Skip to main content

xrust/parser/xpath/
mod.rs

1/*! # Parse XPath expressions
2
3An XPath expression parser using the xrust parser combinator that produces a xrust transformation.
4
5```rust
6use xrust::parser::xpath::parse;
7# use xrust::item::Node;
8# fn do_parse<N: Node>() {
9let t = parse::<N>("/child::A/child::B/child::C", None, None).expect("unable to parse XPath expression");
10# }
11```
12
13"t" now contains a [Transform] that will return "C" elements that have a "B" parent and an "A" grandparent in the source document.
14
15To evaluate the transformation we need a Context with a source document as its current item.
16
17```rust
18# use std::rc::Rc;
19# use xrust::xdmerror::{Error, ErrorKind};
20use xrust::item::{Sequence, SequenceTrait, Item, Node, NodeType};
21use xrust::trees::smite::RNode;
22use xrust::parser::ParseError;
23use xrust::parser::xml::parse as xmlparse;
24use xrust::parser::xpath::parse;
25use xrust::transform::context::{Context, ContextBuilder, StaticContext, StaticContextBuilder};
26
27let t = parse("/child::A/child::B/child::C", None, None)
28    .expect("unable to parse XPath expression");
29
30let source = RNode::new_document();
31xmlparse(source.clone(), "<A><B><C/></B><B><C/></B></A>", Some(|_: &_| Err(ParseError::MissingNameSpace)))
32    .expect("unable to parse XML");
33let mut static_context = StaticContextBuilder::new()
34    .message(|_| Ok(()))
35    .fetcher(|_| Ok(String::new()))
36    .parser(|_| Err(Error::new(ErrorKind::NotImplemented, "not implemented")))
37    .build();
38let context = ContextBuilder::new()
39    .context(vec![Item::Node(source)])
40    .build();
41let sequence = context.dispatch(&mut static_context, &t)
42    .expect("evaluation failed");
43assert_eq!(sequence.len(), 2);
44assert_eq!(sequence.to_xml(), "<C/><C/>")
45```
46*/
47
48mod compare;
49mod context;
50mod expressions;
51mod flwr;
52mod functions;
53pub(crate) mod literals;
54mod logic;
55mod nodes;
56pub(crate) mod nodetests;
57mod numbers;
58pub(crate) mod predicates;
59mod strings;
60pub(crate) mod support;
61mod types;
62pub(crate) mod variables;
63
64use crate::parser::combinators::alt::alt4;
65//use crate::parser::combinators::debug::inspect;
66use crate::parser::combinators::list::separated_list1;
67use crate::parser::combinators::map::map;
68use crate::parser::combinators::tag::tag;
69use crate::parser::combinators::tuple::tuple3;
70use crate::parser::combinators::whitespace::xpwhitespace;
71use crate::parser::xpath::flwr::{for_expr, if_expr, let_expr};
72use crate::parser::xpath::logic::or_expr;
73use crate::parser::xpath::support::noop;
74use crate::parser::{
75    ParseError, ParseInput, ParserState, ParserStateBuilder, StaticState, StaticStateBuilder,
76};
77
78use crate::item::Node;
79use crate::transform::Transform;
80use crate::xdmerror::{Error, ErrorKind};
81use std::cell::RefCell;
82use std::rc::Rc;
83use qualname::{NamespaceMap, NamespacePrefix, NamespaceUri};
84
85/// Parse an XPath expression to produce a [Transform]. The optional [Node] or [NamespaceMap] may be used to resolve XML Namespaces (The [Node] will be searched first).
86pub fn parse<N: Node>(
87    input: &str,
88    n: Option<N>,
89    nmap: Option<NamespaceMap>,
90) -> Result<Transform<N>, Error> {
91    // Shortcut for empty
92    if input.is_empty() {
93        return Ok(Transform::Empty);
94    }
95
96    let state = n.clone().map_or_else(
97        || ParserState::new(),
98        |m| ParserStateBuilder::new().doc(m.clone()).current(m).build(),
99    );
100    let result = if let Some(m) = n.clone() {
101        let mut static_state = StaticStateBuilder::new()
102            .namespace(move |pre| {
103                m.namespace_iter()
104                    .find(|nsd| nsd.as_namespace_prefix().unwrap().is_some_and(|p| p == pre))
105                    .map_or_else(
106                        || Err(ParseError::MissingNameSpace),
107                        |nsd| Ok(nsd.as_namespace_uri().unwrap().clone()),
108                    )
109            })
110            .build();
111        xpath_expr((input, state), &mut static_state)
112    } else if let Some(nm) = nmap {
113        let mut static_state = StaticStateBuilder::new()
114            .namespace(move |pre: &NamespacePrefix| {
115                nm.namespace_uri(&Some(pre.clone()))
116                    .ok_or(ParseError::MissingNameSpace)
117            })
118            .build();
119        xpath_expr((input, state), &mut static_state)
120    } else {
121        let mut static_state = StaticStateBuilder::new()
122            .namespace(|_| Err(ParseError::MissingNameSpace))
123            .build();
124        xpath_expr((input, state), &mut static_state)
125    };
126    match result {
127        Ok((_, x)) => Ok(x),
128        Err(err) => match err {
129            ParseError::Combinator(f) => Err(Error::new(
130                ErrorKind::ParseError,
131                format!(
132                    "Unrecoverable parser error ({}) while parsing XPath expression \"{}\"",
133                    f, input
134                ),
135            )),
136            ParseError::NotWellFormed(e) => Err(Error::new(
137                ErrorKind::ParseError,
138                format!("Unrecognised extra characters: \"{}\"", e),
139            )),
140            ParseError::MissingNameSpace => Err(Error::new(
141                ErrorKind::ParseError,
142                "Missing namespace declaration.".to_string(),
143            )),
144            ParseError::Notimplemented => Err(Error::new(
145                ErrorKind::ParseError,
146                "Unimplemented feature.".to_string(),
147            )),
148            _ => Err(Error::new(ErrorKind::Unknown, "Unknown error".to_string())),
149        },
150    }
151}
152
153fn xpath_expr<'a, N: Node + 'a, L>(
154    input: ParseInput<'a, N>,
155    ss: &mut StaticState<L>,
156) -> Result<(ParseInput<'a, N>, Transform<N>), ParseError>
157where
158    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError> + 'a,
159{
160    match expr::<N, L>()(input, ss) {
161        Err(err) => Err(err),
162        Ok(((input1, state1), e)) => {
163            //Check nothing remaining in iterator, nothing after the end of the root node.
164            if input1.is_empty() {
165                Ok(((input1, state1), e))
166            } else {
167                Err(ParseError::NotWellFormed(format!(
168                    "Unrecognised extra characters: \"{}\"",
169                    input1
170                )))
171            }
172        }
173    }
174}
175// Implementation note: cannot use opaque type because XPath expressions are recursive, and Rust *really* doesn't like recursive opaque types. Dynamic trait objects aren't ideal, but compiling XPath expressions is a one-off operation so that shouldn't cause a major performance issue.
176// Implementation note 2: since XPath is recursive, must lazily evaluate arguments to avoid stack overflow.
177pub fn expr<'a, N: Node + 'a, L>() -> Box<
178    dyn Fn(
179            ParseInput<'a, N>,
180            &mut StaticState<L>,
181        ) -> Result<(ParseInput<'a, N>, Transform<N>), ParseError>
182        + 'a,
183>
184where
185    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError> + 'a,
186{
187    Box::new(map(
188        separated_list1(
189            map(tuple3(xpwhitespace(), tag(","), xpwhitespace()), |_| ()),
190            expr_single::<N, L>(),
191        ),
192        |mut v| {
193            if v.len() == 1 {
194                v.pop().unwrap()
195            } else {
196                Transform::SequenceItems(v)
197            }
198        },
199    ))
200}
201
202type ExprParser<'a, N, L> = Rc<
203    dyn Fn(
204            ParseInput<'a, N>,
205            &mut StaticState<L>,
206        ) -> Result<(ParseInput<'a, N>, Transform<N>), ParseError>
207        + 'a,
208>;
209
210pub(crate) fn expr_wrapper<'a, N: Node + 'a, L>(
211    b: bool,
212) -> Box<
213    dyn Fn(
214            ParseInput<'a, N>,
215            &mut StaticState<L>,
216        ) -> Result<(ParseInput<'a, N>, Transform<N>), ParseError>
217        + 'a,
218>
219where
220    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError> + 'a,
221{
222    // Building the (recursive) expr grammar is expensive, and it must be built
223    // lazily to avoid infinite recursion / stack overflow. Build it ONCE on
224    // first use and reuse it for every predicate/parenthesised sub-expression
225    // and every backtrack, instead of rebuilding the whole grammar each call.
226    let cache: RefCell<Option<ExprParser<'a, N, L>>> = RefCell::new(None);
227    Box::new(move |input, ss| {
228        if b {
229            let parser = {
230                let mut slot = cache.borrow_mut();
231                if slot.is_none() {
232                    *slot = Some(Rc::from(expr::<N, L>()));
233                }
234                slot.as_ref().unwrap().clone()
235            };
236            parser(input, ss)
237        } else {
238            noop::<N, L>()(input, ss)
239        }
240    })
241}
242
243// ExprSingle ::= ForExpr | LetExpr | QuantifiedExpr | IfExpr | OrExpr
244fn expr_single<'a, N: Node + 'a, L>() -> Box<
245    dyn Fn(
246            ParseInput<'a, N>,
247            &mut StaticState<L>,
248        ) -> Result<(ParseInput<'a, N>, Transform<N>), ParseError>
249        + 'a,
250>
251where
252    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError> + 'a,
253{
254    Box::new(alt4(let_expr(), for_expr(), if_expr(), or_expr()))
255}
256
257pub(crate) fn expr_single_wrapper<'a, N: Node + 'a, L>(
258    b: bool,
259) -> Box<
260    dyn Fn(
261            ParseInput<'a, N>,
262            &mut StaticState<L>,
263        ) -> Result<(ParseInput<'a, N>, Transform<N>), ParseError>
264        + 'a,
265>
266where
267    L: FnMut(&NamespacePrefix) -> Result<NamespaceUri, ParseError> + 'a,
268{
269    // Same one-time-build memoization as expr_wrapper: function arguments and
270    // for/let/if clauses go through here, and rebuilding the grammar per call
271    // dominated parse time for function-heavy expressions.
272    let cache: RefCell<Option<ExprParser<'a, N, L>>> = RefCell::new(None);
273    Box::new(move |input, ss| {
274        if b {
275            let parser = {
276                let mut slot = cache.borrow_mut();
277                if slot.is_none() {
278                    *slot = Some(Rc::from(expr_single::<N, L>()));
279                }
280                slot.as_ref().unwrap().clone()
281            };
282            parser(input, ss)
283        } else {
284            noop::<N, L>()(input, ss)
285        }
286    })
287}