Skip to main content

servo_xpath/
lib.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
4
5mod ast;
6mod context;
7mod eval;
8mod functions;
9mod parser;
10mod tokenizer;
11mod value;
12
13use std::fmt;
14use std::hash::Hash;
15
16pub use ast::Expression;
17use ast::QName;
18use context::EvaluationCtx;
19use markup5ever::{LocalName, Namespace, Prefix};
20pub use parser::{Error as ParserError, parse};
21pub use value::{NodeSet, Value};
22
23pub trait Dom {
24    type Context;
25
26    type Node: Node<Context = Self::Context>;
27    type NamespaceResolver: NamespaceResolver<Context = Self::Context>;
28}
29
30/// A handle to a DOM node exposing all functionality needed by xpath.
31pub trait Node: Eq + Clone + fmt::Debug {
32    type Context;
33
34    type ProcessingInstruction: ProcessingInstruction;
35    type Document: Document<Node = Self>;
36    type Attribute: Attribute<Node = Self>;
37    type Element: Element<Node = Self, Context = Self::Context>;
38    type Opaque: Eq + Hash + 'static;
39
40    fn is_comment(&self) -> bool;
41    fn is_text(&self) -> bool;
42    /// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute.
43    fn text_content(&self) -> String;
44    /// <https://html.spec.whatwg.org/multipage/#language>
45    fn language(&self) -> Option<String>;
46    fn parent(&self) -> Option<Self>;
47    fn children(&self) -> impl Iterator<Item = Self>;
48    /// <https://dom.spec.whatwg.org/#concept-tree-order>
49    fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering;
50    /// A non-shadow-including preorder traversal.
51    fn traverse_preorder(&self) -> impl Iterator<Item = Self>;
52    fn inclusive_ancestors(&self) -> impl Iterator<Item = Self>;
53
54    /// Return an iterator over all nodes that come before `self` in [tree order],
55    /// excluding any ancestors and attribute nodes.
56    ///
57    /// [tree order]: https://dom.spec.whatwg.org/#concept-tree-order
58    fn preceding_nodes(&self) -> impl Iterator<Item = Self>;
59
60    /// Return an iterator over all nodes that come after `self` in [tree order],
61    /// excluding any descendants and attribute nodes.
62    ///
63    /// [tree order]: https://dom.spec.whatwg.org/#concept-tree-order
64    fn following_nodes(&self) -> impl Iterator<Item = Self>;
65    fn preceding_siblings(&self) -> impl Iterator<Item = Self>;
66    fn following_siblings(&self) -> impl Iterator<Item = Self>;
67    fn owner_document(&self) -> Self::Document;
68    fn to_opaque(&self) -> Self::Opaque;
69    fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction>;
70    fn as_attribute(&self) -> Option<Self::Attribute>;
71    fn as_element(&self) -> Option<Self::Element>;
72    fn get_root_node(&self) -> Self;
73}
74
75pub trait NamespaceResolver: Clone {
76    type Context;
77
78    fn resolve_namespace_prefix(&self, cx: &mut Self::Context, prefix: &str) -> Option<String>;
79}
80
81pub trait ProcessingInstruction {
82    fn target(&self) -> String;
83}
84
85pub trait Document {
86    type Node: Node<Document = Self>;
87
88    /// Return an iterator over elements with the given ID in tree order.
89    fn get_elements_with_id(&self, id: &str)
90    -> impl Iterator<Item = <Self::Node as Node>::Element>;
91}
92
93pub trait Element {
94    type Context;
95
96    type Node: Node<Element = Self>;
97    type Attribute: Attribute<Node = Self::Node>;
98
99    fn as_node(&self) -> Self::Node;
100    fn prefix(&self) -> Option<Prefix>;
101    fn namespace(&self) -> Namespace;
102    fn local_name(&self) -> LocalName;
103    fn attributes(&self, cx: &mut Self::Context) -> impl Iterator<Item = Self::Attribute>;
104    fn is_html_element_in_html_document(&self) -> bool;
105}
106
107pub trait Attribute {
108    type Node: Node<Attribute = Self>;
109
110    fn as_node(&self) -> Self::Node;
111    fn prefix(&self) -> Option<Prefix>;
112    fn namespace(&self) -> Namespace;
113    fn local_name(&self) -> LocalName;
114}
115
116/// Evaluate an already-parsed XPath expression
117pub fn evaluate_parsed_xpath<D: Dom>(
118    cx: &mut D::Context,
119    expr: &Expression,
120    context_node: D::Node,
121) -> Result<Value<D::Node>, Error> {
122    let context = EvaluationCtx::<D>::new(context_node);
123    match expr.evaluate(cx, &context) {
124        Ok(mut value) => {
125            if let Value::NodeSet(node_set) = &mut value {
126                node_set.deduplicate();
127                node_set.sort();
128            }
129
130            log::debug!("Evaluated XPath: {value:?}");
131            Ok(value)
132        },
133        Err(error) => {
134            log::debug!("Unable to evaluate XPath: {error:?}");
135            Err(error)
136        },
137    }
138}
139
140#[derive(Clone, Debug)]
141pub enum Error {
142    NotANodeset,
143    /// It is not clear where variables used in XPath expression should come from.
144    /// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
145    /// an empty result. We also error out.
146    ///
147    /// See <https://github.com/whatwg/dom/issues/67>
148    CannotUseVariables,
149    InvalidQName {
150        qname: QName,
151    },
152    Internal {
153        msg: String,
154    },
155}
156
157/// <https://www.w3.org/TR/xml/#NT-NameStartChar>
158fn is_valid_start(c: char) -> bool {
159    matches!(c, ':' |
160        'A'..='Z' |
161        '_' |
162        'a'..='z' |
163        '\u{C0}'..='\u{D6}' |
164        '\u{D8}'..='\u{F6}' |
165        '\u{F8}'..='\u{2FF}' |
166        '\u{370}'..='\u{37D}' |
167        '\u{37F}'..='\u{1FFF}' |
168        '\u{200C}'..='\u{200D}' |
169        '\u{2070}'..='\u{218F}' |
170        '\u{2C00}'..='\u{2FEF}' |
171        '\u{3001}'..='\u{D7FF}' |
172        '\u{F900}'..='\u{FDCF}' |
173        '\u{FDF0}'..='\u{FFFD}' |
174        '\u{10000}'..='\u{EFFFF}')
175}
176
177/// <https://www.w3.org/TR/xml/#NT-NameChar>
178fn is_valid_continuation(c: char) -> bool {
179    is_valid_start(c) ||
180        matches!(c,
181            '-' |
182            '.' |
183            '0'..='9' |
184            '\u{B7}' |
185            '\u{300}'..='\u{36F}' |
186            '\u{203F}'..='\u{2040}')
187}
188
189#[cfg(test)]
190/// Provides a dummy DOM to be used for tests.
191mod dummy_implementation {
192    use std::{cmp, iter};
193
194    use markup5ever::{LocalName, ns};
195
196    use super::*;
197
198    // FIXME: Expand this as more features are required
199    #[derive(Clone, Eq, Debug, PartialEq)]
200    pub(crate) struct DummyNode;
201    pub(crate) struct DummyProcessingInstruction;
202    pub(crate) struct DummyDocument;
203    pub(crate) struct DummyAttribute;
204    pub(crate) struct DummyElement;
205
206    impl Node for DummyNode {
207        type Context = ();
208        type ProcessingInstruction = DummyProcessingInstruction;
209        type Document = DummyDocument;
210        type Attribute = DummyAttribute;
211        type Element = DummyElement;
212        type Opaque = usize;
213
214        fn is_comment(&self) -> bool {
215            false
216        }
217        fn is_text(&self) -> bool {
218            false
219        }
220        fn text_content(&self) -> String {
221            String::new()
222        }
223        fn language(&self) -> Option<String> {
224            None
225        }
226        fn parent(&self) -> Option<Self> {
227            None
228        }
229        fn children(&self) -> impl Iterator<Item = Self> {
230            iter::empty()
231        }
232        fn compare_tree_order(&self, _: &Self) -> cmp::Ordering {
233            cmp::Ordering::Greater
234        }
235        fn traverse_preorder(&self) -> impl Iterator<Item = Self> {
236            iter::empty()
237        }
238        fn inclusive_ancestors(&self) -> impl Iterator<Item = Self> {
239            iter::empty()
240        }
241        fn preceding_nodes(&self) -> impl Iterator<Item = Self> {
242            iter::empty()
243        }
244        fn following_nodes(&self) -> impl Iterator<Item = Self> {
245            iter::empty()
246        }
247        fn preceding_siblings(&self) -> impl Iterator<Item = Self> {
248            iter::empty()
249        }
250        fn following_siblings(&self) -> impl Iterator<Item = Self> {
251            iter::empty()
252        }
253        fn owner_document(&self) -> Self::Document {
254            DummyDocument
255        }
256        fn to_opaque(&self) -> Self::Opaque {
257            0
258        }
259        fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction> {
260            None
261        }
262        fn as_attribute(&self) -> Option<Self::Attribute> {
263            None
264        }
265        fn as_element(&self) -> Option<Self::Element> {
266            None
267        }
268        fn get_root_node(&self) -> Self {
269            self.clone()
270        }
271    }
272
273    impl ProcessingInstruction for DummyProcessingInstruction {
274        fn target(&self) -> String {
275            String::new()
276        }
277    }
278
279    impl Document for DummyDocument {
280        type Node = DummyNode;
281
282        fn get_elements_with_id(
283            &self,
284            _: &str,
285        ) -> impl Iterator<Item = <Self::Node as Node>::Element> {
286            iter::empty()
287        }
288    }
289
290    impl Element for DummyElement {
291        type Context = ();
292        type Node = DummyNode;
293        type Attribute = DummyAttribute;
294
295        fn as_node(&self) -> Self::Node {
296            DummyNode
297        }
298        fn prefix(&self) -> Option<Prefix> {
299            None
300        }
301        fn namespace(&self) -> Namespace {
302            ns!()
303        }
304        fn local_name(&self) -> LocalName {
305            LocalName::from("")
306        }
307        fn attributes(&self, _: &mut ()) -> impl Iterator<Item = Self::Attribute> {
308            iter::empty()
309        }
310        fn is_html_element_in_html_document(&self) -> bool {
311            true
312        }
313    }
314
315    impl Attribute for DummyAttribute {
316        type Node = DummyNode;
317
318        fn as_node(&self) -> Self::Node {
319            DummyNode
320        }
321        fn prefix(&self) -> Option<Prefix> {
322            None
323        }
324        fn namespace(&self) -> Namespace {
325            ns!()
326        }
327        fn local_name(&self) -> LocalName {
328            LocalName::from("")
329        }
330    }
331}