servo-xpath 0.3.0

A component of the servo web-engine.
Documentation
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */

mod ast;
mod context;
mod eval;
mod functions;
mod parser;
mod tokenizer;
mod value;

use std::fmt;
use std::hash::Hash;

pub use ast::Expression;
use ast::QName;
use context::EvaluationCtx;
use markup5ever::{LocalName, Namespace, Prefix};
pub use parser::{Error as ParserError, parse};
pub use value::{NodeSet, Value};

pub trait Dom {
    type Context;

    type Node: Node<Context = Self::Context>;
    type NamespaceResolver: NamespaceResolver<Context = Self::Context>;
}

/// A handle to a DOM node exposing all functionality needed by xpath.
pub trait Node: Eq + Clone + fmt::Debug {
    type Context;

    type ProcessingInstruction: ProcessingInstruction;
    type Document: Document<Node = Self>;
    type Attribute: Attribute<Node = Self>;
    type Element: Element<Node = Self, Context = Self::Context>;
    type Opaque: Eq + Hash + 'static;

    fn is_comment(&self) -> bool;
    fn is_text(&self) -> bool;
    /// Equivalent to [`textContent`](https://dom.spec.whatwg.org/#dom-node-textcontent) attribute.
    fn text_content(&self) -> String;
    /// <https://html.spec.whatwg.org/multipage/#language>
    fn language(&self) -> Option<String>;
    fn parent(&self) -> Option<Self>;
    fn children(&self) -> impl Iterator<Item = Self>;
    /// <https://dom.spec.whatwg.org/#concept-tree-order>
    fn compare_tree_order(&self, other: &Self) -> std::cmp::Ordering;
    /// A non-shadow-including preorder traversal.
    fn traverse_preorder(&self) -> impl Iterator<Item = Self>;
    fn inclusive_ancestors(&self) -> impl Iterator<Item = Self>;

    /// Return an iterator over all nodes that come before `self` in [tree order],
    /// excluding any ancestors and attribute nodes.
    ///
    /// [tree order]: https://dom.spec.whatwg.org/#concept-tree-order
    fn preceding_nodes(&self) -> impl Iterator<Item = Self>;

    /// Return an iterator over all nodes that come after `self` in [tree order],
    /// excluding any descendants and attribute nodes.
    ///
    /// [tree order]: https://dom.spec.whatwg.org/#concept-tree-order
    fn following_nodes(&self) -> impl Iterator<Item = Self>;
    fn preceding_siblings(&self) -> impl Iterator<Item = Self>;
    fn following_siblings(&self) -> impl Iterator<Item = Self>;
    fn owner_document(&self) -> Self::Document;
    fn to_opaque(&self) -> Self::Opaque;
    fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction>;
    fn as_attribute(&self) -> Option<Self::Attribute>;
    fn as_element(&self) -> Option<Self::Element>;
    fn get_root_node(&self) -> Self;
}

pub trait NamespaceResolver: Clone {
    type Context;

    fn resolve_namespace_prefix(&self, cx: &mut Self::Context, prefix: &str) -> Option<String>;
}

pub trait ProcessingInstruction {
    fn target(&self) -> String;
}

pub trait Document {
    type Node: Node<Document = Self>;

    /// Return an iterator over elements with the given ID in tree order.
    fn get_elements_with_id(&self, id: &str)
    -> impl Iterator<Item = <Self::Node as Node>::Element>;
}

pub trait Element {
    type Context;

    type Node: Node<Element = Self>;
    type Attribute: Attribute<Node = Self::Node>;

    fn as_node(&self) -> Self::Node;
    fn prefix(&self) -> Option<Prefix>;
    fn namespace(&self) -> Namespace;
    fn local_name(&self) -> LocalName;
    fn attributes(&self, cx: &mut Self::Context) -> impl Iterator<Item = Self::Attribute>;
    fn is_html_element_in_html_document(&self) -> bool;
}

pub trait Attribute {
    type Node: Node<Attribute = Self>;

    fn as_node(&self) -> Self::Node;
    fn prefix(&self) -> Option<Prefix>;
    fn namespace(&self) -> Namespace;
    fn local_name(&self) -> LocalName;
}

/// Evaluate an already-parsed XPath expression
pub fn evaluate_parsed_xpath<D: Dom>(
    cx: &mut D::Context,
    expr: &Expression,
    context_node: D::Node,
) -> Result<Value<D::Node>, Error> {
    let context = EvaluationCtx::<D>::new(context_node);
    match expr.evaluate(cx, &context) {
        Ok(mut value) => {
            if let Value::NodeSet(node_set) = &mut value {
                node_set.deduplicate();
                node_set.sort();
            }

            log::debug!("Evaluated XPath: {value:?}");
            Ok(value)
        },
        Err(error) => {
            log::debug!("Unable to evaluate XPath: {error:?}");
            Err(error)
        },
    }
}

#[derive(Clone, Debug)]
pub enum Error {
    NotANodeset,
    /// It is not clear where variables used in XPath expression should come from.
    /// Firefox throws "NS_ERROR_ILLEGAL_VALUE" when using them, chrome seems to return
    /// an empty result. We also error out.
    ///
    /// See <https://github.com/whatwg/dom/issues/67>
    CannotUseVariables,
    InvalidQName {
        qname: QName,
    },
    Internal {
        msg: String,
    },
}

/// <https://www.w3.org/TR/xml/#NT-NameStartChar>
fn is_valid_start(c: char) -> bool {
    matches!(c, ':' |
        'A'..='Z' |
        '_' |
        'a'..='z' |
        '\u{C0}'..='\u{D6}' |
        '\u{D8}'..='\u{F6}' |
        '\u{F8}'..='\u{2FF}' |
        '\u{370}'..='\u{37D}' |
        '\u{37F}'..='\u{1FFF}' |
        '\u{200C}'..='\u{200D}' |
        '\u{2070}'..='\u{218F}' |
        '\u{2C00}'..='\u{2FEF}' |
        '\u{3001}'..='\u{D7FF}' |
        '\u{F900}'..='\u{FDCF}' |
        '\u{FDF0}'..='\u{FFFD}' |
        '\u{10000}'..='\u{EFFFF}')
}

/// <https://www.w3.org/TR/xml/#NT-NameChar>
fn is_valid_continuation(c: char) -> bool {
    is_valid_start(c) ||
        matches!(c,
            '-' |
            '.' |
            '0'..='9' |
            '\u{B7}' |
            '\u{300}'..='\u{36F}' |
            '\u{203F}'..='\u{2040}')
}

#[cfg(test)]
/// Provides a dummy DOM to be used for tests.
mod dummy_implementation {
    use std::{cmp, iter};

    use markup5ever::{LocalName, ns};

    use super::*;

    // FIXME: Expand this as more features are required
    #[derive(Clone, Eq, Debug, PartialEq)]
    pub(crate) struct DummyNode;
    pub(crate) struct DummyProcessingInstruction;
    pub(crate) struct DummyDocument;
    pub(crate) struct DummyAttribute;
    pub(crate) struct DummyElement;

    impl Node for DummyNode {
        type Context = ();
        type ProcessingInstruction = DummyProcessingInstruction;
        type Document = DummyDocument;
        type Attribute = DummyAttribute;
        type Element = DummyElement;
        type Opaque = usize;

        fn is_comment(&self) -> bool {
            false
        }
        fn is_text(&self) -> bool {
            false
        }
        fn text_content(&self) -> String {
            String::new()
        }
        fn language(&self) -> Option<String> {
            None
        }
        fn parent(&self) -> Option<Self> {
            None
        }
        fn children(&self) -> impl Iterator<Item = Self> {
            iter::empty()
        }
        fn compare_tree_order(&self, _: &Self) -> cmp::Ordering {
            cmp::Ordering::Greater
        }
        fn traverse_preorder(&self) -> impl Iterator<Item = Self> {
            iter::empty()
        }
        fn inclusive_ancestors(&self) -> impl Iterator<Item = Self> {
            iter::empty()
        }
        fn preceding_nodes(&self) -> impl Iterator<Item = Self> {
            iter::empty()
        }
        fn following_nodes(&self) -> impl Iterator<Item = Self> {
            iter::empty()
        }
        fn preceding_siblings(&self) -> impl Iterator<Item = Self> {
            iter::empty()
        }
        fn following_siblings(&self) -> impl Iterator<Item = Self> {
            iter::empty()
        }
        fn owner_document(&self) -> Self::Document {
            DummyDocument
        }
        fn to_opaque(&self) -> Self::Opaque {
            0
        }
        fn as_processing_instruction(&self) -> Option<Self::ProcessingInstruction> {
            None
        }
        fn as_attribute(&self) -> Option<Self::Attribute> {
            None
        }
        fn as_element(&self) -> Option<Self::Element> {
            None
        }
        fn get_root_node(&self) -> Self {
            self.clone()
        }
    }

    impl ProcessingInstruction for DummyProcessingInstruction {
        fn target(&self) -> String {
            String::new()
        }
    }

    impl Document for DummyDocument {
        type Node = DummyNode;

        fn get_elements_with_id(
            &self,
            _: &str,
        ) -> impl Iterator<Item = <Self::Node as Node>::Element> {
            iter::empty()
        }
    }

    impl Element for DummyElement {
        type Context = ();
        type Node = DummyNode;
        type Attribute = DummyAttribute;

        fn as_node(&self) -> Self::Node {
            DummyNode
        }
        fn prefix(&self) -> Option<Prefix> {
            None
        }
        fn namespace(&self) -> Namespace {
            ns!()
        }
        fn local_name(&self) -> LocalName {
            LocalName::from("")
        }
        fn attributes(&self, _: &mut ()) -> impl Iterator<Item = Self::Attribute> {
            iter::empty()
        }
        fn is_html_element_in_html_document(&self) -> bool {
            true
        }
    }

    impl Attribute for DummyAttribute {
        type Node = DummyNode;

        fn as_node(&self) -> Self::Node {
            DummyNode
        }
        fn prefix(&self) -> Option<Prefix> {
            None
        }
        fn namespace(&self) -> Namespace {
            ns!()
        }
        fn local_name(&self) -> LocalName {
            LocalName::from("")
        }
    }
}