skyscraper 0.7.0

XPath for HTML web scraping
Documentation
//! <https://www.w3.org/TR/2017/REC-xpath-31-20170321/#id-primary-expressions>

use std::fmt::Display;

use nom::{branch::alt, character::complete::char, error::context};

use crate::{
    xpath::{
        grammar::{
            data_model::{Function, OwnedXpathValue, XpathItem},
            expressions::{
                maps_and_arrays::{
                    arrays::array_constructor, lookup_operator::unary_lookup::unary_lookup,
                    maps::map_constructor,
                },
                primary_expressions::{
                    inline_function_expressions::inline_function_expr, literals::literal,
                    named_function_references::named_function_ref,
                    parenthesized_expressions::parenthesized_expr,
                    static_function_calls::{func_data, function_call},
                    variable_references::var_ref,
                },
            },
            recipes::{max, Res},
        },
        xpath_item_set::XpathItemSet,
        ExpressionApplyError, XpathExpressionContext,
    },
    xpath_item_set,
};

use self::{
    inline_function_expressions::InlineFunctionExpr, literals::Literal,
    named_function_references::NamedFunctionRef, parenthesized_expressions::ParenthesizedExpr,
    static_function_calls::FunctionCall, variable_references::VarRef,
};

use super::maps_and_arrays::{
    arrays::ArrayConstructor, lookup_operator::unary_lookup::UnaryLookup, maps::MapConstructor,
};

pub mod enclosed_expressions;
mod inline_function_expressions;
mod literals;
mod named_function_references;
pub mod parenthesized_expressions;
pub mod static_function_calls;
pub mod variable_references;

pub fn primary_expr(input: &str) -> Res<&str, PrimaryExpr> {
    // https://www.w3.org/TR/2017/REC-xpath-31-20170321/#prod-xpath31-PrimaryExpr

    fn literal_map(input: &str) -> Res<&str, PrimaryExpr> {
        literal(input).map(|(next_input, res)| (next_input, PrimaryExpr::Literal(res)))
    }

    fn var_ref_map(input: &str) -> Res<&str, PrimaryExpr> {
        var_ref(input).map(|(next_input, res)| (next_input, PrimaryExpr::VarRef(res)))
    }

    fn parenthesized_expr_map(input: &str) -> Res<&str, PrimaryExpr> {
        parenthesized_expr(input)
            .map(|(next_input, res)| (next_input, PrimaryExpr::ParenthesizedExpr(res)))
    }

    fn context_item_expr(input: &str) -> Res<&str, PrimaryExpr> {
        // https://www.w3.org/TR/2017/REC-xpath-31-20170321/#prod-xpath31-ContextItemExpr
        char('.')(input).map(|(next_input, _res)| (next_input, PrimaryExpr::ContextItemExpr))
    }

    fn function_call_map(input: &str) -> Res<&str, PrimaryExpr> {
        function_call(input).map(|(next_input, res)| (next_input, PrimaryExpr::FunctionCall(res)))
    }

    fn function_item_expr_map(input: &str) -> Res<&str, PrimaryExpr> {
        function_item_expr(input)
            .map(|(next_input, res)| (next_input, PrimaryExpr::FunctionItemExpr(res)))
    }

    fn map_constructor_map(input: &str) -> Res<&str, PrimaryExpr> {
        map_constructor(input)
            .map(|(next_input, res)| (next_input, PrimaryExpr::MapConstructor(res)))
    }

    fn array_constructor_map(input: &str) -> Res<&str, PrimaryExpr> {
        array_constructor(input)
            .map(|(next_input, res)| (next_input, PrimaryExpr::ArrayConstructor(res)))
    }

    fn unary_lookup_map(input: &str) -> Res<&str, PrimaryExpr> {
        unary_lookup(input).map(|(next_input, res)| (next_input, PrimaryExpr::UnaryLookup(res)))
    }

    context(
        "primary_expr",
        max((
            literal_map,
            var_ref_map,
            parenthesized_expr_map,
            context_item_expr,
            function_call_map,
            function_item_expr_map,
            map_constructor_map,
            array_constructor_map,
            unary_lookup_map,
        )),
    )(input)
}

#[derive(PartialEq, Debug, Clone)]
pub enum PrimaryExpr {
    Literal(Literal),
    VarRef(VarRef),
    ParenthesizedExpr(ParenthesizedExpr),
    ContextItemExpr,
    FunctionCall(FunctionCall),
    FunctionItemExpr(FunctionItemExpr),
    MapConstructor(MapConstructor),
    ArrayConstructor(ArrayConstructor),
    UnaryLookup(UnaryLookup),
}

impl Display for PrimaryExpr {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            PrimaryExpr::Literal(x) => write!(f, "{}", x),
            PrimaryExpr::VarRef(x) => write!(f, "{}", x),
            PrimaryExpr::ParenthesizedExpr(x) => write!(f, "{}", x),
            PrimaryExpr::ContextItemExpr => write!(f, "."),
            PrimaryExpr::FunctionCall(x) => write!(f, "{}", x),
            PrimaryExpr::FunctionItemExpr(x) => write!(f, "{}", x),
            PrimaryExpr::MapConstructor(x) => write!(f, "{}", x),
            PrimaryExpr::ArrayConstructor(x) => write!(f, "{}", x),
            PrimaryExpr::UnaryLookup(x) => write!(f, "{}", x),
        }
    }
}

impl PrimaryExpr {
    pub(crate) fn eval<'tree>(
        &self,
        context: &XpathExpressionContext<'tree>,
    ) -> Result<XpathItemSet<'tree>, ExpressionApplyError> {
        match self {
            PrimaryExpr::Literal(literal) => {
                Ok(xpath_item_set![XpathItem::AnyAtomicType(literal.value())])
            }
            PrimaryExpr::VarRef(var_ref) => {
                let name = var_ref.name().to_string();
                match context.get_variable(&name) {
                    Some(value) => Ok(value.clone()),
                    None => Err(ExpressionApplyError::new(format!(
                        "Undefined variable: ${}",
                        name
                    ))),
                }
            }
            PrimaryExpr::ParenthesizedExpr(expr) => expr.eval(context),
            PrimaryExpr::ContextItemExpr => {
                // Context item expression is '.', which means select the current context item.
                Ok(xpath_item_set![context.item.clone()])
            }
            PrimaryExpr::FunctionCall(expr) => expr.eval(context),
            PrimaryExpr::FunctionItemExpr(expr) => match expr {
                FunctionItemExpr::NamedFunctionRef(named_ref) => {
                    Ok(xpath_item_set![XpathItem::Function(Function::Named {
                        name: named_ref.name.to_string(),
                        arity: named_ref.number,
                    })])
                }
                FunctionItemExpr::InlineFunctionExpr(inline) => {
                    let params = if let Some(param_list) = &inline.param_list {
                        param_list
                            .params()
                            .iter()
                            .map(|p| p.name.to_string())
                            .collect()
                    } else {
                        Vec::new()
                    };
                    let body_expr = inline.body.expr().cloned();
                    let body_source = body_expr
                        .as_ref()
                        .map(|e| e.to_string())
                        .unwrap_or_default();
                    Ok(xpath_item_set![XpathItem::Function(Function::Inline {
                        params,
                        body_source,
                        body: body_expr.map(Box::new),
                    })])
                }
            },
            PrimaryExpr::MapConstructor(mc) => {
                let mut entries = indexmap::IndexMap::new();
                for entry in &mc.entries {
                    // Evaluate the key — must produce a single atomic value.
                    let key_set = entry.key.eval(context)?;
                    let key_atoms = func_data(&key_set, context.item_tree)?;
                    if key_atoms.len() != 1 {
                        return Err(ExpressionApplyError::new(format!(
                            "Map key must be a single atomic value, got {} values",
                            key_atoms.len()
                        )));
                    }
                    let key = key_atoms.into_iter().next().unwrap();
                    // XPath 3.1 requires err:XQDY0137 for duplicate keys.
                    if entries.contains_key(&key) {
                        return Err(ExpressionApplyError::new(format!(
                            "Duplicate key in map constructor: {}",
                            key
                        )));
                    }
                    // Evaluate the value — preserve item types without atomization.
                    let value_set = entry.value.eval(context)?;
                    let value_items: Vec<OwnedXpathValue> = value_set
                        .iter()
                        .map(|item| OwnedXpathValue::from_xpath_item(item, context.item_tree))
                        .collect();
                    entries.insert(key, value_items);
                }
                Ok(xpath_item_set![XpathItem::Function(Function::Map {
                    entries
                })])
            }
            PrimaryExpr::ArrayConstructor(ac) => {
                let members = match ac {
                    ArrayConstructor::SquareArrayConstructor(sq) => {
                        // Each ExprSingle produces one member (a sequence).
                        let mut members = Vec::new();
                        for entry in &sq.entries {
                            let value_set = entry.eval(context)?;
                            let items: Vec<OwnedXpathValue> = value_set
                                .iter()
                                .map(|item| OwnedXpathValue::from_xpath_item(item, context.item_tree))
                                .collect();
                            members.push(items);
                        }
                        members
                    }
                    ArrayConstructor::CurlyArrayConstructor(cu) => {
                        // The enclosed expression produces a sequence; each item
                        // becomes one member (a singleton sequence).
                        if let Some(expr) = cu.enclosed_expr().expr() {
                            let value_set = expr.eval(context)?;
                            value_set
                                .iter()
                                .map(|item| vec![OwnedXpathValue::from_xpath_item(item, context.item_tree)])
                                .collect()
                        } else {
                            Vec::new()
                        }
                    }
                };
                Ok(xpath_item_set![XpathItem::Function(Function::Array {
                    members
                })])
            }
            PrimaryExpr::UnaryLookup(ul) => {
                // Unary lookup ?key is equivalent to .?key — it applies
                // the key specifier to the context item.
                let func = match &context.item {
                    XpathItem::Function(f) => f,
                    other => {
                        return Err(ExpressionApplyError::new(format!(
                            "Unary lookup requires context item to be a map or array, got {:?}",
                            other
                        )));
                    }
                };
                super::maps_and_arrays::lookup_operator::apply_key_specifier(
                    func, &ul.0, context,
                )
            }
        }
    }
}

fn function_item_expr(input: &str) -> Res<&str, FunctionItemExpr> {
    // https://www.w3.org/TR/2017/REC-xpath-31-20170321/#prod-xpath31-FunctionItemExpr

    fn named_function_ref_map(input: &str) -> Res<&str, FunctionItemExpr> {
        named_function_ref(input)
            .map(|(next_input, res)| (next_input, FunctionItemExpr::NamedFunctionRef(res)))
    }

    fn inline_function_expr_map(input: &str) -> Res<&str, FunctionItemExpr> {
        inline_function_expr(input)
            .map(|(next_input, res)| (next_input, FunctionItemExpr::InlineFunctionExpr(res)))
    }

    context(
        "function_item_expr",
        alt((named_function_ref_map, inline_function_expr_map)),
    )(input)
}

#[derive(PartialEq, Debug, Clone)]
pub enum FunctionItemExpr {
    NamedFunctionRef(NamedFunctionRef),
    InlineFunctionExpr(InlineFunctionExpr),
}

impl Display for FunctionItemExpr {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            FunctionItemExpr::NamedFunctionRef(x) => write!(f, "{}", x),
            FunctionItemExpr::InlineFunctionExpr(x) => write!(f, "{}", x),
        }
    }
}