preserves-path 5.996.1

Implementation of preserves-path, a query language for Preserves documents.
Documentation
use crate::context::Env;
use crate::schemas::path;
use crate::step::Node;
use crate::CompilationError;

use preserves::value::BinarySource;
use preserves::value::BytesBinarySource;
use preserves::value::IOValue;
use preserves::value::NestedValue;
use preserves::value::Reader;

use std::iter::Iterator;

#[derive(Debug)]
enum Binop {
    Union,
    Intersection,
}

fn split_values_by_symbol<'a>(tokens: &'a [IOValue], separator: &str) -> Vec<&'a [IOValue]> {
    tokens
        .split(|t| matches!(t.value().as_symbol(), Some(s) if s == separator))
        .collect()
}

fn split_binop(tokens: &[IOValue]) -> Result<(Vec<&[IOValue]>, Option<Binop>), CompilationError> {
    let union_pieces = split_values_by_symbol(&tokens, "+");
    let intersection_pieces = split_values_by_symbol(&tokens, "&");
    match (union_pieces.len(), intersection_pieces.len()) {
        (1, 1) => Ok((union_pieces, None)),
        (_, 1) => Ok((union_pieces, Some(Binop::Union))),
        (1, _) => Ok((intersection_pieces, Some(Binop::Intersection))),
        _ => Err(CompilationError::MixedOperators),
    }
}

pub fn parse_selector(env: &Env, tokens: &[IOValue]) -> Result<path::Selector, CompilationError> {
    let mut steps = Vec::new();
    let mut tokens = tokens;
    while let Some((s, remaining)) = parse_step(env, tokens)? {
        steps.push(s);
        tokens = remaining;
    }
    Ok(path::Selector(steps))
}

pub fn parse_predicate(env: &Env, tokens: &[IOValue]) -> Result<path::Predicate, CompilationError> {
    let (pieces, binop) = split_binop(tokens)?;
    match binop {
        None => parse_non_binop(env, &pieces[0]),
        Some(o) => {
            let preds = pieces
                .into_iter()
                .map(|ts| parse_non_binop(env, &ts))
                .collect::<Result<_, _>>()?;
            Ok(match o {
                Binop::Union => path::Predicate::Or { preds },
                Binop::Intersection => path::Predicate::And { preds },
            })
        }
    }
}

fn parse_non_binop(env: &Env, tokens: &[IOValue]) -> Result<path::Predicate, CompilationError> {
    if !tokens.is_empty() {
        let t = tokens[0].value();

        if let Some("!") = t.as_symbol().map(|s| s.as_str()) {
            return Ok(path::Predicate::Not {
                pred: Box::new(parse_non_binop(env, &tokens[1..])?),
            });
        }
    }

    Ok(path::Predicate::Selector(Box::new(parse_selector(
        env, tokens,
    )?)))
}

fn parse_schema_definition_name(
    env: &Env,
    token: &IOValue,
) -> Result<(Vec<String>, String), CompilationError> {
    let defpath = token
        .value()
        .to_symbol()
        .map_err(|_| CompilationError::InvalidStep)?;
    let mut module: Vec<String> = defpath.split('.').map(|s| s.to_string()).collect();
    let name = module
        .pop()
        .expect("at least one element in the Schema name");
    match env.lookup_definition(&module, &name) {
        Some(_) => Ok((module, name)),
        None => Err(CompilationError::UndefinedSchemaDefinitionName(format!(
            "{:?}",
            token
        ))),
    }
}

fn parse_step<'a>(
    env: &Env,
    tokens: &'a [IOValue],
) -> Result<Option<(path::Step, &'a [IOValue])>, CompilationError> {
    if tokens.is_empty() {
        return Ok(None);
    }

    let remainder = &tokens[1..];

    if tokens[0].value().is_sequence() {
        return Ok(Some((
            path::Step::Filter(Box::new(path::Filter::Test {
                pred: Box::new(parse_predicate(
                    env,
                    tokens[0].value().as_sequence().unwrap(),
                )?),
            })),
            remainder,
        )));
    }

    match tokens[0].value().as_record(None) {
        None => (),
        Some(r) => match r.label().value().as_symbol() {
            None => return Err(CompilationError::InvalidStep),
            Some(t) => match t.as_str() {
                "count" => {
                    return Ok(Some((
                        path::Step::Function(Box::new(path::Function {
                            selector: parse_selector(env, r.fields())?,
                        })),
                        remainder,
                    )))
                }
                _ => return Err(CompilationError::InvalidStep),
            },
        },
    }

    match tokens[0].value().as_symbol() {
        None => return Err(CompilationError::InvalidStep),
        Some(t) => match t.as_str() {
            "/" => Ok(Some((
                path::Step::Axis(Box::new(path::Axis::Values)),
                remainder,
            ))),
            "//" => Ok(Some((
                path::Step::Axis(Box::new(path::Axis::Descendants)),
                remainder,
            ))),
            "." => {
                let (key, remainder) = pop_step_arg(remainder)?;
                Ok(Some((
                    path::Step::Axis(Box::new(path::Axis::At { key })),
                    remainder,
                )))
            }
            ".^" => Ok(Some((
                path::Step::Axis(Box::new(path::Axis::Label)),
                remainder,
            ))),
            ".keys" => Ok(Some((
                path::Step::Axis(Box::new(path::Axis::Keys)),
                remainder,
            ))),
            ".length" => Ok(Some((
                path::Step::Axis(Box::new(path::Axis::Length)),
                remainder,
            ))),
            ".annotations" => Ok(Some((
                path::Step::Axis(Box::new(path::Axis::Annotations)),
                remainder,
            ))),
            ".embedded" => Ok(Some((
                path::Step::Axis(Box::new(path::Axis::Embedded)),
                remainder,
            ))),
            "%" => {
                let (defpath, remainder) = pop_step_arg(remainder)?;
                let (module, name) = parse_schema_definition_name(env, &defpath)?;
                Ok(Some((
                    path::Step::Axis(Box::new(path::Axis::Parse { module, name })),
                    remainder,
                )))
            }
            "%-" => {
                let (defpath, remainder) = pop_step_arg(remainder)?;
                let (module, name) = parse_schema_definition_name(env, &defpath)?;
                Ok(Some((
                    path::Step::Axis(Box::new(path::Axis::Unparse { module, name })),
                    remainder,
                )))
            }
            "*" => Ok(Some((
                path::Step::Filter(Box::new(path::Filter::Nop)),
                remainder,
            ))),
            "eq" | "=" => parse_comparison(remainder, path::Comparison::Eq),
            "ne" | "!=" => parse_comparison(remainder, path::Comparison::Ne),
            "lt" => parse_comparison(remainder, path::Comparison::Lt),
            "gt" => parse_comparison(remainder, path::Comparison::Gt),
            "le" => parse_comparison(remainder, path::Comparison::Le),
            "ge" => parse_comparison(remainder, path::Comparison::Ge),
            "re" | "=r" => {
                let (regex_val, remainder) = pop_step_arg(remainder)?;
                let regex = regex_val
                    .value()
                    .to_string()
                    .map_err(|_| CompilationError::InvalidStep)?
                    .clone();
                let _ = regex::Regex::new(&regex)?;
                Ok(Some((
                    path::Step::Filter(Box::new(path::Filter::Regex { regex })),
                    remainder,
                )))
            }
            "^" => {
                let (literal, remainder) = pop_step_arg(remainder)?;
                Ok(Some((
                    path::Step::Filter(Box::new(path::Filter::Test {
                        pred: Box::new(path::Predicate::Selector(Box::new(path::Selector(vec![
                            path::Step::Axis(Box::new(path::Axis::Label)),
                            path::Step::Filter(Box::new(path::Filter::Compare {
                                op: Box::new(path::Comparison::Eq),
                                literal,
                            })),
                        ])))),
                    })),
                    remainder,
                )))
            }

            "~real" => Ok(Some((
                path::Step::Filter(Box::new(path::Filter::Real)),
                remainder,
            ))),
            "~int" => Ok(Some((
                path::Step::Filter(Box::new(path::Filter::Int)),
                remainder,
            ))),

            "bool" => Ok(Some((
                path::Step::from(path::ValueKind::Boolean),
                remainder,
            ))),
            "double" => Ok(Some((path::Step::from(path::ValueKind::Double), remainder))),
            "int" => Ok(Some((
                path::Step::from(path::ValueKind::SignedInteger),
                remainder,
            ))),
            "string" => Ok(Some((path::Step::from(path::ValueKind::String), remainder))),
            "bytes" => Ok(Some((
                path::Step::from(path::ValueKind::ByteString),
                remainder,
            ))),
            "symbol" => Ok(Some((path::Step::from(path::ValueKind::Symbol), remainder))),
            "rec" => Ok(Some((path::Step::from(path::ValueKind::Record), remainder))),
            "seq" => Ok(Some((
                path::Step::from(path::ValueKind::Sequence),
                remainder,
            ))),
            "set" => Ok(Some((path::Step::from(path::ValueKind::Set), remainder))),
            "dict" => Ok(Some((
                path::Step::from(path::ValueKind::Dictionary),
                remainder,
            ))),
            "embedded" => Ok(Some((
                path::Step::from(path::ValueKind::Embedded),
                remainder,
            ))),

            _ => Err(CompilationError::InvalidStep),
        },
    }
}

impl From<path::ValueKind> for path::Step {
    fn from(k: path::ValueKind) -> Self {
        path::Step::Filter(Box::new(path::Filter::Kind { kind: Box::new(k) }))
    }
}

fn pop_step_arg(tokens: &[IOValue]) -> Result<(IOValue, &[IOValue]), CompilationError> {
    if tokens.is_empty() {
        return Err(CompilationError::InvalidStep);
    }
    Ok((tokens[0].clone(), &tokens[1..]))
}

fn parse_comparison(
    tokens: &[IOValue],
    op: path::Comparison,
) -> Result<Option<(path::Step, &[IOValue])>, CompilationError> {
    let (literal, remainder) = pop_step_arg(tokens)?;
    Ok(Some((
        path::Step::Filter(Box::new(path::Filter::Compare {
            op: Box::new(op),
            literal,
        })),
        remainder,
    )))
}

impl path::Selector {
    pub fn from_str(env: &Env, s: &str) -> Result<Self, CompilationError> {
        parse_selector(
            env,
            &(BytesBinarySource::new(s.as_bytes())
                .text_iovalues()
                .configured(false)
                .collect::<Result<Vec<_>, _>>()?),
        )
    }
}

impl Node {
    pub fn from_str(env: &Env, s: &str) -> Result<Self, CompilationError> {
        let expr = path::Selector::from_str(env, s)?;
        expr.compile()
    }
}