cipherstash-client 0.34.1-alpha.1

The official CipherStash SDK
Documentation
//! ejsonpath is a parser and evaluator for a subset of JSONPath.
mod parser;

use serde_json::Value;
use std::{
    iter::{empty, once},
    ops::Deref,
};

/// A JSON path selector (in EQL dialect, which is a subset of general JSON Path)
#[derive(Debug, Clone, PartialEq, Eq, derive_more::Display, Hash)]
pub enum Selector {
    /// The `$` operator. Matches the root of the document
    #[display("$")]
    Root,

    /// The `.` operator. Selects subfields of the nodes returned by the left argument.
    #[display("{}.{}", _0.deref(), _1)]
    Dot(Box<Selector>, DotArg),

    /// Selects subnodes of the left argument by numeric index, by field name or by wildcard.
    ///
    /// The array indexing operator `[idx]`, where `idx` can be a number or a string.
    ///
    /// Range indexes are not accepted.
    #[display("{}[{}]", _0, _1)]
    Index(Box<Selector>, IndexArg),
}

/// The right hand argument to the `.` operator.
#[derive(Debug, Clone, PartialEq, Eq, derive_more::Display, Hash)]
pub enum DotArg {
    #[display("{}", _0)]
    Field(String),
}

/// The `idx` in `[idx]`.
///
/// Only indexing by numeric index, field name or wilcard is permitted. Recursion `..` and ranges `[start:end:step]` are
/// not supported.
#[derive(Debug, Clone, PartialEq, Eq, derive_more::Display, Hash)]
pub enum IndexArg {
    /// A number index
    #[display("{}", _0)]
    Number(usize),

    /// An index by name (works just like the dot operator)
    #[display("'{}'", _0)]
    Field(String),

    // Individual array item
    #[display("@")]
    Item,

    #[display("*")]
    Wildcard,
}

#[derive(Debug, thiserror::Error)]
#[error("error parsing eJSONPath `{}`. Details: {}", .parser_input, .message)]
pub struct ParseError {
    pub parser_input: String,
    pub message: String,
}

fn dyn_iter<'a, I: 'a + Iterator<Item = &'a Value>>(
    iter: I,
) -> Box<dyn Iterator<Item = &'a Value> + 'a> {
    Box::new(iter)
}

impl Selector {
    /// Tries to parse a [`Selector`] from a `&str`.
    ///
    /// Returns `Ok(selector)` if succesfull, `Err(ParseError)` on failure.
    pub fn parse(input: &str) -> Result<Selector, ParseError> {
        parser::parse_selector(input).map_err(|e| ParseError {
            parser_input: input.to_owned(),
            message: e,
        })
    }

    /// Evaluates the `Selector`, returning a [`Value::Array`].
    pub fn eval(&self, value: &Value) -> Value {
        Value::Array(self.eval_iter(Box::new(once(value))).cloned().collect())
    }

    fn eval_iter<'a, I: Iterator<Item = &'a Value> + 'a>(
        &'a self,
        value_iter: Box<I>,
    ) -> impl Iterator<Item = &'a Value> {
        match self {
            Selector::Root => value_iter,
            Selector::Dot(selector, dot_arg) => match dot_arg {
                DotArg::Field(field) => dyn_iter(
                    selector
                        .eval_iter(value_iter)
                        .flat_map(|value| Self::get_field(value, field)),
                ),
            },
            Selector::Index(selector, index) => match index {
                IndexArg::Number(idx) => dyn_iter(selector.eval_iter(value_iter).filter_map(
                    |value| match value {
                        Value::Array(arr) => arr.get(*idx),
                        _ => None,
                    },
                )),

                IndexArg::Field(field) => dyn_iter(
                    selector
                        .eval_iter(value_iter)
                        .flat_map(|value| Self::get_field(value, field)),
                ),

                IndexArg::Wildcard => selector.eval_iter(value_iter),
                IndexArg::Item => selector.eval_iter(value_iter),
            },
        }
    }

    fn get_field<'a>(value: &'a Value, field: &'a str) -> Box<dyn Iterator<Item = &'a Value> + 'a> {
        match value {
            Value::Array(arr) => dyn_iter(arr.iter().filter_map(|value| match value {
                Value::Object(obj) => obj.get(field),
                _ => None,
            })),

            Value::Object(obj) => dyn_iter(obj.get(field).into_iter()),

            _ => dyn_iter(empty()),
        }
    }

    /// Canonicalises the `Selector` so that for cases where there is more than one syntax for expressing the same
    /// intent, one particular way is chosen
    ///
    /// For example, in JSONPath `$.foo` and `$['foo']` mean the same thing. In this particular case, the second form is
    /// also more flexible because the field can contain "." as in `$['foo.bar']`.
    ///
    /// The particular syntax above is the only kind of syntax that has special consideration for canonicalisation -
    /// other syntax forms are preserved.
    pub fn canonicalise(&self) -> Self {
        match self {
            Selector::Root => Selector::Root,
            Selector::Dot(selector, dot_arg) => match dot_arg {
                DotArg::Field(name) => Selector::Index(
                    Box::new(selector.canonicalise()),
                    IndexArg::Field(name.clone()),
                ),
            },
            Selector::Index(selector, index_arg) => {
                Selector::Index(Box::new(selector.canonicalise()), index_arg.clone())
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use serde_json::json;

    use super::*;

    #[test]
    fn root_matches_entire_doc() -> Result<(), ParseError> {
        let doc = json!({
            "user": {
                "name": "Bob",
                "email": "bob@example.com"
            }
        });

        let selector = Selector::parse("$")?;
        let result = selector.eval(&doc);

        assert_eq!(result, Value::Array(vec![doc]));

        Ok(())
    }

    #[test]
    fn get_entire_array() -> Result<(), ParseError> {
        let doc = json!({
            "users": [
                {
                    "name": "Alice",
                    "email": "alice@example.com"
                },
                {
                    "name": "Bob",
                    "email": "bob@example.com"
                }
            ]
        });

        let selector = Selector::parse("$.users[*]")?;

        let result = selector.eval(&doc);

        assert_eq!(
            result,
            Value::Array(vec![Value::Array(vec![
                json!({
                    "name": "Alice",
                    "email": "alice@example.com"
                }),
                json!({
                    "name": "Bob",
                    "email": "bob@example.com"
                })
            ])])
        );

        Ok(())
    }

    #[test]
    fn extracting_fields() -> Result<(), ParseError> {
        let doc = json!({
            "users": [
                {
                    "name": "Alice",
                    "email": "alice@example.com"
                },
                {
                    "name": "Bob",
                    "email": "bob@example.com"
                }
            ]
        });

        let selector = Selector::parse("$.users[*].name")?;

        let result = selector.eval(&doc);

        assert_eq!(
            result,
            Value::Array(vec![
                Value::String("Alice".to_owned()),
                Value::String("Bob".to_owned())
            ])
        );

        Ok(())
    }

    #[test]
    fn roundtrip() {
        assert_roundtrip("$.users[*].name");
        assert_roundtrip("$.users['foo'].name");
    }

    #[test]
    fn normalised_quotes() {
        assert_eq!(
            Selector::parse("$.users[\"foo\"].name")
                .unwrap()
                .to_string(),
            "$.users['foo'].name"
        );
    }

    #[test]
    fn canonicalise_simple() {
        assert_eq!(
            Selector::parse("$.foo").unwrap().canonicalise(),
            Selector::parse("$['foo']").unwrap().canonicalise()
        );

        assert_eq!(
            Selector::parse("$[\"foo\"]").unwrap().canonicalise(),
            Selector::parse("$['foo']").unwrap().canonicalise()
        );
    }

    #[test]
    fn canonicalise_recursive() {
        assert_eq!(
            Selector::parse("$.foo.bar").unwrap().canonicalise(),
            Selector::parse("$['foo']['bar']").unwrap().canonicalise()
        );

        assert_eq!(
            Selector::parse("$[\"foo\"][\"bar\"]")
                .unwrap()
                .canonicalise(),
            Selector::parse("$['foo']['bar']").unwrap().canonicalise()
        );
    }

    fn assert_roundtrip(input: &str) {
        assert_eq!(input, Selector::parse(input).unwrap().to_string());
    }
}