datafu 0.0.7

A Rust library for extracting data from config objects and other arbitrary object graphs.
Documentation
/*
 * Datafu - Rust library for extracting data from object graphs.
 * Copyright (C) 2021  Soni L.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
#![warn(rust_2018_idioms)]
#![cfg_attr(not(feature = "stable"), feature(label_break_value))]

//! Datafu is a regex-inspired query language. It was primarily
//! designed for processing object trees parsed from configuration files, but
//! can also be used with JSON APIs, and even XML.
//!
//! # Languge Reference
//!
//! Datafu expressions have the ability to iterate, index, validate and filter
//! data structures, through the use of the syntax elements below.
//!
//! ## Syntax Elements of Datafu Expressions
//!
//! An arrow is `->` and indicates indexing/iteration. Whether indexing or
//! iteration is used is defined by the elements that follow, with iteration
//! being used by default.
//!
//! A variable is a sequence of alphanumeric characters, not starting with
//! a digit. A `(key, value)` tuple containing the respective matched
//! element will be identified by this name in the results map.
//!
//! A literal is a sequence of characters delimited by `'`, optionally
//! followed by `?`, with `%` as the escape character, and defines a
//! string-keyed indexing operation. A literal can contain any character,
//! except unescaped `%` or `'` symbols, which must be escaped as
//! `%%` and `%'`, respectively. The sequence of characters defined by
//! a literal is used as the string object in the indexing operation.
//!
//! A parameter is `$`, optionally followed by `?`, followed by a
//! sequence of alphanumeric characters, not starting with a digit, and
//! defines an object-keyed indexing operation. The sequence of characters
//! defined by a parameter is used to retrieve, from the pattern's
//! definitions, the object to be used in the indexing operation.
//!
//! A regex is a sequence of characters delimited by `/`, optionally
//! followed by `?`, with `%` as the escape character. A regex can
//! contain any character, except unescaped `%` or `/` symbols, which
//! must be escaped as `%%` and `%/`, respectively. The sequence of
//! characters defined by a regex is passed to the `regex` crate, which
//! may apply further restrictions on the characters used, and is used to
//! accept the respective keys processed by the iterator.
//!
//! A predicate is `:`, optionally followed by `?`, followed by an
//! `$` and a sequence of alphanumeric characters, not starting with a
//! digit, and is used to accept values to be processed based on an
//! external [`Predicate`].
//!
//! A key match is a datafu expression (including, but not limited to, the
//! empty datafu expression) enclosed within `[` and `]`, optionally
//! prefixed with one or more predicates, and applies the enclosed
//! predicates and datafu expression to the key (or index) being processed.
//! A key match enables additional validation of keys and/or extraction of
//! values from keys, and accepts a key if and only if the enclosed
//! predicates accept the key and the enclosed expression matches the key.
//!
//! A subvalue is a datafu expression (including, but not limited to, the
//! empty datafu expression) enclosed within `(` and `)`, and applies
//! the enclosed datafu expression to the value (or index) being processed.
//! A subvalue enables the ability to match multiple values on the same
//! object, and accepts a value if and only the enclosed expression
//! matches the value. A subvalue can be made optional by the presence of
//! a `?` after the subvalue - in case of no match, it will just omit
//! the relevant keys in the result. Optional subvalues are unrelated to
//! non-validating syntax elements (see below), they just use the same
//! syntax.
//!
//! Some syntax elements can be validating or non-validating. Validating
//! syntax elements will return a [`errors::MatchError::ValidationError`]
//! whenever a non-accepted element is encountered, whereas non-validating
//! ones will skip them. Whether an element is validating is determined by
//! the absence of an optional `?` in the documented position. Note that
//! it is possible for a validating syntax element to still yield results
//! before returning a [`errors::MatchError::ValidationError`], so one
//! needs to be careful when writing code where such behaviour could
//! result in a security vulnerability.
//!
//! The empty pattern matches anything, but only does so once.
//!
//! ## Syntax of Datafu Expressions
//!
//! Datafu Expressions follow the given syntax, in (pseudo-)extended BNF:
//!
//! ```text
//! expression ::= {arrow tag} {subvalue}
//! tag ::= identifier [arg] {predicate} | arg {predicate}
//! arg ::= parameter | literal | regex | keymatch
//!
//! arrow ::= '->'
//! keymatch ::= '[' {predicate} expression ']'
//! subvalue ::= '(' {predicate} expression ')' ['?']
//! ```
//!
//! For a description of the terminals "parameter", "literal", "regex" and
//! "predicate", see "Syntax Elements of Datafu Expressions" above.
//!
//! # Examples
//!
//! <!-- TODO -->

extern crate regex;

#[cfg(test)]
extern crate proptest;

pub mod errors;
mod parser;
mod pattern;
mod vm;

pub use pattern::Pattern;

pub use vm::Matcher;

// TODO replace with GATs
/// A borrowed or owned value of various types.
///
/// This exists purely as a workaround for Rust not having GATs yet.
#[derive(Debug)]
pub enum RefOwn<'b, T: ?Sized, U> {
    /// Borrowed T.
    Ref(&'b T),
    /// Borrowed string.
    Str(&'b str),
    /// Owned U.
    Own(U),
}

impl<'b, T, U> PartialEq for RefOwn<'b, T, U> 
where
    T: ?Sized + PartialEq<T> + PartialEq<U> + PartialEq<str>,
    U: PartialEq<T> + PartialEq<U> + PartialEq<str>,
    str: PartialEq<T> + PartialEq<U> + PartialEq<str>
{
    fn eq(&self, other: &Self) -> bool {
        match (self, other) {
            (RefOwn::Ref(l), RefOwn::Ref(r)) => l.eq(r),
            (RefOwn::Own(l), RefOwn::Own(r)) => l.eq(r),
            (RefOwn::Str(l), RefOwn::Str(r)) => l.eq(r),
            (RefOwn::Ref(l), RefOwn::Own(r)) => PartialEq::eq(*l, r),
            (RefOwn::Own(l), RefOwn::Str(r)) => PartialEq::eq(l, *r),
            (RefOwn::Str(l), RefOwn::Ref(r)) => l.eq(r),
            (RefOwn::Ref(l), RefOwn::Str(r)) => l.eq(r),
            (RefOwn::Own(l), RefOwn::Ref(r)) => PartialEq::eq(l, *r),
            (RefOwn::Str(l), RefOwn::Own(r)) => PartialEq::eq(*l, r),
        }
    }
}

impl<'b, T: ?Sized, U: Copy> Copy for RefOwn<'b, T, U> {
}

impl<'b, T: ?Sized, U: Clone> Clone for RefOwn<'b, T, U> {
    fn clone(&self) -> Self {
        match self {
            RefOwn::Ref(r) => RefOwn::Ref(r),
            RefOwn::Str(r) => RefOwn::Str(r),
            RefOwn::Own(v) => RefOwn::Own(v.clone()),
        }
    }
}

/// A tuple representing a key-value pair.
pub type KVPair<'b, T> = (RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>, RefOwn<'b, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>);

impl<'b, T, U> From<&'b T> for RefOwn<'b, T, U> {
    fn from(x: &'b T) -> RefOwn<'b, T, U> {
        RefOwn::Ref(x)
    }
}

// TODO investigate if this should be PatternTypes: Default
/// Defines the types and operations used for matching.
pub trait PatternTypes {
    /// The borrowed type.
    type Ref: ?Sized;

    // TODO replace with GATs.
    // TODO potentially relax with Clone?
    /// The owned type.
    type Own: Copy + 'static;

    /// Returns an iterator over key-value pairs contained within an item, or
    /// None if this operation is unsupported for the given value.
    fn pairs<'b>(
        item: RefOwn<'b, Self::Ref, Self::Own>
    ) -> Option<Box<dyn Iterator<Item=KVPair<'b, Self>> + 'b>>;

    /// Returns an optional key-value pair keyed by the given key, or None if
    /// this operation is unsupported for the given value.
    fn get<'a, 'b>(
        item: RefOwn<'b, Self::Ref, Self::Own>,
        key: RefOwn<'a, Self::Ref, Self::Own>
    ) -> Option<Option<KVPair<'b, Self>>>;

    // TODO replace with GATs + newtypes
    /// Returns whether two keys/values are the same/equivalent. This must provide
    /// the same guarantees as PartialEq. In fact, this is a replacement for
    /// PartialEq for cases where it's not possible to just use PartialEq.
    fn matches(
        left: RefOwn<'_, Self::Ref, Self::Own>,
        right: RefOwn<'_, Self::Ref, Self::Own>
    ) -> bool;

    /// Returns the value as an &str.
    fn as_str<'b>(
        value: RefOwn<'b, Self::Ref, Self::Own>
    ) -> Option<&'b str>;
}

/// A predicate for keys and values.
pub type Predicate<T> = dyn (Fn(RefOwn<'_, <T as PatternTypes>::Ref, <T as PatternTypes>::Own>) -> bool) + Send + Sync;