datafu 0.0.7

A Rust library for extracting data from config objects and other arbitrary object graphs.
Documentation
/*
 * This file is part of Datafu
 * Copyright (C) 2021  Soni L.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

use std::collections::BTreeMap;
use std::iter::Peekable;

use regex::Regex;

use crate::KVPair;
use crate::RefOwn;
use crate::PatternTypes;
use crate::Predicate;
use crate::errors::MatchError;

pub(crate) const MAX_CALLS: usize = 250;

type Matches<'a, 'b, T> = BTreeMap<&'a str, KVPair<'b, T>>;

// TODO: use a builder for this?
/// The constant pool for a pattern.
pub(crate) struct PatternConstants<T: PatternTypes> {
    // last proto is implicitly the whole pattern.
    pub(crate) protos: Vec<Vec<PatternElement>>,
    // Note that we can borrow these when creating the output map.
    // https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=da26f9175e96273fa0b94971a4e6172f
    pub(crate) strings: Vec<String>,
    pub(crate) regices: Vec<Regex>,
    pub(crate) predicates: Vec<Box<Predicate<T>>>,
    // NOTE these are part of the constant pool and so have lifetime analogous
    // to 'a (consistently used to indicate constant pool lifetime) when used
    // elsewhere. In particular, they can't be yielded by the iterator.
    pub(crate) defs: Vec<T::Own>,
}

impl<T: PatternTypes> Default for PatternConstants<T> {
    fn default() -> Self {
        Self {
            protos: Default::default(),
            strings: Default::default(),
            regices: Default::default(),
            predicates: Default::default(),
            defs: Default::default(),
        }
    }
}

/// A pattern element.
#[derive(Copy, Clone)]
pub(crate) enum PatternElement {
    Arrow,

    Identifier(usize),

    StringKey(usize, bool),
    RegexKey(usize, bool),
    ParameterKey(usize, bool),
    KeySubtree(usize, bool),
    ValueSubtree(usize, bool),
    ApplyPredicate(usize, bool),

    End
}

struct Frame<'a, 'b, T: PatternTypes> {
    //obj: RefOwn<'b, T::Ref, T::Own>,
    ops: &'a [PatternElement],
    iar: Option<usize>,
    depth: usize,
    path: Vec<Holder<'a, 'b, T>>,
    in_key: bool,
}

impl<'a, 'b, T: PatternTypes> Frame<'a, 'b, T> {
    /// Advances the instruction address register.
    ///
    /// # Returns
    ///
    /// `true` if successful, `false` otherwise.
    fn next(&mut self) -> bool {
        let new = self.iar.map_or(0, |v| v + 1);
        new < self.ops.len() && {
            self.iar = Some(new);
            true
        }
    }

    /// Returns the current instruction.
    fn op(&self) -> PatternElement {
        self.ops[self.iar.expect("ops[iar]")]
    }

    /// Rewinds the instruction address register.
    ///
    /// # Returns
    ///
    /// `true` if successful, `false` otherwise.
    fn prev(&mut self) -> bool {
        let new = self.iar.expect("iar").checked_sub(1);
        new.is_some() && {
            self.iar = new;
            true
        }
    }
}

/// Stores a single match.
///
/// See also Holder.
enum HolderState<'a, 'b, T: PatternTypes> {
    /// Empty holder, for a key-value pair.
    EmptyKey,
    /// Empty holder, for a Matcher and a key-value pair.
    EmptyKeySubtree,
    // /// Empty holder, for a Matcher and a value.
    // EmptyValueSubtree,
    /// Occupied holder, for a key-value pair..
    Key(KVPair<'b, T>),
    /// Occupied holder, for a Matcher and a key-value pair.
    KeySubtree(Peekable<Matcher<'a, 'b, T>>, KVPair<'b, T>),
    /// Occupied holder, for a Matcher and a value. The empty variant is
    /// omitted as it would never be used otherwise.
    ValueSubtree(Peekable<Matcher<'a, 'b, T>>, RefOwn<'b, T::Ref, T::Own>),
    /// Occupied holder, for a value. The empty variant is omitted as it would
    /// never be used otherwise.
    Value(RefOwn<'b, T::Ref, T::Own>),
}

/// Helper enum for HolderState.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
enum HolderKind {
    Key,
    KeySubtree,
    ValueSubtree,
    Value
}

//impl<'a, 'b, T: PatternTypes> Clone for HolderState<'a, 'b, T> {
//    fn clone(&self) -> Self {
//        match self {
//            HolderState::EmptyKey => HolderState::EmptyKey,
//            HolderState::EmptySubtree => HolderState::EmptySubtree,
//            HolderState::Key(v) => HolderState::Key(*v),
//            HolderState::KeySubtree(m, v) => HolderState::KeySubtree(m.clone(), *v),
//            HolderState::ValueSubtree(m, v) => HolderState::ValueSubtree(m.clone(), *v),
//            HolderState::Value(v) => HolderState::Value(*v),
//        }
//    }
//}

impl<'a, 'b, T: PatternTypes> HolderState<'a, 'b, T> {
    #[rustfmt::skip]
    fn is_empty(&self) -> bool {
        match self {
            | HolderState::EmptyKey
            | HolderState::EmptyKeySubtree
            //| HolderState::EmptyValueSubtree
            => true, _ => false
        }
    }

    fn has_value(&self) -> bool {
        !self.is_empty()
    }

    fn kind(&self) -> HolderKind {
        match self {
            | HolderState::EmptyKey
            | HolderState::Key(_)
            => HolderKind::Key,
            | HolderState::EmptyKeySubtree
            | HolderState::KeySubtree(_, _)
            => HolderKind::KeySubtree,
            //| HolderState::EmptyValueSubtree
            | HolderState::ValueSubtree(_, _)
            => HolderKind::ValueSubtree,
            | HolderState::Value(_)
            => HolderKind::Value,
        }
    }

    fn value(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
        match *self {
            HolderState::Key((_, value)) => Some(value),
            HolderState::KeySubtree(_, (_, value)) => Some(value),
            HolderState::ValueSubtree(_, value) => Some(value),
            HolderState::Value(value) => Some(value),
            _ => None
        }
    }

    fn key(&self) -> Option<RefOwn<'b, T::Ref, T::Own>> {
        match *self {
            HolderState::Key((key, _)) => Some(key),
            HolderState::KeySubtree(_, (key, _)) => Some(key),
            _ => None
        }
    }

    fn pair(&self) -> Option<KVPair<'b, T>> {
        match *self {
            HolderState::Key(pair) => Some(pair),
            HolderState::KeySubtree(_, pair) => Some(pair),
            _ => None
        }
    }

    fn subtree(&mut self) -> Option<&mut Peekable<Matcher<'a, 'b, T>>> {
        match *self {
            HolderState::KeySubtree(ref mut subtree, _) => Some(subtree),
            HolderState::ValueSubtree(ref mut subtree, _) => Some(subtree),
            _ => None
        }
    }

    fn clear(&mut self) {
        *self = match self.kind() {
            HolderKind::Key => HolderState::EmptyKey,
            HolderKind::KeySubtree => HolderState::EmptyKeySubtree,
            HolderKind::ValueSubtree => unreachable!(), //HolderState::EmptyValueSubtree,
            HolderKind::Value => unreachable!(),
        };
        assert!(self.is_empty());
    }
}

/// Stores a single match and associated metadata.
///
/// A single match is generally a key-value pair, but may be a collection of
/// named pairs in the case of subtree matches, or just a value for the initial
/// holder.
struct Holder<'a, 'b, T: PatternTypes> {
     name: Option<&'a str>,
     value: HolderState<'a, 'b, T>,
     parent: Option<RefOwn<'b, T::Ref, T::Own>>,
     iterator: Option<Box<dyn Iterator<Item=KVPair<'b, T>> + 'b>>,
     filters: Vec<Box<dyn (for<'c> Fn(&'c mut HolderState<'a, 'b, T>) -> Result<(), MatchError>) + 'a>>,
}

impl<'a, 'b, T: PatternTypes> Holder<'a, 'b, T> {
    fn next(&mut self) -> Result<bool, MatchError> {
        self.ensure_iterator()?;
        if let Self {
            value: ref mut v,
            iterator: Some(ref mut it),
            ref filters,
            ..
        } = self {
            // check if we're in a subtree and (not) done.
            if let Some(matcher) = v.subtree() {
                if let Some(res) = matcher.peek() {
                    // report any errors
                    return res.as_ref().map(|_| true).map_err(|e| e.clone());
                }
            }
            let kind = v.kind();
            let mut next_v;
            loop {
                next_v = match it.next() {
                    Some(pair) => HolderState::Key(pair),
                    None => return Ok(false)
                };
                for filter in filters {
                    filter(&mut next_v)?;
                    if next_v.is_empty() {
                        break;
                    }
                }
                if next_v.has_value() {
                    break;
                }
            }
            assert!(next_v.has_value());
            assert_eq!(next_v.kind(), kind);
            *v = next_v;
            Ok(true)
        } else {
            unreachable!()
        }
    }

    /// Ensure `self.iterator.is_some()`, creating an iterator if necessary.
    fn ensure_iterator(&mut self) -> Result<(), MatchError> {
        if self.iterator.is_none() {
            let iter = T::pairs(self.parent.unwrap());
            if iter.is_none() {
                return Err(MatchError::UnsupportedOperation);
            }
            self.iterator = iter;
        }
        assert!(self.iterator.is_some());
        Ok(())
    }
}

impl<'a, 'b, T: PatternTypes> Default for Holder<'a, 'b, T> {
    fn default() -> Self {
        Self {
            name: Default::default(),
            value: HolderState::EmptyKey,
            parent: Default::default(),
            iterator: Default::default(),
            filters: Default::default(),
        }
    }
}

pub struct Matcher<'a, 'b, T: PatternTypes> {
    defs: &'a PatternConstants<T>,
    frame: Frame<'a, 'b, T>,
}

// TODO:
//
// [x] Arrow
// [x] StringKey
// [x] RegexKey
// [x] KeySubtree
// [x] ValueSubtree
// [x] Ident
// [x] Param (untested)
// [x] ApplyPredicate
// [x] End

/// Helper for `PatternElement::StringKey`.
fn on_string_key<'a, 'b, T: PatternTypes>(
    matcher: &mut Matcher<'a, 'b, T>,
    id: usize,
    skippable: bool,
) -> Result<bool, MatchError> {
    let path = matcher.frame.path.last_mut().unwrap();
    assert!(path.iterator.is_none());
    let key = &matcher.defs.strings[id];
    let iter = T::get(path.parent.unwrap(), RefOwn::Str(key));
    match iter {
        Some(None) if !skippable => Err(MatchError::ValidationError),
        Some(opt) => {
            path.iterator = Some(Box::new(opt.into_iter()));
            Ok(true)
        }
        None => Err(MatchError::UnsupportedOperation),
    }
}

/// Helper for `PatternElement::ParameterKey`.
fn on_parameter_key<'a, 'b, T: PatternTypes>(
    matcher: &mut Matcher<'a, 'b, T>,
    id: usize,
    skippable: bool,
) -> Result<bool, MatchError> {
    let path = matcher.frame.path.last_mut().unwrap();
    assert!(path.iterator.is_none());
    let key = matcher.defs.defs[id];
    let iter = T::get(path.parent.unwrap(), RefOwn::Own(key));
    match iter {
        Some(None) if !skippable => Err(MatchError::ValidationError),
        Some(opt) => {
            path.iterator = Some(Box::new(opt.into_iter()));
            Ok(true)
        }
        None => Err(MatchError::UnsupportedOperation),
    }
}

/// Helper for `PatternElement::RegexKey`.
fn on_regex_key<'a, 'b, T: PatternTypes>(
    matcher: &mut Matcher<'a, 'b, T>,
    id: usize,
    skippable: bool,
) -> Result<bool, MatchError> {
    matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
    let re = &matcher.defs.regices[id];
    let path = matcher.frame.path.last_mut().unwrap();
    path.filters.push(Box::new(move |value| {
        let s = T::as_str(value.key().unwrap());
        match (s.map_or(false, |s| re.is_match(s)), skippable) {
            (true, _) => Ok(()),
            (false, true) => {
                value.clear();
                Ok(())
            },
            (false, false) => Err(MatchError::ValidationError),
        }
    }));
    Ok(true)
}

/// Helper for `PatternElement::KeySubtree`.
fn on_key_subtree<'a, 'b, T: PatternTypes>(
    matcher: &mut Matcher<'a, 'b, T>,
    id: usize,
    skippable: bool,
) -> Result<bool, MatchError> {
    let _ = skippable; // FIXME what should a skippable KeySubtree even do?!
    matcher.frame.path.last_mut().unwrap().ensure_iterator()?;
    let defs = matcher.defs;
    let rlimit: usize = matcher.frame.depth;
    let path = matcher.frame.path.last_mut().unwrap();
    assert!(path.value.is_empty());
    assert_eq!(path.value.kind(), HolderKind::Key);
    path.value = HolderState::EmptyKeySubtree;
    path.filters.push(Box::new(move |value| {
        let key = value.key().unwrap();
        let mut subtree = Matcher::new(key, defs, id, rlimit)?.peekable();
        match subtree.peek() {
            Some(&Ok(_)) => {
                *value = HolderState::KeySubtree(subtree, value.pair().unwrap());
                Ok(())
            },
            Some(&Err(ref e)) => {
                Err(e.clone())
            },
            None => {
                value.clear();
                Ok(())
            }
        }
    }));
    Ok(true)
}

const DUMMY_OPS: &'static [PatternElement] = &[];

impl<'a, 'b, T: PatternTypes> Matcher<'a, 'b, T> {
    pub(crate) fn new(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, proto: usize, rlimit: usize) -> Result<Self, MatchError> {
        let ops: &[_] = &defs.protos[proto];
        Self::with_ops(obj, defs, ops, rlimit)
    }

    /// Constructs a Matcher that yields a single dummy result.
    fn with_ops(obj: RefOwn<'b, T::Ref, T::Own>, defs: &'a PatternConstants<T>, ops: &'a [PatternElement], rlimit: usize) -> Result<Self, MatchError> {
        let depth = rlimit.checked_sub(1).ok_or(MatchError::StackOverflow)?;
        Ok(Self {
            defs: defs,
            frame: Frame {
                //obj: obj,
                ops: ops,
                iar: None,
                depth: depth,
                path: {
                    let mut holder = Holder::default();
                    holder.value = HolderState::Value(obj);
                    holder.iterator = Some(Box::new(std::iter::empty()));
                    vec![holder]
                },
                in_key: false,
            },
        })
    }

    fn on_in_key(&mut self) -> Result<bool, MatchError> {
        match self.frame.op() {
            PatternElement::End => {
                let path = self.frame.path.last_mut().unwrap();
                if path.next()? {
                    Ok(false)
                } else {
                    drop(path);
                    self.frame.path.pop().unwrap();
                    // stop at previous End, or start of frame
                    while self.frame.prev() {
                        if matches!(self.frame.op(), PatternElement::End) {
                            break;
                        }
                    }
                    // is start of frame?
                    if !self.frame.prev() {
                        self.frame.path.clear();
                    }
                    Ok(true)
                }
            },
            PatternElement::ApplyPredicate(id, skippable) => {
                // failing on T::get() is already handled, but we may need a
                // T::pairs(). construct it here.
                self.frame.path.last_mut().unwrap().ensure_iterator()?;
                let pred = &self.defs.predicates[id];
                let path = self.frame.path.last_mut().unwrap();
                path.filters.push(Box::new(move |value| {
                    match (pred(value.value().unwrap()), skippable) {
                        (true, _) => Ok(()),
                        (false, true) => {
                            value.clear();
                            Ok(())
                        },
                        (false, false) => Err(MatchError::ValidationError),
                    }
                }));
                Ok(true)
            },
            PatternElement::StringKey(id, skippable) => {
                on_string_key(self, id, skippable)
            },
            PatternElement::ParameterKey(id, skippable) => {
                on_parameter_key(self, id, skippable)
            },
            PatternElement::RegexKey(id, skippable) => {
                on_regex_key(self, id, skippable)
            },
            PatternElement::KeySubtree(id, skippable) => {
                on_key_subtree(self, id, skippable)
            },
            _ => unreachable!("on_in_key")
        }
    }

    fn on_not_in_key(&mut self) -> Result<bool, MatchError> {
        match self.frame.op() {
            PatternElement::Arrow => {
                // this *should* always pass.
                assert!(self.frame.path.last().unwrap().iterator.is_some());
                let mut holder = Holder::default();
                holder.parent = self.frame.path.last().unwrap().value.value();
                assert!(holder.parent.is_some());
                self.frame.path.push(holder);
                Ok(false)
            },
            PatternElement::Identifier(id) => {
                let name = self.defs.strings.get(id).map(|s| &**s);
                let path = self.frame.path.last_mut().unwrap();
                path.name = name;
                assert!(path.iterator.is_none());
                // we don't actually create the iterator here,
                // as we may still wanna use T::get() instead.
                Ok(true)
            },
            PatternElement::ApplyPredicate(id, skippable) => {
                assert!(self.frame.path.len() == 1);
                let pred = &self.defs.predicates[id];
                let value = self.frame.path.last().unwrap().value.value();
                match (pred(value.unwrap()), skippable) {
                    (true, _) => Ok(false),
                    (false, true) => {
                        self.frame.path.clear();
                        // any Ok(_) will do
                        Ok(false)
                    },
                    (false, false) => Err(MatchError::ValidationError),
                }
            },
            PatternElement::StringKey(id, skippable) => {
                on_string_key(self, id, skippable)
            },
            PatternElement::ParameterKey(id, skippable) => {
                on_parameter_key(self, id, skippable)
            },
            PatternElement::RegexKey(id, skippable) => {
                on_regex_key(self, id, skippable)
            },
            PatternElement::KeySubtree(id, skippable) => {
                on_key_subtree(self, id, skippable)
            },
            PatternElement::ValueSubtree(id, skippable) => {
                let value = self.frame.path.last().unwrap().value.value().unwrap();
                let mut subtree = Matcher::new(
                    value,
                    self.defs,
                    id,
                    self.frame.depth
                )?.peekable();
                let mut dummy = Matcher::with_ops(
                    value,
                    self.defs,
                    DUMMY_OPS,
                    self.frame.depth
                )?.peekable();
                // may panic.
                let peeked = subtree.peek();
                // shouldn't panic.
                let _ = dummy.peek();
                // push Holder after peek.
                self.frame.path.push(Holder::default());
                let mut holder = self.frame.path.last_mut().unwrap();
                holder.parent = Some(value);
                holder.iterator = Some(Box::new(std::iter::empty()));
                match peeked {
                    None if skippable => {
                        holder.value = HolderState::ValueSubtree(dummy, value);
                        Ok(true)
                    },
                    Some(&Ok(_)) | None => {
                        drop(peeked);
                        holder.value = HolderState::ValueSubtree(subtree, value);
                        Ok(true)
                    },
                    Some(&Err(ref e)) => {
                        Err(e.clone())
                    },
                }
            },
            _ => unreachable!("on_not_in_key")
        }
    }

    fn collect_results(&mut self) -> Matches<'a, 'b, T> {
        let mut res: Matches<'a, 'b, T> = Default::default();
        for holder in &mut self.frame.path {
            // make sure it's not empty.
            assert!(holder.value.has_value());
            // handle subtrees.
            if let Some(matcher) = holder.value.subtree() {
                if let Some(matches) = matcher.next() {
                    // NOTE: we have checked these already.
                    // (and if we haven't, that's a bug.)
                    res.extend(matches.unwrap());
                }
            }
            // handle pairs.
            if let Some(pair) = holder.value.pair() {
                if let Some(name) = holder.name {
                    res.insert(name, pair);
                }
            }
        }
        res
    }

    fn on_end(&mut self) -> (bool, Matches<'a, 'b, T>) {
        match self.frame.op() {
            PatternElement::End => {
                assert!(!self.frame.path.last().expect("path").value.is_empty());
                let res = self.collect_results();
                if !self.frame.prev() {
                    // NOTE: frame.prev() must always be called, even if this
                    // gets replaced with debug_assert!() in the future.
                    assert!(false, "frame.prev()");
                }
                (true, res)
            }
            PatternElement::ApplyPredicate {..} => {
                assert!(!self.frame.in_key);
                let res = self.collect_results();
                self.frame.path.clear();
                (false, res)
            }
            _ => unreachable!("on_end")
        }
    }
}

impl<'a, 'b, T: PatternTypes> Iterator for Matcher<'a, 'b, T> {
    type Item = Result<BTreeMap<&'a str, KVPair<'b, T>>, MatchError>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.frame.ops.is_empty() {
            if !self.frame.path.is_empty() {
                self.frame.path.clear();
                return Some(Ok(Default::default()));
            }
        }
        while !self.frame.path.is_empty() {
            if !self.frame.next() {
                let (in_key, res) = self.on_end();
                self.frame.in_key = in_key;
                return Some(Ok(res));
            } else {
                let in_key = if self.frame.in_key {
                    self.on_in_key()
                } else {
                    self.on_not_in_key()
                };
                match in_key {
                    Ok(in_key) => self.frame.in_key = in_key,
                    Err(e) => {
                        self.frame.path.clear();
                        return Some(Err(e))
                    },
                }
            }
        }
        None
    }
}