haz-query-lang 0.2.0

Parser and AST for the haz task query language.
Documentation
//! The boolean filter-expression syntax tree.

use crate::span::Span;

/// A parsed boolean filter expression, generic over its atom
/// representation.
///
/// The parser produces an `Expr<RawAtom>`. Consumer crates lift
/// atoms to a typed representation (e.g. `Expr<TagName>`,
/// `Expr<PathPattern>`, `Expr<RelationalAtom>`) before evaluation.
///
/// Trees built by the parser are left-associative for binary
/// operators and follow the precedence order of `QRY-002`:
/// `!` tightest, then `&`, then `|`.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Expr<A> {
    /// A leaf atom.
    Atom(A),
    /// Negation of an inner expression.
    Not(Box<Expr<A>>),
    /// Logical conjunction of two sub-expressions.
    And(Box<Expr<A>>, Box<Expr<A>>),
    /// Logical disjunction of two sub-expressions.
    Or(Box<Expr<A>>, Box<Expr<A>>),
}

/// The raw atom representation emitted by the parser: the
/// verbatim text of the atom token together with the byte-offset
/// span it occupied in the original input.
///
/// The parser does not interpret the text. Consumer crates apply
/// per-attribute validation (identifier rules, path-pattern
/// grammar, relational `kind:value` discrimination) before
/// evaluation.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct RawAtom {
    /// The atom token's verbatim UTF-8 byte content.
    pub text: String,
    /// The atom token's byte-offset span in the original input.
    pub span: Span,
}

impl<A> Expr<A> {
    /// Apply `f` to every atom in the tree in canonical
    /// left-to-right order, returning a new expression whose
    /// atoms are of type `B`. The traversal is fail-fast: the
    /// first error returned by `f` short-circuits the rest.
    ///
    /// # Errors
    ///
    /// Returns the first `E` produced by `f`. Subsequent atoms
    /// are not visited; consumers that want to accumulate every
    /// atom-level error MUST layer their own collection on top.
    pub fn try_map<B, E, F>(self, mut f: F) -> Result<Expr<B>, E>
    where
        F: FnMut(A) -> Result<B, E>,
    {
        self.try_map_inner(&mut f)
    }

    fn try_map_inner<B, E, F>(self, f: &mut F) -> Result<Expr<B>, E>
    where
        F: FnMut(A) -> Result<B, E>,
    {
        match self {
            Expr::Atom(a) => Ok(Expr::Atom(f(a)?)),
            Expr::Not(inner) => {
                let inner = (*inner).try_map_inner(f)?;
                Ok(Expr::Not(Box::new(inner)))
            }
            Expr::And(left, right) => {
                let left = (*left).try_map_inner(f)?;
                let right = (*right).try_map_inner(f)?;
                Ok(Expr::And(Box::new(left), Box::new(right)))
            }
            Expr::Or(left, right) => {
                let left = (*left).try_map_inner(f)?;
                let right = (*right).try_map_inner(f)?;
                Ok(Expr::Or(Box::new(left), Box::new(right)))
            }
        }
    }

    /// Evaluate the expression by applying `f` to each atom and
    /// folding the results under the boolean operators of
    /// `QRY-002`.
    ///
    /// Short-circuits in the standard way: `And` stops at the
    /// first `false`, `Or` stops at the first `true`.
    pub fn eval<F>(&self, mut f: F) -> bool
    where
        F: FnMut(&A) -> bool,
    {
        self.eval_inner(&mut f)
    }

    fn eval_inner<F>(&self, f: &mut F) -> bool
    where
        F: FnMut(&A) -> bool,
    {
        match self {
            Expr::Atom(a) => f(a),
            Expr::Not(inner) => !inner.eval_inner(f),
            Expr::And(left, right) => left.eval_inner(f) && right.eval_inner(f),
            Expr::Or(left, right) => left.eval_inner(f) || right.eval_inner(f),
        }
    }

    /// Evaluate the expression by applying a fallible predicate
    /// `f` to each atom.
    ///
    /// Short-circuits in two ways: an `Err` propagates
    /// immediately, and `And` / `Or` stop as soon as the result
    /// is determined. Atoms past the short-circuit point are not
    /// visited and their predicates are not invoked.
    ///
    /// # Errors
    ///
    /// Returns the first `E` produced by `f`.
    pub fn try_eval<F, E>(&self, mut f: F) -> Result<bool, E>
    where
        F: FnMut(&A) -> Result<bool, E>,
    {
        self.try_eval_inner(&mut f)
    }

    fn try_eval_inner<F, E>(&self, f: &mut F) -> Result<bool, E>
    where
        F: FnMut(&A) -> Result<bool, E>,
    {
        match self {
            Expr::Atom(a) => f(a),
            Expr::Not(inner) => Ok(!inner.try_eval_inner(f)?),
            Expr::And(left, right) => Ok(left.try_eval_inner(f)? && right.try_eval_inner(f)?),
            Expr::Or(left, right) => Ok(left.try_eval_inner(f)? || right.try_eval_inner(f)?),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn atom(text: &str, start: usize, end: usize) -> Expr<RawAtom> {
        Expr::Atom(RawAtom {
            text: text.to_owned(),
            span: Span { start, end },
        })
    }

    #[test]
    fn expr_is_structurally_equal_to_a_clone() {
        let original = Expr::And(
            Box::new(atom("a", 0, 1)),
            Box::new(Expr::Or(
                Box::new(atom("b", 4, 5)),
                Box::new(Expr::Not(Box::new(atom("c", 9, 10)))),
            )),
        );
        let cloned = original.clone();
        assert_eq!(original, cloned);
    }

    #[test]
    fn raw_atom_preserves_byte_text_and_span() {
        let atom = RawAtom {
            text: "name:foo".to_owned(),
            span: Span { start: 2, end: 10 },
        };
        assert_eq!(atom.text, "name:foo");
        assert_eq!(atom.span, Span { start: 2, end: 10 });
    }

    #[test]
    fn try_map_transforms_a_single_atom() {
        let expr: Expr<&str> = Expr::Atom("42");
        let mapped: Expr<i32> = expr.try_map(str::parse::<i32>).unwrap();
        assert_eq!(mapped, Expr::Atom(42));
    }

    #[test]
    fn try_map_preserves_tree_structure() {
        let expr: Expr<&str> = Expr::And(
            Box::new(Expr::Or(
                Box::new(Expr::Atom("1")),
                Box::new(Expr::Not(Box::new(Expr::Atom("2")))),
            )),
            Box::new(Expr::Atom("3")),
        );
        let mapped: Expr<i32> = expr.try_map(str::parse::<i32>).unwrap();
        let expected: Expr<i32> = Expr::And(
            Box::new(Expr::Or(
                Box::new(Expr::Atom(1)),
                Box::new(Expr::Not(Box::new(Expr::Atom(2)))),
            )),
            Box::new(Expr::Atom(3)),
        );
        assert_eq!(mapped, expected);
    }

    #[test]
    fn try_map_propagates_first_error_and_stops() {
        let mut visited = Vec::new();
        let expr: Expr<&str> = Expr::And(
            Box::new(Expr::Atom("1")),
            Box::new(Expr::Or(
                Box::new(Expr::Atom("oops")),
                Box::new(Expr::Atom("3")),
            )),
        );
        let result: Result<Expr<i32>, _> = expr.try_map(|s| {
            visited.push(s.to_owned());
            s.parse::<i32>()
        });
        assert!(result.is_err());
        // The third atom ("3") MUST NOT be visited because "oops"
        // short-circuits the traversal.
        assert_eq!(visited, vec!["1", "oops"]);
    }

    #[test]
    fn try_map_visits_atoms_in_left_to_right_order() {
        let mut visited = Vec::new();
        let expr: Expr<&str> = Expr::Or(
            Box::new(Expr::And(
                Box::new(Expr::Atom("a")),
                Box::new(Expr::Atom("b")),
            )),
            Box::new(Expr::Not(Box::new(Expr::Atom("c")))),
        );
        let _: Expr<usize> = expr
            .try_map(|s: &str| {
                visited.push(s.to_owned());
                Ok::<usize, ()>(s.len())
            })
            .unwrap();
        assert_eq!(visited, vec!["a", "b", "c"]);
    }

    // --- Expr::eval ------------------------------------------

    #[test]
    fn eval_returns_predicate_result_for_single_atom() {
        let expr = Expr::Atom("yes");
        assert!(expr.eval(|a| *a == "yes"));
        assert!(!expr.eval(|a| *a == "no"));
    }

    #[test]
    fn eval_applies_negation() {
        let expr = Expr::Not(Box::new(Expr::Atom(true)));
        assert!(!expr.eval(|a| *a));
    }

    #[test]
    fn eval_folds_and_or_with_precedence() {
        // (a & !b) | c
        let expr = Expr::Or(
            Box::new(Expr::And(
                Box::new(Expr::Atom("a")),
                Box::new(Expr::Not(Box::new(Expr::Atom("b")))),
            )),
            Box::new(Expr::Atom("c")),
        );
        let truthy = |s: &&str| *s == "a" || *s == "c";
        assert!(expr.eval(truthy));
        let only_b = |s: &&str| *s == "b";
        assert!(!expr.eval(only_b));
    }

    #[test]
    fn eval_short_circuits_and_on_first_false() {
        let mut visited: Vec<&str> = Vec::new();
        let expr = Expr::And(
            Box::new(Expr::Atom("first")),
            Box::new(Expr::Atom("second")),
        );
        let _ = expr.eval(|a| {
            visited.push(*a);
            *a != "first"
        });
        assert_eq!(visited, vec!["first"]);
    }

    #[test]
    fn eval_short_circuits_or_on_first_true() {
        let mut visited: Vec<&str> = Vec::new();
        let expr = Expr::Or(
            Box::new(Expr::Atom("first")),
            Box::new(Expr::Atom("second")),
        );
        let _ = expr.eval(|a| {
            visited.push(*a);
            true
        });
        assert_eq!(visited, vec!["first"]);
    }

    // --- Expr::try_eval --------------------------------------

    #[test]
    fn try_eval_returns_ok_for_infallible_predicate() {
        let expr: Expr<i32> = Expr::And(Box::new(Expr::Atom(1)), Box::new(Expr::Atom(2)));
        let result: Result<bool, ()> = expr.try_eval(|n| Ok(*n > 0));
        assert_eq!(result, Ok(true));
    }

    #[test]
    fn try_eval_propagates_error_from_first_failing_atom() {
        let mut visited: Vec<i32> = Vec::new();
        let expr: Expr<i32> = Expr::Or(Box::new(Expr::Atom(1)), Box::new(Expr::Atom(2)));
        let result: Result<bool, &'static str> = expr.try_eval(|n| {
            visited.push(*n);
            if *n == 1 { Err("boom") } else { Ok(true) }
        });
        assert_eq!(result, Err("boom"));
        // Right side MUST NOT be visited.
        assert_eq!(visited, vec![1]);
    }

    #[test]
    fn try_eval_short_circuits_and_on_false_without_visiting_right() {
        let mut visited: Vec<i32> = Vec::new();
        let expr: Expr<i32> = Expr::And(Box::new(Expr::Atom(1)), Box::new(Expr::Atom(2)));
        let result: Result<bool, ()> = expr.try_eval(|n| {
            visited.push(*n);
            Ok(*n != 1)
        });
        assert_eq!(result, Ok(false));
        assert_eq!(visited, vec![1]);
    }
}