openvet-policy 0.6.0

//! Requirement expression: parse and evaluate.
//!
//! Grammar:
//! ```text
//! expr         := implies_expr
//! implies_expr := or_expr ('implies' implies_expr)?
//! or_expr      := and_expr ('or' and_expr)*
//! and_expr     := not_expr ('and' not_expr)*
//! not_expr     := 'not' not_expr | atom
//! atom         := claim | '(' expr ')'
//! claim        := [a-zA-Z_][a-zA-Z0-9_-]*
//! ```
//!
//! Standard precedence (`not` > `and` > `or` > `implies`); `implies`
//! is right-associative, so `a implies b implies c` parses as
//! `a implies (b implies c)`. Reserved words are literally `and`,
//! `or`, `not`, `implies` (not case-sensitive).
//!
//! `implies` is syntactic sugar: `a implies b` parses to the same
//! AST as `(not a) or b`, so vacuous truth (`False implies _ == True`)
//! and the Kleene truth table for implication fall out of the
//! existing `Or` and `Not` evaluation.
//!
//! [`Expr::And`] and [`Expr::Or`] are n-ary. The parser collects
//! same-operator chains into a single `Vec<Expr>` via the
//! [`Expr::and`] / [`Expr::or`] smart constructors, which splice
//! same-op children — so `a and b and c`, `(a and b) and c`, and
//! the `Or` produced by `a implies b or c` all flatten to one
//! level. Associativity makes the shape change invisible to
//! evaluation, but diagnostic tree rendering is one level shallower
//! per same-op chain.
//!
//! Evaluation is Kleene 3-valued logic over [`Tri`]:
//!
//! - `False` short-circuits `and`.
//! - `True` short-circuits `or`.
//! - `Unknown` propagates when neither short-circuit fires and the
//!   result isn't determined.

use crate::error::PolicyError;

/// A parsed requirement expression over claim names.
///
/// `And` and `Or` are n-ary: a chain of same-operator children is
/// kept flat in a single `Vec<Expr>` rather than nested left-folded
/// binary nodes. Construct via [`Expr::and`] / [`Expr::or`] /
/// [`Expr::not`] — those smart constructors splice same-operator
/// children so `a and b and c` parses to one `And([a, b, c])` and
/// `(a or b) or c` parses to one `Or([a, b, c])`. The shape change
/// is invisible to evaluation (`or` and `and` are associative) but
/// makes diagnostic tree rendering one level shallower per same-op
/// chain.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Expr {
    /// Reference to a single claim by name.
    Claim(String),
    /// Logical negation.
    Not(Box<Expr>),
    /// Logical conjunction over one-or-more children.
    And(Vec<Expr>),
    /// Logical disjunction over one-or-more children.
    Or(Vec<Expr>),
}

impl Expr {
    /// Build an [`Expr::And`] from `parts`, splicing any child that
    /// is itself an `And` so the result is a flat one-level chain.
    ///
    /// A single-element vec collapses to its lone child unwrapped.
    /// Empty vec is the identity (vacuously `True`); the parser
    /// never produces this, but the constructor is defined for it.
    pub fn and(parts: Vec<Expr>) -> Expr {
        let mut flat = Vec::with_capacity(parts.len());
        for p in parts {
            match p {
                Expr::And(children) => flat.extend(children),
                other => flat.push(other),
            }
        }
        if flat.len() == 1 {
            flat.into_iter().next().unwrap()
        } else {
            Expr::And(flat)
        }
    }

    /// Build an [`Expr::Or`] from `parts`, splicing any child that
    /// is itself an `Or` so the result is a flat one-level chain.
    ///
    /// A single-element vec collapses to its lone child unwrapped.
    /// Empty vec is the identity (vacuously `False`); the parser
    /// never produces this, but the constructor is defined for it.
    pub fn or(parts: Vec<Expr>) -> Expr {
        let mut flat = Vec::with_capacity(parts.len());
        for p in parts {
            match p {
                Expr::Or(children) => flat.extend(children),
                other => flat.push(other),
            }
        }
        if flat.len() == 1 {
            flat.into_iter().next().unwrap()
        } else {
            Expr::Or(flat)
        }
    }

    /// Wrap `inner` in an [`Expr::Not`].
    ///
    /// Named to match [`Expr::and`] / [`Expr::or`] rather than via
    /// `std::ops::Not` — the constructor triad reads better as
    /// `Expr::and` / `Expr::or` / `Expr::not` than as a mix of
    /// `Expr::and` and `!expr`.
    #[allow(clippy::should_implement_trait)]
    pub fn not(inner: Expr) -> Expr {
        Expr::Not(Box::new(inner))
    }
}

/// Kleene 3-valued logic state.
///
/// `Unknown` propagates when a claim's value can't be decided, so
/// "audit didn't assert this claim" and "audit asserted this claim
/// false" stay distinct end to end.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Tri {
    /// Claim asserted true.
    True,
    /// Claim asserted false.
    False,
    /// Claim not asserted (or expression result undetermined).
    Unknown,
}

impl std::ops::Not for Tri {
    type Output = Tri;
    fn not(self) -> Self {
        match self {
            Tri::True => Tri::False,
            Tri::False => Tri::True,
            Tri::Unknown => Tri::Unknown,
        }
    }
}

/// Evaluate `expr` against a claim-lookup function returning each
/// claim's tri-state.
///
/// Short-circuits on `False` for `and` and on `True` for `or`.
/// `not Unknown` stays `Unknown`.
pub fn evaluate<F>(expr: &Expr, lookup: &F) -> Tri
where
    F: Fn(&str) -> Tri,
{
    match expr {
        Expr::Claim(name) => lookup(name),
        Expr::Not(inner) => !evaluate(inner, lookup),
        Expr::And(children) => {
            // Empty AND is vacuously true; short-circuit on any False;
            // True if every child is True; Unknown otherwise.
            let mut all_true = true;
            for c in children {
                match evaluate(c, lookup) {
                    Tri::False => return Tri::False,
                    Tri::True => {}
                    Tri::Unknown => all_true = false,
                }
            }
            if all_true { Tri::True } else { Tri::Unknown }
        }
        Expr::Or(children) => {
            // Empty OR is vacuously false; short-circuit on any True;
            // False if every child is False; Unknown otherwise.
            let mut all_false = true;
            for c in children {
                match evaluate(c, lookup) {
                    Tri::True => return Tri::True,
                    Tri::False => {}
                    Tri::Unknown => all_false = false,
                }
            }
            if all_false { Tri::False } else { Tri::Unknown }
        }
    }
}

// ──────────────────────────────────────────────────────────────────
// Tokenizer
// ──────────────────────────────────────────────────────────────────

#[derive(Debug, PartialEq, Eq)]
enum Token {
    Ident(String),
    And,
    Or,
    Not,
    Implies,
    LParen,
    RParen,
}

fn tokenize(input: &str) -> Result<Vec<Token>, PolicyError> {
    let mut out = Vec::new();
    let mut chars = input.chars().peekable();
    while let Some(&c) = chars.peek() {
        if c.is_whitespace() {
            chars.next();
            continue;
        }
        if c == '(' {
            chars.next();
            out.push(Token::LParen);
            continue;
        }
        if c == ')' {
            chars.next();
            out.push(Token::RParen);
            continue;
        }
        if is_ident_start(c) {
            let mut s = String::new();
            while let Some(&c) = chars.peek() {
                if is_ident_continue(c) {
                    s.push(c);
                    chars.next();
                } else {
                    break;
                }
            }
            out.push(match s.to_ascii_lowercase().as_str() {
                "and" => Token::And,
                "or" => Token::Or,
                "not" => Token::Not,
                "implies" => Token::Implies,
                _ => Token::Ident(s),
            });
            continue;
        }
        return Err(PolicyError::ExprParse(format!(
            "unexpected character {c:?} in expression"
        )));
    }
    Ok(out)
}

fn is_ident_start(c: char) -> bool {
    c.is_ascii_alphabetic() || c == '_'
}

fn is_ident_continue(c: char) -> bool {
    c.is_ascii_alphanumeric() || c == '_' || c == '-'
}

// ──────────────────────────────────────────────────────────────────
// Recursive-descent parser
// ──────────────────────────────────────────────────────────────────

struct Parser {
    tokens: std::vec::IntoIter<Token>,
    peeked: Option<Token>,
}

impl Parser {
    fn new(tokens: Vec<Token>) -> Self {
        Self {
            tokens: tokens.into_iter(),
            peeked: None,
        }
    }

    fn peek(&mut self) -> Option<&Token> {
        if self.peeked.is_none() {
            self.peeked = self.tokens.next();
        }
        self.peeked.as_ref()
    }

    fn consume(&mut self) -> Option<Token> {
        if let Some(t) = self.peeked.take() {
            return Some(t);
        }
        self.tokens.next()
    }

    fn parse_expr(&mut self) -> Result<Expr, PolicyError> {
        self.parse_implies()
    }

    fn parse_implies(&mut self) -> Result<Expr, PolicyError> {
        let left = self.parse_or()?;
        if matches!(self.peek(), Some(Token::Implies)) {
            self.consume();
            // Right-associative: recurse into parse_implies for the RHS.
            // `Expr::or` splices when `right` is itself an Or, so
            // `a implies b or c` flattens to `Or([Not(a), b, c])`.
            let right = self.parse_implies()?;
            return Ok(Expr::or(vec![Expr::not(left), right]));
        }
        Ok(left)
    }

    fn parse_or(&mut self) -> Result<Expr, PolicyError> {
        let mut parts = vec![self.parse_and()?];
        while matches!(self.peek(), Some(Token::Or)) {
            self.consume();
            parts.push(self.parse_and()?);
        }
        Ok(Expr::or(parts))
    }

    fn parse_and(&mut self) -> Result<Expr, PolicyError> {
        let mut parts = vec![self.parse_not()?];
        while matches!(self.peek(), Some(Token::And)) {
            self.consume();
            parts.push(self.parse_not()?);
        }
        Ok(Expr::and(parts))
    }

    fn parse_not(&mut self) -> Result<Expr, PolicyError> {
        if matches!(self.peek(), Some(Token::Not)) {
            self.consume();
            let inner = self.parse_not()?;
            return Ok(Expr::not(inner));
        }
        self.parse_atom()
    }

    fn parse_atom(&mut self) -> Result<Expr, PolicyError> {
        match self.consume() {
            Some(Token::Ident(s)) => Ok(Expr::Claim(s)),
            Some(Token::LParen) => {
                let inner = self.parse_expr()?;
                match self.consume() {
                    Some(Token::RParen) => Ok(inner),
                    _ => Err(PolicyError::ExprParse("missing closing ')'".into())),
                }
            }
            Some(t) => Err(PolicyError::ExprParse(format!(
                "unexpected token {t:?}; expected claim or '('"
            ))),
            None => Err(PolicyError::ExprParse(
                "unexpected end of expression".into(),
            )),
        }
    }
}

/// Parse a requirement-expression string into an [`Expr`].
///
/// See the module docs for the grammar. Errors with
/// [`PolicyError::ExprParse`] on malformed input or trailing
/// tokens.
pub fn parse(input: &str) -> Result<Expr, PolicyError> {
    let tokens = tokenize(input)?;
    let mut p = Parser::new(tokens);
    let expr = p.parse_expr()?;
    if p.peek().is_some() {
        return Err(PolicyError::ExprParse(format!(
            "trailing tokens after expression: {:?}",
            p.consume()
        )));
    }
    Ok(expr)
}

#[cfg(test)]
mod tests {
    use super::*;

    fn ev(expr: &str, claims: &[(&str, bool)]) -> Tri {
        let e = parse(expr).unwrap();
        let lookup = |name: &str| match claims.iter().find(|(n, _)| *n == name) {
            Some((_, true)) => Tri::True,
            Some((_, false)) => Tri::False,
            None => Tri::Unknown,
        };
        evaluate(&e, &lookup)
    }

    #[test]
    fn single_claim() {
        assert_eq!(ev("safe-to-deploy", &[("safe-to-deploy", true)]), Tri::True);
        assert_eq!(
            ev("safe-to-deploy", &[("safe-to-deploy", false)]),
            Tri::False
        );
        assert_eq!(ev("safe-to-deploy", &[]), Tri::Unknown);
    }

    #[test]
    fn and_basic() {
        assert_eq!(ev("a and b", &[("a", true), ("b", true)]), Tri::True);
        assert_eq!(ev("a and b", &[("a", true), ("b", false)]), Tri::False);
        assert_eq!(ev("a and b", &[("a", true)]), Tri::Unknown);
    }

    #[test]
    fn and_short_circuits_on_false() {
        // `b` is unknown but irrelevant since `a` is false.
        assert_eq!(ev("a and b", &[("a", false)]), Tri::False);
    }

    #[test]
    fn or_basic() {
        assert_eq!(ev("a or b", &[("a", false), ("b", true)]), Tri::True);
        assert_eq!(ev("a or b", &[("a", false), ("b", false)]), Tri::False);
        assert_eq!(ev("a or b", &[("a", false)]), Tri::Unknown);
    }

    #[test]
    fn or_short_circuits_on_true() {
        assert_eq!(ev("a or b", &[("a", true)]), Tri::True);
    }

    #[test]
    fn not_inverts() {
        assert_eq!(ev("not a", &[("a", true)]), Tri::False);
        assert_eq!(ev("not a", &[("a", false)]), Tri::True);
        assert_eq!(ev("not a", &[]), Tri::Unknown);
    }

    #[test]
    fn precedence_not_binds_tightest() {
        // `not a and b` parses as `(not a) and b`, not `not (a and b)`.
        assert_eq!(ev("not a and b", &[("a", false), ("b", true)]), Tri::True);
        assert_eq!(ev("not a and b", &[("a", true), ("b", true)]), Tri::False);
    }

    #[test]
    fn precedence_and_binds_tighter_than_or() {
        // `a or b and c` parses as `a or (b and c)`.
        assert_eq!(
            ev("a or b and c", &[("a", false), ("b", true), ("c", true)]),
            Tri::True
        );
        assert_eq!(
            ev("a or b and c", &[("a", false), ("b", true), ("c", false)]),
            Tri::False
        );
    }

    #[test]
    fn parens_override_precedence() {
        // `(a or b) and c` — without parens this'd be `a or (b and c)`.
        assert_eq!(
            ev("(a or b) and c", &[("a", true), ("c", false)]),
            Tri::False
        );
    }

    #[test]
    fn nested_not_and_or() {
        assert_eq!(
            ev(
                "not (a and b) or c",
                &[("a", true), ("b", true), ("c", false)]
            ),
            Tri::False
        );
        assert_eq!(
            ev("not (a and b) or c", &[("a", true), ("b", false)]),
            Tri::True
        );
    }

    #[test]
    fn case_insensitive_keywords() {
        assert_eq!(ev("a AND b", &[("a", true), ("b", true)]), Tri::True);
        assert_eq!(ev("NOT a", &[("a", true)]), Tri::False);
        assert_eq!(ev("a IMPLIES b", &[("a", true), ("b", true)]), Tri::True);
    }

    #[test]
    fn implies_truth_table() {
        // Classical cases.
        assert_eq!(ev("a implies b", &[("a", true), ("b", true)]), Tri::True);
        assert_eq!(ev("a implies b", &[("a", true), ("b", false)]), Tri::False);
        // Vacuous truth: false antecedent → true regardless of consequent.
        assert_eq!(ev("a implies b", &[("a", false), ("b", false)]), Tri::True);
        assert_eq!(ev("a implies b", &[("a", false)]), Tri::True);
        // Kleene: true antecedent, unknown consequent → unknown.
        assert_eq!(ev("a implies b", &[("a", true)]), Tri::Unknown);
        // Kleene: unknown antecedent, true consequent → true
        // (the `or` short-circuits regardless of the negated unknown).
        assert_eq!(ev("a implies b", &[("b", true)]), Tri::True);
        // Kleene: unknown antecedent, false consequent → unknown.
        assert_eq!(ev("a implies b", &[("b", false)]), Tri::Unknown);
    }

    #[test]
    fn implies_lower_than_or_and_and() {
        // `a implies b and c` parses as `a implies (b and c)`.
        assert_eq!(
            ev(
                "a implies b and c",
                &[("a", true), ("b", true), ("c", false)]
            ),
            Tri::False
        );
        // `a or b implies c` parses as `(a or b) implies c`.
        // With a=true, antecedent is true; consequent c=false → false.
        assert_eq!(
            ev(
                "a or b implies c",
                &[("a", true), ("b", false), ("c", false)]
            ),
            Tri::False
        );
        // With a=false, b=false: antecedent false → vacuously true.
        assert_eq!(
            ev(
                "a or b implies c",
                &[("a", false), ("b", false), ("c", false)]
            ),
            Tri::True
        );
    }

    #[test]
    fn implies_right_associative() {
        // `a implies b implies c` parses as `a implies (b implies c)`.
        // Left-assoc would mean `(a implies b) implies c`; pick a witness
        // where the two associations disagree:
        // a=true, b=false, c=false.
        //   right-assoc: a implies (b implies c) = T → (F → F) = T → T = T
        //   left-assoc:  (a implies b) implies c = (T → F) → F = F → F = T
        // Same here. Try a=false, b=true, c=false:
        //   right-assoc: F → (T → F) = F → F = T
        //   left-assoc:  (F → T) → F = T → F = F
        assert_eq!(
            ev(
                "a implies b implies c",
                &[("a", false), ("b", true), ("c", false)]
            ),
            Tri::True
        );
    }

    #[test]
    fn implies_desugars_to_or_not() {
        // `a implies b` should parse to the same AST as `(not a) or b`.
        let lhs = parse("a implies b").unwrap();
        let rhs = parse("(not a) or b").unwrap();
        assert_eq!(lhs, rhs);
    }

    #[test]
    fn nary_and_chain_flattens() {
        // `a and b and c` parses to one n-ary And, not a left-folded
        // pair of binary Ands.
        let e = parse("a and b and c").unwrap();
        let Expr::And(children) = e else {
            panic!("expected top-level And");
        };
        assert_eq!(children.len(), 3);
        assert_eq!(children[0], Expr::Claim("a".into()));
        assert_eq!(children[1], Expr::Claim("b".into()));
        assert_eq!(children[2], Expr::Claim("c".into()));
    }

    #[test]
    fn nary_or_chain_flattens() {
        let e = parse("a or b or c").unwrap();
        let Expr::Or(children) = e else {
            panic!("expected top-level Or");
        };
        assert_eq!(children.len(), 3);
    }

    #[test]
    fn parens_with_same_op_get_spliced() {
        // `(a or b) or c` should flatten to one Or, not nest.
        let e = parse("(a or b) or c").unwrap();
        let Expr::Or(children) = e else {
            panic!("expected flat Or");
        };
        assert_eq!(children.len(), 3);

        // `a and (b and c)` same — parens can't change associativity,
        // so the shape collapses.
        let e = parse("a and (b and c)").unwrap();
        let Expr::And(children) = e else {
            panic!("expected flat And");
        };
        assert_eq!(children.len(), 3);
    }

    #[test]
    fn implies_with_or_rhs_splices_into_one_or() {
        // `a implies b or c` desugars to `or(not(a), or(b, c))` and
        // then the smart constructor splices the inner Or, giving
        // `Or([Not(a), b, c])` — three flat children.
        let e = parse("a implies b or c").unwrap();
        let Expr::Or(children) = e else {
            panic!("expected top-level Or");
        };
        assert_eq!(children.len(), 3);
        assert_eq!(children[0], Expr::not(Expr::Claim("a".into())));
        assert_eq!(children[1], Expr::Claim("b".into()));
        assert_eq!(children[2], Expr::Claim("c".into()));
    }

    #[test]
    fn mixed_op_nesting_is_preserved() {
        // `a and (b or c) and d`: the middle child is an Or, not an
        // And, so it stays nested under the outer And.
        let e = parse("a and (b or c) and d").unwrap();
        let Expr::And(children) = e else {
            panic!("expected top-level And");
        };
        assert_eq!(children.len(), 3);
        assert!(matches!(&children[1], Expr::Or(inner) if inner.len() == 2));
    }

    #[test]
    fn single_element_collapses() {
        // `Expr::and(vec![x])` returns `x` directly, not `And([x])`.
        let x = Expr::Claim("x".into());
        assert_eq!(Expr::and(vec![x.clone()]), x);
        assert_eq!(Expr::or(vec![x.clone()]), x);
    }

    #[test]
    fn parse_errors() {
        assert!(parse("a and").is_err());
        assert!(parse("(a or b").is_err());
        assert!(parse("and a").is_err());
        assert!(parse("a b").is_err()); // trailing token
        assert!(parse("").is_err());
        assert!(parse("a #").is_err()); // unknown character
        assert!(parse("a implies").is_err()); // missing consequent
        assert!(parse("implies b").is_err()); // missing antecedent
    }
}