cel-parser 0.8.1

A parser for the Common Expression Language (CEL)
Documentation
use crate::{RelationOp, ArithmeticOp, Expression, UnaryOp, Member, Atom, parse_bytes, parse_string};
use std::sync::Arc;

grammar;

match {
    // Skip whitespace and comments
   r"\s*" => { },
   r"//[^\n\r]*[\n\r]*" => { },
} else {
   _
}

pub Expression: Expression = {
    Conditional
};

pub Conditional: Expression = {
    <condition:LogicalOr> "?" <if_true:LogicalOr> ":" <if_false:Conditional> => Expression::Ternary(condition.into(), if_true.into(), if_false.into()),
    LogicalOr
};

pub LogicalOr: Expression = {
    <left:LogicalOr> "||" <right:LogicalAnd> => Expression::Or(left.into(), right.into()),
    LogicalAnd
};

pub LogicalAnd: Expression = {
    <left:LogicalAnd> "&&" <right:Relations> => Expression::And(left.into(), right.into()),
    Relations
};

pub Relations: Expression = {
    <left:ArithmeticAddSub> <op:RelationOp> <right:ArithmeticAddSub> => Expression::Relation(left.into(), op, right.into()),
    ArithmeticAddSub
};

pub ArithmeticAddSub: Expression = {
    <left:ArithmeticAddSub> <op:ArithmeticOpAddSub> <right:ArithmeticMulDivMod> => Expression::Arithmetic(left.into(), op, right.into()),
    ArithmeticMulDivMod
};

pub ArithmeticMulDivMod: Expression = {
    <left:ArithmeticMulDivMod> <op:ArithmeticOpMulDivMod> <right:Unary> => Expression::Arithmetic(left.into(), op, right.into()),
    Unary
};

pub Unary: Expression = {
    <op:UnaryOp> <right:Member> => Expression::Unary(op, right.into()),
    Member
};

pub Member: Expression = {
    <left:Member> "." <identifier:Ident> => Expression::Member(left.into(), Member::Attribute(identifier.into()).into()).into(),
    <left:Member> "." <identifier:Ident> "(" <arguments:CommaSeparated<Expression>> ")" => {
           Expression::FunctionCall(Expression::Ident(identifier).into(), Some(left.into()), arguments).into()
   },
    <left:Member> "[" <expression:Expression> "]" => Expression::Member(left.into(), Member::Index(expression.into()).into()).into(),
    <left:Member> "{" <fields:CommaSeparated<FieldInits>> "}" => Expression::Member(left.into(), Member::Fields(fields.into()).into()).into(),
    Primary,
}

pub Primary: Expression = {
    "."? <Ident> => Expression::Ident(<>.into()).into(),
    "."? <identifier:Ident> "(" <arguments:CommaSeparated<Expression>> ")" => {
           Expression::FunctionCall(Expression::Ident(identifier).into(), None, arguments).into()
    },
    Atom => Expression::Atom(<>).into(),
    "[" <members:CommaSeparated<Expression>> "]" => Expression::List(<>).into(),
    "{" <fields:CommaSeparated<MapInits>> "}" => Expression::Map(<>).into(),
    "(" <Expression> ")"
}

pub FieldInits: (Arc<String>, Expression) = {
    <Ident> ":" <Expression>
}

pub MapInits: (Expression, Expression) = {
    <Expression> ":" <Expression>
}

CommaSeparated<T>: Vec<T> = {
    <v:(<T> ",")*> <e:T?> => match e {
        None => v,
        Some(e) => {
            let mut v = v;
            v.push(e);
            v
        }
    }
};

ArithmeticOpAddSub: ArithmeticOp = {
    "+" => ArithmeticOp::Add,
    "-" => ArithmeticOp::Subtract
};

ArithmeticOpMulDivMod: ArithmeticOp = {
    "*" => ArithmeticOp::Multiply,
    "/" => ArithmeticOp::Divide,
    "%" => ArithmeticOp::Modulus
};


UnaryOp: UnaryOp = {
    "!" => UnaryOp::Not,
    "!!" => UnaryOp::DoubleNot,
    "-" => UnaryOp::Minus,
    "--" => UnaryOp::DoubleMinus,
}

RelationOp: RelationOp = {
    "<" => RelationOp::LessThan,
    "<=" => RelationOp::LessThanEq,
    ">" => RelationOp::GreaterThan,
    ">=" => RelationOp::GreaterThanEq,
    "==" => RelationOp::Equals,
    "!=" => RelationOp::NotEquals,
    "in" => RelationOp::In
}

Atom: Atom = {
    // Integer literals. Annoying to parse :/
    r"-?[0-9]+" => Atom::Int(<>.parse().unwrap()),
    <s:r"-?0[xX][0-9a-fA-F]+"> => Atom::Int(i64::from_str_radix(&s.chars().filter(|&x| x != 'x' && x != 'X').collect::<String>(), 16).unwrap()),
    <s:r"[0-9]+[uU]"> => Atom::UInt(s[..s.len()-1].parse().unwrap()),
    <s:r"-?0[xX][0-9a-fA-F]+[uU]"> => Atom::UInt(u64::from_str_radix(&s.chars().filter(|&x| x != 'x' && x != 'X' && x != 'u' && x != 'U').collect::<String>(), 16).unwrap()),

    // Float with decimals and optional exponent
    r"([-+]?[0-9]*\.[0-9]+([eE][-+]?[0-9]+)?)" => Atom::Float(<>.parse().unwrap()),
    // Float with no decimals and required exponent
    r"[-+]?[0-9]+[eE][-+]?[0-9]+" => Atom::Float(<>.parse().unwrap()),

    // NOTE: I've commented out some of the more complex string parsing rules
    // because they're causing "attempt to subtract with overflow" errors within
    // the LALRPOP parser.

    // Double quoted string
    // I used ChatGPT to come up with this pattern and the explanation is as follows:
    //   1. `"`: Match the opening double quote.
    //   2. `([^"\\]*(?:\\.[^"\\]*)*)`: This is the main part of the regex which matches the content inside the double quotes.
    //     a. `[^"\\]*`: Match any sequence of characters that are neither a double quote nor a backslash.
    //     b. `(?:\\.[^"\\]*)*`: This part matches an escaped character followed by any sequence of characters that are
    //        neither a double quote nor a backslash. It uses a non-capturing group (?:...) to repeat the pattern.
    //        This handles sequences like \", \\, or any other escaped character.
    //   3. `"`: Match the closing double quote.
    r#""([^"\\]*(?:\\.[^"\\]*)*)""# => Atom::String(parse_string(<>).unwrap().into()),
    r#"[rR]"([^"\\]*(?:\\.[^"\\]*)*)""# => Atom::String(parse_string(<>).unwrap().into()),
    // r#""""(\\.|[^"{3}])*""""# => Atom::String(<>.to_string().into()),

    // Single quoted string
    // Uses similar regex as above, but replace double quote with a single one
    r#"'([^'\\]*(?:\\.[^'\\]*)*)'"# => Atom::String(parse_string(<>).unwrap().into()),
    r#"[rR]'([^'\\]*(?:\\.[^'\\]*)*)'"# => Atom::String(parse_string(<>).unwrap().into()),
    // r#"'''(\\.|[^'{3}])*'''"# => Atom::String(<>.to_string().into()),

    // Double quoted bytes
    r#"[bB]"(\\.|[^"\n])*""# => Atom::Bytes(parse_bytes(&<>[2..<>.len()-1]).unwrap().into()),
    // r#"[bB]"""(\\.|[^"{3}])*""""# => Atom::Bytes(Vec::from(<>.as_bytes()).into()),

    // Single quoted bytes
    r#"[bB]'(\\.|[^'\n])*'"# => Atom::Bytes(parse_bytes(&<>[2..<>.len()-1]).unwrap().into()),
    // r#"[bB]'''(\\.|[^'{3}])*'''"# => Atom::Bytes(Vec::from(<>.as_bytes()).into()),

    "true" => Atom::Bool(true),
    "false" => Atom::Bool(false),
    "null" => Atom::Null,
};

Ident: Arc<String> = {
    r"[_a-zA-Z][_a-zA-Z0-9]*" => Arc::from(<>.to_string())
}