rust_cel_parser/parser/
mod.rs

1use lazy_static::lazy_static;
2use pest::Parser;
3use pest::iterators::{Pair, Pairs};
4use pest::pratt_parser::{Assoc, Op, PrattParser};
5use pest_derive::Parser;
6
7use crate::ast::{BinaryOperator, CelType, ComprehensionOp, Expr, Literal, UnaryOperator};
8use crate::error::CelParserError;
9
10#[derive(Parser)]
11#[grammar = "parser/cel_grammar.pest"]
12pub struct CelParser;
13
14// --- Pratt Parser Setup ---
15lazy_static! {
16    static ref PRATT_PARSER: PrattParser<Rule> = {
17        use Assoc::*; // Left, Right
18        use Rule::*;
19
20        // Configure precedence and associativity following CEL specification.
21        // Order for Pest PrattParser: LOWEST precedence group first, HIGHEST precedence group last.
22        PrattParser::new()
23            // Level 7: Logical OR (||) - Lowest infix precedence
24            .op(Op::infix(log_or, Left))
25            // Level 6: Logical AND (&&)
26            .op(Op::infix(log_and, Left))
27            // Level 5: Relations / Equality (==, !=, <, <=, >, >=, in)
28            .op(Op::infix(eq, Left) | Op::infix(ne, Left) |
29                Op::infix(lt, Left) | Op::infix(le, Left) |
30                Op::infix(gt, Left) | Op::infix(ge, Left) |
31                Op::infix(in_op, Left))
32            // Level 4: Add / Subtract (+, -)
33            .op(Op::infix(add, Left) | Op::infix(binary_minus, Left))
34            // Level 3: Multiply / Divide / Remainder (*, /, %) - Highest infix precedence
35            .op(Op::infix(div, Left) | Op::infix(rem, Left))
36            .op(Op::infix(mul, Left))
37            // Level 2: Unary Prefix Operators (!, -)
38            .op(Op::prefix(log_not) | Op::prefix(unary_minus))
39    };
40}
41
42/// Parses a full CEL program string into an AST.
43pub fn parse_cel_program(input: &str) -> Result<Expr, CelParserError> {
44    let mut top_level_pairs = CelParser::parse(Rule::program, input)?;
45
46    let program_pair = top_level_pairs.next().ok_or_else(|| {
47        CelParserError::InternalError("No top-level program match found".to_string())
48    })?;
49    if program_pair.as_rule() != Rule::program {
50        return Err(CelParserError::InternalError(format!(
51            "Expected Rule::program, got {:?}",
52            program_pair.as_rule()
53        )));
54    }
55    if top_level_pairs.next().is_some() {
56        return Err(CelParserError::InternalError(
57            "Unexpected extra data after main program".to_string(),
58        ));
59    }
60
61    // program = { SOI ~ expr ~ EOI }
62    let mut inner_program_pairs = program_pair.into_inner();
63
64    let expr_pair = inner_program_pairs
65        .next()
66        .ok_or_else(|| CelParserError::InternalError("Missing expr".to_string()))?;
67    let eoi_pair = inner_program_pairs
68        .next()
69        .ok_or_else(|| CelParserError::InternalError("Missing EOI".to_string()))?;
70
71    if eoi_pair.as_rule() != Rule::EOI {
72        return Err(CelParserError::InternalError(format!(
73            "Expected EOI, got {:?}",
74            eoi_pair.as_rule()
75        )));
76    }
77
78    // Ensure no extra pairs remain
79    if inner_program_pairs.next().is_some() {
80        return Err(CelParserError::InternalError(
81            "Unexpected extra pairs after EOI".to_string(),
82        ));
83    }
84
85    // Start the recursive build from the top-level 'expr' rule
86    build_ast_from_expr(expr_pair)
87}
88
89/// Builds an AST node from a Pair representing the 'expr' rule.
90fn build_ast_from_expr(pair: Pair<Rule>) -> Result<Expr, CelParserError> {
91    // expr = { pratt_operand_sequence ~ (cond ~ expr ~ cond_else ~ expr)? }
92    let mut inner = pair.into_inner();
93    let pratt_seq_pair = inner.next().ok_or_else(|| {
94        CelParserError::InternalError("Missing pratt sequence in expr".to_string())
95    })?;
96
97    if let Some(cond_op_pair) = inner.next() {
98        // Check if the optional '?' exists
99        // Ternary case: ? expr : expr
100        if cond_op_pair.as_rule() != Rule::cond {
101            return Err(CelParserError::InternalError(
102                "Expected '?' for conditional".to_string(),
103            ));
104        }
105        let true_branch_pair = inner.next().ok_or_else(|| {
106            CelParserError::InternalError("Missing true branch for conditional".to_string())
107        })?;
108        let else_op_pair = inner.next().ok_or_else(|| {
109            CelParserError::InternalError("Missing ':' for conditional".to_string())
110        })?;
111        if else_op_pair.as_rule() != Rule::cond_else {
112            return Err(CelParserError::InternalError(
113                "Expected ':' for conditional".to_string(),
114            ));
115        }
116        let false_branch_pair = inner.next().ok_or_else(|| {
117            CelParserError::InternalError("Missing false branch for conditional".to_string())
118        })?;
119
120        // Build the condition part using the Pratt parser
121        let condition_ast = build_ast_from_pratt_sequence(pratt_seq_pair.into_inner())?;
122        // Recursively build the branches
123        let true_branch_ast = build_ast_from_expr(true_branch_pair)?;
124        let false_branch_ast = build_ast_from_expr(false_branch_pair)?;
125
126        Ok(Expr::Conditional {
127            cond: Box::new(condition_ast),
128            true_branch: Box::new(true_branch_ast),
129            false_branch: Box::new(false_branch_ast),
130        })
131    } else {
132        // No ternary operator, just parse the Pratt sequence
133        build_ast_from_pratt_sequence(pratt_seq_pair.into_inner())
134    }
135}
136
137/// Builds an AST node using the Pratt parser from the inner pairs of 'pratt_operand_sequence'.
138fn build_ast_from_pratt_sequence(pairs: Pairs<Rule>) -> Result<Expr, CelParserError> {
139    PRATT_PARSER
140        .map_primary(|primary| {
141            let mut inner = primary.into_inner();
142            let base_pair = inner.next().unwrap();
143            let mut ast = build_ast_from_primary(base_pair)?;
144
145            for chain_part in inner {
146                ast = match chain_part.as_rule() {
147                    Rule::comprehension => {
148                        let mut inner_comp = chain_part.into_inner();
149                        let op_str = inner_comp.next().unwrap().as_str();
150                        let iter_var = inner_comp.next().unwrap().as_str().to_string();
151                        let predicate_pair = inner_comp.next().unwrap();
152                        let op = match op_str {
153                            "all" => ComprehensionOp::All,
154                            "exists" => ComprehensionOp::Exists,
155                            "exists_one" => ComprehensionOp::ExistsOne,
156                            "filter" => ComprehensionOp::Filter,
157                            _ => unreachable!(),
158                        };
159                        let predicate_ast = build_ast_from_expr(predicate_pair)?;
160                        Expr::Comprehension {
161                            op,
162                            target: Box::new(ast),
163                            iter_var,
164                            predicate: Box::new(predicate_ast),
165                        }
166                    }
167                    Rule::map_macro => {
168                        let mut inner_map = chain_part.into_inner();
169                        inner_map.next(); // Skip map_macro_word
170                        let iter_var = inner_map.next().unwrap().as_str().to_string();
171                        let first_expr_pair = inner_map.next().unwrap();
172
173                        let (filter_ast, transform_ast) =
174                            if let Some(second_expr_pair) = inner_map.next() {
175                                // Three-arg version: map(x, p, t)
176                                let filter = build_ast_from_expr(first_expr_pair)?;
177                                let transform = build_ast_from_expr(second_expr_pair)?;
178                                (Some(Box::new(filter)), Box::new(transform))
179                            } else {
180                                // Two-arg version: map(x, t)
181                                let transform = build_ast_from_expr(first_expr_pair)?;
182                                (None, Box::new(transform))
183                            };
184
185                        Expr::Map {
186                            target: Box::new(ast),
187                            iter_var,
188                            filter: filter_ast,
189                            transform: transform_ast,
190                        }
191                    }
192                    Rule::member_call => {
193                        let mut inner_call = chain_part.into_inner();
194                        let field_access_pair = inner_call.next().unwrap();
195                        let call_args_pair = inner_call.next().unwrap();
196
197                        let field_name = field_access_pair
198                            .into_inner()
199                            .next()
200                            .unwrap()
201                            .as_str()
202                            .to_string();
203                        let target = Expr::FieldAccess {
204                            base: Box::new(ast),
205                            field: field_name,
206                        };
207
208                        let mut args = Vec::new();
209                        if let Some(expr_list) = call_args_pair.into_inner().next() {
210                            for arg in expr_list.into_inner() {
211                                args.push(build_ast_from_expr(arg)?);
212                            }
213                        }
214                        Expr::Call {
215                            target: Box::new(target),
216                            args,
217                        }
218                    }
219                    Rule::field_access => {
220                        let field_name =
221                            chain_part.into_inner().next().unwrap().as_str().to_string();
222                        Expr::FieldAccess {
223                            base: Box::new(ast),
224                            field: field_name,
225                        }
226                    }
227                    Rule::index_access => {
228                        let index_pair = chain_part.into_inner().next().unwrap();
229                        let index_ast = build_ast_from_expr(index_pair)?;
230                        Expr::Index {
231                            base: Box::new(ast),
232                            index: Box::new(index_ast),
233                        }
234                    }
235                    _ => unreachable!(
236                        "Unexpected part in member_chain: {:?}",
237                        chain_part.as_rule()
238                    ),
239                }
240            }
241            Ok(ast)
242        })
243        .map_prefix(|op, rhs| {
244            let rhs_ast = rhs?;
245            match op.as_rule() {
246                Rule::log_not => Ok(Expr::UnaryOp {
247                    op: UnaryOperator::Not,
248                    operand: Box::new(rhs_ast),
249                }),
250                Rule::unary_minus => Ok(Expr::UnaryOp {
251                    op: UnaryOperator::Neg,
252                    operand: Box::new(rhs_ast),
253                }),
254                _ => Err(CelParserError::InternalError(format!(
255                    "Unexpected prefix operator: {:?}",
256                    op.as_rule()
257                ))),
258            }
259        })
260        .map_infix(|lhs, op, rhs| {
261            let lhs_ast = lhs?;
262            let rhs_ast = rhs?;
263            let binary_op = match op.as_rule() {
264                Rule::log_or => BinaryOperator::Or,
265                Rule::log_and => BinaryOperator::And,
266                Rule::eq => BinaryOperator::Eq,
267                Rule::ne => BinaryOperator::Ne,
268                Rule::lt => BinaryOperator::Lt,
269                Rule::le => BinaryOperator::Le,
270                Rule::gt => BinaryOperator::Gt,
271                Rule::ge => BinaryOperator::Ge,
272                Rule::in_op => BinaryOperator::In,
273                Rule::add => BinaryOperator::Add,
274                Rule::binary_minus => BinaryOperator::Sub,
275                Rule::mul => BinaryOperator::Mul,
276                Rule::div => BinaryOperator::Div,
277                Rule::rem => BinaryOperator::Rem,
278                _ => {
279                    return Err(CelParserError::InternalError(format!(
280                        "Unexpected infix operator: {:?}",
281                        op.as_rule()
282                    )));
283                }
284            };
285            Ok(Expr::BinaryOp {
286                op: binary_op,
287                left: Box::new(lhs_ast),
288                right: Box::new(rhs_ast),
289            })
290        })
291        .parse(pairs)
292}
293
294fn build_ast_from_primary(pair: Pair<Rule>) -> Result<Expr, CelParserError> {
295    // `primary` is a wrapper rule. We must look at its single inner pair
296    // to determine the actual expression type.
297    let inner_pair = pair
298        .into_inner()
299        .next()
300        .ok_or_else(|| CelParserError::InternalError("Empty primary rule".to_string()))?;
301
302    match inner_pair.as_rule() {
303        Rule::message_lit => {
304            let mut inner = inner_pair.into_inner();
305            let type_name_pair = inner.next().unwrap();
306            let type_name = type_name_pair.as_str().to_string();
307            let mut fields = Vec::new();
308            for field_pair in inner {
309                if field_pair.as_rule() == Rule::field_init {
310                    let mut inner_field = field_pair.into_inner();
311                    let field_name_pair = inner_field.next().unwrap();
312                    let value_pair = inner_field.next().unwrap();
313                    let field_name = field_name_pair.as_str().to_string();
314                    let value_ast = build_ast_from_expr(value_pair)?;
315                    fields.push((field_name, value_ast));
316                }
317            }
318            Ok(Expr::MessageLiteral { type_name, fields })
319        }
320        Rule::map_lit => {
321            let mut entries = Vec::new();
322            for entry_pair in inner_pair.into_inner() {
323                if entry_pair.as_rule() == Rule::map_entry {
324                    let mut inner_entry = entry_pair.into_inner();
325                    let key_pair = inner_entry.next().unwrap();
326                    let value_pair = inner_entry.next().unwrap();
327                    let key_ast = build_ast_from_expr(key_pair)?;
328                    let value_ast = build_ast_from_expr(value_pair)?;
329                    entries.push((key_ast, value_ast));
330                }
331            }
332            Ok(Expr::MapLiteral { entries })
333        }
334        Rule::list_lit => {
335            let mut elements = Vec::new();
336            if let Some(expr_list_pair) = inner_pair.into_inner().next() {
337                for expr_pair in expr_list_pair.into_inner() {
338                    elements.push(build_ast_from_expr(expr_pair)?);
339                }
340            }
341            Ok(Expr::List { elements })
342        }
343        Rule::has_macro => {
344            let target_expr_pair = inner_pair.into_inner().next().unwrap();
345            let target_ast = build_ast_from_expr(target_expr_pair)?;
346            Ok(Expr::Has {
347                target: Box::new(target_ast),
348            })
349        }
350        Rule::literal => build_ast_from_literal_rule(inner_pair.into_inner().next().unwrap()),
351        Rule::paren_expr => build_ast_from_expr(inner_pair.into_inner().next().unwrap()),
352        Rule::global_call => {
353            let mut inner = inner_pair.into_inner();
354            let ident_pair = inner.next().unwrap();
355            let target = Expr::Identifier(ident_pair.as_str().to_string());
356
357            let mut args = Vec::new();
358            // FIX: The expr_list is optional, so we use `if let` instead of `unwrap`.
359            if let Some(expr_list_pair) = inner.next() {
360                for expr_pair in expr_list_pair.into_inner() {
361                    args.push(build_ast_from_expr(expr_pair)?);
362                }
363            }
364            Ok(Expr::Call {
365                target: Box::new(target),
366                args,
367            })
368        }
369        Rule::ident => Ok(Expr::Identifier(inner_pair.as_str().to_string())),
370        rule => Err(CelParserError::InternalError(format!(
371            "Unexpected rule in build_ast_from_primary: {:?}",
372            rule
373        ))),
374    }
375}
376
377/// Builds an AST Expr::Literal node from a Pair representing a *specific* literal rule
378/// (e.g., int_lit, float_lit, string_lit, etc.), not the silent `literal` wrapper.
379fn build_ast_from_literal_rule(pair: Pair<Rule>) -> Result<Expr, CelParserError> {
380    match pair.as_rule() {
381        Rule::int_lit => {
382            // int_lit = { decimal_lit | hex_lit }
383            let inner_pair = pair
384                .into_inner()
385                .next()
386                .ok_or_else(|| CelParserError::InternalError("Empty int_lit".to_string()))?;
387            let num_str = inner_pair.as_str();
388            let val = if inner_pair.as_rule() == Rule::hex_lit {
389                // hex_lit     = @{ "0" ~ ("x" | "X") ~ hex_digit+ }
390                let hex_val = num_str.get(2..).ok_or_else(|| {
391                    CelParserError::InternalError("Invalid hex literal structure".to_string())
392                })?;
393                i64::from_str_radix(hex_val, 16)
394            } else {
395                // decimal_lit = @{ digit+ }
396                num_str.parse::<i64>()
397            }
398            .map_err(|e| CelParserError::InvalidIntegerLiteral(num_str.to_string(), e))?;
399            Ok(Expr::Literal(Literal::Int(val)))
400        }
401        Rule::uint_lit => {
402            // uint_lit = @{ int_lit ~ ("u" | "U") }
403            let int_lit_pair = pair
404                .into_inner()
405                .next()
406                .ok_or_else(|| CelParserError::InternalError("Empty uint_lit".to_string()))?;
407            // int_lit = { decimal_lit | hex_lit }
408            let inner_pair = int_lit_pair.into_inner().next().ok_or_else(|| {
409                CelParserError::InternalError("Empty int_lit inside uint_lit".to_string())
410            })?;
411            let num_str = inner_pair.as_str();
412            let val = if inner_pair.as_rule() == Rule::hex_lit {
413                let hex_val = num_str.get(2..).ok_or_else(|| {
414                    CelParserError::InternalError(
415                        "Invalid hex literal structure in uint".to_string(),
416                    )
417                })?;
418                u64::from_str_radix(hex_val, 16)
419            } else {
420                num_str.parse::<u64>()
421            }
422            .map_err(|e| CelParserError::InvalidUintLiteral(num_str.to_string(), e))?;
423            Ok(Expr::Literal(Literal::Uint(val)))
424        }
425        Rule::float_lit => {
426            let num_str = pair.as_str();
427            let val = num_str
428                .parse::<f64>()
429                .map_err(|e| CelParserError::InvalidFloatLiteral(num_str.to_string(), e))?;
430            Ok(Expr::Literal(Literal::Float(val)))
431        }
432        Rule::string_lit => {
433            // Simplified Grammar:
434            // string_lit = ${ (("r" | "R") ~ RAW_TYPES) | NORMAL_TYPES }
435            // Where RAW_TYPES = raw_... | ...
436            // And NORMAL_TYPES = normal_... | ...
437            let mut inner_pairs = pair.into_inner();
438            let first_inner = inner_pairs
439                .next()
440                .ok_or_else(|| CelParserError::InternalError("Empty string_lit".to_string()))?;
441
442            let (is_raw, specific_quote_type_pair) =
443                // Check if the *first inner pair* is the r/R text token itself
444                if first_inner.as_str() == "r" || first_inner.as_str() == "R" {
445                    // If yes, it's raw. The *next* inner pair is the specific raw quote type rule.
446                    (true, inner_pairs.next().ok_or_else(|| CelParserError::InternalError("Missing raw string type rule after r/R".to_string()))?)
447                } else {
448                    // If no, it's normal. The *first inner pair* IS the specific normal quote type rule.
449                    (false, first_inner)
450                };
451
452            // Extract the raw string content (between the outermost delimiters)
453            let quotes = specific_quote_type_pair.as_str();
454            let content_str = match specific_quote_type_pair.as_rule() {
455                // Add all specific types (raw and normal) here
456                Rule::normal_string_lit_double
457                | Rule::raw_string_lit_double
458                | Rule::normal_string_lit_single
459                | Rule::raw_string_lit_single => {
460                    quotes.get(1..quotes.len() - 1).ok_or_else(|| {
461                        CelParserError::InternalError(
462                            "Invalid single/double quote structure".to_string(),
463                        )
464                    })?
465                }
466                Rule::normal_string_lit_triple_double
467                | Rule::raw_string_lit_triple_double
468                | Rule::normal_string_lit_triple_single
469                | Rule::raw_string_lit_triple_single => {
470                    quotes.get(3..quotes.len() - 3).ok_or_else(|| {
471                        CelParserError::InternalError("Invalid triple quote structure".to_string())
472                    })?
473                }
474                _ => {
475                    return Err(CelParserError::InternalError(format!(
476                        "Unexpected rule matched inside string_lit: {:?}",
477                        specific_quote_type_pair.as_rule()
478                    )));
479                }
480            };
481
482            let final_string = if is_raw {
483                content_str.to_string() // No escape processing for raw strings
484            } else {
485                parse_string_escapes(content_str)? // Process escapes for normal strings
486            };
487            Ok(Expr::Literal(Literal::String(final_string)))
488        }
489        Rule::bytes_lit => {
490            // Grammar: bytes_lit = ${ ("b" | "B") ~ string_lit }
491            // Debug shows inner pair is ONLY the string_lit pair.
492            let mut inner = pair.into_inner();
493            // --- FIXED AREA START ---
494            // Directly get the single inner pair, which MUST be string_lit
495            let string_lit_pair = inner.next().ok_or_else(|| {
496                CelParserError::InternalError(
497                    "Missing inner string_lit pair for bytes_lit".to_string(),
498                )
499            })?;
500            // Optional sanity check
501            if string_lit_pair.as_rule() != Rule::string_lit {
502                return Err(CelParserError::InternalError(format!(
503                    "Expected string_lit inside bytes_lit, found {:?}",
504                    string_lit_pair.as_rule()
505                )));
506            }
507            if inner.next().is_some() {
508                return Err(CelParserError::InternalError(
509                    "Unexpected extra inner pair for bytes_lit".to_string(),
510                ));
511            }
512            // --- FIXED AREA END ---
513
514            // Now, inspect the retrieved string_lit_pair based on its structure
515            let mut inner_str_pairs = string_lit_pair.into_inner();
516            // ... rest of the logic remains the same ...
517
518            let first_inner_str = inner_str_pairs.next().ok_or_else(|| {
519                CelParserError::InternalError("Empty inner string_lit for bytes".to_string())
520            })?;
521
522            let (is_raw, specific_quote_type_pair) =
523                if first_inner_str.as_str() == "r" || first_inner_str.as_str() == "R" {
524                    // It's raw. The next pair is the specific raw quote type.
525                    (
526                        true,
527                        inner_str_pairs.next().ok_or_else(|| {
528                            CelParserError::InternalError(
529                                "Missing raw string type rule in bytes after r/R".to_string(),
530                            )
531                        })?,
532                    )
533                } else {
534                    // It's normal. The first pair IS the specific normal quote type.
535                    (false, first_inner_str)
536                };
537
538            // Extract content string
539            let quotes = specific_quote_type_pair.as_str();
540            let content_str = match specific_quote_type_pair.as_rule() {
541                Rule::normal_string_lit_double
542                | Rule::raw_string_lit_double
543                | Rule::normal_string_lit_single
544                | Rule::raw_string_lit_single => {
545                    quotes.get(1..quotes.len() - 1).ok_or_else(|| {
546                        CelParserError::InternalError(
547                            "Invalid quote structure in bytes".to_string(),
548                        )
549                    })?
550                }
551                Rule::normal_string_lit_triple_double
552                | Rule::raw_string_lit_triple_double
553                | Rule::normal_string_lit_triple_single
554                | Rule::raw_string_lit_triple_single => {
555                    quotes.get(3..quotes.len() - 3).ok_or_else(|| {
556                        CelParserError::InternalError(
557                            "Invalid triple quote structure in bytes".to_string(),
558                        )
559                    })?
560                }
561                _ => {
562                    return Err(CelParserError::InternalError(format!(
563                        "Unexpected rule matched inside bytes_lit's string_lit: {:?}",
564                        specific_quote_type_pair.as_rule()
565                    )));
566                }
567            };
568
569            let final_bytes = if is_raw {
570                // Raw bytes literal: content is literal UTF-8 bytes from source
571                content_str.as_bytes().to_vec()
572            } else {
573                // Normal bytes literal: process escapes as byte values
574                parse_bytes_escapes(content_str)?
575            };
576            Ok(Expr::Literal(Literal::Bytes(final_bytes)))
577        }
578        Rule::bool_lit => {
579            // bool_lit = @{ "true" | "false" }
580            let val = pair.as_str().parse::<bool>().unwrap(); // Grammar guarantees true/false
581            Ok(Expr::Literal(Literal::Bool(val)))
582        }
583        Rule::null_lit => {
584            // null_lit = @{ "null" }
585            Ok(Expr::Literal(Literal::Null))
586        }
587
588        Rule::type_lit => {
589            let type_str = pair.as_str();
590            let cel_type = match type_str {
591                "int" => CelType::Int,
592                "uint" => CelType::Uint,
593                "double" => CelType::Double,
594                "bool" => CelType::Bool,
595                "string" => CelType::String,
596                "bytes" => CelType::Bytes,
597                "list" => CelType::List,
598                "map" => CelType::Map,
599                "null_type" => CelType::NullType,
600                "type" => CelType::Type,
601                _ => {
602                    return Err(CelParserError::InternalError(format!(
603                        "Unknown type literal: {}",
604                        type_str
605                    )));
606                }
607            };
608            Ok(Expr::Type(cel_type))
609        }
610
611        _ => todo!("Handle other literal types: {:?}", pair.as_rule()),
612    }
613}
614
615// Helper to parse escape sequences within a non-raw string literal's content
616fn parse_string_escapes(s: &str) -> Result<String, CelParserError> {
617    let mut result = String::with_capacity(s.len());
618    let mut chars = s.chars().peekable();
619
620    while let Some(c) = chars.next() {
621        if c == '\\' {
622            let escaped_char = chars.next().ok_or_else(|| {
623                CelParserError::IncompleteEscapeSequence(format!("\\ at end of {}", s))
624            })?;
625            match escaped_char {
626                'a' => result.push('\u{07}'), // Bell
627                'b' => result.push('\u{08}'), // Backspace
628                'f' => result.push('\u{0C}'), // Form feed
629                'n' => result.push('\n'),     // Line feed
630                'r' => result.push('\r'),     // Carriage return
631                't' => result.push('\t'),     // Horizontal tab
632                'v' => result.push('\u{0B}'), // Vertical tab
633                '\\' => result.push('\\'),
634                '?' => result.push('?'),
635                '"' => result.push('"'),
636                '\'' => result.push('\''),
637                '`' => result.push('`'),
638                'x' | 'X' => {
639                    let h1 = chars.next().ok_or_else(|| {
640                        CelParserError::IncompleteEscapeSequence(format!("\\x{}", s))
641                    })?;
642                    let h2 = chars.next().ok_or_else(|| {
643                        CelParserError::IncompleteEscapeSequence(format!("\\x{}{}", h1, s))
644                    })?;
645                    let code_str = format!("{}{}", h1, h2);
646                    let code = u32::from_str_radix(&code_str, 16).map_err(|_| {
647                        CelParserError::InvalidEscapeSequence(format!("x{}", code_str))
648                    })?;
649                    result.push(char::from_u32(code).ok_or_else(|| {
650                        CelParserError::InvalidUnicodeEscape(format!("x{}", code_str))
651                    })?);
652                }
653                'u' => {
654                    let h1 = chars.next().ok_or_else(|| {
655                        CelParserError::IncompleteEscapeSequence(format!("\\u{}", s))
656                    })?;
657                    let h2 = chars.next().ok_or_else(|| {
658                        CelParserError::IncompleteEscapeSequence(format!("\\u{}{}", h1, s))
659                    })?;
660                    let h3 = chars.next().ok_or_else(|| {
661                        CelParserError::IncompleteEscapeSequence(format!("\\u{}{}{}", h1, h2, s))
662                    })?;
663                    let h4 = chars.next().ok_or_else(|| {
664                        CelParserError::IncompleteEscapeSequence(format!(
665                            "\\u{}{}{}{}",
666                            h1, h2, h3, s
667                        ))
668                    })?;
669                    let code_str = format!("{}{}{}{}", h1, h2, h3, h4);
670                    let code = u32::from_str_radix(&code_str, 16).map_err(|_| {
671                        CelParserError::InvalidEscapeSequence(format!("u{}", code_str))
672                    })?;
673                    if (0xD800..=0xDFFF).contains(&code) {
674                        return Err(CelParserError::InvalidUnicodeEscape(format!(
675                            "u{} (surrogate)",
676                            code_str
677                        )));
678                    }
679                    result.push(char::from_u32(code).ok_or_else(|| {
680                        CelParserError::InvalidUnicodeEscape(format!("u{}", code_str))
681                    })?);
682                }
683                'U' => {
684                    let h1 = chars.next().ok_or_else(|| {
685                        CelParserError::IncompleteEscapeSequence(format!("\\U{}", s))
686                    })?;
687                    let h2 = chars.next().ok_or_else(|| {
688                        CelParserError::IncompleteEscapeSequence(format!("\\U{}{}", h1, s))
689                    })?;
690                    let h3 = chars.next().ok_or_else(|| {
691                        CelParserError::IncompleteEscapeSequence(format!("\\U{}{}{}", h1, h2, s))
692                    })?;
693                    let h4 = chars.next().ok_or_else(|| {
694                        CelParserError::IncompleteEscapeSequence(format!(
695                            "\\U{}{}{}{}",
696                            h1, h2, h3, s
697                        ))
698                    })?;
699                    let h5 = chars.next().ok_or_else(|| {
700                        CelParserError::IncompleteEscapeSequence(format!(
701                            "\\U{}{}{}{}{}",
702                            h1, h2, h3, h4, s
703                        ))
704                    })?;
705                    let h6 = chars.next().ok_or_else(|| {
706                        CelParserError::IncompleteEscapeSequence(format!(
707                            "\\U{}{}{}{}{}{}",
708                            h1, h2, h3, h4, h5, s
709                        ))
710                    })?;
711                    let h7 = chars.next().ok_or_else(|| {
712                        CelParserError::IncompleteEscapeSequence(format!(
713                            "\\U{}{}{}{}{}{}{}",
714                            h1, h2, h3, h4, h5, h6, s
715                        ))
716                    })?;
717                    let h8 = chars.next().ok_or_else(|| {
718                        CelParserError::IncompleteEscapeSequence(format!(
719                            "\\U{}{}{}{}{}{}{}{}",
720                            h1, h2, h3, h4, h5, h6, h7, s
721                        ))
722                    })?;
723                    let code_str = format!("{}{}{}{}{}{}{}{}", h1, h2, h3, h4, h5, h6, h7, h8);
724                    let code = u32::from_str_radix(&code_str, 16).map_err(|_| {
725                        CelParserError::InvalidEscapeSequence(format!("U{}", code_str))
726                    })?;
727                    if (0xD800..=0xDFFF).contains(&code) || code > 0x10FFFF {
728                        return Err(CelParserError::InvalidUnicodeEscape(format!(
729                            "U{} (surrogate or > U+10FFFF)",
730                            code_str
731                        )));
732                    }
733                    result.push(char::from_u32(code).ok_or_else(|| {
734                        CelParserError::InvalidUnicodeEscape(format!("U{}", code_str))
735                    })?);
736                }
737                o @ '0'..='3' => {
738                    // Octal escape \ooo
739                    let o2 = chars.next().ok_or_else(|| {
740                        CelParserError::IncompleteEscapeSequence(format!("\\{}{}", o, s))
741                    })?;
742                    let o3 = chars.next().ok_or_else(|| {
743                        CelParserError::IncompleteEscapeSequence(format!("\\{}{}{}", o, o2, s))
744                    })?;
745                    if !('0'..='7').contains(&o2) || !('0'..='7').contains(&o3) {
746                        return Err(CelParserError::InvalidEscapeSequence(format!(
747                            "{}{}{}",
748                            o, o2, o3
749                        )));
750                    }
751                    let code_str = format!("{}{}{}", o, o2, o3);
752                    let code = u32::from_str_radix(&code_str, 8)
753                        .map_err(|_| CelParserError::InvalidEscapeSequence(code_str.clone()))?;
754                    result.push(
755                        char::from_u32(code)
756                            .ok_or_else(|| CelParserError::InvalidUnicodeEscape(code_str))?,
757                    );
758                }
759                other => return Err(CelParserError::InvalidEscapeSequence(other.to_string())),
760            }
761        } else {
762            result.push(c);
763        }
764    }
765    Ok(result)
766}
767
768// Helper to parse escape sequences within a non-raw bytes literal's content
769fn parse_bytes_escapes(s: &str) -> Result<Vec<u8>, CelParserError> {
770    let mut result = Vec::with_capacity(s.len());
771    let mut bytes = s.bytes().peekable(); // Process raw bytes
772
773    while let Some(b) = bytes.next() {
774        if b == b'\\' {
775            let escaped_byte = bytes.next().ok_or_else(|| {
776                CelParserError::IncompleteEscapeSequence(format!("\\ at end of {}", s))
777            })?;
778            match escaped_byte {
779                b'a' => result.push(0x07),  // Bell
780                b'b' => result.push(0x08),  // Backspace
781                b'f' => result.push(0x0C),  // Form feed
782                b'n' => result.push(b'\n'), // Line feed
783                b'r' => result.push(b'\r'), // Carriage return
784                b't' => result.push(b'\t'), // Horizontal tab
785                b'v' => result.push(0x0B),  // Vertical tab
786                b'\\' => result.push(b'\\'),
787                b'?' => result.push(b'?'),
788                b'"' => result.push(b'"'),
789                b'\'' => result.push(b'\''),
790                b'`' => result.push(b'`'),
791                b'x' | b'X' => {
792                    let h1_b = bytes.next().ok_or_else(|| {
793                        CelParserError::IncompleteEscapeSequence(format!("\\x{}", s))
794                    })?;
795                    let h2_b = bytes.next().ok_or_else(|| {
796                        CelParserError::IncompleteEscapeSequence(format!(
797                            "\\x{}{}",
798                            h1_b as char, s
799                        ))
800                    })?;
801                    let hex_str = String::from_utf8(vec![h1_b, h2_b]).map_err(|_| {
802                        CelParserError::InvalidEscapeSequence(
803                            "Non-UTF8 hex escape char".to_string(),
804                        )
805                    })?;
806                    let byte_val = u8::from_str_radix(&hex_str, 16).map_err(|_| {
807                        CelParserError::InvalidEscapeSequence(format!("x{}", hex_str))
808                    })?;
809                    result.push(byte_val);
810                }
811                // \u and \U are *not* valid escapes in CEL bytes literals
812                b'u' | b'U' => {
813                    return Err(CelParserError::InvalidEscapeSequence(
814                        if escaped_byte == b'u' {
815                            "u".to_string()
816                        } else {
817                            "U".to_string()
818                        },
819                    ));
820                }
821                o @ b'0'..=b'3' => {
822                    // Octal escape \ooo interpreted as byte value
823                    let o2 = bytes.next().ok_or_else(|| {
824                        CelParserError::IncompleteEscapeSequence(format!("\\{}{}", o as char, s))
825                    })?;
826                    let o3 = bytes.next().ok_or_else(|| {
827                        CelParserError::IncompleteEscapeSequence(format!(
828                            "\\{}{}{}",
829                            o as char, o2 as char, s
830                        ))
831                    })?;
832                    if !(b'0'..=b'7').contains(&o2) || !(b'0'..=b'7').contains(&o3) {
833                        let temp = vec![o, o2, o3];
834                        let invalid_oct = String::from_utf8_lossy(&temp);
835                        return Err(CelParserError::InvalidEscapeSequence(format!(
836                            "{}",
837                            invalid_oct
838                        )));
839                    }
840                    let oct_str = String::from_utf8(vec![o, o2, o3]).map_err(|_| {
841                        CelParserError::InvalidEscapeSequence(
842                            "Non-UTF8 octal escape char".to_string(),
843                        )
844                    })?;
845                    let byte_val = u8::from_str_radix(&oct_str, 8)
846                        .map_err(|_| CelParserError::InvalidEscapeSequence(oct_str))?;
847                    // Check if the octal value is > 377 (which is 255 decimal)
848                    // This check is inherently done by u8::from_str_radix, as it won't parse > 255 for base 8 from 3 digits.
849                    result.push(byte_val);
850                }
851                other => {
852                    return Err(CelParserError::InvalidEscapeSequence(
853                        (other as char).to_string(),
854                    ));
855                }
856            }
857        } else {
858            // Not an escape, just add the byte.
859            result.push(b);
860        }
861    }
862    // No need to check final UTF-8 validity here, we are constructing a Vec<u8>
863    Ok(result)
864}
865
866// --- Unit Tests ---
867#[cfg(test)]
868mod tests;
rust_cel_parser/parser/mod.rs

rust_cel_parser/parser/
mod.rs