1use lazy_static::lazy_static;
2use pest::Parser;
3use pest::iterators::{Pair, Pairs};
4use pest::pratt_parser::{Assoc, Op, PrattParser};
5use pest_derive::Parser;
6
7use crate::ast::{BinaryOperator, CelType, ComprehensionOp, Expr, Literal, UnaryOperator};
8use crate::error::CelParserError;
9
10#[derive(Parser)]
11#[grammar = "parser/cel_grammar.pest"]
12pub struct CelParser;
13
14lazy_static! {
16 static ref PRATT_PARSER: PrattParser<Rule> = {
17 use Assoc::*; use Rule::*;
19
20 PrattParser::new()
23 .op(Op::infix(log_or, Left))
25 .op(Op::infix(log_and, Left))
27 .op(Op::infix(eq, Left) | Op::infix(ne, Left) |
29 Op::infix(lt, Left) | Op::infix(le, Left) |
30 Op::infix(gt, Left) | Op::infix(ge, Left) |
31 Op::infix(in_op, Left))
32 .op(Op::infix(add, Left) | Op::infix(binary_minus, Left))
34 .op(Op::infix(div, Left) | Op::infix(rem, Left))
36 .op(Op::infix(mul, Left))
37 .op(Op::prefix(log_not) | Op::prefix(unary_minus))
39 };
40}
41
42pub fn parse_cel_program(input: &str) -> Result<Expr, CelParserError> {
44 let mut top_level_pairs = CelParser::parse(Rule::program, input)?;
45
46 let program_pair = top_level_pairs.next().ok_or_else(|| {
47 CelParserError::InternalError("No top-level program match found".to_string())
48 })?;
49 if program_pair.as_rule() != Rule::program {
50 return Err(CelParserError::InternalError(format!(
51 "Expected Rule::program, got {:?}",
52 program_pair.as_rule()
53 )));
54 }
55 if top_level_pairs.next().is_some() {
56 return Err(CelParserError::InternalError(
57 "Unexpected extra data after main program".to_string(),
58 ));
59 }
60
61 let mut inner_program_pairs = program_pair.into_inner();
63
64 let expr_pair = inner_program_pairs
65 .next()
66 .ok_or_else(|| CelParserError::InternalError("Missing expr".to_string()))?;
67 let eoi_pair = inner_program_pairs
68 .next()
69 .ok_or_else(|| CelParserError::InternalError("Missing EOI".to_string()))?;
70
71 if eoi_pair.as_rule() != Rule::EOI {
72 return Err(CelParserError::InternalError(format!(
73 "Expected EOI, got {:?}",
74 eoi_pair.as_rule()
75 )));
76 }
77
78 if inner_program_pairs.next().is_some() {
80 return Err(CelParserError::InternalError(
81 "Unexpected extra pairs after EOI".to_string(),
82 ));
83 }
84
85 build_ast_from_expr(expr_pair)
87}
88
89fn build_ast_from_expr(pair: Pair<Rule>) -> Result<Expr, CelParserError> {
91 let mut inner = pair.into_inner();
93 let pratt_seq_pair = inner.next().ok_or_else(|| {
94 CelParserError::InternalError("Missing pratt sequence in expr".to_string())
95 })?;
96
97 if let Some(cond_op_pair) = inner.next() {
98 if cond_op_pair.as_rule() != Rule::cond {
101 return Err(CelParserError::InternalError(
102 "Expected '?' for conditional".to_string(),
103 ));
104 }
105 let true_branch_pair = inner.next().ok_or_else(|| {
106 CelParserError::InternalError("Missing true branch for conditional".to_string())
107 })?;
108 let else_op_pair = inner.next().ok_or_else(|| {
109 CelParserError::InternalError("Missing ':' for conditional".to_string())
110 })?;
111 if else_op_pair.as_rule() != Rule::cond_else {
112 return Err(CelParserError::InternalError(
113 "Expected ':' for conditional".to_string(),
114 ));
115 }
116 let false_branch_pair = inner.next().ok_or_else(|| {
117 CelParserError::InternalError("Missing false branch for conditional".to_string())
118 })?;
119
120 let condition_ast = build_ast_from_pratt_sequence(pratt_seq_pair.into_inner())?;
122 let true_branch_ast = build_ast_from_expr(true_branch_pair)?;
124 let false_branch_ast = build_ast_from_expr(false_branch_pair)?;
125
126 Ok(Expr::Conditional {
127 cond: Box::new(condition_ast),
128 true_branch: Box::new(true_branch_ast),
129 false_branch: Box::new(false_branch_ast),
130 })
131 } else {
132 build_ast_from_pratt_sequence(pratt_seq_pair.into_inner())
134 }
135}
136
137fn build_ast_from_pratt_sequence(pairs: Pairs<Rule>) -> Result<Expr, CelParserError> {
139 PRATT_PARSER
140 .map_primary(|primary| {
141 let mut inner = primary.into_inner();
142 let base_pair = inner.next().unwrap();
143 let mut ast = build_ast_from_primary(base_pair)?;
144
145 for chain_part in inner {
146 ast = match chain_part.as_rule() {
147 Rule::comprehension => {
148 let mut inner_comp = chain_part.into_inner();
149 let op_str = inner_comp.next().unwrap().as_str();
150 let iter_var = inner_comp.next().unwrap().as_str().to_string();
151 let predicate_pair = inner_comp.next().unwrap();
152 let op = match op_str {
153 "all" => ComprehensionOp::All,
154 "exists" => ComprehensionOp::Exists,
155 "exists_one" => ComprehensionOp::ExistsOne,
156 "filter" => ComprehensionOp::Filter,
157 _ => unreachable!(),
158 };
159 let predicate_ast = build_ast_from_expr(predicate_pair)?;
160 Expr::Comprehension {
161 op,
162 target: Box::new(ast),
163 iter_var,
164 predicate: Box::new(predicate_ast),
165 }
166 }
167 Rule::map_macro => {
168 let mut inner_map = chain_part.into_inner();
169 inner_map.next(); let iter_var = inner_map.next().unwrap().as_str().to_string();
171 let first_expr_pair = inner_map.next().unwrap();
172
173 let (filter_ast, transform_ast) =
174 if let Some(second_expr_pair) = inner_map.next() {
175 let filter = build_ast_from_expr(first_expr_pair)?;
177 let transform = build_ast_from_expr(second_expr_pair)?;
178 (Some(Box::new(filter)), Box::new(transform))
179 } else {
180 let transform = build_ast_from_expr(first_expr_pair)?;
182 (None, Box::new(transform))
183 };
184
185 Expr::Map {
186 target: Box::new(ast),
187 iter_var,
188 filter: filter_ast,
189 transform: transform_ast,
190 }
191 }
192 Rule::member_call => {
193 let mut inner_call = chain_part.into_inner();
194 let field_access_pair = inner_call.next().unwrap();
195 let call_args_pair = inner_call.next().unwrap();
196
197 let field_name = field_access_pair
198 .into_inner()
199 .next()
200 .unwrap()
201 .as_str()
202 .to_string();
203 let target = Expr::FieldAccess {
204 base: Box::new(ast),
205 field: field_name,
206 };
207
208 let mut args = Vec::new();
209 if let Some(expr_list) = call_args_pair.into_inner().next() {
210 for arg in expr_list.into_inner() {
211 args.push(build_ast_from_expr(arg)?);
212 }
213 }
214 Expr::Call {
215 target: Box::new(target),
216 args,
217 }
218 }
219 Rule::field_access => {
220 let field_name =
221 chain_part.into_inner().next().unwrap().as_str().to_string();
222 Expr::FieldAccess {
223 base: Box::new(ast),
224 field: field_name,
225 }
226 }
227 Rule::index_access => {
228 let index_pair = chain_part.into_inner().next().unwrap();
229 let index_ast = build_ast_from_expr(index_pair)?;
230 Expr::Index {
231 base: Box::new(ast),
232 index: Box::new(index_ast),
233 }
234 }
235 _ => unreachable!(
236 "Unexpected part in member_chain: {:?}",
237 chain_part.as_rule()
238 ),
239 }
240 }
241 Ok(ast)
242 })
243 .map_prefix(|op, rhs| {
244 let rhs_ast = rhs?;
245 match op.as_rule() {
246 Rule::log_not => Ok(Expr::UnaryOp {
247 op: UnaryOperator::Not,
248 operand: Box::new(rhs_ast),
249 }),
250 Rule::unary_minus => Ok(Expr::UnaryOp {
251 op: UnaryOperator::Neg,
252 operand: Box::new(rhs_ast),
253 }),
254 _ => Err(CelParserError::InternalError(format!(
255 "Unexpected prefix operator: {:?}",
256 op.as_rule()
257 ))),
258 }
259 })
260 .map_infix(|lhs, op, rhs| {
261 let lhs_ast = lhs?;
262 let rhs_ast = rhs?;
263 let binary_op = match op.as_rule() {
264 Rule::log_or => BinaryOperator::Or,
265 Rule::log_and => BinaryOperator::And,
266 Rule::eq => BinaryOperator::Eq,
267 Rule::ne => BinaryOperator::Ne,
268 Rule::lt => BinaryOperator::Lt,
269 Rule::le => BinaryOperator::Le,
270 Rule::gt => BinaryOperator::Gt,
271 Rule::ge => BinaryOperator::Ge,
272 Rule::in_op => BinaryOperator::In,
273 Rule::add => BinaryOperator::Add,
274 Rule::binary_minus => BinaryOperator::Sub,
275 Rule::mul => BinaryOperator::Mul,
276 Rule::div => BinaryOperator::Div,
277 Rule::rem => BinaryOperator::Rem,
278 _ => {
279 return Err(CelParserError::InternalError(format!(
280 "Unexpected infix operator: {:?}",
281 op.as_rule()
282 )));
283 }
284 };
285 Ok(Expr::BinaryOp {
286 op: binary_op,
287 left: Box::new(lhs_ast),
288 right: Box::new(rhs_ast),
289 })
290 })
291 .parse(pairs)
292}
293
294fn build_ast_from_primary(pair: Pair<Rule>) -> Result<Expr, CelParserError> {
295 let inner_pair = pair
298 .into_inner()
299 .next()
300 .ok_or_else(|| CelParserError::InternalError("Empty primary rule".to_string()))?;
301
302 match inner_pair.as_rule() {
303 Rule::message_lit => {
304 let mut inner = inner_pair.into_inner();
305 let type_name_pair = inner.next().unwrap();
306 let type_name = type_name_pair.as_str().to_string();
307 let mut fields = Vec::new();
308 for field_pair in inner {
309 if field_pair.as_rule() == Rule::field_init {
310 let mut inner_field = field_pair.into_inner();
311 let field_name_pair = inner_field.next().unwrap();
312 let value_pair = inner_field.next().unwrap();
313 let field_name = field_name_pair.as_str().to_string();
314 let value_ast = build_ast_from_expr(value_pair)?;
315 fields.push((field_name, value_ast));
316 }
317 }
318 Ok(Expr::MessageLiteral { type_name, fields })
319 }
320 Rule::map_lit => {
321 let mut entries = Vec::new();
322 for entry_pair in inner_pair.into_inner() {
323 if entry_pair.as_rule() == Rule::map_entry {
324 let mut inner_entry = entry_pair.into_inner();
325 let key_pair = inner_entry.next().unwrap();
326 let value_pair = inner_entry.next().unwrap();
327 let key_ast = build_ast_from_expr(key_pair)?;
328 let value_ast = build_ast_from_expr(value_pair)?;
329 entries.push((key_ast, value_ast));
330 }
331 }
332 Ok(Expr::MapLiteral { entries })
333 }
334 Rule::list_lit => {
335 let mut elements = Vec::new();
336 if let Some(expr_list_pair) = inner_pair.into_inner().next() {
337 for expr_pair in expr_list_pair.into_inner() {
338 elements.push(build_ast_from_expr(expr_pair)?);
339 }
340 }
341 Ok(Expr::List { elements })
342 }
343 Rule::has_macro => {
344 let target_expr_pair = inner_pair.into_inner().next().unwrap();
345 let target_ast = build_ast_from_expr(target_expr_pair)?;
346 Ok(Expr::Has {
347 target: Box::new(target_ast),
348 })
349 }
350 Rule::literal => build_ast_from_literal_rule(inner_pair.into_inner().next().unwrap()),
351 Rule::paren_expr => build_ast_from_expr(inner_pair.into_inner().next().unwrap()),
352 Rule::global_call => {
353 let mut inner = inner_pair.into_inner();
354 let ident_pair = inner.next().unwrap();
355 let target = Expr::Identifier(ident_pair.as_str().to_string());
356
357 let mut args = Vec::new();
358 if let Some(expr_list_pair) = inner.next() {
360 for expr_pair in expr_list_pair.into_inner() {
361 args.push(build_ast_from_expr(expr_pair)?);
362 }
363 }
364 Ok(Expr::Call {
365 target: Box::new(target),
366 args,
367 })
368 }
369 Rule::ident => Ok(Expr::Identifier(inner_pair.as_str().to_string())),
370 rule => Err(CelParserError::InternalError(format!(
371 "Unexpected rule in build_ast_from_primary: {:?}",
372 rule
373 ))),
374 }
375}
376
377fn build_ast_from_literal_rule(pair: Pair<Rule>) -> Result<Expr, CelParserError> {
380 match pair.as_rule() {
381 Rule::int_lit => {
382 let inner_pair = pair
384 .into_inner()
385 .next()
386 .ok_or_else(|| CelParserError::InternalError("Empty int_lit".to_string()))?;
387 let num_str = inner_pair.as_str();
388 let val = if inner_pair.as_rule() == Rule::hex_lit {
389 let hex_val = num_str.get(2..).ok_or_else(|| {
391 CelParserError::InternalError("Invalid hex literal structure".to_string())
392 })?;
393 i64::from_str_radix(hex_val, 16)
394 } else {
395 num_str.parse::<i64>()
397 }
398 .map_err(|e| CelParserError::InvalidIntegerLiteral(num_str.to_string(), e))?;
399 Ok(Expr::Literal(Literal::Int(val)))
400 }
401 Rule::uint_lit => {
402 let int_lit_pair = pair
404 .into_inner()
405 .next()
406 .ok_or_else(|| CelParserError::InternalError("Empty uint_lit".to_string()))?;
407 let inner_pair = int_lit_pair.into_inner().next().ok_or_else(|| {
409 CelParserError::InternalError("Empty int_lit inside uint_lit".to_string())
410 })?;
411 let num_str = inner_pair.as_str();
412 let val = if inner_pair.as_rule() == Rule::hex_lit {
413 let hex_val = num_str.get(2..).ok_or_else(|| {
414 CelParserError::InternalError(
415 "Invalid hex literal structure in uint".to_string(),
416 )
417 })?;
418 u64::from_str_radix(hex_val, 16)
419 } else {
420 num_str.parse::<u64>()
421 }
422 .map_err(|e| CelParserError::InvalidUintLiteral(num_str.to_string(), e))?;
423 Ok(Expr::Literal(Literal::Uint(val)))
424 }
425 Rule::float_lit => {
426 let num_str = pair.as_str();
427 let val = num_str
428 .parse::<f64>()
429 .map_err(|e| CelParserError::InvalidFloatLiteral(num_str.to_string(), e))?;
430 Ok(Expr::Literal(Literal::Float(val)))
431 }
432 Rule::string_lit => {
433 let mut inner_pairs = pair.into_inner();
438 let first_inner = inner_pairs
439 .next()
440 .ok_or_else(|| CelParserError::InternalError("Empty string_lit".to_string()))?;
441
442 let (is_raw, specific_quote_type_pair) =
443 if first_inner.as_str() == "r" || first_inner.as_str() == "R" {
445 (true, inner_pairs.next().ok_or_else(|| CelParserError::InternalError("Missing raw string type rule after r/R".to_string()))?)
447 } else {
448 (false, first_inner)
450 };
451
452 let quotes = specific_quote_type_pair.as_str();
454 let content_str = match specific_quote_type_pair.as_rule() {
455 Rule::normal_string_lit_double
457 | Rule::raw_string_lit_double
458 | Rule::normal_string_lit_single
459 | Rule::raw_string_lit_single => {
460 quotes.get(1..quotes.len() - 1).ok_or_else(|| {
461 CelParserError::InternalError(
462 "Invalid single/double quote structure".to_string(),
463 )
464 })?
465 }
466 Rule::normal_string_lit_triple_double
467 | Rule::raw_string_lit_triple_double
468 | Rule::normal_string_lit_triple_single
469 | Rule::raw_string_lit_triple_single => {
470 quotes.get(3..quotes.len() - 3).ok_or_else(|| {
471 CelParserError::InternalError("Invalid triple quote structure".to_string())
472 })?
473 }
474 _ => {
475 return Err(CelParserError::InternalError(format!(
476 "Unexpected rule matched inside string_lit: {:?}",
477 specific_quote_type_pair.as_rule()
478 )));
479 }
480 };
481
482 let final_string = if is_raw {
483 content_str.to_string() } else {
485 parse_string_escapes(content_str)? };
487 Ok(Expr::Literal(Literal::String(final_string)))
488 }
489 Rule::bytes_lit => {
490 let mut inner = pair.into_inner();
493 let string_lit_pair = inner.next().ok_or_else(|| {
496 CelParserError::InternalError(
497 "Missing inner string_lit pair for bytes_lit".to_string(),
498 )
499 })?;
500 if string_lit_pair.as_rule() != Rule::string_lit {
502 return Err(CelParserError::InternalError(format!(
503 "Expected string_lit inside bytes_lit, found {:?}",
504 string_lit_pair.as_rule()
505 )));
506 }
507 if inner.next().is_some() {
508 return Err(CelParserError::InternalError(
509 "Unexpected extra inner pair for bytes_lit".to_string(),
510 ));
511 }
512 let mut inner_str_pairs = string_lit_pair.into_inner();
516 let first_inner_str = inner_str_pairs.next().ok_or_else(|| {
519 CelParserError::InternalError("Empty inner string_lit for bytes".to_string())
520 })?;
521
522 let (is_raw, specific_quote_type_pair) =
523 if first_inner_str.as_str() == "r" || first_inner_str.as_str() == "R" {
524 (
526 true,
527 inner_str_pairs.next().ok_or_else(|| {
528 CelParserError::InternalError(
529 "Missing raw string type rule in bytes after r/R".to_string(),
530 )
531 })?,
532 )
533 } else {
534 (false, first_inner_str)
536 };
537
538 let quotes = specific_quote_type_pair.as_str();
540 let content_str = match specific_quote_type_pair.as_rule() {
541 Rule::normal_string_lit_double
542 | Rule::raw_string_lit_double
543 | Rule::normal_string_lit_single
544 | Rule::raw_string_lit_single => {
545 quotes.get(1..quotes.len() - 1).ok_or_else(|| {
546 CelParserError::InternalError(
547 "Invalid quote structure in bytes".to_string(),
548 )
549 })?
550 }
551 Rule::normal_string_lit_triple_double
552 | Rule::raw_string_lit_triple_double
553 | Rule::normal_string_lit_triple_single
554 | Rule::raw_string_lit_triple_single => {
555 quotes.get(3..quotes.len() - 3).ok_or_else(|| {
556 CelParserError::InternalError(
557 "Invalid triple quote structure in bytes".to_string(),
558 )
559 })?
560 }
561 _ => {
562 return Err(CelParserError::InternalError(format!(
563 "Unexpected rule matched inside bytes_lit's string_lit: {:?}",
564 specific_quote_type_pair.as_rule()
565 )));
566 }
567 };
568
569 let final_bytes = if is_raw {
570 content_str.as_bytes().to_vec()
572 } else {
573 parse_bytes_escapes(content_str)?
575 };
576 Ok(Expr::Literal(Literal::Bytes(final_bytes)))
577 }
578 Rule::bool_lit => {
579 let val = pair.as_str().parse::<bool>().unwrap(); Ok(Expr::Literal(Literal::Bool(val)))
582 }
583 Rule::null_lit => {
584 Ok(Expr::Literal(Literal::Null))
586 }
587
588 Rule::type_lit => {
589 let type_str = pair.as_str();
590 let cel_type = match type_str {
591 "int" => CelType::Int,
592 "uint" => CelType::Uint,
593 "double" => CelType::Double,
594 "bool" => CelType::Bool,
595 "string" => CelType::String,
596 "bytes" => CelType::Bytes,
597 "list" => CelType::List,
598 "map" => CelType::Map,
599 "null_type" => CelType::NullType,
600 "type" => CelType::Type,
601 _ => {
602 return Err(CelParserError::InternalError(format!(
603 "Unknown type literal: {}",
604 type_str
605 )));
606 }
607 };
608 Ok(Expr::Type(cel_type))
609 }
610
611 _ => todo!("Handle other literal types: {:?}", pair.as_rule()),
612 }
613}
614
615fn parse_string_escapes(s: &str) -> Result<String, CelParserError> {
617 let mut result = String::with_capacity(s.len());
618 let mut chars = s.chars().peekable();
619
620 while let Some(c) = chars.next() {
621 if c == '\\' {
622 let escaped_char = chars.next().ok_or_else(|| {
623 CelParserError::IncompleteEscapeSequence(format!("\\ at end of {}", s))
624 })?;
625 match escaped_char {
626 'a' => result.push('\u{07}'), 'b' => result.push('\u{08}'), 'f' => result.push('\u{0C}'), 'n' => result.push('\n'), 'r' => result.push('\r'), 't' => result.push('\t'), 'v' => result.push('\u{0B}'), '\\' => result.push('\\'),
634 '?' => result.push('?'),
635 '"' => result.push('"'),
636 '\'' => result.push('\''),
637 '`' => result.push('`'),
638 'x' | 'X' => {
639 let h1 = chars.next().ok_or_else(|| {
640 CelParserError::IncompleteEscapeSequence(format!("\\x{}", s))
641 })?;
642 let h2 = chars.next().ok_or_else(|| {
643 CelParserError::IncompleteEscapeSequence(format!("\\x{}{}", h1, s))
644 })?;
645 let code_str = format!("{}{}", h1, h2);
646 let code = u32::from_str_radix(&code_str, 16).map_err(|_| {
647 CelParserError::InvalidEscapeSequence(format!("x{}", code_str))
648 })?;
649 result.push(char::from_u32(code).ok_or_else(|| {
650 CelParserError::InvalidUnicodeEscape(format!("x{}", code_str))
651 })?);
652 }
653 'u' => {
654 let h1 = chars.next().ok_or_else(|| {
655 CelParserError::IncompleteEscapeSequence(format!("\\u{}", s))
656 })?;
657 let h2 = chars.next().ok_or_else(|| {
658 CelParserError::IncompleteEscapeSequence(format!("\\u{}{}", h1, s))
659 })?;
660 let h3 = chars.next().ok_or_else(|| {
661 CelParserError::IncompleteEscapeSequence(format!("\\u{}{}{}", h1, h2, s))
662 })?;
663 let h4 = chars.next().ok_or_else(|| {
664 CelParserError::IncompleteEscapeSequence(format!(
665 "\\u{}{}{}{}",
666 h1, h2, h3, s
667 ))
668 })?;
669 let code_str = format!("{}{}{}{}", h1, h2, h3, h4);
670 let code = u32::from_str_radix(&code_str, 16).map_err(|_| {
671 CelParserError::InvalidEscapeSequence(format!("u{}", code_str))
672 })?;
673 if (0xD800..=0xDFFF).contains(&code) {
674 return Err(CelParserError::InvalidUnicodeEscape(format!(
675 "u{} (surrogate)",
676 code_str
677 )));
678 }
679 result.push(char::from_u32(code).ok_or_else(|| {
680 CelParserError::InvalidUnicodeEscape(format!("u{}", code_str))
681 })?);
682 }
683 'U' => {
684 let h1 = chars.next().ok_or_else(|| {
685 CelParserError::IncompleteEscapeSequence(format!("\\U{}", s))
686 })?;
687 let h2 = chars.next().ok_or_else(|| {
688 CelParserError::IncompleteEscapeSequence(format!("\\U{}{}", h1, s))
689 })?;
690 let h3 = chars.next().ok_or_else(|| {
691 CelParserError::IncompleteEscapeSequence(format!("\\U{}{}{}", h1, h2, s))
692 })?;
693 let h4 = chars.next().ok_or_else(|| {
694 CelParserError::IncompleteEscapeSequence(format!(
695 "\\U{}{}{}{}",
696 h1, h2, h3, s
697 ))
698 })?;
699 let h5 = chars.next().ok_or_else(|| {
700 CelParserError::IncompleteEscapeSequence(format!(
701 "\\U{}{}{}{}{}",
702 h1, h2, h3, h4, s
703 ))
704 })?;
705 let h6 = chars.next().ok_or_else(|| {
706 CelParserError::IncompleteEscapeSequence(format!(
707 "\\U{}{}{}{}{}{}",
708 h1, h2, h3, h4, h5, s
709 ))
710 })?;
711 let h7 = chars.next().ok_or_else(|| {
712 CelParserError::IncompleteEscapeSequence(format!(
713 "\\U{}{}{}{}{}{}{}",
714 h1, h2, h3, h4, h5, h6, s
715 ))
716 })?;
717 let h8 = chars.next().ok_or_else(|| {
718 CelParserError::IncompleteEscapeSequence(format!(
719 "\\U{}{}{}{}{}{}{}{}",
720 h1, h2, h3, h4, h5, h6, h7, s
721 ))
722 })?;
723 let code_str = format!("{}{}{}{}{}{}{}{}", h1, h2, h3, h4, h5, h6, h7, h8);
724 let code = u32::from_str_radix(&code_str, 16).map_err(|_| {
725 CelParserError::InvalidEscapeSequence(format!("U{}", code_str))
726 })?;
727 if (0xD800..=0xDFFF).contains(&code) || code > 0x10FFFF {
728 return Err(CelParserError::InvalidUnicodeEscape(format!(
729 "U{} (surrogate or > U+10FFFF)",
730 code_str
731 )));
732 }
733 result.push(char::from_u32(code).ok_or_else(|| {
734 CelParserError::InvalidUnicodeEscape(format!("U{}", code_str))
735 })?);
736 }
737 o @ '0'..='3' => {
738 let o2 = chars.next().ok_or_else(|| {
740 CelParserError::IncompleteEscapeSequence(format!("\\{}{}", o, s))
741 })?;
742 let o3 = chars.next().ok_or_else(|| {
743 CelParserError::IncompleteEscapeSequence(format!("\\{}{}{}", o, o2, s))
744 })?;
745 if !('0'..='7').contains(&o2) || !('0'..='7').contains(&o3) {
746 return Err(CelParserError::InvalidEscapeSequence(format!(
747 "{}{}{}",
748 o, o2, o3
749 )));
750 }
751 let code_str = format!("{}{}{}", o, o2, o3);
752 let code = u32::from_str_radix(&code_str, 8)
753 .map_err(|_| CelParserError::InvalidEscapeSequence(code_str.clone()))?;
754 result.push(
755 char::from_u32(code)
756 .ok_or_else(|| CelParserError::InvalidUnicodeEscape(code_str))?,
757 );
758 }
759 other => return Err(CelParserError::InvalidEscapeSequence(other.to_string())),
760 }
761 } else {
762 result.push(c);
763 }
764 }
765 Ok(result)
766}
767
768fn parse_bytes_escapes(s: &str) -> Result<Vec<u8>, CelParserError> {
770 let mut result = Vec::with_capacity(s.len());
771 let mut bytes = s.bytes().peekable(); while let Some(b) = bytes.next() {
774 if b == b'\\' {
775 let escaped_byte = bytes.next().ok_or_else(|| {
776 CelParserError::IncompleteEscapeSequence(format!("\\ at end of {}", s))
777 })?;
778 match escaped_byte {
779 b'a' => result.push(0x07), b'b' => result.push(0x08), b'f' => result.push(0x0C), b'n' => result.push(b'\n'), b'r' => result.push(b'\r'), b't' => result.push(b'\t'), b'v' => result.push(0x0B), b'\\' => result.push(b'\\'),
787 b'?' => result.push(b'?'),
788 b'"' => result.push(b'"'),
789 b'\'' => result.push(b'\''),
790 b'`' => result.push(b'`'),
791 b'x' | b'X' => {
792 let h1_b = bytes.next().ok_or_else(|| {
793 CelParserError::IncompleteEscapeSequence(format!("\\x{}", s))
794 })?;
795 let h2_b = bytes.next().ok_or_else(|| {
796 CelParserError::IncompleteEscapeSequence(format!(
797 "\\x{}{}",
798 h1_b as char, s
799 ))
800 })?;
801 let hex_str = String::from_utf8(vec![h1_b, h2_b]).map_err(|_| {
802 CelParserError::InvalidEscapeSequence(
803 "Non-UTF8 hex escape char".to_string(),
804 )
805 })?;
806 let byte_val = u8::from_str_radix(&hex_str, 16).map_err(|_| {
807 CelParserError::InvalidEscapeSequence(format!("x{}", hex_str))
808 })?;
809 result.push(byte_val);
810 }
811 b'u' | b'U' => {
813 return Err(CelParserError::InvalidEscapeSequence(
814 if escaped_byte == b'u' {
815 "u".to_string()
816 } else {
817 "U".to_string()
818 },
819 ));
820 }
821 o @ b'0'..=b'3' => {
822 let o2 = bytes.next().ok_or_else(|| {
824 CelParserError::IncompleteEscapeSequence(format!("\\{}{}", o as char, s))
825 })?;
826 let o3 = bytes.next().ok_or_else(|| {
827 CelParserError::IncompleteEscapeSequence(format!(
828 "\\{}{}{}",
829 o as char, o2 as char, s
830 ))
831 })?;
832 if !(b'0'..=b'7').contains(&o2) || !(b'0'..=b'7').contains(&o3) {
833 let temp = vec![o, o2, o3];
834 let invalid_oct = String::from_utf8_lossy(&temp);
835 return Err(CelParserError::InvalidEscapeSequence(format!(
836 "{}",
837 invalid_oct
838 )));
839 }
840 let oct_str = String::from_utf8(vec![o, o2, o3]).map_err(|_| {
841 CelParserError::InvalidEscapeSequence(
842 "Non-UTF8 octal escape char".to_string(),
843 )
844 })?;
845 let byte_val = u8::from_str_radix(&oct_str, 8)
846 .map_err(|_| CelParserError::InvalidEscapeSequence(oct_str))?;
847 result.push(byte_val);
850 }
851 other => {
852 return Err(CelParserError::InvalidEscapeSequence(
853 (other as char).to_string(),
854 ));
855 }
856 }
857 } else {
858 result.push(b);
860 }
861 }
862 Ok(result)
864}
865
866#[cfg(test)]
868mod tests;