1use nom::branch::alt;
3use nom::bytes::complete::tag;
4use nom::character::complete::{char, digit1};
5use nom::combinator::{cut, map, opt, value};
6use nom::error::{ErrorKind as NomErrorKind, ParseError};
7use nom::multi::{many1, separated_list1};
8use nom::sequence::{delimited, preceded, terminated};
9use nom::Parser;
10
11use super::error::{Error, ErrorKind};
12use super::nom_recipes::{map_res, rtrim};
13use super::types::{Input, ParseResult};
14
15const JUMP_LIMIT_IN_ALTERNATIVES: u32 = 200;
16
17#[derive(Clone, Debug, PartialEq)]
19pub enum Token {
20 Byte(u8),
22 NotByte(u8),
24 MaskedByte(u8, Mask),
26 NotMaskedByte(u8, Mask),
28 Jump(Jump),
30 Alternatives(Vec<Vec<Token>>),
32}
33
34#[derive(Clone, Debug, PartialEq, Eq)]
36pub enum Mask {
37 Left,
39 Right,
41 All,
43}
44
45#[derive(Clone, Debug, PartialEq, Eq)]
53pub struct Jump {
54 pub from: u32,
56 pub to: Option<u32>,
58}
59
60pub fn parse_hex_string(input: &str) -> Result<Vec<Token>, Error> {
68 use nom::Finish;
69
70 let input = Input::new(input);
71 let (_, tokens) = hex_string(input).finish()?;
72
73 Ok(tokens)
74}
75
76pub(crate) fn hex_string(input: Input) -> ParseResult<Vec<Token>> {
82 let (input, _) = rtrim(char('{')).parse(input)?;
83
84 cut(terminated(|input| tokens(input, false), rtrim(char('}')))).parse(input)
85}
86
87fn hex_digit(mut input: Input) -> ParseResult<u8> {
89 match input.cursor().chars().next().and_then(|c| {
90 #[allow(clippy::cast_possible_truncation)]
92 c.to_digit(16).map(|v| v as u8)
93 }) {
94 Some(v) => {
95 input.advance(1);
96 Ok((input, v))
97 }
98 _ => Err(nom::Err::Error(Error::from_error_kind(
99 input,
100 NomErrorKind::HexDigit,
101 ))),
102 }
103}
104
105fn byte(input: Input) -> ParseResult<u8> {
109 let (input, digit0) = hex_digit(input)?;
110
111 map(rtrim(hex_digit), move |digit1| (digit0 << 4) | digit1).parse(input)
112}
113
114fn not_token(input: Input) -> ParseResult<Token> {
116 let start = input.pos();
117
118 let (input, _) = char('~').parse(input)?;
119 let (input, token) = cut(alt((
120 map(byte, Token::NotByte),
121 map(masked_byte, |(b, mask)| Token::NotMaskedByte(b, mask)),
122 )))
123 .parse(input)?;
124
125 if let Token::NotMaskedByte(_, Mask::All) = &token {
126 return Err(nom::Err::Failure(Error::new(
127 input.get_span_from(start),
128 ErrorKind::CannotNegateMaskAll,
129 )));
130 }
131
132 Ok((input, token))
133}
134
135fn masked_byte(input: Input) -> ParseResult<(u8, Mask)> {
139 rtrim(alt((
140 map(tag("??"), |_| (0, Mask::All)),
141 map(preceded(char('?'), hex_digit), |v| (v, Mask::Left)),
142 map(terminated(hex_digit, char('?')), |v| (v, Mask::Right)),
143 )))
144 .parse(input)
145}
146
147fn range(input: Input) -> ParseResult<Jump> {
157 let start = input.pos();
158 let (input, _) = rtrim(char('[')).parse(input)?;
159
160 let (input, from) = opt(map_res(rtrim(digit1), |v| {
162 str::parse::<u32>(v.cursor()).map_err(ErrorKind::StrToIntError)
163 }))
164 .parse(input)?;
165
166 let (input, to) = match from {
167 Some(from) => {
168 alt((
169 delimited(
171 rtrim(char('-')),
172 opt(map_res(rtrim(digit1), |v| {
173 str::parse(v.cursor()).map_err(ErrorKind::StrToIntError)
174 })),
175 rtrim(char(']')),
176 ),
177 value(Some(from), rtrim(char(']'))),
179 ))
180 .parse(input)?
181 }
182 None => delimited(
183 rtrim(char('-')),
184 opt(map_res(rtrim(digit1), |v| {
185 str::parse(v.cursor()).map_err(ErrorKind::StrToIntError)
186 })),
187 rtrim(char(']')),
188 )
189 .parse(input)?,
190 };
191
192 let jump = Jump {
193 from: from.unwrap_or(0),
194 to,
195 };
196
197 if let Err(kind) = validate_jump(&jump) {
198 return Err(nom::Err::Failure(Error::new(
199 input.get_span_from(start),
200 kind,
201 )));
202 }
203 Ok((input, jump))
204}
205
206fn validate_jump(range: &Jump) -> Result<(), ErrorKind> {
208 if let Some(to) = range.to {
209 if range.from == 0 && to == 0 {
210 return Err(ErrorKind::JumpEmpty);
211 }
212 if range.from > to {
213 return Err(ErrorKind::JumpRangeInvalid {
214 from: range.from,
215 to,
216 });
217 }
218 }
219
220 Ok(())
221}
222
223fn alternatives(input: Input) -> ParseResult<Token> {
229 let (input, _) = rtrim(char('(')).parse(input)?;
230
231 cut(terminated(
232 map(
233 separated_list1(rtrim(char('|')), |input| tokens(input, true)),
234 Token::Alternatives,
235 ),
236 rtrim(char(')')),
237 ))
238 .parse(input)
239}
240
241fn range_as_hex_token(input: Input, in_alternatives: bool) -> ParseResult<Token> {
242 let start = input.pos();
243 let (input, range) = range(input)?;
244
245 if in_alternatives {
247 if let Err(kind) = validate_jump_in_alternatives(&range) {
248 return Err(nom::Err::Failure(Error::new(
249 input.get_span_from(start),
250 kind,
251 )));
252 }
253 }
254
255 if let Some(to) = &range.to {
257 if range.from == *to && range.from == 1 {
258 return Ok((input, Token::MaskedByte(0, Mask::All)));
259 }
260 }
261 Ok((input, Token::Jump(range)))
262}
263
264fn validate_jump_in_alternatives(jump: &Jump) -> Result<(), ErrorKind> {
265 match jump.to {
266 None => Err(ErrorKind::JumpUnboundedInAlternation),
267 Some(to) => {
268 if to > JUMP_LIMIT_IN_ALTERNATIVES {
270 Err(ErrorKind::JumpTooBigInAlternation {
271 limit: JUMP_LIMIT_IN_ALTERNATIVES,
272 })
273 } else {
274 Ok(())
275 }
276 }
277 }
278}
279
280fn hex_token(input: Input, in_alternatives: bool) -> ParseResult<Token> {
287 alt((
288 not_token,
289 map(byte, Token::Byte),
290 map(masked_byte, |(v, mask)| Token::MaskedByte(v, mask)),
291 |input| range_as_hex_token(input, in_alternatives),
292 alternatives,
293 ))
294 .parse(input)
295}
296
297fn tokens(mut input: Input, in_alternatives: bool) -> ParseResult<Vec<Token>> {
303 let start = input.pos();
304
305 if input.string_recursion_counter >= input.params.string_recursion_limit {
311 return Err(nom::Err::Failure(Error::new(
312 input.get_span_from(start),
313 ErrorKind::HexStringTooDeep,
314 )));
315 }
316
317 input.string_recursion_counter += 1;
318 let (mut input, tokens) = many1(|input| hex_token(input, in_alternatives)).parse(input)?;
319 input.string_recursion_counter -= 1;
320
321 if matches!(tokens[0], Token::Jump(_))
322 || (tokens.len() > 1 && matches!(tokens[tokens.len() - 1], Token::Jump(_)))
323 {
324 Err(nom::Err::Failure(Error::new(
325 input.get_span_from(start),
326 ErrorKind::JumpAtBound,
327 )))
328 } else {
329 Ok((input, tokens))
330 }
331}
332
333#[cfg(test)]
334mod tests {
335 use crate::test_helpers::{parse, parse_err, test_public_type};
336 use crate::types::Params;
337 use nom::Finish;
338
339 use super::*;
340
341 #[test]
342 fn test_parse_hex_byte() {
343 parse(byte, "AF", "", 0xAF);
344 parse(byte, "10F", "F", 0x10);
345 parse(byte, "9E 1", "1", 0x9E);
346
347 parse_err(byte, "G1");
348 parse_err(byte, "1G");
349 parse_err(byte, "1");
350 parse_err(byte, " ");
351 }
352
353 #[test]
354 fn test_parse_masked_byte() {
355 parse(masked_byte, "?1", "", (1, Mask::Left));
356 parse(masked_byte, "C??", "?", (0xC, Mask::Right));
357 parse(masked_byte, "?? ", "", (0, Mask::All));
358
359 parse_err(masked_byte, "AB");
360 parse_err(masked_byte, " ?");
361 parse_err(masked_byte, "G?");
362 parse_err(masked_byte, "?G");
363 }
364
365 #[test]
366 fn test_parse_not_token() {
367 parse(not_token, "~23a", "a", Token::NotByte(0x23));
368 parse(
369 not_token,
370 "~?3b",
371 "b",
372 Token::NotMaskedByte(0x03, Mask::Left),
373 );
374 parse(
375 not_token,
376 "~F?",
377 "",
378 Token::NotMaskedByte(0x0F, Mask::Right),
379 );
380
381 parse_err(not_token, "~");
382 parse_err(not_token, "~1");
383 parse_err(not_token, "~1 2");
384 parse_err(not_token, "~ 12");
385 parse_err(not_token, "~??");
386 parse_err(not_token, "~g1");
387 parse_err(not_token, "~1g");
388 parse_err(not_token, "12");
389 parse_err(not_token, "?a");
390 parse_err(not_token, "a?");
391 parse_err(not_token, "??");
392 }
393
394 #[test]
395 fn test_range() {
396 parse(range, "[-] a", "a", Jump { from: 0, to: None });
397 parse(
398 range,
399 "[ 15 -35]",
400 "",
401 Jump {
402 from: 15,
403 to: Some(35),
404 },
405 );
406 parse(range, "[1- ]", "", Jump { from: 1, to: None });
407 parse(
408 range,
409 "[1-2]]",
410 "]",
411 Jump {
412 from: 1,
413 to: Some(2),
414 },
415 );
416 parse(
417 range,
418 "[ 1 - 2 ]",
419 "",
420 Jump {
421 from: 1,
422 to: Some(2),
423 },
424 );
425 parse(
426 range,
427 "[-1]",
428 "",
429 Jump {
430 from: 0,
431 to: Some(1),
432 },
433 );
434 parse(
435 range,
436 "[12 ]",
437 "",
438 Jump {
439 from: 12,
440 to: Some(12),
441 },
442 );
443
444 parse_err(range, "[");
445 parse_err(range, "[]");
446 parse_err(range, "[--]");
447 parse_err(range, "[1-2-3]");
448 parse_err(range, "[1-2-]");
449 parse_err(range, "[-2-]");
450 parse_err(range, "[d-e]");
451 parse_err(range, "[1 2]");
452 parse_err(range, "[999999999999-]");
453 parse_err(range, "[1-999999999999]");
454 parse_err(range, "[-999999999999]");
455
456 parse_err(range, "[4-2]");
458 parse_err(range, "[4-3]");
459 parse(
460 range,
461 "[4-4]",
462 "",
463 Jump {
464 from: 4,
465 to: Some(4),
466 },
467 );
468 parse_err(range, "[0]");
469 parse_err(range, "[0-0]");
470 parse(
471 range,
472 "[1]",
473 "",
474 Jump {
475 from: 1,
476 to: Some(1),
477 },
478 );
479 }
480
481 #[test]
482 fn test_alternatives() {
483 parse(
484 alternatives,
485 "( AB | 56 ?F ) ",
486 "",
487 Token::Alternatives(vec![
488 vec![Token::Byte(0xAB)],
489 vec![Token::Byte(0x56), Token::MaskedByte(0x0F, Mask::Left)],
490 ]),
491 );
492 parse(
493 alternatives,
494 "(12[1]C?|??[3-5]33)",
495 "",
496 Token::Alternatives(vec![
497 vec![
498 Token::Byte(0x12),
499 Token::MaskedByte(0, Mask::All),
500 Token::MaskedByte(0x0C, Mask::Right),
501 ],
502 vec![
503 Token::MaskedByte(0x00, Mask::All),
504 Token::Jump(Jump {
505 from: 3,
506 to: Some(5),
507 }),
508 Token::Byte(0x33),
509 ],
510 ]),
511 );
512 parse(
513 alternatives,
514 "( ( ?D | 23)| 15) ",
515 "",
516 Token::Alternatives(vec![
517 vec![Token::Alternatives(vec![
518 vec![Token::MaskedByte(0x0D, Mask::Left)],
519 vec![Token::Byte(0x23)],
520 ])],
521 vec![Token::Byte(0x15)],
522 ]),
523 );
524 parse(
525 alternatives,
526 "( AA (BB | CC) | DD | EE FF )",
527 "",
528 Token::Alternatives(vec![
529 vec![
530 Token::Byte(0xAA),
531 Token::Alternatives(vec![vec![Token::Byte(0xBB)], vec![Token::Byte(0xCC)]]),
532 ],
533 vec![Token::Byte(0xDD)],
534 vec![Token::Byte(0xEE), Token::Byte(0xFF)],
535 ]),
536 );
537
538 parse_err(alternatives, "( AB | [-] )");
539 parse_err(alternatives, "( AB | [1-] )");
540 parse_err(alternatives, "( AB | [1-250] )");
541 parse_err(alternatives, "( AB | [199-201] )");
542 parse_err(alternatives, "( AB | [200-201] )");
543 parse_err(alternatives, ")");
544 parse_err(alternatives, "()");
545 parse_err(alternatives, "(");
546 parse_err(alternatives, "(|)");
547 parse_err(alternatives, "(|");
548 parse_err(alternatives, "(AB|)");
549 parse_err(alternatives, "(|12)");
550 parse_err(alternatives, "(|123)");
551
552 parse_err(alternatives, "( [-] AB | CD )");
553 parse_err(alternatives, "( AB [1-2] | CD )");
554 parse_err(alternatives, "( AB | [3-] CD )");
555 parse_err(alternatives, "( AB | CD EF [-5] )");
556 }
557
558 #[test]
559 fn test_hex_string() {
560 parse(hex_string, "{ AB }", "", vec![Token::Byte(0xAB)]);
561
562 parse(
563 hex_string,
564 "{ DE AD BE EF }",
565 "",
566 vec![
567 Token::Byte(0xDE),
568 Token::Byte(0xAD),
569 Token::Byte(0xBE),
570 Token::Byte(0xEF),
571 ],
572 );
573 parse(
574 hex_string,
575 "{ 01 ?2 ?? 3? [1-] ( AF | DC ) }",
576 "",
577 vec![
578 Token::Byte(1),
579 Token::MaskedByte(2, Mask::Left),
580 Token::MaskedByte(0, Mask::All),
581 Token::MaskedByte(3, Mask::Right),
582 Token::Jump(Jump { from: 1, to: None }),
583 Token::Alternatives(vec![vec![Token::Byte(0xAF)], vec![Token::Byte(0xDC)]]),
584 ],
585 );
586
587 parse(
588 hex_string,
589 "{ 01 [1] 02 [2] 03 } a",
590 "a",
591 vec![
592 Token::Byte(1),
593 Token::MaskedByte(0, Mask::All),
594 Token::Byte(2),
595 Token::Jump(Jump {
596 from: 2,
597 to: Some(2),
598 }),
599 Token::Byte(3),
600 ],
601 );
602
603 parse_err(hex_string, "{ [-] }");
604 parse_err(hex_string, "{ [-] AB }");
605 parse_err(hex_string, "{ AB CD [-] }");
606
607 parse_err(hex_string, "AB");
608 parse_err(hex_string, "{");
609 parse_err(hex_string, "{}");
610 parse_err(hex_string, "{A}");
611 parse_err(hex_string, "{ABA}");
612 parse_err(hex_string, "{AB");
613 }
614
615 #[test]
616 fn test_stack_overflow() {
617 const STRING_RECURSION_TEST_LIMIT: u8 = 10;
621
622 let mut hex = String::new();
625 hex.push_str("{ AB ");
626 for _ in 0..10_000 {
627 hex.push_str("( CD | ");
628 }
629 for _ in 0..10_000 {
630 hex.push(')');
631 }
632 hex.push('}');
633
634 let input = Input::with_params(
635 &hex,
636 Params::default().string_recursion_limit(STRING_RECURSION_TEST_LIMIT),
637 );
638 let res = hex_string(input).finish();
639 assert_eq!(
640 &res.unwrap_err(),
641 &Error::new(70..70, ErrorKind::HexStringTooDeep)
642 );
643
644 let input = Input::with_params(
646 &hex,
647 Params::default().string_recursion_limit(STRING_RECURSION_TEST_LIMIT + 2),
648 );
649 let res = hex_string(input).finish();
650 assert_eq!(
651 &res.unwrap_err(),
652 &Error::new(84..84, ErrorKind::HexStringTooDeep)
653 );
654
655 let mut hex = String::new();
658 hex.push_str("{ AB ");
659 let nb = STRING_RECURSION_TEST_LIMIT - 1;
660 for _ in 0..nb {
661 hex.push_str("( CD | ");
662 }
663 for _ in 0..nb {
664 hex.push_str(" EF )");
665 }
666 hex.push_str(" EF ");
667 for _ in 0..nb {
668 hex.push_str("( CD | ");
669 }
670 for _ in 0..nb {
671 hex.push_str("EF )");
672 }
673 hex.push('}');
674
675 let input = Input::with_params(
676 &hex,
677 Params::default().string_recursion_limit(STRING_RECURSION_TEST_LIMIT),
678 );
679 let _res = hex_string(input).unwrap();
680 assert_eq!(input.string_recursion_counter, 0);
681 }
682
683 #[test]
684 fn test_parse_hex_string() {
685 assert!(parse_hex_string(r"{ AB }").is_ok());
686 assert!(parse_hex_string(r"AB").is_err());
687 }
688
689 #[test]
690 fn test_public_types() {
691 test_public_type(Token::Byte(3));
692 test_public_type(Mask::Left);
693 test_public_type(Jump { from: 3, to: None });
694 }
695}