1use alloc::vec::Vec;
2use core::num::ParseIntError;
3
4use thiserror_no_std::Error;
5
6use super::{
7 Lexer,
8 PositionedError,
9 Token,
10};
11use crate::{
12 Atom,
13 GenericBinaryPattern,
14 JumpType,
15 ReadWidth,
16};
17
18#[derive(Debug, Error, PartialEq, Eq, Clone)]
19pub enum ParseError {
20 #[error("unexpected token encountered")]
21 UnexpectedToken,
22
23 #[error("unexpected end of input")]
24 UnexpectedEnd,
25
26 #[error("invalid binary value")]
27 BinaryValueInvalid,
28
29 #[error("incomplete binary value; bits missing")]
30 BinaryValueIncomplete,
31
32 #[error("group not properly closed (missing ')')")]
33 GroupNotClosed,
34
35 #[error("block not properly closed (missing '}}')")]
36 BlockNotClosed,
37
38 #[error("invalid range bound: {0}")]
39 RangeBoundInvalid(ParseIntError),
40
41 #[error("range end must be greater than start")]
42 RangeEndMustBeGraterThenStart,
43
44 #[error("sequence exceeds maximum allowed size")]
45 SequenceTooLarge,
46}
47
48#[derive(Debug, Clone, Copy)]
49enum ByteSegment {
50 Value(u8),
51 Whildcard,
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
55enum Radix {
56 Hex,
57 Bin,
58}
59
60impl Radix {
61 pub fn from_prefix(value: &str) -> Option<Self> {
62 if value.starts_with("0b") {
63 Some(Self::Bin)
64 } else if value.starts_with("0x") {
65 Some(Self::Hex)
66 } else {
67 None
68 }
69 }
70
71 pub fn prefix(&self) -> &'static str {
72 match self {
73 Self::Bin => "0b",
74 Self::Hex => "0x",
75 }
76 }
77
78 pub fn segment_bit_width(&self) -> usize {
79 match self {
80 Self::Bin => 1,
81 Self::Hex => 4,
82 }
83 }
84}
85
86pub struct PatternParser<'a> {
87 lexer: Lexer<'a>,
88 peeked_token: Option<Token<'a>>,
89
90 byte_sequence: Vec<u8>,
91 atoms: Vec<Atom>,
92}
93
94impl<'a> PatternParser<'a> {
95 pub fn new(input: &'a str) -> Self {
96 Self {
97 lexer: Lexer::new(input),
98 peeked_token: None,
99
100 atoms: Vec::with_capacity(128),
101 byte_sequence: Vec::with_capacity(256),
102 }
103 }
104
105 fn peek_token(&mut self) -> Option<&Token<'a>> {
106 if self.peeked_token.is_none() {
107 self.peeked_token = self.lexer.next_token();
108 }
109
110 self.peeked_token.as_ref()
111 }
112
113 fn pop_token(&mut self) -> Result<Token<'a>, PositionedError<ParseError>> {
114 if let Some(token) = self.peeked_token.take() {
115 Ok(token)
116 } else if let Some(token) = self.lexer.next_token() {
117 Ok(token)
118 } else {
119 Err(PositionedError::new(
120 self.lexer.token_range(),
121 ParseError::UnexpectedEnd,
122 ))
123 }
124 }
125
126 fn bytes_commit(
127 &mut self,
128 sequence_start: usize,
129 ) -> Result<(u16, u16), PositionedError<ParseError>> {
130 assert!(sequence_start <= self.byte_sequence.len());
131
132 let bytes_end = self.byte_sequence.len();
133 if sequence_start > u16::MAX as usize || bytes_end > u16::MAX as usize {
134 return Err(PositionedError::new(
135 self.lexer.token_range(),
136 ParseError::SequenceTooLarge,
137 ));
138 }
139
140 Ok((sequence_start as u16, bytes_end as u16))
141 }
142
143 pub fn parse(mut self) -> Result<GenericBinaryPattern<'static>, PositionedError<ParseError>> {
144 let _ = self.parse_until(|_| false)?;
146 Ok(GenericBinaryPattern::new(self.atoms, self.byte_sequence))
147 }
148
149 fn parse_until(
150 &mut self,
151 matcher: impl Fn(&Token<'a>) -> bool,
152 ) -> Result<bool, PositionedError<ParseError>> {
153 while let Some(token) = self.peek_token() {
154 if matcher(token) {
155 return Ok(true);
156 }
157
158 match token {
159 Token::Text(_) => self.parse_bytes()?,
160 Token::Whildcard => self.parse_wildcard()?,
161
162 Token::PositionSave => self.parse_position_save()?,
163
164 Token::JumpRel1 => self.parse_jump()?,
165 Token::JumpRel4 => self.parse_jump()?,
166 Token::JumpAbs64 => self.parse_jump()?,
167
168 Token::Read1 => self.parse_read()?,
169 Token::Read2 => self.parse_read()?,
170 Token::Read4 => self.parse_read()?,
171
172 Token::GroupOpen => self.parse_group()?,
173 Token::RangeOpen => self.parse_range()?,
174
175 _ => {
176 return Err(PositionedError::new(
177 self.lexer.token_range(),
178 ParseError::UnexpectedToken,
179 ))
180 }
181 }
182 }
183
184 Ok(false)
185 }
186
187 fn parse_bytes_with_radix(&mut self, radix: Radix) -> Result<(), PositionedError<ParseError>> {
188 let Token::Text(value) = self.pop_token()? else {
189 return Err(PositionedError::new(
190 self.lexer.token_range(),
191 ParseError::UnexpectedToken,
192 ));
193 };
194
195 let radix_prefix = radix.prefix();
196 let (chars_token_index, chars) = if value.starts_with(radix_prefix) {
197 (
198 self.lexer.token_range().start + radix_prefix.len(),
199 value[radix_prefix.len()..].char_indices(),
200 )
201 } else {
202 (self.lexer.token_range().start, value.char_indices())
203 };
204
205 let mut byte_segments = Vec::new();
206 for (char_index, c) in chars {
207 if c == '?' {
208 byte_segments.push(ByteSegment::Whildcard);
209 continue;
210 }
211
212 let Some(value) = c.to_digit(1 << radix.segment_bit_width()) else {
213 return Err(PositionedError::new(
214 chars_token_index + char_index..chars_token_index + char_index + 1,
215 ParseError::BinaryValueInvalid,
216 ));
217 };
218
219 byte_segments.push(ByteSegment::Value(value as u8));
220 }
221
222 let byte_chunk_size = 8 / radix.segment_bit_width();
223 if byte_segments.len() % byte_chunk_size > 0 {
224 return Err(PositionedError::new(
225 self.lexer.token_range(),
226 ParseError::BinaryValueIncomplete,
227 ));
228 }
229
230 let bytes_start = self.byte_sequence.len();
231 for chunk in byte_segments.chunks_exact(byte_chunk_size) {
232 let mut byte = 0;
233 for segment in chunk {
234 byte <<= radix.segment_bit_width();
235 if let ByteSegment::Value(bits) = *segment {
236 byte |= bits;
237 }
238 }
239
240 self.byte_sequence.push(byte);
241 }
242
243 let (seq_start, seq_end) = self.bytes_commit(bytes_start)?;
244 if byte_segments
245 .iter()
246 .any(|seg| matches!(seg, ByteSegment::Whildcard))
247 {
248 let bytes_mask_start = self.byte_sequence.len();
249 for chunk in byte_segments.chunks_exact(byte_chunk_size) {
250 let mut byte = 0;
251 for segment in chunk {
252 byte <<= radix.segment_bit_width();
253 match segment {
254 ByteSegment::Value(_) => {
255 byte |= (1 << radix.segment_bit_width()) - 1;
256 }
257 ByteSegment::Whildcard => {
258 byte |= 0;
259 }
260 }
261 }
262
263 self.byte_sequence.push(byte);
264 }
265
266 let (mask_start, _mask_end) = self.bytes_commit(bytes_mask_start)?;
267 self.atoms.push(Atom::ByteSequenceMasked {
268 seq_start,
269 mask_start,
270 len: (byte_segments.len() / byte_chunk_size) as u16,
271 });
272 } else {
273 self.atoms.push(Atom::ByteSequence { seq_start, seq_end });
274 }
275 Ok(())
276 }
277
278 fn parse_bytes(&mut self) -> Result<(), PositionedError<ParseError>> {
279 let Some(Token::Text(value)) = self.peek_token() else {
280 return Err(PositionedError::new(
281 self.lexer.token_range(),
282 ParseError::UnexpectedToken,
283 ));
284 };
285
286 let radix = Radix::from_prefix(&value).unwrap_or(Radix::Hex);
287 self.parse_bytes_with_radix(radix)
288 }
289
290 fn parse_position_save(&mut self) -> Result<(), PositionedError<ParseError>> {
291 let Token::PositionSave = self.pop_token()? else {
292 return Err(PositionedError::new(
293 self.lexer.token_range(),
294 ParseError::UnexpectedToken,
295 ));
296 };
297
298 self.atoms.push(Atom::SaveCursor);
299 Ok(())
300 }
301
302 fn parse_wildcard(&mut self) -> Result<(), PositionedError<ParseError>> {
303 let Token::Whildcard = self.pop_token()? else {
304 return Err(PositionedError::new(
305 self.lexer.token_range(),
306 ParseError::UnexpectedToken,
307 ));
308 };
309
310 self.atoms.push(Atom::WildcardFixed(1));
311 Ok(())
312 }
313
314 fn parse_read(&mut self) -> Result<(), PositionedError<ParseError>> {
315 let read_width = match self.pop_token()? {
316 Token::Read1 => ReadWidth::Byte,
317 Token::Read2 => ReadWidth::Word,
318 Token::Read4 => ReadWidth::DWord,
319 _ => {
320 return Err(PositionedError::new(
321 self.lexer.token_range(),
322 ParseError::UnexpectedToken,
323 ))
324 }
325 };
326
327 self.atoms.push(Atom::Read(read_width));
328 Ok(())
329 }
330
331 fn parse_jump(&mut self) -> Result<(), PositionedError<ParseError>> {
332 let (jump_type, width) = match self.pop_token()? {
333 Token::JumpRel1 => (JumpType::RelByte, 1),
334 Token::JumpRel4 => (JumpType::RelDWord, 4),
335 Token::JumpAbs64 => (JumpType::AbsQWord, 8),
336 _ => {
337 return Err(PositionedError::new(
338 self.lexer.token_range(),
339 ParseError::UnexpectedToken,
340 ))
341 }
342 };
343
344 if matches!(self.peek_token(), Some(Token::BlockOpen)) {
345 let _ = self.pop_token()?;
346 self.atoms.push(Atom::CursorPush);
347 self.atoms.push(Atom::Jump(jump_type));
348
349 let block_start = self.lexer.token_range();
350 if !self.parse_until(|token| matches!(token, Token::BlockClose))? {
351 return Err(PositionedError::new(
352 block_start,
353 ParseError::BlockNotClosed,
354 ));
355 }
356
357 self.atoms.push(Atom::CursorPop { advance: width });
358 let _ = self.pop_token()?;
359 } else {
360 self.atoms.push(Atom::Jump(jump_type));
361 }
362
363 Ok(())
364 }
365
366 fn parse_group(&mut self) -> Result<(), PositionedError<ParseError>> {
367 let Token::GroupOpen = self.pop_token()? else {
368 return Err(PositionedError::new(
369 self.lexer.token_range(),
370 ParseError::UnexpectedToken,
371 ));
372 };
373
374 let group_start = self.lexer.token_range();
375 let mut branch_atoms = Vec::with_capacity(8);
376 loop {
377 let branch_atom_index = self.atoms.len();
378 self.atoms.push(Atom::Branch {
379 left_len: 0,
380 right_len: 0,
381 });
382
383 if !self.parse_until(|token| matches!(token, Token::GroupClose | Token::GroupPipe))? {
384 return Err(PositionedError::new(
385 group_start,
386 ParseError::GroupNotClosed,
387 ));
388 }
389
390 if matches!(self.pop_token()?, Token::GroupClose) {
391 self.atoms.remove(branch_atom_index);
393 break;
394 }
395
396 let left_branch_len = self.atoms.len() - branch_atom_index - 1;
397 if left_branch_len > u16::MAX as usize {
398 return Err(PositionedError::new(
399 self.lexer.token_range(),
400 ParseError::SequenceTooLarge,
401 ));
402 }
403
404 if let Atom::Branch { left_len, .. } = &mut self.atoms[branch_atom_index] {
405 *left_len = left_branch_len as u16;
406 } else {
407 unreachable!("atom should be a branch");
408 }
409
410 branch_atoms.push(branch_atom_index);
411 }
412
413 let atom_count = self.atoms.len();
414 for branch_atom_index in branch_atoms {
415 if let Atom::Branch {
416 left_len,
417 right_len,
418 } = &mut self.atoms[branch_atom_index]
419 {
420 let right_branch_len = atom_count - *left_len as usize - branch_atom_index - 1;
421 if right_branch_len > u16::MAX as usize {
422 return Err(PositionedError::new(
423 self.lexer.token_range(),
424 ParseError::SequenceTooLarge,
425 ));
426 }
427
428 *right_len = right_branch_len as u16;
429 } else {
430 unreachable!("atom should be a branch");
431 }
432 }
433
434 Ok(())
435 }
436
437 fn parse_range(&mut self) -> Result<(), PositionedError<ParseError>> {
438 let Token::RangeOpen = self.pop_token()? else {
439 return Err(PositionedError::new(
440 self.lexer.token_range(),
441 ParseError::UnexpectedToken,
442 ));
443 };
444
445 let Token::Text(range_start) = self.pop_token()? else {
446 return Err(PositionedError::new(
447 self.lexer.token_range(),
448 ParseError::UnexpectedToken,
449 ));
450 };
451
452 let range_start = range_start.parse::<u16>().map_err(|err| {
453 PositionedError::new(self.lexer.token_range(), ParseError::RangeBoundInvalid(err))
454 })?;
455
456 match self.pop_token()? {
457 Token::RangeClose => {
458 self.atoms.push(Atom::WildcardFixed(range_start));
459 Ok(())
460 }
461 Token::RangeSeperator => {
462 let Token::Text(range_end) = self.pop_token()? else {
463 return Err(PositionedError::new(
464 self.lexer.token_range(),
465 ParseError::UnexpectedToken,
466 ));
467 };
468
469 let range_end = range_end.parse::<u16>().map_err(|err| {
470 PositionedError::new(
471 self.lexer.token_range(),
472 ParseError::RangeBoundInvalid(err),
473 )
474 })?;
475
476 if range_end <= range_start {
477 return Err(PositionedError::new(
478 self.lexer.token_range(),
479 ParseError::RangeEndMustBeGraterThenStart,
480 ));
481 }
482
483 self.atoms.push(Atom::WildcardRange {
484 min: range_start,
485 max: range_end,
486 });
487 if !matches!(self.pop_token()?, Token::RangeClose) {
488 Err(PositionedError::new(
489 self.lexer.token_range(),
490 ParseError::UnexpectedToken,
491 ))
492 } else {
493 Ok(())
494 }
495 }
496 _ => Err(PositionedError::new(
497 self.lexer.token_range(),
498 ParseError::UnexpectedToken,
499 )),
500 }
501 }
502}
503
504pub fn parse_pattern(
506 pattern: &str,
507) -> Result<GenericBinaryPattern<'static>, PositionedError<ParseError>> {
508 let parser = PatternParser::new(pattern);
509 parser.parse()
510}
511
512#[cfg(test)]
513mod test {
514 use super::PatternParser;
515 use crate::{
516 compiler::{
517 parser::ParseError,
518 PositionedError,
519 },
520 pattern::BinaryPattern,
521 Atom,
522 JumpType,
523 };
524
525 #[test]
526 fn test_byte_sequence_bin() {
527 {
528 let parser = PatternParser::new("0b10011100");
529 let result = parser.parse().unwrap();
530 assert_eq!(
531 result.atoms(),
532 &[Atom::ByteSequence {
533 seq_start: 0,
534 seq_end: 1
535 },]
536 );
537 assert_eq!(result.byte_sequence(), &[0b10011100]);
538 }
539
540 {
541 let parser = PatternParser::new("0b1001110011110000");
542 let result = parser.parse().unwrap();
543 assert_eq!(
544 result.atoms(),
545 &[Atom::ByteSequence {
546 seq_start: 0,
547 seq_end: 2
548 },]
549 );
550 assert_eq!(result.byte_sequence(), &[0b10011100, 0b11110000]);
551 }
552 }
553
554 #[test]
555 fn test_byte_sequence_hex() {
556 {
557 let parser = PatternParser::new("FF 00 12");
558 let result = parser.parse().unwrap();
559 assert_eq!(
560 result.atoms(),
561 &[
562 Atom::ByteSequence {
563 seq_start: 0,
564 seq_end: 1
565 },
566 Atom::ByteSequence {
567 seq_start: 1,
568 seq_end: 2
569 },
570 Atom::ByteSequence {
571 seq_start: 2,
572 seq_end: 3
573 }
574 ]
575 );
576 assert_eq!(result.byte_sequence(), &[0xFF, 0x00, 0x12]);
577 }
578
579 {
580 let parser = PatternParser::new("FF00 12");
581 let result = parser.parse().unwrap();
582 assert_eq!(
583 result.atoms(),
584 &[
585 Atom::ByteSequence {
586 seq_start: 0,
587 seq_end: 2
588 },
589 Atom::ByteSequence {
590 seq_start: 2,
591 seq_end: 3
592 }
593 ]
594 );
595 assert_eq!(result.byte_sequence(), &[0xFF, 0x00, 0x12]);
596 }
597
598 {
599 let parser = PatternParser::new("0xDEADBEEF");
600 let result = parser.parse().unwrap();
601 assert_eq!(
602 result.atoms(),
603 &[Atom::ByteSequence {
604 seq_start: 0,
605 seq_end: 4
606 },]
607 );
608 assert_eq!(result.byte_sequence(), &[0xDE, 0xAD, 0xBE, 0xEF]);
609 }
610
611 {
612 let parser = PatternParser::new("FF0");
613 let result = parser.parse().unwrap_err();
614 assert_eq!(
615 &result,
616 &PositionedError::new(0..3, ParseError::BinaryValueIncomplete)
617 );
618 }
619
620 {
621 let parser = PatternParser::new("FX");
622 let result = parser.parse().unwrap_err();
623 assert_eq!(
624 &result,
625 &PositionedError::new(1..2, ParseError::BinaryValueInvalid)
626 );
627 }
628 }
629
630 #[test]
631 fn test_byte_sequence_mask_hex() {
632 {
633 let parser = PatternParser::new("A?");
634 let result = parser.parse().unwrap();
635 assert_eq!(
636 result.atoms(),
637 &[Atom::ByteSequenceMasked {
638 seq_start: 0,
639 mask_start: 1,
640 len: 1
641 }]
642 );
643 assert_eq!(result.byte_sequence(), &[0xA0, 0xF0]);
644 }
645
646 {
647 let parser = PatternParser::new("F?E?");
648 let result = parser.parse().unwrap();
649 assert_eq!(
650 result.atoms(),
651 &[Atom::ByteSequenceMasked {
652 seq_start: 0,
653 mask_start: 2,
654 len: 2
655 }]
656 );
657 assert_eq!(result.byte_sequence(), &[0xF0, 0xE0, 0xF0, 0xF0]);
658 }
659 }
660
661 #[test]
662 fn test_byte_sequence_mask_bin() {
663 {
664 let parser = PatternParser::new("0b100??001");
665 let result = parser.parse().unwrap();
666 assert_eq!(
667 result.atoms(),
668 &[Atom::ByteSequenceMasked {
669 seq_start: 0,
670 mask_start: 1,
671 len: 1
672 }]
673 );
674 assert_eq!(result.byte_sequence(), &[0x81, 0xE7]);
675 }
676
677 {
678 let parser = PatternParser::new("0b100??001?????111");
679 let result = parser.parse().unwrap();
680 assert_eq!(
681 result.atoms(),
682 &[Atom::ByteSequenceMasked {
683 seq_start: 0,
684 mask_start: 2,
685 len: 2
686 }]
687 );
688 assert_eq!(result.byte_sequence(), &[0x81, 0x07, 0xE7, 0x07]);
689 }
690 }
691
692 #[test]
693 fn test_byte_wildcard() {
694 {
695 let parser = PatternParser::new("?");
696 let result = parser.parse().unwrap();
697 assert_eq!(result.atoms(), &[Atom::WildcardFixed(1),]);
698 assert_eq!(result.byte_sequence(), &[]);
699 }
700
701 {
702 let parser = PatternParser::new("AB ? CD");
703 let result = parser.parse().unwrap();
704 assert_eq!(
705 result.atoms(),
706 &[
707 Atom::ByteSequence {
708 seq_start: 0x00,
709 seq_end: 0x01
710 },
711 Atom::WildcardFixed(1),
712 Atom::ByteSequence {
713 seq_start: 0x01,
714 seq_end: 0x02
715 }
716 ]
717 );
718 assert_eq!(result.byte_sequence(), &[0xAB, 0xCD]);
719 }
720
721 {
723 let parser = PatternParser::new("AB ?? CD");
724 let result = parser.parse().unwrap();
725 assert_eq!(
726 result.atoms(),
727 &[
728 Atom::ByteSequence {
729 seq_start: 0x00,
730 seq_end: 0x01
731 },
732 Atom::ByteSequenceMasked {
733 seq_start: 0x01,
734 mask_start: 0x02,
735 len: 0x01
736 },
737 Atom::ByteSequence {
738 seq_start: 0x03,
739 seq_end: 0x04
740 }
741 ]
742 );
743 assert_eq!(result.byte_sequence(), &[0xAB, 0x00, 0x00, 0xCD]);
744 }
745
746 {
748 let parser = PatternParser::new("AB ??? CD");
749 let result = parser.parse().unwrap_err();
750 assert_eq!(
751 &result,
752 &PositionedError::new(3..6, ParseError::BinaryValueIncomplete)
753 );
754 }
755 }
756
757 #[test]
758 fn test_byte_wildcard_nl() {
759 let parser = PatternParser::new("?\n?");
760 let result = parser.parse().unwrap();
761 assert_eq!(
762 result.atoms(),
763 &[Atom::WildcardFixed(1), Atom::WildcardFixed(1),]
764 );
765 assert_eq!(result.byte_sequence(), &[]);
766 }
767
768 #[test]
769 fn test_jump() {
770 {
771 let parser = PatternParser::new("%$* FF * % $");
772 let result = parser.parse().unwrap();
773 assert_eq!(
774 result.atoms(),
775 &[
776 Atom::Jump(JumpType::RelByte),
777 Atom::Jump(JumpType::RelDWord),
778 Atom::Jump(JumpType::AbsQWord),
779 Atom::ByteSequence {
780 seq_start: 0,
781 seq_end: 1
782 },
783 Atom::Jump(JumpType::AbsQWord),
784 Atom::Jump(JumpType::RelByte),
785 Atom::Jump(JumpType::RelDWord),
786 ]
787 );
788 assert_eq!(result.byte_sequence(), &[0xFF]);
789 }
790 }
791
792 #[test]
793 fn test_jump_block() {
794 {
795 let parser = PatternParser::new("$ { FE }");
796 let result = parser.parse().unwrap();
797 assert_eq!(
798 result.atoms(),
799 &[
800 Atom::CursorPush,
801 Atom::Jump(JumpType::RelDWord),
802 Atom::ByteSequence {
803 seq_start: 0,
804 seq_end: 1
805 },
806 Atom::CursorPop { advance: 4 },
807 ]
808 );
809 assert_eq!(result.byte_sequence(), &[0xFE]);
810 }
811
812 {
813 let parser = PatternParser::new("$ { FE $ { FF } }");
814 let result = parser.parse().unwrap();
815 assert_eq!(
816 result.atoms(),
817 &[
818 Atom::CursorPush,
819 Atom::Jump(JumpType::RelDWord),
820 Atom::ByteSequence {
821 seq_start: 0,
822 seq_end: 1
823 },
824 Atom::CursorPush,
825 Atom::Jump(JumpType::RelDWord),
826 Atom::ByteSequence {
827 seq_start: 1,
828 seq_end: 2
829 },
830 Atom::CursorPop { advance: 4 },
831 Atom::CursorPop { advance: 4 },
832 ]
833 );
834 assert_eq!(result.byte_sequence(), &[0xFE, 0xFF]);
835 }
836
837 {
838 let parser = PatternParser::new("$ { FE");
839 let result = parser.parse().unwrap_err();
840 assert_eq!(
841 &result,
842 &PositionedError::new(2..3, ParseError::BlockNotClosed)
843 );
844 }
845 }
846
847 #[test]
848 fn test_group() {
849 {
850 let parser = PatternParser::new("()");
852 let result = parser.parse().unwrap();
853 assert_eq!(result.atoms(), &[]);
854 assert_eq!(result.byte_sequence(), &[]);
855 }
856
857 {
858 let parser = PatternParser::new("( FF00 )");
860 let result = parser.parse().unwrap();
861 assert_eq!(
862 result.atoms(),
863 &[Atom::ByteSequence {
864 seq_start: 0,
865 seq_end: 2
866 }]
867 );
868 assert_eq!(result.byte_sequence(), &[0xFF, 0x00]);
869 }
870
871 {
872 let parser = PatternParser::new("( 01 | 02 03 )");
874 let result = parser.parse().unwrap();
875 assert_eq!(
876 result.atoms(),
877 &[
878 Atom::Branch {
879 left_len: 1,
880 right_len: 2
881 },
882 Atom::ByteSequence {
883 seq_start: 0,
884 seq_end: 1
885 },
886 Atom::ByteSequence {
887 seq_start: 1,
888 seq_end: 2
889 },
890 Atom::ByteSequence {
891 seq_start: 2,
892 seq_end: 3
893 }
894 ]
895 );
896 assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03]);
897 }
898
899 {
900 let parser = PatternParser::new("( 01 | 02 03 | FF )");
902 let result = parser.parse().unwrap();
903 assert_eq!(
904 result.atoms(),
905 &[
906 Atom::Branch {
907 left_len: 1,
908 right_len: 4
909 },
910 Atom::ByteSequence {
911 seq_start: 0,
912 seq_end: 1
913 },
914 Atom::Branch {
915 left_len: 2,
916 right_len: 1
917 },
918 Atom::ByteSequence {
919 seq_start: 1,
920 seq_end: 2
921 },
922 Atom::ByteSequence {
923 seq_start: 2,
924 seq_end: 3
925 },
926 Atom::ByteSequence {
927 seq_start: 3,
928 seq_end: 4
929 }
930 ]
931 );
932 assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03, 0xFF]);
933 }
934
935 {
936 let parser = PatternParser::new("( 01 | ( 02 | 03 ) )");
938 let result = parser.parse().unwrap();
939 assert_eq!(
940 result.atoms(),
941 &[
942 Atom::Branch {
943 left_len: 1,
944 right_len: 3
945 },
946 Atom::ByteSequence {
947 seq_start: 0,
948 seq_end: 1
949 },
950 Atom::Branch {
951 left_len: 1,
952 right_len: 1
953 },
954 Atom::ByteSequence {
955 seq_start: 1,
956 seq_end: 2
957 },
958 Atom::ByteSequence {
959 seq_start: 2,
960 seq_end: 3
961 }
962 ]
963 );
964 assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03]);
965 }
966
967 {
968 let parser = PatternParser::new("( (01 | 02) | 03 )");
970 let result = parser.parse().unwrap();
971 assert_eq!(
972 result.atoms(),
973 &[
974 Atom::Branch {
975 left_len: 3,
976 right_len: 1
977 },
978 Atom::Branch {
979 left_len: 1,
980 right_len: 1
981 },
982 Atom::ByteSequence {
983 seq_start: 0,
984 seq_end: 1
985 },
986 Atom::ByteSequence {
987 seq_start: 1,
988 seq_end: 2
989 },
990 Atom::ByteSequence {
991 seq_start: 2,
992 seq_end: 3
993 },
994 ]
995 );
996 assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03]);
997 }
998
999 {
1000 let parser = PatternParser::new("(");
1002 let result = parser.parse().unwrap_err();
1003 assert_eq!(
1004 &result,
1005 &PositionedError::new(0..1, ParseError::GroupNotClosed)
1006 );
1007 }
1008
1009 {
1010 let parser = PatternParser::new("( FF 00");
1012 let result = parser.parse().unwrap_err();
1013 assert_eq!(
1014 &result,
1015 &PositionedError::new(0..1, ParseError::GroupNotClosed)
1016 );
1017 }
1018
1019 {
1020 let parser = PatternParser::new(" (|");
1022 let result = parser.parse().unwrap_err();
1023 assert_eq!(
1024 &result,
1025 &PositionedError::new(1..2, ParseError::GroupNotClosed)
1026 );
1027 }
1028 }
1029
1030 #[test]
1031 fn test_range() {
1032 {
1033 let parser = PatternParser::new("[0] [123]");
1035 let result = parser.parse().unwrap();
1036 assert_eq!(
1037 result.atoms(),
1038 &[Atom::WildcardFixed(0), Atom::WildcardFixed(123)]
1039 );
1040 assert_eq!(result.byte_sequence(), &[]);
1041 }
1042
1043 {
1044 let parser = PatternParser::new("[0-10] [123- 999]");
1046 let result = parser.parse().unwrap();
1047 assert_eq!(
1048 result.atoms(),
1049 &[
1050 Atom::WildcardRange { min: 0, max: 10 },
1051 Atom::WildcardRange { min: 123, max: 999 }
1052 ]
1053 );
1054 assert_eq!(result.byte_sequence(), &[]);
1055 }
1056
1057 {
1058 let parser = PatternParser::new("[0-]");
1060 let result = parser.parse().unwrap_err();
1061 assert_eq!(
1062 &result,
1063 &PositionedError::new(3..4, ParseError::UnexpectedToken)
1064 );
1065 }
1066
1067 {
1068 let parser = PatternParser::new("[FF 0-3]");
1070 let result = parser.parse().unwrap_err();
1071 assert_eq!(*result.position(), 1..3);
1072 assert!(matches!(result.inner(), ParseError::RangeBoundInvalid(_)));
1073 }
1074
1075 {
1076 let parser = PatternParser::new("[0-3");
1078 let result = parser.parse().unwrap_err();
1079 assert_eq!(
1080 &result,
1081 &PositionedError::new(3..4, ParseError::UnexpectedEnd)
1082 );
1083 }
1084
1085 {
1086 let parser = PatternParser::new("[3-0]");
1088 let result = parser.parse().unwrap_err();
1089 assert_eq!(
1090 &result,
1091 &PositionedError::new(3..4, ParseError::RangeEndMustBeGraterThenStart)
1092 );
1093 }
1094 }
1095}