bmatcher_core/compiler/
parser.rs

1use alloc::vec::Vec;
2use core::num::ParseIntError;
3
4use thiserror_no_std::Error;
5
6use super::{
7    Lexer,
8    PositionedError,
9    Token,
10};
11use crate::{
12    Atom,
13    GenericBinaryPattern,
14    JumpType,
15    ReadWidth,
16};
17
18#[derive(Debug, Error, PartialEq, Eq, Clone)]
19pub enum ParseError {
20    #[error("unexpected token encountered")]
21    UnexpectedToken,
22
23    #[error("unexpected end of input")]
24    UnexpectedEnd,
25
26    #[error("invalid binary value")]
27    BinaryValueInvalid,
28
29    #[error("incomplete binary value; bits missing")]
30    BinaryValueIncomplete,
31
32    #[error("group not properly closed (missing ')')")]
33    GroupNotClosed,
34
35    #[error("block not properly closed (missing '}}')")]
36    BlockNotClosed,
37
38    #[error("invalid range bound: {0}")]
39    RangeBoundInvalid(ParseIntError),
40
41    #[error("range end must be greater than start")]
42    RangeEndMustBeGraterThenStart,
43
44    #[error("sequence exceeds maximum allowed size")]
45    SequenceTooLarge,
46}
47
48#[derive(Debug, Clone, Copy)]
49enum ByteSegment {
50    Value(u8),
51    Whildcard,
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
55enum Radix {
56    Hex,
57    Bin,
58}
59
60impl Radix {
61    pub fn from_prefix(value: &str) -> Option<Self> {
62        if value.starts_with("0b") {
63            Some(Self::Bin)
64        } else if value.starts_with("0x") {
65            Some(Self::Hex)
66        } else {
67            None
68        }
69    }
70
71    pub fn prefix(&self) -> &'static str {
72        match self {
73            Self::Bin => "0b",
74            Self::Hex => "0x",
75        }
76    }
77
78    pub fn segment_bit_width(&self) -> usize {
79        match self {
80            Self::Bin => 1,
81            Self::Hex => 4,
82        }
83    }
84}
85
86pub struct PatternParser<'a> {
87    lexer: Lexer<'a>,
88    peeked_token: Option<Token<'a>>,
89
90    byte_sequence: Vec<u8>,
91    atoms: Vec<Atom>,
92}
93
94impl<'a> PatternParser<'a> {
95    pub fn new(input: &'a str) -> Self {
96        Self {
97            lexer: Lexer::new(input),
98            peeked_token: None,
99
100            atoms: Vec::with_capacity(128),
101            byte_sequence: Vec::with_capacity(256),
102        }
103    }
104
105    fn peek_token(&mut self) -> Option<&Token<'a>> {
106        if self.peeked_token.is_none() {
107            self.peeked_token = self.lexer.next_token();
108        }
109
110        self.peeked_token.as_ref()
111    }
112
113    fn pop_token(&mut self) -> Result<Token<'a>, PositionedError<ParseError>> {
114        if let Some(token) = self.peeked_token.take() {
115            Ok(token)
116        } else if let Some(token) = self.lexer.next_token() {
117            Ok(token)
118        } else {
119            Err(PositionedError::new(
120                self.lexer.token_range(),
121                ParseError::UnexpectedEnd,
122            ))
123        }
124    }
125
126    fn bytes_commit(
127        &mut self,
128        sequence_start: usize,
129    ) -> Result<(u16, u16), PositionedError<ParseError>> {
130        assert!(sequence_start <= self.byte_sequence.len());
131
132        let bytes_end = self.byte_sequence.len();
133        if sequence_start > u16::MAX as usize || bytes_end > u16::MAX as usize {
134            return Err(PositionedError::new(
135                self.lexer.token_range(),
136                ParseError::SequenceTooLarge,
137            ));
138        }
139
140        Ok((sequence_start as u16, bytes_end as u16))
141    }
142
143    pub fn parse(mut self) -> Result<GenericBinaryPattern<'static>, PositionedError<ParseError>> {
144        /* parse until the end :) */
145        let _ = self.parse_until(|_| false)?;
146        Ok(GenericBinaryPattern::new(self.atoms, self.byte_sequence))
147    }
148
149    fn parse_until(
150        &mut self,
151        matcher: impl Fn(&Token<'a>) -> bool,
152    ) -> Result<bool, PositionedError<ParseError>> {
153        while let Some(token) = self.peek_token() {
154            if matcher(token) {
155                return Ok(true);
156            }
157
158            match token {
159                Token::Text(_) => self.parse_bytes()?,
160                Token::Whildcard => self.parse_wildcard()?,
161
162                Token::PositionSave => self.parse_position_save()?,
163
164                Token::JumpRel1 => self.parse_jump()?,
165                Token::JumpRel4 => self.parse_jump()?,
166                Token::JumpAbs64 => self.parse_jump()?,
167
168                Token::Read1 => self.parse_read()?,
169                Token::Read2 => self.parse_read()?,
170                Token::Read4 => self.parse_read()?,
171
172                Token::GroupOpen => self.parse_group()?,
173                Token::RangeOpen => self.parse_range()?,
174
175                _ => {
176                    return Err(PositionedError::new(
177                        self.lexer.token_range(),
178                        ParseError::UnexpectedToken,
179                    ))
180                }
181            }
182        }
183
184        Ok(false)
185    }
186
187    fn parse_bytes_with_radix(&mut self, radix: Radix) -> Result<(), PositionedError<ParseError>> {
188        let Token::Text(value) = self.pop_token()? else {
189            return Err(PositionedError::new(
190                self.lexer.token_range(),
191                ParseError::UnexpectedToken,
192            ));
193        };
194
195        let radix_prefix = radix.prefix();
196        let (chars_token_index, chars) = if value.starts_with(radix_prefix) {
197            (
198                self.lexer.token_range().start + radix_prefix.len(),
199                value[radix_prefix.len()..].char_indices(),
200            )
201        } else {
202            (self.lexer.token_range().start, value.char_indices())
203        };
204
205        let mut byte_segments = Vec::new();
206        for (char_index, c) in chars {
207            if c == '?' {
208                byte_segments.push(ByteSegment::Whildcard);
209                continue;
210            }
211
212            let Some(value) = c.to_digit(1 << radix.segment_bit_width()) else {
213                return Err(PositionedError::new(
214                    chars_token_index + char_index..chars_token_index + char_index + 1,
215                    ParseError::BinaryValueInvalid,
216                ));
217            };
218
219            byte_segments.push(ByteSegment::Value(value as u8));
220        }
221
222        let byte_chunk_size = 8 / radix.segment_bit_width();
223        if byte_segments.len() % byte_chunk_size > 0 {
224            return Err(PositionedError::new(
225                self.lexer.token_range(),
226                ParseError::BinaryValueIncomplete,
227            ));
228        }
229
230        let bytes_start = self.byte_sequence.len();
231        for chunk in byte_segments.chunks_exact(byte_chunk_size) {
232            let mut byte = 0;
233            for segment in chunk {
234                byte <<= radix.segment_bit_width();
235                if let ByteSegment::Value(bits) = *segment {
236                    byte |= bits;
237                }
238            }
239
240            self.byte_sequence.push(byte);
241        }
242
243        let (seq_start, seq_end) = self.bytes_commit(bytes_start)?;
244        if byte_segments
245            .iter()
246            .any(|seg| matches!(seg, ByteSegment::Whildcard))
247        {
248            let bytes_mask_start = self.byte_sequence.len();
249            for chunk in byte_segments.chunks_exact(byte_chunk_size) {
250                let mut byte = 0;
251                for segment in chunk {
252                    byte <<= radix.segment_bit_width();
253                    match segment {
254                        ByteSegment::Value(_) => {
255                            byte |= (1 << radix.segment_bit_width()) - 1;
256                        }
257                        ByteSegment::Whildcard => {
258                            byte |= 0;
259                        }
260                    }
261                }
262
263                self.byte_sequence.push(byte);
264            }
265
266            let (mask_start, _mask_end) = self.bytes_commit(bytes_mask_start)?;
267            self.atoms.push(Atom::ByteSequenceMasked {
268                seq_start,
269                mask_start,
270                len: (byte_segments.len() / byte_chunk_size) as u16,
271            });
272        } else {
273            self.atoms.push(Atom::ByteSequence { seq_start, seq_end });
274        }
275        Ok(())
276    }
277
278    fn parse_bytes(&mut self) -> Result<(), PositionedError<ParseError>> {
279        let Some(Token::Text(value)) = self.peek_token() else {
280            return Err(PositionedError::new(
281                self.lexer.token_range(),
282                ParseError::UnexpectedToken,
283            ));
284        };
285
286        let radix = Radix::from_prefix(&value).unwrap_or(Radix::Hex);
287        self.parse_bytes_with_radix(radix)
288    }
289
290    fn parse_position_save(&mut self) -> Result<(), PositionedError<ParseError>> {
291        let Token::PositionSave = self.pop_token()? else {
292            return Err(PositionedError::new(
293                self.lexer.token_range(),
294                ParseError::UnexpectedToken,
295            ));
296        };
297
298        self.atoms.push(Atom::SaveCursor);
299        Ok(())
300    }
301
302    fn parse_wildcard(&mut self) -> Result<(), PositionedError<ParseError>> {
303        let Token::Whildcard = self.pop_token()? else {
304            return Err(PositionedError::new(
305                self.lexer.token_range(),
306                ParseError::UnexpectedToken,
307            ));
308        };
309
310        self.atoms.push(Atom::WildcardFixed(1));
311        Ok(())
312    }
313
314    fn parse_read(&mut self) -> Result<(), PositionedError<ParseError>> {
315        let read_width = match self.pop_token()? {
316            Token::Read1 => ReadWidth::Byte,
317            Token::Read2 => ReadWidth::Word,
318            Token::Read4 => ReadWidth::DWord,
319            _ => {
320                return Err(PositionedError::new(
321                    self.lexer.token_range(),
322                    ParseError::UnexpectedToken,
323                ))
324            }
325        };
326
327        self.atoms.push(Atom::Read(read_width));
328        Ok(())
329    }
330
331    fn parse_jump(&mut self) -> Result<(), PositionedError<ParseError>> {
332        let (jump_type, width) = match self.pop_token()? {
333            Token::JumpRel1 => (JumpType::RelByte, 1),
334            Token::JumpRel4 => (JumpType::RelDWord, 4),
335            Token::JumpAbs64 => (JumpType::AbsQWord, 8),
336            _ => {
337                return Err(PositionedError::new(
338                    self.lexer.token_range(),
339                    ParseError::UnexpectedToken,
340                ))
341            }
342        };
343
344        if matches!(self.peek_token(), Some(Token::BlockOpen)) {
345            let _ = self.pop_token()?;
346            self.atoms.push(Atom::CursorPush);
347            self.atoms.push(Atom::Jump(jump_type));
348
349            let block_start = self.lexer.token_range();
350            if !self.parse_until(|token| matches!(token, Token::BlockClose))? {
351                return Err(PositionedError::new(
352                    block_start,
353                    ParseError::BlockNotClosed,
354                ));
355            }
356
357            self.atoms.push(Atom::CursorPop { advance: width });
358            let _ = self.pop_token()?;
359        } else {
360            self.atoms.push(Atom::Jump(jump_type));
361        }
362
363        Ok(())
364    }
365
366    fn parse_group(&mut self) -> Result<(), PositionedError<ParseError>> {
367        let Token::GroupOpen = self.pop_token()? else {
368            return Err(PositionedError::new(
369                self.lexer.token_range(),
370                ParseError::UnexpectedToken,
371            ));
372        };
373
374        let group_start = self.lexer.token_range();
375        let mut branch_atoms = Vec::with_capacity(8);
376        loop {
377            let branch_atom_index = self.atoms.len();
378            self.atoms.push(Atom::Branch {
379                left_len: 0,
380                right_len: 0,
381            });
382
383            if !self.parse_until(|token| matches!(token, Token::GroupClose | Token::GroupPipe))? {
384                return Err(PositionedError::new(
385                    group_start,
386                    ParseError::GroupNotClosed,
387                ));
388            }
389
390            if matches!(self.pop_token()?, Token::GroupClose) {
391                /* group end, no more entries */
392                self.atoms.remove(branch_atom_index);
393                break;
394            }
395
396            let left_branch_len = self.atoms.len() - branch_atom_index - 1;
397            if left_branch_len > u16::MAX as usize {
398                return Err(PositionedError::new(
399                    self.lexer.token_range(),
400                    ParseError::SequenceTooLarge,
401                ));
402            }
403
404            if let Atom::Branch { left_len, .. } = &mut self.atoms[branch_atom_index] {
405                *left_len = left_branch_len as u16;
406            } else {
407                unreachable!("atom should be a branch");
408            }
409
410            branch_atoms.push(branch_atom_index);
411        }
412
413        let atom_count = self.atoms.len();
414        for branch_atom_index in branch_atoms {
415            if let Atom::Branch {
416                left_len,
417                right_len,
418            } = &mut self.atoms[branch_atom_index]
419            {
420                let right_branch_len = atom_count - *left_len as usize - branch_atom_index - 1;
421                if right_branch_len > u16::MAX as usize {
422                    return Err(PositionedError::new(
423                        self.lexer.token_range(),
424                        ParseError::SequenceTooLarge,
425                    ));
426                }
427
428                *right_len = right_branch_len as u16;
429            } else {
430                unreachable!("atom should be a branch");
431            }
432        }
433
434        Ok(())
435    }
436
437    fn parse_range(&mut self) -> Result<(), PositionedError<ParseError>> {
438        let Token::RangeOpen = self.pop_token()? else {
439            return Err(PositionedError::new(
440                self.lexer.token_range(),
441                ParseError::UnexpectedToken,
442            ));
443        };
444
445        let Token::Text(range_start) = self.pop_token()? else {
446            return Err(PositionedError::new(
447                self.lexer.token_range(),
448                ParseError::UnexpectedToken,
449            ));
450        };
451
452        let range_start = range_start.parse::<u16>().map_err(|err| {
453            PositionedError::new(self.lexer.token_range(), ParseError::RangeBoundInvalid(err))
454        })?;
455
456        match self.pop_token()? {
457            Token::RangeClose => {
458                self.atoms.push(Atom::WildcardFixed(range_start));
459                Ok(())
460            }
461            Token::RangeSeperator => {
462                let Token::Text(range_end) = self.pop_token()? else {
463                    return Err(PositionedError::new(
464                        self.lexer.token_range(),
465                        ParseError::UnexpectedToken,
466                    ));
467                };
468
469                let range_end = range_end.parse::<u16>().map_err(|err| {
470                    PositionedError::new(
471                        self.lexer.token_range(),
472                        ParseError::RangeBoundInvalid(err),
473                    )
474                })?;
475
476                if range_end <= range_start {
477                    return Err(PositionedError::new(
478                        self.lexer.token_range(),
479                        ParseError::RangeEndMustBeGraterThenStart,
480                    ));
481                }
482
483                self.atoms.push(Atom::WildcardRange {
484                    min: range_start,
485                    max: range_end,
486                });
487                if !matches!(self.pop_token()?, Token::RangeClose) {
488                    Err(PositionedError::new(
489                        self.lexer.token_range(),
490                        ParseError::UnexpectedToken,
491                    ))
492                } else {
493                    Ok(())
494                }
495            }
496            _ => Err(PositionedError::new(
497                self.lexer.token_range(),
498                ParseError::UnexpectedToken,
499            )),
500        }
501    }
502}
503
504/// Parse the given string as pattern.
505pub fn parse_pattern(
506    pattern: &str,
507) -> Result<GenericBinaryPattern<'static>, PositionedError<ParseError>> {
508    let parser = PatternParser::new(pattern);
509    parser.parse()
510}
511
512#[cfg(test)]
513mod test {
514    use super::PatternParser;
515    use crate::{
516        compiler::{
517            parser::ParseError,
518            PositionedError,
519        },
520        pattern::BinaryPattern,
521        Atom,
522        JumpType,
523    };
524
525    #[test]
526    fn test_byte_sequence_bin() {
527        {
528            let parser = PatternParser::new("0b10011100");
529            let result = parser.parse().unwrap();
530            assert_eq!(
531                result.atoms(),
532                &[Atom::ByteSequence {
533                    seq_start: 0,
534                    seq_end: 1
535                },]
536            );
537            assert_eq!(result.byte_sequence(), &[0b10011100]);
538        }
539
540        {
541            let parser = PatternParser::new("0b1001110011110000");
542            let result = parser.parse().unwrap();
543            assert_eq!(
544                result.atoms(),
545                &[Atom::ByteSequence {
546                    seq_start: 0,
547                    seq_end: 2
548                },]
549            );
550            assert_eq!(result.byte_sequence(), &[0b10011100, 0b11110000]);
551        }
552    }
553
554    #[test]
555    fn test_byte_sequence_hex() {
556        {
557            let parser = PatternParser::new("FF 00 12");
558            let result = parser.parse().unwrap();
559            assert_eq!(
560                result.atoms(),
561                &[
562                    Atom::ByteSequence {
563                        seq_start: 0,
564                        seq_end: 1
565                    },
566                    Atom::ByteSequence {
567                        seq_start: 1,
568                        seq_end: 2
569                    },
570                    Atom::ByteSequence {
571                        seq_start: 2,
572                        seq_end: 3
573                    }
574                ]
575            );
576            assert_eq!(result.byte_sequence(), &[0xFF, 0x00, 0x12]);
577        }
578
579        {
580            let parser = PatternParser::new("FF00 12");
581            let result = parser.parse().unwrap();
582            assert_eq!(
583                result.atoms(),
584                &[
585                    Atom::ByteSequence {
586                        seq_start: 0,
587                        seq_end: 2
588                    },
589                    Atom::ByteSequence {
590                        seq_start: 2,
591                        seq_end: 3
592                    }
593                ]
594            );
595            assert_eq!(result.byte_sequence(), &[0xFF, 0x00, 0x12]);
596        }
597
598        {
599            let parser = PatternParser::new("0xDEADBEEF");
600            let result = parser.parse().unwrap();
601            assert_eq!(
602                result.atoms(),
603                &[Atom::ByteSequence {
604                    seq_start: 0,
605                    seq_end: 4
606                },]
607            );
608            assert_eq!(result.byte_sequence(), &[0xDE, 0xAD, 0xBE, 0xEF]);
609        }
610
611        {
612            let parser = PatternParser::new("FF0");
613            let result = parser.parse().unwrap_err();
614            assert_eq!(
615                &result,
616                &PositionedError::new(0..3, ParseError::BinaryValueIncomplete)
617            );
618        }
619
620        {
621            let parser = PatternParser::new("FX");
622            let result = parser.parse().unwrap_err();
623            assert_eq!(
624                &result,
625                &PositionedError::new(1..2, ParseError::BinaryValueInvalid)
626            );
627        }
628    }
629
630    #[test]
631    fn test_byte_sequence_mask_hex() {
632        {
633            let parser = PatternParser::new("A?");
634            let result = parser.parse().unwrap();
635            assert_eq!(
636                result.atoms(),
637                &[Atom::ByteSequenceMasked {
638                    seq_start: 0,
639                    mask_start: 1,
640                    len: 1
641                }]
642            );
643            assert_eq!(result.byte_sequence(), &[0xA0, 0xF0]);
644        }
645
646        {
647            let parser = PatternParser::new("F?E?");
648            let result = parser.parse().unwrap();
649            assert_eq!(
650                result.atoms(),
651                &[Atom::ByteSequenceMasked {
652                    seq_start: 0,
653                    mask_start: 2,
654                    len: 2
655                }]
656            );
657            assert_eq!(result.byte_sequence(), &[0xF0, 0xE0, 0xF0, 0xF0]);
658        }
659    }
660
661    #[test]
662    fn test_byte_sequence_mask_bin() {
663        {
664            let parser = PatternParser::new("0b100??001");
665            let result = parser.parse().unwrap();
666            assert_eq!(
667                result.atoms(),
668                &[Atom::ByteSequenceMasked {
669                    seq_start: 0,
670                    mask_start: 1,
671                    len: 1
672                }]
673            );
674            assert_eq!(result.byte_sequence(), &[0x81, 0xE7]);
675        }
676
677        {
678            let parser = PatternParser::new("0b100??001?????111");
679            let result = parser.parse().unwrap();
680            assert_eq!(
681                result.atoms(),
682                &[Atom::ByteSequenceMasked {
683                    seq_start: 0,
684                    mask_start: 2,
685                    len: 2
686                }]
687            );
688            assert_eq!(result.byte_sequence(), &[0x81, 0x07, 0xE7, 0x07]);
689        }
690    }
691
692    #[test]
693    fn test_byte_wildcard() {
694        {
695            let parser = PatternParser::new("?");
696            let result = parser.parse().unwrap();
697            assert_eq!(result.atoms(), &[Atom::WildcardFixed(1),]);
698            assert_eq!(result.byte_sequence(), &[]);
699        }
700
701        {
702            let parser = PatternParser::new("AB ? CD");
703            let result = parser.parse().unwrap();
704            assert_eq!(
705                result.atoms(),
706                &[
707                    Atom::ByteSequence {
708                        seq_start: 0x00,
709                        seq_end: 0x01
710                    },
711                    Atom::WildcardFixed(1),
712                    Atom::ByteSequence {
713                        seq_start: 0x01,
714                        seq_end: 0x02
715                    }
716                ]
717            );
718            assert_eq!(result.byte_sequence(), &[0xAB, 0xCD]);
719        }
720
721        /* double wildcards are being interpreted as nibbles */
722        {
723            let parser = PatternParser::new("AB ?? CD");
724            let result = parser.parse().unwrap();
725            assert_eq!(
726                result.atoms(),
727                &[
728                    Atom::ByteSequence {
729                        seq_start: 0x00,
730                        seq_end: 0x01
731                    },
732                    Atom::ByteSequenceMasked {
733                        seq_start: 0x01,
734                        mask_start: 0x02,
735                        len: 0x01
736                    },
737                    Atom::ByteSequence {
738                        seq_start: 0x03,
739                        seq_end: 0x04
740                    }
741                ]
742            );
743            assert_eq!(result.byte_sequence(), &[0xAB, 0x00, 0x00, 0xCD]);
744        }
745
746        /* this is just invalid */
747        {
748            let parser = PatternParser::new("AB ??? CD");
749            let result = parser.parse().unwrap_err();
750            assert_eq!(
751                &result,
752                &PositionedError::new(3..6, ParseError::BinaryValueIncomplete)
753            );
754        }
755    }
756
757    #[test]
758    fn test_byte_wildcard_nl() {
759        let parser = PatternParser::new("?\n?");
760        let result = parser.parse().unwrap();
761        assert_eq!(
762            result.atoms(),
763            &[Atom::WildcardFixed(1), Atom::WildcardFixed(1),]
764        );
765        assert_eq!(result.byte_sequence(), &[]);
766    }
767
768    #[test]
769    fn test_jump() {
770        {
771            let parser = PatternParser::new("%$* FF * % $");
772            let result = parser.parse().unwrap();
773            assert_eq!(
774                result.atoms(),
775                &[
776                    Atom::Jump(JumpType::RelByte),
777                    Atom::Jump(JumpType::RelDWord),
778                    Atom::Jump(JumpType::AbsQWord),
779                    Atom::ByteSequence {
780                        seq_start: 0,
781                        seq_end: 1
782                    },
783                    Atom::Jump(JumpType::AbsQWord),
784                    Atom::Jump(JumpType::RelByte),
785                    Atom::Jump(JumpType::RelDWord),
786                ]
787            );
788            assert_eq!(result.byte_sequence(), &[0xFF]);
789        }
790    }
791
792    #[test]
793    fn test_jump_block() {
794        {
795            let parser = PatternParser::new("$ { FE }");
796            let result = parser.parse().unwrap();
797            assert_eq!(
798                result.atoms(),
799                &[
800                    Atom::CursorPush,
801                    Atom::Jump(JumpType::RelDWord),
802                    Atom::ByteSequence {
803                        seq_start: 0,
804                        seq_end: 1
805                    },
806                    Atom::CursorPop { advance: 4 },
807                ]
808            );
809            assert_eq!(result.byte_sequence(), &[0xFE]);
810        }
811
812        {
813            let parser = PatternParser::new("$ { FE $ { FF } }");
814            let result = parser.parse().unwrap();
815            assert_eq!(
816                result.atoms(),
817                &[
818                    Atom::CursorPush,
819                    Atom::Jump(JumpType::RelDWord),
820                    Atom::ByteSequence {
821                        seq_start: 0,
822                        seq_end: 1
823                    },
824                    Atom::CursorPush,
825                    Atom::Jump(JumpType::RelDWord),
826                    Atom::ByteSequence {
827                        seq_start: 1,
828                        seq_end: 2
829                    },
830                    Atom::CursorPop { advance: 4 },
831                    Atom::CursorPop { advance: 4 },
832                ]
833            );
834            assert_eq!(result.byte_sequence(), &[0xFE, 0xFF]);
835        }
836
837        {
838            let parser = PatternParser::new("$ { FE");
839            let result = parser.parse().unwrap_err();
840            assert_eq!(
841                &result,
842                &PositionedError::new(2..3, ParseError::BlockNotClosed)
843            );
844        }
845    }
846
847    #[test]
848    fn test_group() {
849        {
850            /* empty group */
851            let parser = PatternParser::new("()");
852            let result = parser.parse().unwrap();
853            assert_eq!(result.atoms(), &[]);
854            assert_eq!(result.byte_sequence(), &[]);
855        }
856
857        {
858            /* single entry group -> just ignores the group */
859            let parser = PatternParser::new("( FF00 )");
860            let result = parser.parse().unwrap();
861            assert_eq!(
862                result.atoms(),
863                &[Atom::ByteSequence {
864                    seq_start: 0,
865                    seq_end: 2
866                }]
867            );
868            assert_eq!(result.byte_sequence(), &[0xFF, 0x00]);
869        }
870
871        {
872            /* group with two entries */
873            let parser = PatternParser::new("( 01 | 02 03 )");
874            let result = parser.parse().unwrap();
875            assert_eq!(
876                result.atoms(),
877                &[
878                    Atom::Branch {
879                        left_len: 1,
880                        right_len: 2
881                    },
882                    Atom::ByteSequence {
883                        seq_start: 0,
884                        seq_end: 1
885                    },
886                    Atom::ByteSequence {
887                        seq_start: 1,
888                        seq_end: 2
889                    },
890                    Atom::ByteSequence {
891                        seq_start: 2,
892                        seq_end: 3
893                    }
894                ]
895            );
896            assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03]);
897        }
898
899        {
900            /* group with tree entries */
901            let parser = PatternParser::new("( 01 | 02 03 | FF )");
902            let result = parser.parse().unwrap();
903            assert_eq!(
904                result.atoms(),
905                &[
906                    Atom::Branch {
907                        left_len: 1,
908                        right_len: 4
909                    },
910                    Atom::ByteSequence {
911                        seq_start: 0,
912                        seq_end: 1
913                    },
914                    Atom::Branch {
915                        left_len: 2,
916                        right_len: 1
917                    },
918                    Atom::ByteSequence {
919                        seq_start: 1,
920                        seq_end: 2
921                    },
922                    Atom::ByteSequence {
923                        seq_start: 2,
924                        seq_end: 3
925                    },
926                    Atom::ByteSequence {
927                        seq_start: 3,
928                        seq_end: 4
929                    }
930                ]
931            );
932            assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03, 0xFF]);
933        }
934
935        {
936            /* nested group (right) */
937            let parser = PatternParser::new("( 01 | ( 02 | 03 ) )");
938            let result = parser.parse().unwrap();
939            assert_eq!(
940                result.atoms(),
941                &[
942                    Atom::Branch {
943                        left_len: 1,
944                        right_len: 3
945                    },
946                    Atom::ByteSequence {
947                        seq_start: 0,
948                        seq_end: 1
949                    },
950                    Atom::Branch {
951                        left_len: 1,
952                        right_len: 1
953                    },
954                    Atom::ByteSequence {
955                        seq_start: 1,
956                        seq_end: 2
957                    },
958                    Atom::ByteSequence {
959                        seq_start: 2,
960                        seq_end: 3
961                    }
962                ]
963            );
964            assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03]);
965        }
966
967        {
968            /* nested group (left) */
969            let parser = PatternParser::new("( (01 | 02) | 03 )");
970            let result = parser.parse().unwrap();
971            assert_eq!(
972                result.atoms(),
973                &[
974                    Atom::Branch {
975                        left_len: 3,
976                        right_len: 1
977                    },
978                    Atom::Branch {
979                        left_len: 1,
980                        right_len: 1
981                    },
982                    Atom::ByteSequence {
983                        seq_start: 0,
984                        seq_end: 1
985                    },
986                    Atom::ByteSequence {
987                        seq_start: 1,
988                        seq_end: 2
989                    },
990                    Atom::ByteSequence {
991                        seq_start: 2,
992                        seq_end: 3
993                    },
994                ]
995            );
996            assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03]);
997        }
998
999        {
1000            /* unclosed group */
1001            let parser = PatternParser::new("(");
1002            let result = parser.parse().unwrap_err();
1003            assert_eq!(
1004                &result,
1005                &PositionedError::new(0..1, ParseError::GroupNotClosed)
1006            );
1007        }
1008
1009        {
1010            /* unclosed group with contents */
1011            let parser = PatternParser::new("( FF 00");
1012            let result = parser.parse().unwrap_err();
1013            assert_eq!(
1014                &result,
1015                &PositionedError::new(0..1, ParseError::GroupNotClosed)
1016            );
1017        }
1018
1019        {
1020            /* unclosed group with pipe */
1021            let parser = PatternParser::new(" (|");
1022            let result = parser.parse().unwrap_err();
1023            assert_eq!(
1024                &result,
1025                &PositionedError::new(1..2, ParseError::GroupNotClosed)
1026            );
1027        }
1028    }
1029
1030    #[test]
1031    fn test_range() {
1032        {
1033            /* widecard fixed */
1034            let parser = PatternParser::new("[0] [123]");
1035            let result = parser.parse().unwrap();
1036            assert_eq!(
1037                result.atoms(),
1038                &[Atom::WildcardFixed(0), Atom::WildcardFixed(123)]
1039            );
1040            assert_eq!(result.byte_sequence(), &[]);
1041        }
1042
1043        {
1044            /* widecard range */
1045            let parser = PatternParser::new("[0-10] [123- 999]");
1046            let result = parser.parse().unwrap();
1047            assert_eq!(
1048                result.atoms(),
1049                &[
1050                    Atom::WildcardRange { min: 0, max: 10 },
1051                    Atom::WildcardRange { min: 123, max: 999 }
1052                ]
1053            );
1054            assert_eq!(result.byte_sequence(), &[]);
1055        }
1056
1057        {
1058            /* widecard range error */
1059            let parser = PatternParser::new("[0-]");
1060            let result = parser.parse().unwrap_err();
1061            assert_eq!(
1062                &result,
1063                &PositionedError::new(3..4, ParseError::UnexpectedToken)
1064            );
1065        }
1066
1067        {
1068            /* widecard range error */
1069            let parser = PatternParser::new("[FF 0-3]");
1070            let result = parser.parse().unwrap_err();
1071            assert_eq!(*result.position(), 1..3);
1072            assert!(matches!(result.inner(), ParseError::RangeBoundInvalid(_)));
1073        }
1074
1075        {
1076            /* not closed */
1077            let parser = PatternParser::new("[0-3");
1078            let result = parser.parse().unwrap_err();
1079            assert_eq!(
1080                &result,
1081                &PositionedError::new(3..4, ParseError::UnexpectedEnd)
1082            );
1083        }
1084
1085        {
1086            /* end less then start */
1087            let parser = PatternParser::new("[3-0]");
1088            let result = parser.parse().unwrap_err();
1089            assert_eq!(
1090                &result,
1091                &PositionedError::new(3..4, ParseError::RangeEndMustBeGraterThenStart)
1092            );
1093        }
1094    }
1095}