1use alloc::vec::Vec;
2use core::num::ParseIntError;
3
4use super::{
5 Lexer,
6 PositionedError,
7 Token,
8};
9use crate::{
10 Atom,
11 GenericBinaryPattern,
12 JumpType,
13 ReadWidth,
14};
15
16#[derive(Debug, PartialEq, Eq, Clone)]
17pub enum ParseError {
18 UnexpectedToken,
19 UnexpectedEnd,
20
21 MaskByteLenMismatch,
22
23 BinaryValueInvalid,
24 BinaryValueIncomplete,
25
26 GroupNotClosed,
27 BlockNotClosed,
28
29 RangeBoundInvalid(ParseIntError),
30 RangeEndMustBeGraterThenStart,
31
32 SequenceTooLarge,
33}
34
35#[derive(Debug, Clone, Copy)]
36enum ByteSegment {
37 Value(u8),
38 Whildcard,
39}
40
41#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
42enum Radix {
43 Hex,
44 Bin,
45}
46
47impl Radix {
48 pub fn from_prefix(value: &str) -> Option<Self> {
49 if value.starts_with("0b") {
50 Some(Self::Bin)
51 } else if value.starts_with("0x") {
52 Some(Self::Hex)
53 } else {
54 None
55 }
56 }
57
58 pub fn prefix(&self) -> &'static str {
59 match self {
60 Self::Bin => "0b",
61 Self::Hex => "0x",
62 }
63 }
64
65 pub fn segment_bit_width(&self) -> usize {
66 match self {
67 Self::Bin => 1,
68 Self::Hex => 4,
69 }
70 }
71}
72
73pub struct PatternParser<'a> {
74 lexer: Lexer<'a>,
75 peeked_token: Option<Token<'a>>,
76
77 byte_sequence: Vec<u8>,
78 atoms: Vec<Atom>,
79}
80
81impl<'a> PatternParser<'a> {
82 pub fn new(input: &'a str) -> Self {
83 Self {
84 lexer: Lexer::new(input),
85 peeked_token: None,
86
87 atoms: Vec::with_capacity(128),
88 byte_sequence: Vec::with_capacity(256),
89 }
90 }
91
92 fn peek_token(&mut self) -> Option<&Token<'a>> {
93 if self.peeked_token.is_none() {
94 self.peeked_token = self.lexer.next_token();
95 }
96
97 self.peeked_token.as_ref()
98 }
99
100 fn pop_token(&mut self) -> Result<Token<'a>, PositionedError<ParseError>> {
101 if let Some(token) = self.peeked_token.take() {
102 Ok(token)
103 } else if let Some(token) = self.lexer.next_token() {
104 Ok(token)
105 } else {
106 Err(PositionedError::new(
107 self.lexer.token_range(),
108 ParseError::UnexpectedEnd,
109 ))
110 }
111 }
112
113 fn bytes_commit(
114 &mut self,
115 sequence_start: usize,
116 ) -> Result<(u16, u16), PositionedError<ParseError>> {
117 assert!(sequence_start <= self.byte_sequence.len());
118
119 let bytes_end = self.byte_sequence.len();
120 if sequence_start > u16::MAX as usize || bytes_end > u16::MAX as usize {
121 return Err(PositionedError::new(
122 self.lexer.token_range(),
123 ParseError::SequenceTooLarge,
124 ));
125 }
126
127 Ok((sequence_start as u16, bytes_end as u16))
128 }
129
130 pub fn parse(mut self) -> Result<GenericBinaryPattern<'static>, PositionedError<ParseError>> {
131 let _ = self.parse_until(|_| false)?;
133 Ok(GenericBinaryPattern::new(self.atoms, self.byte_sequence))
134 }
135
136 fn parse_until(
137 &mut self,
138 matcher: impl Fn(&Token<'a>) -> bool,
139 ) -> Result<bool, PositionedError<ParseError>> {
140 while let Some(token) = self.peek_token() {
141 if matcher(token) {
142 return Ok(true);
143 }
144
145 match token {
146 Token::Text(_) => self.parse_bytes()?,
147 Token::Whildcard => self.parse_wildcard()?,
148
149 Token::PositionSave => self.parse_position_save()?,
150
151 Token::JumpRel1 => self.parse_jump()?,
152 Token::JumpRel4 => self.parse_jump()?,
153 Token::JumpAbs64 => self.parse_jump()?,
154
155 Token::Read1 => self.parse_read()?,
156 Token::Read2 => self.parse_read()?,
157 Token::Read4 => self.parse_read()?,
158
159 Token::GroupOpen => self.parse_group()?,
160 Token::RangeOpen => self.parse_range()?,
161
162 _ => {
163 return Err(PositionedError::new(
164 self.lexer.token_range(),
165 ParseError::UnexpectedToken,
166 ))
167 }
168 }
169 }
170
171 Ok(false)
172 }
173
174 fn parse_bytes_with_radix(&mut self, radix: Radix) -> Result<(), PositionedError<ParseError>> {
175 let Token::Text(value) = self.pop_token()? else {
176 return Err(PositionedError::new(
177 self.lexer.token_range(),
178 ParseError::UnexpectedToken,
179 ));
180 };
181
182 let radix_prefix = radix.prefix();
183 let (chars_token_index, chars) = if value.starts_with(radix_prefix) {
184 (
185 self.lexer.token_range().start + radix_prefix.len(),
186 value[radix_prefix.len()..].char_indices(),
187 )
188 } else {
189 (self.lexer.token_range().start, value.char_indices())
190 };
191
192 let mut byte_segments = Vec::new();
193 for (char_index, c) in chars {
194 if c == '?' {
195 byte_segments.push(ByteSegment::Whildcard);
196 continue;
197 }
198
199 let Some(value) = c.to_digit(1 << radix.segment_bit_width()) else {
200 return Err(PositionedError::new(
201 chars_token_index + char_index..chars_token_index + char_index + 1,
202 ParseError::BinaryValueInvalid,
203 ));
204 };
205
206 byte_segments.push(ByteSegment::Value(value as u8));
207 }
208
209 let byte_chunk_size = 8 / radix.segment_bit_width();
210 if byte_segments.len() % byte_chunk_size > 0 {
211 return Err(PositionedError::new(
212 self.lexer.token_range(),
213 ParseError::BinaryValueIncomplete,
214 ));
215 }
216
217 let bytes_start = self.byte_sequence.len();
218 for chunk in byte_segments.chunks_exact(byte_chunk_size) {
219 let mut byte = 0;
220 for segment in chunk {
221 byte <<= radix.segment_bit_width();
222 if let ByteSegment::Value(bits) = *segment {
223 byte |= bits;
224 }
225 }
226
227 self.byte_sequence.push(byte);
228 }
229
230 let (seq_start, seq_end) = self.bytes_commit(bytes_start)?;
231 if byte_segments
232 .iter()
233 .any(|seg| matches!(seg, ByteSegment::Whildcard))
234 {
235 let bytes_mask_start = self.byte_sequence.len();
236 for chunk in byte_segments.chunks_exact(byte_chunk_size) {
237 let mut byte = 0;
238 for segment in chunk {
239 byte <<= radix.segment_bit_width();
240 match segment {
241 ByteSegment::Value(_) => {
242 byte |= (1 << radix.segment_bit_width()) - 1;
243 }
244 ByteSegment::Whildcard => {
245 byte |= 0;
246 }
247 }
248 }
249
250 self.byte_sequence.push(byte);
251 }
252
253 let (mask_start, _mask_end) = self.bytes_commit(bytes_mask_start)?;
254 self.atoms.push(Atom::ByteSequenceMasked {
255 seq_start,
256 mask_start,
257 len: (byte_segments.len() / byte_chunk_size) as u16,
258 });
259 } else {
260 self.atoms.push(Atom::ByteSequence { seq_start, seq_end });
261 }
262 Ok(())
263 }
264
265 fn parse_bytes(&mut self) -> Result<(), PositionedError<ParseError>> {
266 let Some(Token::Text(value)) = self.peek_token() else {
267 return Err(PositionedError::new(
268 self.lexer.token_range(),
269 ParseError::UnexpectedToken,
270 ));
271 };
272
273 let radix = Radix::from_prefix(&value).unwrap_or(Radix::Hex);
274 self.parse_bytes_with_radix(radix)
275 }
276
277 fn parse_position_save(&mut self) -> Result<(), PositionedError<ParseError>> {
278 let Token::PositionSave = self.pop_token()? else {
279 return Err(PositionedError::new(
280 self.lexer.token_range(),
281 ParseError::UnexpectedToken,
282 ));
283 };
284
285 self.atoms.push(Atom::SaveCursor);
286 Ok(())
287 }
288
289 fn parse_wildcard(&mut self) -> Result<(), PositionedError<ParseError>> {
290 let Token::Whildcard = self.pop_token()? else {
291 return Err(PositionedError::new(
292 self.lexer.token_range(),
293 ParseError::UnexpectedToken,
294 ));
295 };
296
297 self.atoms.push(Atom::WildcardFixed(1));
298 Ok(())
299 }
300
301 fn parse_read(&mut self) -> Result<(), PositionedError<ParseError>> {
302 let read_width = match self.pop_token()? {
303 Token::Read1 => ReadWidth::Byte,
304 Token::Read2 => ReadWidth::Word,
305 Token::Read4 => ReadWidth::DWord,
306 _ => {
307 return Err(PositionedError::new(
308 self.lexer.token_range(),
309 ParseError::UnexpectedToken,
310 ))
311 }
312 };
313
314 self.atoms.push(Atom::Read(read_width));
315 Ok(())
316 }
317
318 fn parse_jump(&mut self) -> Result<(), PositionedError<ParseError>> {
319 let (jump_type, width) = match self.pop_token()? {
320 Token::JumpRel1 => (JumpType::RelByte, 1),
321 Token::JumpRel4 => (JumpType::RelDWord, 4),
322 Token::JumpAbs64 => (JumpType::AbsQWord, 8),
323 _ => {
324 return Err(PositionedError::new(
325 self.lexer.token_range(),
326 ParseError::UnexpectedToken,
327 ))
328 }
329 };
330
331 if matches!(self.peek_token(), Some(Token::BlockOpen)) {
332 let _ = self.pop_token()?;
333 self.atoms.push(Atom::CursorPush);
334 self.atoms.push(Atom::Jump(jump_type));
335
336 let block_start = self.lexer.token_range();
337 if !self.parse_until(|token| matches!(token, Token::BlockClose))? {
338 return Err(PositionedError::new(
339 block_start,
340 ParseError::BlockNotClosed,
341 ));
342 }
343
344 self.atoms.push(Atom::CursorPop { advance: width });
345 let _ = self.pop_token()?;
346 } else {
347 self.atoms.push(Atom::Jump(jump_type));
348 }
349
350 Ok(())
351 }
352
353 fn parse_group(&mut self) -> Result<(), PositionedError<ParseError>> {
354 let Token::GroupOpen = self.pop_token()? else {
355 return Err(PositionedError::new(
356 self.lexer.token_range(),
357 ParseError::UnexpectedToken,
358 ));
359 };
360
361 let group_start = self.lexer.token_range();
362 let mut branch_atoms = Vec::with_capacity(8);
363 loop {
364 let branch_atom_index = self.atoms.len();
365 self.atoms.push(Atom::Branch {
366 left_len: 0,
367 right_len: 0,
368 });
369
370 if !self.parse_until(|token| matches!(token, Token::GroupClose | Token::GroupPipe))? {
371 return Err(PositionedError::new(
372 group_start,
373 ParseError::GroupNotClosed,
374 ));
375 }
376
377 if matches!(self.pop_token()?, Token::GroupClose) {
378 self.atoms.remove(branch_atom_index);
380 break;
381 }
382
383 let left_branch_len = self.atoms.len() - branch_atom_index - 1;
384 if left_branch_len > u16::MAX as usize {
385 return Err(PositionedError::new(
386 self.lexer.token_range(),
387 ParseError::SequenceTooLarge,
388 ));
389 }
390
391 if let Atom::Branch { left_len, .. } = &mut self.atoms[branch_atom_index] {
392 *left_len = left_branch_len as u16;
393 } else {
394 unreachable!("atom should be a branch");
395 }
396
397 branch_atoms.push(branch_atom_index);
398 }
399
400 let atom_count = self.atoms.len();
401 for branch_atom_index in branch_atoms {
402 if let Atom::Branch {
403 left_len,
404 right_len,
405 } = &mut self.atoms[branch_atom_index]
406 {
407 let right_branch_len = atom_count - *left_len as usize - branch_atom_index - 1;
408 if right_branch_len > u16::MAX as usize {
409 return Err(PositionedError::new(
410 self.lexer.token_range(),
411 ParseError::SequenceTooLarge,
412 ));
413 }
414
415 *right_len = right_branch_len as u16;
416 } else {
417 unreachable!("atom should be a branch");
418 }
419 }
420
421 Ok(())
422 }
423
424 fn parse_range(&mut self) -> Result<(), PositionedError<ParseError>> {
425 let Token::RangeOpen = self.pop_token()? else {
426 return Err(PositionedError::new(
427 self.lexer.token_range(),
428 ParseError::UnexpectedToken,
429 ));
430 };
431
432 let Token::Text(range_start) = self.pop_token()? else {
433 return Err(PositionedError::new(
434 self.lexer.token_range(),
435 ParseError::UnexpectedToken,
436 ));
437 };
438
439 let range_start = range_start.parse::<u16>().map_err(|err| {
440 PositionedError::new(self.lexer.token_range(), ParseError::RangeBoundInvalid(err))
441 })?;
442
443 match self.pop_token()? {
444 Token::RangeClose => {
445 self.atoms.push(Atom::WildcardFixed(range_start));
446 Ok(())
447 }
448 Token::RangeSeperator => {
449 let Token::Text(range_end) = self.pop_token()? else {
450 return Err(PositionedError::new(
451 self.lexer.token_range(),
452 ParseError::UnexpectedToken,
453 ));
454 };
455
456 let range_end = range_end.parse::<u16>().map_err(|err| {
457 PositionedError::new(
458 self.lexer.token_range(),
459 ParseError::RangeBoundInvalid(err),
460 )
461 })?;
462
463 if range_end <= range_start {
464 return Err(PositionedError::new(
465 self.lexer.token_range(),
466 ParseError::RangeEndMustBeGraterThenStart,
467 ));
468 }
469
470 self.atoms.push(Atom::WildcardRange {
471 min: range_start,
472 max: range_end,
473 });
474 if !matches!(self.pop_token()?, Token::RangeClose) {
475 Err(PositionedError::new(
476 self.lexer.token_range(),
477 ParseError::UnexpectedToken,
478 ))
479 } else {
480 Ok(())
481 }
482 }
483 _ => Err(PositionedError::new(
484 self.lexer.token_range(),
485 ParseError::UnexpectedToken,
486 )),
487 }
488 }
489}
490
491pub fn parse_pattern(
493 pattern: &str,
494) -> Result<GenericBinaryPattern<'static>, PositionedError<ParseError>> {
495 let parser = PatternParser::new(pattern);
496 parser.parse()
497}
498
499#[cfg(test)]
500mod test {
501 use super::PatternParser;
502 use crate::{
503 compiler::{
504 parser::ParseError,
505 PositionedError,
506 },
507 pattern::BinaryPattern,
508 Atom,
509 JumpType,
510 };
511
512 #[test]
513 fn test_byte_sequence_bin() {
514 {
515 let parser = PatternParser::new("0b10011100");
516 let result = parser.parse().unwrap();
517 assert_eq!(
518 result.atoms(),
519 &[Atom::ByteSequence {
520 seq_start: 0,
521 seq_end: 1
522 },]
523 );
524 assert_eq!(result.byte_sequence(), &[0b10011100]);
525 }
526
527 {
528 let parser = PatternParser::new("0b1001110011110000");
529 let result = parser.parse().unwrap();
530 assert_eq!(
531 result.atoms(),
532 &[Atom::ByteSequence {
533 seq_start: 0,
534 seq_end: 2
535 },]
536 );
537 assert_eq!(result.byte_sequence(), &[0b10011100, 0b11110000]);
538 }
539 }
540
541 #[test]
542 fn test_byte_sequence_hex() {
543 {
544 let parser = PatternParser::new("FF 00 12");
545 let result = parser.parse().unwrap();
546 assert_eq!(
547 result.atoms(),
548 &[
549 Atom::ByteSequence {
550 seq_start: 0,
551 seq_end: 1
552 },
553 Atom::ByteSequence {
554 seq_start: 1,
555 seq_end: 2
556 },
557 Atom::ByteSequence {
558 seq_start: 2,
559 seq_end: 3
560 }
561 ]
562 );
563 assert_eq!(result.byte_sequence(), &[0xFF, 0x00, 0x12]);
564 }
565
566 {
567 let parser = PatternParser::new("FF00 12");
568 let result = parser.parse().unwrap();
569 assert_eq!(
570 result.atoms(),
571 &[
572 Atom::ByteSequence {
573 seq_start: 0,
574 seq_end: 2
575 },
576 Atom::ByteSequence {
577 seq_start: 2,
578 seq_end: 3
579 }
580 ]
581 );
582 assert_eq!(result.byte_sequence(), &[0xFF, 0x00, 0x12]);
583 }
584
585 {
586 let parser = PatternParser::new("0xDEADBEEF");
587 let result = parser.parse().unwrap();
588 assert_eq!(
589 result.atoms(),
590 &[Atom::ByteSequence {
591 seq_start: 0,
592 seq_end: 4
593 },]
594 );
595 assert_eq!(result.byte_sequence(), &[0xDE, 0xAD, 0xBE, 0xEF]);
596 }
597
598 {
599 let parser = PatternParser::new("FF0");
600 let result = parser.parse().unwrap_err();
601 assert_eq!(
602 &result,
603 &PositionedError::new(0..3, ParseError::BinaryValueIncomplete)
604 );
605 }
606
607 {
608 let parser = PatternParser::new("FX");
609 let result = parser.parse().unwrap_err();
610 assert_eq!(
611 &result,
612 &PositionedError::new(1..2, ParseError::BinaryValueInvalid)
613 );
614 }
615 }
616
617 #[test]
618 fn test_byte_sequence_mask_hex() {
619 {
620 let parser = PatternParser::new("A?");
621 let result = parser.parse().unwrap();
622 assert_eq!(
623 result.atoms(),
624 &[Atom::ByteSequenceMasked {
625 seq_start: 0,
626 mask_start: 1,
627 len: 1
628 }]
629 );
630 assert_eq!(result.byte_sequence(), &[0xA0, 0xF0]);
631 }
632
633 {
634 let parser = PatternParser::new("F?E?");
635 let result = parser.parse().unwrap();
636 assert_eq!(
637 result.atoms(),
638 &[Atom::ByteSequenceMasked {
639 seq_start: 0,
640 mask_start: 2,
641 len: 2
642 }]
643 );
644 assert_eq!(result.byte_sequence(), &[0xF0, 0xE0, 0xF0, 0xF0]);
645 }
646 }
647
648 #[test]
649 fn test_byte_sequence_mask_bin() {
650 {
651 let parser = PatternParser::new("0b100??001");
652 let result = parser.parse().unwrap();
653 assert_eq!(
654 result.atoms(),
655 &[Atom::ByteSequenceMasked {
656 seq_start: 0,
657 mask_start: 1,
658 len: 1
659 }]
660 );
661 assert_eq!(result.byte_sequence(), &[0x81, 0xE7]);
662 }
663
664 {
665 let parser = PatternParser::new("0b100??001?????111");
666 let result = parser.parse().unwrap();
667 assert_eq!(
668 result.atoms(),
669 &[Atom::ByteSequenceMasked {
670 seq_start: 0,
671 mask_start: 2,
672 len: 2
673 }]
674 );
675 assert_eq!(result.byte_sequence(), &[0x81, 0x07, 0xE7, 0x07]);
676 }
677 }
678
679 #[test]
680 fn test_byte_wildcard() {
681 {
682 let parser = PatternParser::new("?");
683 let result = parser.parse().unwrap();
684 assert_eq!(result.atoms(), &[Atom::WildcardFixed(1),]);
685 assert_eq!(result.byte_sequence(), &[]);
686 }
687
688 {
689 let parser = PatternParser::new("AB ? CD");
690 let result = parser.parse().unwrap();
691 assert_eq!(
692 result.atoms(),
693 &[
694 Atom::ByteSequence {
695 seq_start: 0x00,
696 seq_end: 0x01
697 },
698 Atom::WildcardFixed(1),
699 Atom::ByteSequence {
700 seq_start: 0x01,
701 seq_end: 0x02
702 }
703 ]
704 );
705 assert_eq!(result.byte_sequence(), &[0xAB, 0xCD]);
706 }
707
708 {
710 let parser = PatternParser::new("AB ?? CD");
711 let result = parser.parse().unwrap();
712 assert_eq!(
713 result.atoms(),
714 &[
715 Atom::ByteSequence {
716 seq_start: 0x00,
717 seq_end: 0x01
718 },
719 Atom::ByteSequenceMasked {
720 seq_start: 0x01,
721 mask_start: 0x02,
722 len: 0x01
723 },
724 Atom::ByteSequence {
725 seq_start: 0x03,
726 seq_end: 0x04
727 }
728 ]
729 );
730 assert_eq!(result.byte_sequence(), &[0xAB, 0x00, 0x00, 0xCD]);
731 }
732
733 {
735 let parser = PatternParser::new("AB ??? CD");
736 let result = parser.parse().unwrap_err();
737 assert_eq!(
738 &result,
739 &PositionedError::new(3..6, ParseError::BinaryValueIncomplete)
740 );
741 }
742 }
743
744 #[test]
745 fn test_jump() {
746 {
747 let parser = PatternParser::new("%$* FF * % $");
748 let result = parser.parse().unwrap();
749 assert_eq!(
750 result.atoms(),
751 &[
752 Atom::Jump(JumpType::RelByte),
753 Atom::Jump(JumpType::RelDWord),
754 Atom::Jump(JumpType::AbsQWord),
755 Atom::ByteSequence {
756 seq_start: 0,
757 seq_end: 1
758 },
759 Atom::Jump(JumpType::AbsQWord),
760 Atom::Jump(JumpType::RelByte),
761 Atom::Jump(JumpType::RelDWord),
762 ]
763 );
764 assert_eq!(result.byte_sequence(), &[0xFF]);
765 }
766 }
767
768 #[test]
769 fn test_jump_block() {
770 {
771 let parser = PatternParser::new("$ { FE }");
772 let result = parser.parse().unwrap();
773 assert_eq!(
774 result.atoms(),
775 &[
776 Atom::CursorPush,
777 Atom::Jump(JumpType::RelDWord),
778 Atom::ByteSequence {
779 seq_start: 0,
780 seq_end: 1
781 },
782 Atom::CursorPop { advance: 4 },
783 ]
784 );
785 assert_eq!(result.byte_sequence(), &[0xFE]);
786 }
787
788 {
789 let parser = PatternParser::new("$ { FE $ { FF } }");
790 let result = parser.parse().unwrap();
791 assert_eq!(
792 result.atoms(),
793 &[
794 Atom::CursorPush,
795 Atom::Jump(JumpType::RelDWord),
796 Atom::ByteSequence {
797 seq_start: 0,
798 seq_end: 1
799 },
800 Atom::CursorPush,
801 Atom::Jump(JumpType::RelDWord),
802 Atom::ByteSequence {
803 seq_start: 1,
804 seq_end: 2
805 },
806 Atom::CursorPop { advance: 4 },
807 Atom::CursorPop { advance: 4 },
808 ]
809 );
810 assert_eq!(result.byte_sequence(), &[0xFE, 0xFF]);
811 }
812
813 {
814 let parser = PatternParser::new("$ { FE");
815 let result = parser.parse().unwrap_err();
816 assert_eq!(
817 &result,
818 &PositionedError::new(2..3, ParseError::BlockNotClosed)
819 );
820 }
821 }
822
823 #[test]
824 fn test_group() {
825 {
826 let parser = PatternParser::new("()");
828 let result = parser.parse().unwrap();
829 assert_eq!(result.atoms(), &[]);
830 assert_eq!(result.byte_sequence(), &[]);
831 }
832
833 {
834 let parser = PatternParser::new("( FF00 )");
836 let result = parser.parse().unwrap();
837 assert_eq!(
838 result.atoms(),
839 &[Atom::ByteSequence {
840 seq_start: 0,
841 seq_end: 2
842 }]
843 );
844 assert_eq!(result.byte_sequence(), &[0xFF, 0x00]);
845 }
846
847 {
848 let parser = PatternParser::new("( 01 | 02 03 )");
850 let result = parser.parse().unwrap();
851 assert_eq!(
852 result.atoms(),
853 &[
854 Atom::Branch {
855 left_len: 1,
856 right_len: 2
857 },
858 Atom::ByteSequence {
859 seq_start: 0,
860 seq_end: 1
861 },
862 Atom::ByteSequence {
863 seq_start: 1,
864 seq_end: 2
865 },
866 Atom::ByteSequence {
867 seq_start: 2,
868 seq_end: 3
869 }
870 ]
871 );
872 assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03]);
873 }
874
875 {
876 let parser = PatternParser::new("( 01 | 02 03 | FF )");
878 let result = parser.parse().unwrap();
879 assert_eq!(
880 result.atoms(),
881 &[
882 Atom::Branch {
883 left_len: 1,
884 right_len: 4
885 },
886 Atom::ByteSequence {
887 seq_start: 0,
888 seq_end: 1
889 },
890 Atom::Branch {
891 left_len: 2,
892 right_len: 1
893 },
894 Atom::ByteSequence {
895 seq_start: 1,
896 seq_end: 2
897 },
898 Atom::ByteSequence {
899 seq_start: 2,
900 seq_end: 3
901 },
902 Atom::ByteSequence {
903 seq_start: 3,
904 seq_end: 4
905 }
906 ]
907 );
908 assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03, 0xFF]);
909 }
910
911 {
912 let parser = PatternParser::new("( 01 | ( 02 | 03 ) )");
914 let result = parser.parse().unwrap();
915 assert_eq!(
916 result.atoms(),
917 &[
918 Atom::Branch {
919 left_len: 1,
920 right_len: 3
921 },
922 Atom::ByteSequence {
923 seq_start: 0,
924 seq_end: 1
925 },
926 Atom::Branch {
927 left_len: 1,
928 right_len: 1
929 },
930 Atom::ByteSequence {
931 seq_start: 1,
932 seq_end: 2
933 },
934 Atom::ByteSequence {
935 seq_start: 2,
936 seq_end: 3
937 }
938 ]
939 );
940 assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03]);
941 }
942
943 {
944 let parser = PatternParser::new("( (01 | 02) | 03 )");
946 let result = parser.parse().unwrap();
947 assert_eq!(
948 result.atoms(),
949 &[
950 Atom::Branch {
951 left_len: 3,
952 right_len: 1
953 },
954 Atom::Branch {
955 left_len: 1,
956 right_len: 1
957 },
958 Atom::ByteSequence {
959 seq_start: 0,
960 seq_end: 1
961 },
962 Atom::ByteSequence {
963 seq_start: 1,
964 seq_end: 2
965 },
966 Atom::ByteSequence {
967 seq_start: 2,
968 seq_end: 3
969 },
970 ]
971 );
972 assert_eq!(result.byte_sequence(), &[0x01, 0x02, 0x03]);
973 }
974
975 {
976 let parser = PatternParser::new("(");
978 let result = parser.parse().unwrap_err();
979 assert_eq!(
980 &result,
981 &PositionedError::new(0..1, ParseError::GroupNotClosed)
982 );
983 }
984
985 {
986 let parser = PatternParser::new("( FF 00");
988 let result = parser.parse().unwrap_err();
989 assert_eq!(
990 &result,
991 &PositionedError::new(0..1, ParseError::GroupNotClosed)
992 );
993 }
994
995 {
996 let parser = PatternParser::new(" (|");
998 let result = parser.parse().unwrap_err();
999 assert_eq!(
1000 &result,
1001 &PositionedError::new(1..2, ParseError::GroupNotClosed)
1002 );
1003 }
1004 }
1005
1006 #[test]
1007 fn test_range() {
1008 {
1009 let parser = PatternParser::new("[0] [123]");
1011 let result = parser.parse().unwrap();
1012 assert_eq!(
1013 result.atoms(),
1014 &[Atom::WildcardFixed(0), Atom::WildcardFixed(123)]
1015 );
1016 assert_eq!(result.byte_sequence(), &[]);
1017 }
1018
1019 {
1020 let parser = PatternParser::new("[0-10] [123- 999]");
1022 let result = parser.parse().unwrap();
1023 assert_eq!(
1024 result.atoms(),
1025 &[
1026 Atom::WildcardRange { min: 0, max: 10 },
1027 Atom::WildcardRange { min: 123, max: 999 }
1028 ]
1029 );
1030 assert_eq!(result.byte_sequence(), &[]);
1031 }
1032
1033 {
1034 let parser = PatternParser::new("[0-]");
1036 let result = parser.parse().unwrap_err();
1037 assert_eq!(
1038 &result,
1039 &PositionedError::new(3..4, ParseError::UnexpectedToken)
1040 );
1041 }
1042
1043 {
1044 let parser = PatternParser::new("[FF 0-3]");
1046 let result = parser.parse().unwrap_err();
1047 assert_eq!(*result.position(), 1..3);
1048 assert!(matches!(result.inner(), ParseError::RangeBoundInvalid(_)));
1049 }
1050
1051 {
1052 let parser = PatternParser::new("[0-3");
1054 let result = parser.parse().unwrap_err();
1055 assert_eq!(
1056 &result,
1057 &PositionedError::new(3..4, ParseError::UnexpectedEnd)
1058 );
1059 }
1060
1061 {
1062 let parser = PatternParser::new("[3-0]");
1064 let result = parser.parse().unwrap_err();
1065 assert_eq!(
1066 &result,
1067 &PositionedError::new(3..4, ParseError::RangeEndMustBeGraterThenStart)
1068 );
1069 }
1070 }
1071}