1pub trait Pattern: Clone + Copy {
4 fn eval(&self, input: &[u8]) -> Option<usize>;
10
11 fn then<P>(self, other: P) -> Sequence<Self, P>
20 where
21 Self: Sized,
22 P: Pattern,
23 {
24 Sequence {
25 pat1: self,
26 pat2: other,
27 }
28 }
29
30 fn or<P>(self, other: P) -> Choice<Self, P>
39 where
40 Self: Sized,
41 P: Pattern,
42 {
43 Choice {
44 pat1: self,
45 pat2: other,
46 }
47 }
48}
49
50impl<P> Pattern for &P
51where
52 P: Pattern,
53{
54 fn eval(&self, input: &[u8]) -> Option<usize> {
55 (*self).eval(input)
56 }
57}
58
59#[derive(Clone, Copy, Debug)]
61pub struct Choice<C1, C2> {
62 pat1: C1,
63 pat2: C2,
64}
65
66impl<P1, P2> Pattern for Choice<P1, P2>
67where
68 P1: Pattern,
69 P2: Pattern,
70{
71 fn eval(&self, input: &[u8]) -> Option<usize> {
72 match self.pat1.eval(input) {
73 Some(res) => Some(res),
74 None => self.pat2.eval(input),
75 }
76 }
77}
78
79#[derive(Clone, Copy, Debug)]
81pub struct Sequence<P1, P2> {
82 pat1: P1,
83 pat2: P2,
84}
85
86impl<P1, P2> Pattern for Sequence<P1, P2>
87where
88 P1: Pattern,
89 P2: Pattern,
90{
91 fn eval(&self, input: &[u8]) -> Option<usize> {
92 if let Some(a) = self.pat1.eval(input) {
93 if let Some(b) = self.pat2.eval(&input[a..]) {
94 return Some(a + b);
95 }
96 }
97 None
98 }
99}
100
101pub fn byte() -> One {
109 One
110}
111
112#[derive(Debug, Clone, Copy)]
114pub struct One;
115
116impl Pattern for One {
117 fn eval(&self, input: &[u8]) -> Option<usize> {
118 if input.is_empty() {
119 return None;
120 }
121
122 Some(1)
123 }
124}
125
126pub fn codepoint() -> Codepoint {
134 Codepoint
135}
136
137#[derive(Debug, Clone, Copy)]
139pub struct Codepoint;
140
141impl Pattern for Codepoint {
142 fn eval(&self, input: &[u8]) -> Option<usize> {
143 let lookahead = std::cmp::min(input.len(), 4);
145 let chunk = String::from_utf8_lossy(&input[..lookahead]);
146 let size = match chunk.chars().next() {
147 Some(c) if c == char::REPLACEMENT_CHARACTER => return None,
148 None => return None,
149 Some(c) => c.len_utf8(),
150 };
151 Some(size)
152 }
153}
154
155pub fn prefix(slice: &[u8]) -> Prefix<'_> {
174 Prefix(slice)
175}
176
177pub fn utf8(s: &str) -> Prefix<'_> {
194 Prefix(s.as_bytes())
195}
196
197impl Pattern for &str {
198 fn eval(&self, input: &[u8]) -> Option<usize> {
199 utf8(self).eval(input)
200 }
201}
202
203impl Pattern for &[u8] {
204 fn eval(&self, input: &[u8]) -> Option<usize> {
205 Prefix(self).eval(input)
206 }
207}
208
209impl Pattern for u8 {
210 fn eval(&self, input: &[u8]) -> Option<usize> {
211 Prefix(&[*self]).eval(input)
212 }
213}
214
215#[derive(Debug, Clone, Copy)]
217pub struct Prefix<'p>(&'p [u8]);
218
219impl Pattern for Prefix<'_> {
220 fn eval(&self, input: &[u8]) -> Option<usize> {
221 if !input.starts_with(self.0) {
222 return None;
223 }
224
225 Some(self.0.len())
226 }
227}
228
229pub fn oneof(bytes: &[u8]) -> ByteLookupTable {
240 let mut set: [bool; 256] = [false; 256];
241
242 let mut i = 0;
243 while i < bytes.len() {
244 set[bytes[i] as usize] = true;
245 i += 1;
246 }
247
248 ByteLookupTable(set)
249}
250
251pub fn noneof(bytes: &[u8]) -> ByteLookupTable {
253 let mut set = oneof(bytes).0;
254 let mut i = 0;
255 while i < set.len() {
256 set[i] = !set[i];
257 i += 1;
258 }
259
260 ByteLookupTable(set)
261}
262
263pub fn digit() -> ByteLookupTable {
272 oneof(b"0123456789")
273}
274
275pub fn alpha() -> ByteLookupTable {
284 oneof(b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
285}
286
287pub fn hex() -> Choice<ByteLookupTable, ByteLookupTable> {
296 oneof(b"abcdefABCDEF").or(digit())
297}
298
299#[derive(Debug, Clone, Copy)]
301pub struct ByteLookupTable([bool; 256]);
302
303impl Pattern for ByteLookupTable {
304 fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
305 let first = *input.first()?;
306
307 if self.0[first as usize] {
308 return Some(1);
309 }
310 None
311 }
312}
313pub fn range(lo: u8, hi: u8) -> ElementRange {
322 ElementRange(lo, hi)
323}
324
325#[derive(Debug, Clone, Copy)]
327pub struct ElementRange(u8, u8);
328
329impl Pattern for ElementRange {
330 fn eval(&self, input: &[u8]) -> Option<usize> {
331 let first = input.first()?;
332
333 if first >= &self.0 && first <= &self.1 {
334 return Some(1);
335 }
336
337 None
338 }
339}
340
341pub fn any<P>(pattern: P) -> Repetition<P>
351where
352 P: Pattern,
353{
354 Repetition {
355 lo: 0,
356 hi: None,
357 pattern,
358 }
359}
360
361pub fn at_least<P>(n: usize, pattern: P) -> Repetition<P>
370where
371 P: Pattern,
372{
373 Repetition {
374 lo: n,
375 hi: None,
376 pattern,
377 }
378}
379
380pub fn at_most<P>(n: usize, pattern: P) -> Repetition<P>
390where
391 P: Pattern,
392{
393 Repetition {
394 lo: 0,
395 hi: Some(n),
396 pattern,
397 }
398}
399
400pub fn optional<P>(pattern: P) -> Repetition<P>
411where
412 P: Pattern,
413{
414 Repetition {
415 lo: 0,
416 hi: Some(1),
417 pattern,
418 }
419}
420
421pub fn count<P>(n: usize, pattern: P) -> Repetition<P>
430where
431 P: Pattern,
432{
433 Repetition {
434 lo: n,
435 hi: Some(n),
436 pattern,
437 }
438}
439
440pub fn between<P>(lo: usize, hi: usize, pattern: P) -> Repetition<P>
444where
445 P: Pattern,
446{
447 Repetition {
448 lo,
449 hi: Some(hi),
450 pattern,
451 }
452}
453
454#[derive(Debug, Clone, Copy)]
458pub struct Repetition<P> {
459 pattern: P,
460 lo: usize,
461 hi: Option<usize>,
462}
463
464impl<P> Pattern for Repetition<P>
465where
466 P: Pattern,
467{
468 fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
469 let mut match_count = 0;
470 let mut offset = 0;
471
472 if let Some(upper_bound) = self.hi {
473 assert!(
474 upper_bound >= self.lo,
475 "upper bound should be greater than or equal to the lower bound"
476 );
477 };
478
479 loop {
480 if let Some(upper_bound) = self.hi {
482 if match_count == upper_bound {
483 return Some(offset);
484 }
485 };
486
487 let Some(value) = self.pattern.eval(&input[offset..]) else {
488 if match_count < self.lo {
489 return None;
490 }
491 return Some(offset);
492 };
493
494 match_count += 1;
495 offset += value;
496 }
497 }
498}
499
500pub fn not<P>(pattern: P) -> Not<P>
504where
505 P: Pattern,
506{
507 Not(pattern)
508}
509
510#[derive(Debug, Clone, Copy)]
512pub struct Not<P>(P);
513
514impl<P> Pattern for Not<P>
515where
516 P: Pattern,
517{
518 fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
519 if self.0.eval(input).is_some() {
520 return None;
521 }
522
523 Some(0)
524 }
525}
526
527pub fn end() -> Eof {
529 Eof
530}
531
532#[derive(Debug, Clone, Copy)]
534pub struct Eof;
535
536impl Pattern for Eof {
537 fn eval(&self, input: &[u8]) -> Option<usize> {
538 input.is_empty().then_some(0)
539 }
540}
541
542#[cfg(test)]
543mod tests {
544 use super::*;
545
546 #[test]
547 fn match_prefix() {
548 assert_eq!("".eval(b""), Some(0));
550 assert_eq!("".eval(b"aa"), Some(0));
552 assert_eq!("aa".eval(b""), None);
554 assert_eq!("aa".eval(b"a"), None);
556 assert_eq!("aa".eval(b"b"), None);
557 assert_eq!("aa".eval(b"aa"), Some(2));
559 assert_eq!("aa".eval(b"bb"), None);
560 assert_eq!("aa".eval(b"aaa"), Some(2));
562 assert_eq!("aa".eval(b"bbb"), None);
563 }
564
565 #[test]
566 fn match_codepoint() {
567 assert_eq!(codepoint().eval(b"a"), Some(1));
568 assert_eq!(codepoint().eval("๐ป".as_bytes()), Some(4));
569 assert_eq!(codepoint().eval(b"\xff"), None);
570 }
571
572 #[test]
573 fn match_range() {
574 assert_eq!(range(b'5', b'5').eval(b""), None);
576 assert_eq!(range(b'5', b'5').eval(b"5"), Some(1));
578 assert_eq!(range(b'5', b'5').eval(b"4"), None);
580 assert_eq!(range(b'5', b'5').eval(b"6"), None);
581 assert_eq!(range(b'3', b'5').eval(b""), None);
583 assert_eq!(range(b'3', b'5').eval(b"3"), Some(1));
585 assert_eq!(range(b'3', b'5').eval(b"5"), Some(1));
586 assert_eq!(range(b'3', b'5').eval(b"4"), Some(1));
588 assert_eq!(range(b'3', b'5').eval(b"2"), None);
590 assert_eq!(range(b'3', b'5').eval(b"6"), None);
591 }
592
593 #[test]
594 fn match_oneof_noneof() {
595 assert_eq!(oneof(b"").eval(b""), None);
597 assert_eq!(noneof(b"").eval(b""), None);
598
599 assert_eq!(oneof(b"").eval(b"a"), None);
601 assert_eq!(noneof(b"").eval(b"a"), Some(1));
602
603 assert_eq!(oneof(b"ab").eval(b""), None);
605 assert_eq!(noneof(b"ab").eval(b""), None);
606
607 assert_eq!(oneof(b"ab").eval(b"a"), Some(1));
609 assert_eq!(oneof(b"ab").eval(b"b"), Some(1));
610
611 assert_eq!(noneof(b"ab").eval(b"a"), None);
612 assert_eq!(noneof(b"ab").eval(b"b"), None);
613
614 assert_eq!(oneof(b"ab").eval(b"c"), None);
616 assert_eq!(noneof(b"ab").eval(b"c"), Some(1));
617 }
618
619 #[test]
620 fn match_choice() {
621 assert_eq!("a".or("aa").eval(b"aa"), Some(1));
623 assert_eq!("aa".or("a").eval(b"aa"), Some(2));
624 assert_eq!("a".or("b").eval(b"a"), Some(1));
626
627 assert_eq!("b".or("a").eval(b"a"), Some(1));
629 assert_eq!("b".or("a").eval(b"c"), None);
631 }
632
633 #[test]
634 fn match_sequence() {
635 assert_eq!("a".then("b").eval(b"ab"), Some(2));
637 assert_eq!("a".then("c").eval(b"ab"), None);
639
640 assert_eq!("a".then("").eval(b"b"), None);
642 assert_eq!("a".then("c").eval(b"b"), None);
644 }
645
646 #[test]
647 fn patterns_are_reusable() {
648 let pattern = "a".then("b".or("c"));
649 assert_eq!(pattern.eval(b"ac"), Some(2));
650 assert_eq!(pattern.eval(b"ab"), Some(2));
651 }
652
653 #[test]
654 fn repetition_patterns() {
655 assert_eq!(count(0, "a").eval(b""), Some(0));
657 assert_eq!(count(0, "a").eval(b"a"), Some(0));
658 assert_eq!(count(0, "a").eval(b"aa"), Some(0));
659
660 assert_eq!(count(3, "a").eval(b""), None);
662 assert_eq!(count(3, "a").eval(b"aa"), None);
663 assert_eq!(count(3, "a").eval(b"aaa"), Some(3));
664 assert_eq!(count(3, "a").eval(b"aaaa"), Some(3));
665
666 assert_eq!(any("a").eval(b""), Some(0));
668 assert_eq!(any("a").eval(b"b"), Some(0));
669 assert_eq!(any("a").eval(b"aa"), Some(2));
670 assert_eq!(any("a").eval(b"aab"), Some(2));
671
672 assert_eq!(at_least(0, "a").eval(b""), Some(0));
674 assert_eq!(at_least(0, "a").eval(b"a"), Some(1));
675 assert_eq!(at_least(0, "a").eval(b"aaaa"), Some(4));
676
677 assert_eq!(at_least(3, "a").eval(b""), None);
679 assert_eq!(at_least(3, "a").eval(b"aa"), None);
680 assert_eq!(at_least(3, "a").eval(b"aaa"), Some(3));
681 assert_eq!(at_least(3, "a").eval(b"aaaaa"), Some(5));
682
683 assert_eq!(at_most(0, "a").eval(b""), Some(0));
685 assert_eq!(at_most(0, "a").eval(b"a"), Some(0));
686 assert_eq!(at_most(0, "a").eval(b"aa"), Some(0));
687
688 assert_eq!(at_most(3, "a").eval(b""), Some(0));
690 assert_eq!(at_most(3, "a").eval(b"b"), Some(0));
691 assert_eq!(at_most(3, "a").eval(b"aa"), Some(2));
692 assert_eq!(at_most(3, "a").eval(b"aaa"), Some(3));
693 assert_eq!(at_most(3, "a").eval(b"aaaaa"), Some(3));
694
695 assert_eq!(between(2, 4, "a").eval(b""), None);
697 assert_eq!(between(2, 4, "a").eval(b"a"), None);
698 assert_eq!(between(2, 4, "a").eval(b"aa"), Some(2));
699 assert_eq!(between(2, 4, "a").eval(b"aaa"), Some(3));
700 assert_eq!(between(2, 4, "a").eval(b"aaaa"), Some(4));
701 assert_eq!(between(2, 4, "a").eval(b"aaaaa"), Some(4));
702 }
703}