1pub trait Pattern: Clone + Copy {
4 fn eval(&self, input: &[u8]) -> Option<usize>;
10
11 fn then<P>(self, other: P) -> Sequence<Self, P>
20 where
21 Self: Sized,
22 P: Pattern,
23 {
24 Sequence {
25 pat1: self,
26 pat2: other,
27 }
28 }
29
30 fn or<P>(self, other: P) -> Choice<Self, P>
39 where
40 Self: Sized,
41 P: Pattern,
42 {
43 Choice {
44 pat1: self,
45 pat2: other,
46 }
47 }
48}
49
50impl<P> Pattern for &P
51where
52 P: Pattern,
53{
54 fn eval(&self, input: &[u8]) -> Option<usize> {
55 (*self).eval(input)
56 }
57}
58
59#[derive(Clone, Copy, Debug)]
61pub struct Choice<C1, C2> {
62 pat1: C1,
63 pat2: C2,
64}
65
66impl<P1, P2> Pattern for Choice<P1, P2>
67where
68 P1: Pattern,
69 P2: Pattern,
70{
71 fn eval(&self, input: &[u8]) -> Option<usize> {
72 match self.pat1.eval(input) {
73 Some(res) => Some(res),
74 None => self.pat2.eval(input),
75 }
76 }
77}
78
79#[derive(Clone, Copy, Debug)]
81pub struct Sequence<P1, P2> {
82 pat1: P1,
83 pat2: P2,
84}
85
86impl<P1, P2> Pattern for Sequence<P1, P2>
87where
88 P1: Pattern,
89 P2: Pattern,
90{
91 fn eval(&self, input: &[u8]) -> Option<usize> {
92 if let Some(a) = self.pat1.eval(input) {
93 if let Some(b) = self.pat2.eval(&input[a..]) {
94 return Some(a + b);
95 }
96 }
97 None
98 }
99}
100
101pub fn one() -> One {
109 One
110}
111
112#[derive(Debug, Clone, Copy)]
114pub struct One;
115
116impl Pattern for One {
117 fn eval(&self, input: &[u8]) -> Option<usize> {
118 if input.is_empty() {
119 return None;
120 }
121
122 Some(1)
123 }
124}
125
126pub fn prefix(slice: &[u8]) -> Prefix {
145 Prefix(slice)
146}
147
148pub fn utf8(s: &str) -> Prefix {
165 Prefix(s.as_bytes())
166}
167
168impl Pattern for &str {
169 fn eval(&self, input: &[u8]) -> Option<usize> {
170 utf8(self).eval(input)
171 }
172}
173
174impl Pattern for &[u8] {
175 fn eval(&self, input: &[u8]) -> Option<usize> {
176 Prefix(self).eval(input)
177 }
178}
179
180impl Pattern for u8 {
181 fn eval(&self, input: &[u8]) -> Option<usize> {
182 Prefix(&[*self]).eval(input)
183 }
184}
185
186#[derive(Debug, Clone, Copy)]
188pub struct Prefix<'p>(&'p [u8]);
189
190impl Pattern for Prefix<'_> {
191 fn eval(&self, input: &[u8]) -> Option<usize> {
192 if !input.starts_with(self.0) {
193 return None;
194 }
195
196 Some(self.0.len())
197 }
198}
199
200pub fn oneof(bytes: &[u8]) -> ByteLookupTable {
211 let mut set: [bool; 256] = [false; 256];
212
213 let mut i = 0;
214 while i < bytes.len() {
215 set[bytes[i] as usize] = true;
216 i += 1;
217 }
218
219 ByteLookupTable(set)
220}
221
222pub fn noneof(bytes: &[u8]) -> ByteLookupTable {
224 let mut set = oneof(bytes).0;
225 let mut i = 0;
226 while i < set.len() {
227 set[i] = !set[i];
228 i += 1;
229 }
230
231 ByteLookupTable(set)
232}
233
234pub fn digit() -> ByteLookupTable {
243 oneof(b"0123456789")
244}
245
246pub fn alpha() -> ByteLookupTable {
255 oneof(b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
256}
257
258pub fn hex() -> Choice<ByteLookupTable, ByteLookupTable> {
267 oneof(b"abcdefABCDEF").or(digit())
268}
269
270#[derive(Debug, Clone, Copy)]
272pub struct ByteLookupTable([bool; 256]);
273
274impl Pattern for ByteLookupTable {
275 fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
276 let first = *input.first()?;
277
278 if self.0[first as usize] {
279 return Some(1);
280 }
281 None
282 }
283}
284pub fn range(lo: u8, hi: u8) -> ElementRange {
293 ElementRange(lo, hi)
294}
295
296#[derive(Debug, Clone, Copy)]
298pub struct ElementRange(u8, u8);
299
300impl Pattern for ElementRange {
301 fn eval(&self, input: &[u8]) -> Option<usize> {
302 let first = input.first()?;
303
304 if first >= &self.0 && first <= &self.1 {
305 return Some(1);
306 }
307
308 None
309 }
310}
311
312pub fn any<P>(pattern: P) -> Repetition<P>
322where
323 P: Pattern,
324{
325 Repetition {
326 lo: 0,
327 hi: None,
328 pattern,
329 }
330}
331
332pub fn at_least<P>(n: usize, pattern: P) -> Repetition<P>
341where
342 P: Pattern,
343{
344 Repetition {
345 lo: n,
346 hi: None,
347 pattern,
348 }
349}
350
351pub fn at_most<P>(n: usize, pattern: P) -> Repetition<P>
361where
362 P: Pattern,
363{
364 Repetition {
365 lo: 0,
366 hi: Some(n),
367 pattern,
368 }
369}
370
371pub fn optional<P>(pattern: P) -> Repetition<P>
382where
383 P: Pattern,
384{
385 Repetition {
386 lo: 0,
387 hi: Some(1),
388 pattern,
389 }
390}
391
392pub fn count<P>(n: usize, pattern: P) -> Repetition<P>
401where
402 P: Pattern,
403{
404 Repetition {
405 lo: n,
406 hi: Some(n),
407 pattern,
408 }
409}
410
411pub fn between<P>(lo: usize, hi: usize, pattern: P) -> Repetition<P>
415where
416 P: Pattern,
417{
418 Repetition {
419 lo,
420 hi: Some(hi),
421 pattern,
422 }
423}
424
425#[derive(Debug, Clone, Copy)]
429pub struct Repetition<P> {
430 pattern: P,
431 lo: usize,
432 hi: Option<usize>,
433}
434
435impl<P> Pattern for Repetition<P>
436where
437 P: Pattern,
438{
439 fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
440 let mut match_count = 0;
441 let mut offset = 0;
442
443 if let Some(upper_bound) = self.hi {
444 assert!(
445 upper_bound >= self.lo,
446 "upper bound should be greater than or equal to the lower bound"
447 );
448 };
449
450 loop {
451 if let Some(upper_bound) = self.hi {
453 if match_count == upper_bound {
454 return Some(offset);
455 }
456 };
457
458 let Some(value) = self.pattern.eval(&input[offset..]) else {
459 if match_count < self.lo {
460 return None;
461 }
462 return Some(offset);
463 };
464
465 match_count += 1;
466 offset += value;
467 }
468 }
469}
470
471pub fn not<P>(pattern: P) -> Not<P>
475where
476 P: Pattern,
477{
478 Not(pattern)
479}
480
481#[derive(Debug, Clone, Copy)]
483pub struct Not<P>(P);
484
485impl<P> Pattern for Not<P>
486where
487 P: Pattern,
488{
489 fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
490 if self.0.eval(input).is_some() {
491 return None;
492 }
493
494 Some(0)
495 }
496}
497
498pub fn end() -> Eof {
500 Eof
501}
502
503#[derive(Debug, Clone, Copy)]
505pub struct Eof;
506
507impl Pattern for Eof {
508 fn eval(&self, input: &[u8]) -> Option<usize> {
509 input.is_empty().then_some(0)
510 }
511}
512
513#[cfg(test)]
514mod tests {
515 use super::*;
516
517 #[test]
518 fn match_prefix() {
519 assert_eq!("".eval(b""), Some(0));
521 assert_eq!("".eval(b"aa"), Some(0));
523 assert_eq!("aa".eval(b""), None);
525 assert_eq!("aa".eval(b"a"), None);
527 assert_eq!("aa".eval(b"b"), None);
528 assert_eq!("aa".eval(b"aa"), Some(2));
530 assert_eq!("aa".eval(b"bb"), None);
531 assert_eq!("aa".eval(b"aaa"), Some(2));
533 assert_eq!("aa".eval(b"bbb"), None);
534 }
535
536 #[test]
537 fn match_range() {
538 assert_eq!(range(b'5', b'5').eval(b""), None);
540 assert_eq!(range(b'5', b'5').eval(b"5"), Some(1));
542 assert_eq!(range(b'5', b'5').eval(b"4"), None);
544 assert_eq!(range(b'5', b'5').eval(b"6"), None);
545 assert_eq!(range(b'3', b'5').eval(b""), None);
547 assert_eq!(range(b'3', b'5').eval(b"3"), Some(1));
549 assert_eq!(range(b'3', b'5').eval(b"5"), Some(1));
550 assert_eq!(range(b'3', b'5').eval(b"4"), Some(1));
552 assert_eq!(range(b'3', b'5').eval(b"2"), None);
554 assert_eq!(range(b'3', b'5').eval(b"6"), None);
555 }
556
557 #[test]
558 fn match_oneof_noneof() {
559 assert_eq!(oneof(b"").eval(b""), None);
561 assert_eq!(noneof(b"").eval(b""), None);
562
563 assert_eq!(oneof(b"").eval(b"a"), None);
565 assert_eq!(noneof(b"").eval(b"a"), Some(1));
566
567 assert_eq!(oneof(b"ab").eval(b""), None);
569 assert_eq!(noneof(b"ab").eval(b""), None);
570
571 assert_eq!(oneof(b"ab").eval(b"a"), Some(1));
573 assert_eq!(oneof(b"ab").eval(b"b"), Some(1));
574
575 assert_eq!(noneof(b"ab").eval(b"a"), None);
576 assert_eq!(noneof(b"ab").eval(b"b"), None);
577
578 assert_eq!(oneof(b"ab").eval(b"c"), None);
580 assert_eq!(noneof(b"ab").eval(b"c"), Some(1));
581 }
582
583 #[test]
584 fn match_choice() {
585 assert_eq!("a".or("aa").eval(b"aa"), Some(1));
587 assert_eq!("aa".or("a").eval(b"aa"), Some(2));
588 assert_eq!("a".or("b").eval(b"a"), Some(1));
590
591 assert_eq!("b".or("a").eval(b"a"), Some(1));
593 assert_eq!("b".or("a").eval(b"c"), None);
595 }
596
597 #[test]
598 fn match_sequence() {
599 assert_eq!("a".then("b").eval(b"ab"), Some(2));
601 assert_eq!("a".then("c").eval(b"ab"), None);
603
604 assert_eq!("a".then("").eval(b"b"), None);
606 assert_eq!("a".then("c").eval(b"b"), None);
608 }
609
610 #[test]
611 fn patterns_are_reusable() {
612 let pattern = "a".then("b".or("c"));
613 assert_eq!(pattern.eval(b"ac"), Some(2));
614 assert_eq!(pattern.eval(b"ab"), Some(2));
615 }
616
617 #[test]
618 fn repetition_patterns() {
619 assert_eq!(count(0, "a").eval(b""), Some(0));
621 assert_eq!(count(0, "a").eval(b"a"), Some(0));
622 assert_eq!(count(0, "a").eval(b"aa"), Some(0));
623
624 assert_eq!(count(3, "a").eval(b""), None);
626 assert_eq!(count(3, "a").eval(b"aa"), None);
627 assert_eq!(count(3, "a").eval(b"aaa"), Some(3));
628 assert_eq!(count(3, "a").eval(b"aaaa"), Some(3));
629
630 assert_eq!(any("a").eval(b""), Some(0));
632 assert_eq!(any("a").eval(b"b"), Some(0));
633 assert_eq!(any("a").eval(b"aa"), Some(2));
634 assert_eq!(any("a").eval(b"aab"), Some(2));
635
636 assert_eq!(at_least(0, "a").eval(b""), Some(0));
638 assert_eq!(at_least(0, "a").eval(b"a"), Some(1));
639 assert_eq!(at_least(0, "a").eval(b"aaaa"), Some(4));
640
641 assert_eq!(at_least(3, "a").eval(b""), None);
643 assert_eq!(at_least(3, "a").eval(b"aa"), None);
644 assert_eq!(at_least(3, "a").eval(b"aaa"), Some(3));
645 assert_eq!(at_least(3, "a").eval(b"aaaaa"), Some(5));
646
647 assert_eq!(at_most(0, "a").eval(b""), Some(0));
649 assert_eq!(at_most(0, "a").eval(b"a"), Some(0));
650 assert_eq!(at_most(0, "a").eval(b"aa"), Some(0));
651
652 assert_eq!(at_most(3, "a").eval(b""), Some(0));
654 assert_eq!(at_most(3, "a").eval(b"b"), Some(0));
655 assert_eq!(at_most(3, "a").eval(b"aa"), Some(2));
656 assert_eq!(at_most(3, "a").eval(b"aaa"), Some(3));
657 assert_eq!(at_most(3, "a").eval(b"aaaaa"), Some(3));
658
659 assert_eq!(between(2, 4, "a").eval(b""), None);
661 assert_eq!(between(2, 4, "a").eval(b"a"), None);
662 assert_eq!(between(2, 4, "a").eval(b"aa"), Some(2));
663 assert_eq!(between(2, 4, "a").eval(b"aaa"), Some(3));
664 assert_eq!(between(2, 4, "a").eval(b"aaaa"), Some(4));
665 assert_eq!(between(2, 4, "a").eval(b"aaaaa"), Some(4));
666 }
667}