1use bstr::ByteSlice;
2
3pub trait Pattern: Clone + Copy {
6 fn eval(&self, input: &[u8]) -> Option<usize>;
12
13 fn then<P>(self, other: P) -> Sequence<Self, P>
22 where
23 Self: Sized,
24 P: Pattern,
25 {
26 Sequence {
27 pat1: self,
28 pat2: other,
29 }
30 }
31
32 fn or<P>(self, other: P) -> Choice<Self, P>
41 where
42 Self: Sized,
43 P: Pattern,
44 {
45 Choice {
46 pat1: self,
47 pat2: other,
48 }
49 }
50}
51
52impl<P> Pattern for &P
53where
54 P: Pattern,
55{
56 fn eval(&self, input: &[u8]) -> Option<usize> {
57 (*self).eval(input)
58 }
59}
60
61#[derive(Clone, Copy, Debug)]
63pub struct Choice<C1, C2> {
64 pat1: C1,
65 pat2: C2,
66}
67
68impl<P1, P2> Pattern for Choice<P1, P2>
69where
70 P1: Pattern,
71 P2: Pattern,
72{
73 fn eval(&self, input: &[u8]) -> Option<usize> {
74 match self.pat1.eval(input) {
75 Some(res) => Some(res),
76 None => self.pat2.eval(input),
77 }
78 }
79}
80
81#[derive(Clone, Copy, Debug)]
83pub struct Sequence<P1, P2> {
84 pat1: P1,
85 pat2: P2,
86}
87
88impl<P1, P2> Pattern for Sequence<P1, P2>
89where
90 P1: Pattern,
91 P2: Pattern,
92{
93 fn eval(&self, input: &[u8]) -> Option<usize> {
94 if let Some(a) = self.pat1.eval(input) {
95 if let Some(b) = self.pat2.eval(&input[a..]) {
96 return Some(a + b);
97 }
98 }
99 None
100 }
101}
102
103pub fn byte() -> One {
111 One
112}
113
114#[derive(Debug, Clone, Copy)]
116pub struct One;
117
118impl Pattern for One {
119 fn eval(&self, input: &[u8]) -> Option<usize> {
120 if input.is_empty() {
121 return None;
122 }
123
124 Some(1)
125 }
126}
127
128pub fn codepoint() -> Codepoint {
136 Codepoint
137}
138
139#[derive(Debug, Clone, Copy)]
141pub struct Codepoint;
142
143impl Pattern for Codepoint {
144 fn eval(&self, input: &[u8]) -> Option<usize> {
145 input.chars().next().map(|c| c.len_utf8())
146 }
147}
148
149pub fn prefix(slice: &[u8]) -> Prefix {
168 Prefix(slice)
169}
170
171pub fn utf8(s: &str) -> Prefix {
188 Prefix(s.as_bytes())
189}
190
191impl Pattern for &str {
192 fn eval(&self, input: &[u8]) -> Option<usize> {
193 utf8(self).eval(input)
194 }
195}
196
197impl Pattern for &[u8] {
198 fn eval(&self, input: &[u8]) -> Option<usize> {
199 Prefix(self).eval(input)
200 }
201}
202
203impl Pattern for u8 {
204 fn eval(&self, input: &[u8]) -> Option<usize> {
205 Prefix(&[*self]).eval(input)
206 }
207}
208
209#[derive(Debug, Clone, Copy)]
211pub struct Prefix<'p>(&'p [u8]);
212
213impl Pattern for Prefix<'_> {
214 fn eval(&self, input: &[u8]) -> Option<usize> {
215 if !input.starts_with(self.0) {
216 return None;
217 }
218
219 Some(self.0.len())
220 }
221}
222
223pub fn oneof(bytes: &[u8]) -> ByteLookupTable {
234 let mut set: [bool; 256] = [false; 256];
235
236 let mut i = 0;
237 while i < bytes.len() {
238 set[bytes[i] as usize] = true;
239 i += 1;
240 }
241
242 ByteLookupTable(set)
243}
244
245pub fn noneof(bytes: &[u8]) -> ByteLookupTable {
247 let mut set = oneof(bytes).0;
248 let mut i = 0;
249 while i < set.len() {
250 set[i] = !set[i];
251 i += 1;
252 }
253
254 ByteLookupTable(set)
255}
256
257pub fn digit() -> ByteLookupTable {
266 oneof(b"0123456789")
267}
268
269pub fn alpha() -> ByteLookupTable {
278 oneof(b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
279}
280
281pub fn hex() -> Choice<ByteLookupTable, ByteLookupTable> {
290 oneof(b"abcdefABCDEF").or(digit())
291}
292
293#[derive(Debug, Clone, Copy)]
295pub struct ByteLookupTable([bool; 256]);
296
297impl Pattern for ByteLookupTable {
298 fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
299 let first = *input.first()?;
300
301 if self.0[first as usize] {
302 return Some(1);
303 }
304 None
305 }
306}
307pub fn range(lo: u8, hi: u8) -> ElementRange {
316 ElementRange(lo, hi)
317}
318
319#[derive(Debug, Clone, Copy)]
321pub struct ElementRange(u8, u8);
322
323impl Pattern for ElementRange {
324 fn eval(&self, input: &[u8]) -> Option<usize> {
325 let first = input.first()?;
326
327 if first >= &self.0 && first <= &self.1 {
328 return Some(1);
329 }
330
331 None
332 }
333}
334
335pub fn any<P>(pattern: P) -> Repetition<P>
345where
346 P: Pattern,
347{
348 Repetition {
349 lo: 0,
350 hi: None,
351 pattern,
352 }
353}
354
355pub fn at_least<P>(n: usize, pattern: P) -> Repetition<P>
364where
365 P: Pattern,
366{
367 Repetition {
368 lo: n,
369 hi: None,
370 pattern,
371 }
372}
373
374pub fn at_most<P>(n: usize, pattern: P) -> Repetition<P>
384where
385 P: Pattern,
386{
387 Repetition {
388 lo: 0,
389 hi: Some(n),
390 pattern,
391 }
392}
393
394pub fn optional<P>(pattern: P) -> Repetition<P>
405where
406 P: Pattern,
407{
408 Repetition {
409 lo: 0,
410 hi: Some(1),
411 pattern,
412 }
413}
414
415pub fn count<P>(n: usize, pattern: P) -> Repetition<P>
424where
425 P: Pattern,
426{
427 Repetition {
428 lo: n,
429 hi: Some(n),
430 pattern,
431 }
432}
433
434pub fn between<P>(lo: usize, hi: usize, pattern: P) -> Repetition<P>
438where
439 P: Pattern,
440{
441 Repetition {
442 lo,
443 hi: Some(hi),
444 pattern,
445 }
446}
447
448#[derive(Debug, Clone, Copy)]
452pub struct Repetition<P> {
453 pattern: P,
454 lo: usize,
455 hi: Option<usize>,
456}
457
458impl<P> Pattern for Repetition<P>
459where
460 P: Pattern,
461{
462 fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
463 let mut match_count = 0;
464 let mut offset = 0;
465
466 if let Some(upper_bound) = self.hi {
467 assert!(
468 upper_bound >= self.lo,
469 "upper bound should be greater than or equal to the lower bound"
470 );
471 };
472
473 loop {
474 if let Some(upper_bound) = self.hi {
476 if match_count == upper_bound {
477 return Some(offset);
478 }
479 };
480
481 let Some(value) = self.pattern.eval(&input[offset..]) else {
482 if match_count < self.lo {
483 return None;
484 }
485 return Some(offset);
486 };
487
488 match_count += 1;
489 offset += value;
490 }
491 }
492}
493
494pub fn not<P>(pattern: P) -> Not<P>
498where
499 P: Pattern,
500{
501 Not(pattern)
502}
503
504#[derive(Debug, Clone, Copy)]
506pub struct Not<P>(P);
507
508impl<P> Pattern for Not<P>
509where
510 P: Pattern,
511{
512 fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
513 if self.0.eval(input).is_some() {
514 return None;
515 }
516
517 Some(0)
518 }
519}
520
521pub fn end() -> Eof {
523 Eof
524}
525
526#[derive(Debug, Clone, Copy)]
528pub struct Eof;
529
530impl Pattern for Eof {
531 fn eval(&self, input: &[u8]) -> Option<usize> {
532 input.is_empty().then_some(0)
533 }
534}
535
536#[cfg(test)]
537mod tests {
538 use super::*;
539
540 #[test]
541 fn match_prefix() {
542 assert_eq!("".eval(b""), Some(0));
544 assert_eq!("".eval(b"aa"), Some(0));
546 assert_eq!("aa".eval(b""), None);
548 assert_eq!("aa".eval(b"a"), None);
550 assert_eq!("aa".eval(b"b"), None);
551 assert_eq!("aa".eval(b"aa"), Some(2));
553 assert_eq!("aa".eval(b"bb"), None);
554 assert_eq!("aa".eval(b"aaa"), Some(2));
556 assert_eq!("aa".eval(b"bbb"), None);
557 }
558
559 #[test]
560 fn match_range() {
561 assert_eq!(range(b'5', b'5').eval(b""), None);
563 assert_eq!(range(b'5', b'5').eval(b"5"), Some(1));
565 assert_eq!(range(b'5', b'5').eval(b"4"), None);
567 assert_eq!(range(b'5', b'5').eval(b"6"), None);
568 assert_eq!(range(b'3', b'5').eval(b""), None);
570 assert_eq!(range(b'3', b'5').eval(b"3"), Some(1));
572 assert_eq!(range(b'3', b'5').eval(b"5"), Some(1));
573 assert_eq!(range(b'3', b'5').eval(b"4"), Some(1));
575 assert_eq!(range(b'3', b'5').eval(b"2"), None);
577 assert_eq!(range(b'3', b'5').eval(b"6"), None);
578 }
579
580 #[test]
581 fn match_oneof_noneof() {
582 assert_eq!(oneof(b"").eval(b""), None);
584 assert_eq!(noneof(b"").eval(b""), None);
585
586 assert_eq!(oneof(b"").eval(b"a"), None);
588 assert_eq!(noneof(b"").eval(b"a"), Some(1));
589
590 assert_eq!(oneof(b"ab").eval(b""), None);
592 assert_eq!(noneof(b"ab").eval(b""), None);
593
594 assert_eq!(oneof(b"ab").eval(b"a"), Some(1));
596 assert_eq!(oneof(b"ab").eval(b"b"), Some(1));
597
598 assert_eq!(noneof(b"ab").eval(b"a"), None);
599 assert_eq!(noneof(b"ab").eval(b"b"), None);
600
601 assert_eq!(oneof(b"ab").eval(b"c"), None);
603 assert_eq!(noneof(b"ab").eval(b"c"), Some(1));
604 }
605
606 #[test]
607 fn match_choice() {
608 assert_eq!("a".or("aa").eval(b"aa"), Some(1));
610 assert_eq!("aa".or("a").eval(b"aa"), Some(2));
611 assert_eq!("a".or("b").eval(b"a"), Some(1));
613
614 assert_eq!("b".or("a").eval(b"a"), Some(1));
616 assert_eq!("b".or("a").eval(b"c"), None);
618 }
619
620 #[test]
621 fn match_sequence() {
622 assert_eq!("a".then("b").eval(b"ab"), Some(2));
624 assert_eq!("a".then("c").eval(b"ab"), None);
626
627 assert_eq!("a".then("").eval(b"b"), None);
629 assert_eq!("a".then("c").eval(b"b"), None);
631 }
632
633 #[test]
634 fn patterns_are_reusable() {
635 let pattern = "a".then("b".or("c"));
636 assert_eq!(pattern.eval(b"ac"), Some(2));
637 assert_eq!(pattern.eval(b"ab"), Some(2));
638 }
639
640 #[test]
641 fn repetition_patterns() {
642 assert_eq!(count(0, "a").eval(b""), Some(0));
644 assert_eq!(count(0, "a").eval(b"a"), Some(0));
645 assert_eq!(count(0, "a").eval(b"aa"), Some(0));
646
647 assert_eq!(count(3, "a").eval(b""), None);
649 assert_eq!(count(3, "a").eval(b"aa"), None);
650 assert_eq!(count(3, "a").eval(b"aaa"), Some(3));
651 assert_eq!(count(3, "a").eval(b"aaaa"), Some(3));
652
653 assert_eq!(any("a").eval(b""), Some(0));
655 assert_eq!(any("a").eval(b"b"), Some(0));
656 assert_eq!(any("a").eval(b"aa"), Some(2));
657 assert_eq!(any("a").eval(b"aab"), Some(2));
658
659 assert_eq!(at_least(0, "a").eval(b""), Some(0));
661 assert_eq!(at_least(0, "a").eval(b"a"), Some(1));
662 assert_eq!(at_least(0, "a").eval(b"aaaa"), Some(4));
663
664 assert_eq!(at_least(3, "a").eval(b""), None);
666 assert_eq!(at_least(3, "a").eval(b"aa"), None);
667 assert_eq!(at_least(3, "a").eval(b"aaa"), Some(3));
668 assert_eq!(at_least(3, "a").eval(b"aaaaa"), Some(5));
669
670 assert_eq!(at_most(0, "a").eval(b""), Some(0));
672 assert_eq!(at_most(0, "a").eval(b"a"), Some(0));
673 assert_eq!(at_most(0, "a").eval(b"aa"), Some(0));
674
675 assert_eq!(at_most(3, "a").eval(b""), Some(0));
677 assert_eq!(at_most(3, "a").eval(b"b"), Some(0));
678 assert_eq!(at_most(3, "a").eval(b"aa"), Some(2));
679 assert_eq!(at_most(3, "a").eval(b"aaa"), Some(3));
680 assert_eq!(at_most(3, "a").eval(b"aaaaa"), Some(3));
681
682 assert_eq!(between(2, 4, "a").eval(b""), None);
684 assert_eq!(between(2, 4, "a").eval(b"a"), None);
685 assert_eq!(between(2, 4, "a").eval(b"aa"), Some(2));
686 assert_eq!(between(2, 4, "a").eval(b"aaa"), Some(3));
687 assert_eq!(between(2, 4, "a").eval(b"aaaa"), Some(4));
688 assert_eq!(between(2, 4, "a").eval(b"aaaaa"), Some(4));
689 }
690}