bparse/
pattern.rs

1use bstr::ByteSlice;
2
3// The Clone & Copy super traits allow patterns to be re-used even when a function returns `impl Pattern`
4/// Expresses that the implementing type may be used to match a byte slice.
5pub trait Pattern: Clone + Copy {
6    /// Evaluates the pattern against a byte slice.
7    /// If the pattern matches, the length of matching slice should be returned.
8    /// Otherwise, `None` should be returned.
9    ///
10    /// It is assumed that the pattern begins matching from the start of the slice.
11    fn eval(&self, input: &[u8]) -> Option<usize>;
12
13    /// Returns a new pattern that will match if `self` and `other` match in sequence.
14    ///
15    /// ```
16    /// use bparse::Pattern;
17    ///
18    /// let pattern = "a".then("b");
19    /// assert_eq!(pattern.eval(b"abc"), Some(2));
20    /// ```
21    fn then<P>(self, other: P) -> Sequence<Self, P>
22    where
23        Self: Sized,
24        P: Pattern,
25    {
26        Sequence {
27            pat1: self,
28            pat2: other,
29        }
30    }
31
32    /// Returns a new pattern that will match if either `self` or `other` match.
33    ///
34    /// ```
35    /// use bparse::Pattern;
36    ///
37    /// let pattern = "a".or("b");
38    /// assert_eq!(pattern.eval(b"ba"), Some(1));
39    /// ```
40    fn or<P>(self, other: P) -> Choice<Self, P>
41    where
42        Self: Sized,
43        P: Pattern,
44    {
45        Choice {
46            pat1: self,
47            pat2: other,
48        }
49    }
50}
51
52impl<P> Pattern for &P
53where
54    P: Pattern,
55{
56    fn eval(&self, input: &[u8]) -> Option<usize> {
57        (*self).eval(input)
58    }
59}
60
61/// See [`Pattern::or`]
62#[derive(Clone, Copy, Debug)]
63pub struct Choice<C1, C2> {
64    pat1: C1,
65    pat2: C2,
66}
67
68impl<P1, P2> Pattern for Choice<P1, P2>
69where
70    P1: Pattern,
71    P2: Pattern,
72{
73    fn eval(&self, input: &[u8]) -> Option<usize> {
74        match self.pat1.eval(input) {
75            Some(res) => Some(res),
76            None => self.pat2.eval(input),
77        }
78    }
79}
80
81/// See [`Pattern::then`]
82#[derive(Clone, Copy, Debug)]
83pub struct Sequence<P1, P2> {
84    pat1: P1,
85    pat2: P2,
86}
87
88impl<P1, P2> Pattern for Sequence<P1, P2>
89where
90    P1: Pattern,
91    P2: Pattern,
92{
93    fn eval(&self, input: &[u8]) -> Option<usize> {
94        if let Some(a) = self.pat1.eval(input) {
95            if let Some(b) = self.pat2.eval(&input[a..]) {
96                return Some(a + b);
97            }
98        }
99        None
100    }
101}
102
103/// Returns a new pattern that always matches the next byte in the input if it exists.
104///
105/// ```
106/// use bparse::{Pattern, byte};
107///
108/// assert_eq!(byte().eval(&[1, 2, 3]), Some(1));
109/// ```
110pub fn byte() -> One {
111    One
112}
113
114/// See [`one`]
115#[derive(Debug, Clone, Copy)]
116pub struct One;
117
118impl Pattern for One {
119    fn eval(&self, input: &[u8]) -> Option<usize> {
120        if input.is_empty() {
121            return None;
122        }
123
124        Some(1)
125    }
126}
127
128/// Returns a new pattern that matches the next utf-8 codepoint if it exists.
129///
130/// ```
131/// use bparse::{Pattern, codepoint};
132///
133/// assert_eq!(codepoint().eval("๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆa".as_bytes()), Some(4))
134/// ```
135pub fn codepoint() -> Codepoint {
136    Codepoint
137}
138
139/// See [`codepoint`]
140#[derive(Debug, Clone, Copy)]
141pub struct Codepoint;
142
143impl Pattern for Codepoint {
144    fn eval(&self, input: &[u8]) -> Option<usize> {
145        input.chars().next().map(|c| c.len_utf8())
146    }
147}
148
149/// Returns a new pattern that will match if `slice` is a prefix of the input.
150///
151/// ```
152/// use bparse::{Pattern, prefix};
153///
154/// let pattern = prefix(&[1]);
155/// assert_eq!(pattern.eval(&[1, 2, 3]), Some(1));
156/// ```
157///
158/// As a convenience, the `Pattern` trait is implemented for slices and bytes with the same effect:
159///
160/// ```
161/// use bparse::Pattern;
162///
163/// assert_eq!((&b"ab"[..]).eval(b"abc"), Some(2));
164/// ```
165///
166/// If you are working with ascii or utf8 byte slices, consider [`utf8`]
167pub fn prefix(slice: &[u8]) -> Prefix {
168    Prefix(slice)
169}
170
171/// Returns a new pattern that will match the utf8 string slice `s` at the start of the input.
172///
173/// ```
174/// use bparse::{Pattern, utf8};
175///
176/// let pattern = utf8("he");
177/// assert_eq!(pattern.eval(b"hello"), Some(2));
178/// ```
179///
180/// As a convenience, the `Pattern` trait is implemented for string slices with the same effect:
181///
182/// ```
183/// use bparse::Pattern;
184///
185/// assert_eq!("he".eval(b"hello"), Some(2));
186/// ```
187pub fn utf8(s: &str) -> Prefix {
188    Prefix(s.as_bytes())
189}
190
191impl Pattern for &str {
192    fn eval(&self, input: &[u8]) -> Option<usize> {
193        utf8(self).eval(input)
194    }
195}
196
197impl Pattern for &[u8] {
198    fn eval(&self, input: &[u8]) -> Option<usize> {
199        Prefix(self).eval(input)
200    }
201}
202
203impl Pattern for u8 {
204    fn eval(&self, input: &[u8]) -> Option<usize> {
205        Prefix(&[*self]).eval(input)
206    }
207}
208
209/// See [`prefix`]
210#[derive(Debug, Clone, Copy)]
211pub struct Prefix<'p>(&'p [u8]);
212
213impl Pattern for Prefix<'_> {
214    fn eval(&self, input: &[u8]) -> Option<usize> {
215        if !input.starts_with(self.0) {
216            return None;
217        }
218
219        Some(self.0.len())
220    }
221}
222
223/// Returns a pattern that will match any byte in `bytes` at the start of the input
224///
225/// ```
226/// use bparse::{Pattern, oneof};
227///
228/// let pattern = oneof(b"?!.:");
229/// assert_eq!(pattern.eval(b"hi"), None);
230/// assert_eq!(pattern.eval(b"!!"), Some(1));
231/// assert_eq!(pattern.eval(b":"), Some(1));
232/// ```
233pub fn oneof(bytes: &[u8]) -> ByteLookupTable {
234    let mut set: [bool; 256] = [false; 256];
235
236    let mut i = 0;
237    while i < bytes.len() {
238        set[bytes[i] as usize] = true;
239        i += 1;
240    }
241
242    ByteLookupTable(set)
243}
244
245/// Inverse of [`oneof`].
246pub fn noneof(bytes: &[u8]) -> ByteLookupTable {
247    let mut set = oneof(bytes).0;
248    let mut i = 0;
249    while i < set.len() {
250        set[i] = !set[i];
251        i += 1;
252    }
253
254    ByteLookupTable(set)
255}
256
257/// Returns a pattern that will match any ascii digit at the start of the input
258///
259/// ```
260/// use bparse::{Pattern, digit};
261///
262/// assert_eq!(digit().eval(b"a"), None);
263/// assert_eq!(digit().eval(b"8"), Some(1));
264/// ```
265pub fn digit() -> ByteLookupTable {
266    oneof(b"0123456789")
267}
268
269/// Returns a pattern that will match any ascii letter at the start of the input
270///
271/// ```
272/// use bparse::{Pattern, alpha};
273///
274/// assert_eq!(alpha().eval(b"1"), None);
275/// assert_eq!(alpha().eval(b"a"), Some(1));
276/// ```
277pub fn alpha() -> ByteLookupTable {
278    oneof(b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
279}
280
281/// Returns a pattern that will match any hexadecimal character at the start of the input
282///
283/// ```
284/// use bparse::{Pattern, hex};
285///
286/// assert_eq!(hex().eval(b"z"), None);
287/// assert_eq!(hex().eval(b"f"), Some(1));
288/// ```
289pub fn hex() -> Choice<ByteLookupTable, ByteLookupTable> {
290    oneof(b"abcdefABCDEF").or(digit())
291}
292
293/// See [`oneof`]
294#[derive(Debug, Clone, Copy)]
295pub struct ByteLookupTable([bool; 256]);
296
297impl Pattern for ByteLookupTable {
298    fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
299        let first = *input.first()?;
300
301        if self.0[first as usize] {
302            return Some(1);
303        }
304        None
305    }
306}
307/// Returns a new pattern that will match an element in the closed interval `[lo, hi]`
308///
309/// ```
310/// use bparse::{Pattern, range};
311///
312/// let pattern = range(b'a', b'z');
313/// assert_eq!(pattern.eval(b"b"), Some(1));
314/// ```
315pub fn range(lo: u8, hi: u8) -> ElementRange {
316    ElementRange(lo, hi)
317}
318
319/// See [`range()`]
320#[derive(Debug, Clone, Copy)]
321pub struct ElementRange(u8, u8);
322
323impl Pattern for ElementRange {
324    fn eval(&self, input: &[u8]) -> Option<usize> {
325        let first = input.first()?;
326
327        if first >= &self.0 && first <= &self.1 {
328            return Some(1);
329        }
330
331        None
332    }
333}
334
335/// Returns a new pattern that matches as many repetitions as possible of the given `pattern`, including 0.
336///
337/// ```
338/// use bparse::{Pattern, any};
339///
340/// assert_eq!(any("a").eval(b"aaa"), Some(3));
341/// assert_eq!(any("a").eval(b"aa"), Some(2));
342/// assert_eq!(any("a").eval(b""), Some(0));
343/// ```
344pub fn any<P>(pattern: P) -> Repetition<P>
345where
346    P: Pattern,
347{
348    Repetition {
349        lo: 0,
350        hi: None,
351        pattern,
352    }
353}
354
355/// Returns a new pattern that matches at least `n` repetitions of `pattern`.
356///
357/// ```
358/// use bparse::{Pattern, at_least};
359///
360/// assert_eq!(at_least(2, "a").eval(b"a"), None);
361/// assert_eq!(at_least(2, "a").eval(b"aaa"), Some(3));
362/// ```
363pub fn at_least<P>(n: usize, pattern: P) -> Repetition<P>
364where
365    P: Pattern,
366{
367    Repetition {
368        lo: n,
369        hi: None,
370        pattern,
371    }
372}
373
374/// Returns a new pattern that matches at most `n` repetitions of `pattern`.
375///
376/// ```
377/// use bparse::{Pattern, at_most};
378///
379/// assert_eq!(at_most(2, "b").eval(b"b"), Some(1));
380/// assert_eq!(at_most(2, "b").eval(b"bbbb"), Some(2));
381/// assert_eq!(at_most(2, "b").eval(b"aa"), Some(0));
382/// ```
383pub fn at_most<P>(n: usize, pattern: P) -> Repetition<P>
384where
385    P: Pattern,
386{
387    Repetition {
388        lo: 0,
389        hi: Some(n),
390        pattern,
391    }
392}
393
394/// Returns a new pattern that matches 0 or 1 repetitions of `pattern`
395///
396/// This implies the new pattern always matches the input.
397///
398/// ```
399/// use bparse::{Pattern, optional};
400///
401/// assert_eq!(optional("a").eval(b"b"), Some(0));
402/// assert_eq!(optional("a").eval(b"a"), Some(1));
403/// ```
404pub fn optional<P>(pattern: P) -> Repetition<P>
405where
406    P: Pattern,
407{
408    Repetition {
409        lo: 0,
410        hi: Some(1),
411        pattern,
412    }
413}
414
415/// Returns a new pattern that matches exactly `n` repetitions of `pattern`.
416///
417/// ```
418/// use bparse::{Pattern, count};
419///
420/// assert_eq!(count(2, "z").eval(b"zzz"), Some(2));
421/// assert_eq!(count(2, "z").eval(b"z"), None);
422/// ```
423pub fn count<P>(n: usize, pattern: P) -> Repetition<P>
424where
425    P: Pattern,
426{
427    Repetition {
428        lo: n,
429        hi: Some(n),
430        pattern,
431    }
432}
433
434/// Returns a new pattern that matches between `lo` and `hi` repetitions of `pattern`.
435///
436/// Both bounds are inclusive.
437pub fn between<P>(lo: usize, hi: usize, pattern: P) -> Repetition<P>
438where
439    P: Pattern,
440{
441    Repetition {
442        lo,
443        hi: Some(hi),
444        pattern,
445    }
446}
447
448/// Exppresses pattern repetition.
449///
450/// Many patterns (e.g. [`between`], [`optional`], [`at_least`]) are expressed in terms of this pattern.
451#[derive(Debug, Clone, Copy)]
452pub struct Repetition<P> {
453    pattern: P,
454    lo: usize,
455    hi: Option<usize>,
456}
457
458impl<P> Pattern for Repetition<P>
459where
460    P: Pattern,
461{
462    fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
463        let mut match_count = 0;
464        let mut offset = 0;
465
466        if let Some(upper_bound) = self.hi {
467            assert!(
468                upper_bound >= self.lo,
469                "upper bound should be greater than or equal to the lower bound"
470            );
471        };
472
473        loop {
474            // We hit the upper bound of pattern repetition
475            if let Some(upper_bound) = self.hi {
476                if match_count == upper_bound {
477                    return Some(offset);
478                }
479            };
480
481            let Some(value) = self.pattern.eval(&input[offset..]) else {
482                if match_count < self.lo {
483                    return None;
484                }
485                return Some(offset);
486            };
487
488            match_count += 1;
489            offset += value;
490        }
491    }
492}
493
494/// Returns a new pattern that matches only if `pattern` does not match.
495///
496/// This pattern returns `Some(0)` if the underlying pattern did not match which makes it function as a negative lookahead.
497pub fn not<P>(pattern: P) -> Not<P>
498where
499    P: Pattern,
500{
501    Not(pattern)
502}
503
504/// See [`not`]
505#[derive(Debug, Clone, Copy)]
506pub struct Not<P>(P);
507
508impl<P> Pattern for Not<P>
509where
510    P: Pattern,
511{
512    fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
513        if self.0.eval(input).is_some() {
514            return None;
515        }
516
517        Some(0)
518    }
519}
520
521/// Returns a pattern that matches the end of the slice.
522pub fn end() -> Eof {
523    Eof
524}
525
526/// See [`end`]
527#[derive(Debug, Clone, Copy)]
528pub struct Eof;
529
530impl Pattern for Eof {
531    fn eval(&self, input: &[u8]) -> Option<usize> {
532        input.is_empty().then_some(0)
533    }
534}
535
536#[cfg(test)]
537mod tests {
538    use super::*;
539
540    #[test]
541    fn match_prefix() {
542        // Pattern is empty, input is empty:
543        assert_eq!("".eval(b""), Some(0));
544        // Pattern is empty, input is not empty:
545        assert_eq!("".eval(b"aa"), Some(0));
546        // Pattern is not empty, input is empty:
547        assert_eq!("aa".eval(b""), None);
548        // Pattern is not empty, input length is less than pattern length:
549        assert_eq!("aa".eval(b"a"), None);
550        assert_eq!("aa".eval(b"b"), None);
551        // Pattern is not empty, input length is equal to pattern length:
552        assert_eq!("aa".eval(b"aa"), Some(2));
553        assert_eq!("aa".eval(b"bb"), None);
554        // Pattern is not empty, input length is greater than pattern length:
555        assert_eq!("aa".eval(b"aaa"), Some(2));
556        assert_eq!("aa".eval(b"bbb"), None);
557    }
558
559    #[test]
560    fn match_range() {
561        // Range bounds are the same, input is empty:
562        assert_eq!(range(b'5', b'5').eval(b""), None);
563        // Range bounds are the same, input begins with a byte identical to bounds:
564        assert_eq!(range(b'5', b'5').eval(b"5"), Some(1));
565        // Range bounds are the same, input begins with outside bounds:
566        assert_eq!(range(b'5', b'5').eval(b"4"), None);
567        assert_eq!(range(b'5', b'5').eval(b"6"), None);
568        // Range bounds are different, input is empty:
569        assert_eq!(range(b'3', b'5').eval(b""), None);
570        // Range bounds are different, input begins with byte identical to bounds:
571        assert_eq!(range(b'3', b'5').eval(b"3"), Some(1));
572        assert_eq!(range(b'3', b'5').eval(b"5"), Some(1));
573        // Range bounds are different, input begins with byte within bounds:
574        assert_eq!(range(b'3', b'5').eval(b"4"), Some(1));
575        // Range bounds are different, input begins with byte outside bounds:
576        assert_eq!(range(b'3', b'5').eval(b"2"), None);
577        assert_eq!(range(b'3', b'5').eval(b"6"), None);
578    }
579
580    #[test]
581    fn match_oneof_noneof() {
582        // Set is empty, input is empty:
583        assert_eq!(oneof(b"").eval(b""), None);
584        assert_eq!(noneof(b"").eval(b""), None);
585
586        // Set is empty, input is not empty:
587        assert_eq!(oneof(b"").eval(b"a"), None);
588        assert_eq!(noneof(b"").eval(b"a"), Some(1));
589
590        // Set is not empty, input is empty:
591        assert_eq!(oneof(b"ab").eval(b""), None);
592        assert_eq!(noneof(b"ab").eval(b""), None);
593
594        // Set is not empty, input begins with byte in set
595        assert_eq!(oneof(b"ab").eval(b"a"), Some(1));
596        assert_eq!(oneof(b"ab").eval(b"b"), Some(1));
597
598        assert_eq!(noneof(b"ab").eval(b"a"), None);
599        assert_eq!(noneof(b"ab").eval(b"b"), None);
600
601        // Set is not empty, input begins with byte not in set
602        assert_eq!(oneof(b"ab").eval(b"c"), None);
603        assert_eq!(noneof(b"ab").eval(b"c"), Some(1));
604    }
605
606    #[test]
607    fn match_choice() {
608        // left matches, right matches:
609        assert_eq!("a".or("aa").eval(b"aa"), Some(1));
610        assert_eq!("aa".or("a").eval(b"aa"), Some(2));
611        // left matches, right does not match:
612        assert_eq!("a".or("b").eval(b"a"), Some(1));
613
614        // left does not match, right matches:
615        assert_eq!("b".or("a").eval(b"a"), Some(1));
616        // left does not match, right does not match:
617        assert_eq!("b".or("a").eval(b"c"), None);
618    }
619
620    #[test]
621    fn match_sequence() {
622        // left matches, right matches:
623        assert_eq!("a".then("b").eval(b"ab"), Some(2));
624        // left matches, right does not match:
625        assert_eq!("a".then("c").eval(b"ab"), None);
626
627        // left does not match, right matches:
628        assert_eq!("a".then("").eval(b"b"), None);
629        // left does not match, right does not match:
630        assert_eq!("a".then("c").eval(b"b"), None);
631    }
632
633    #[test]
634    fn patterns_are_reusable() {
635        let pattern = "a".then("b".or("c"));
636        assert_eq!(pattern.eval(b"ac"), Some(2));
637        assert_eq!(pattern.eval(b"ab"), Some(2));
638    }
639
640    #[test]
641    fn repetition_patterns() {
642        // repeats exactly 0 times
643        assert_eq!(count(0, "a").eval(b""), Some(0));
644        assert_eq!(count(0, "a").eval(b"a"), Some(0));
645        assert_eq!(count(0, "a").eval(b"aa"), Some(0));
646
647        // repeats exactly n times
648        assert_eq!(count(3, "a").eval(b""), None);
649        assert_eq!(count(3, "a").eval(b"aa"), None);
650        assert_eq!(count(3, "a").eval(b"aaa"), Some(3));
651        assert_eq!(count(3, "a").eval(b"aaaa"), Some(3));
652
653        // repeats any number of times
654        assert_eq!(any("a").eval(b""), Some(0));
655        assert_eq!(any("a").eval(b"b"), Some(0));
656        assert_eq!(any("a").eval(b"aa"), Some(2));
657        assert_eq!(any("a").eval(b"aab"), Some(2));
658
659        // repeats at least 0 times
660        assert_eq!(at_least(0, "a").eval(b""), Some(0));
661        assert_eq!(at_least(0, "a").eval(b"a"), Some(1));
662        assert_eq!(at_least(0, "a").eval(b"aaaa"), Some(4));
663
664        // repeats at least n times
665        assert_eq!(at_least(3, "a").eval(b""), None);
666        assert_eq!(at_least(3, "a").eval(b"aa"), None);
667        assert_eq!(at_least(3, "a").eval(b"aaa"), Some(3));
668        assert_eq!(at_least(3, "a").eval(b"aaaaa"), Some(5));
669
670        // repeats at most 0 times
671        assert_eq!(at_most(0, "a").eval(b""), Some(0));
672        assert_eq!(at_most(0, "a").eval(b"a"), Some(0));
673        assert_eq!(at_most(0, "a").eval(b"aa"), Some(0));
674
675        // repeats at most n times
676        assert_eq!(at_most(3, "a").eval(b""), Some(0));
677        assert_eq!(at_most(3, "a").eval(b"b"), Some(0));
678        assert_eq!(at_most(3, "a").eval(b"aa"), Some(2));
679        assert_eq!(at_most(3, "a").eval(b"aaa"), Some(3));
680        assert_eq!(at_most(3, "a").eval(b"aaaaa"), Some(3));
681
682        // repeats between n & m times
683        assert_eq!(between(2, 4, "a").eval(b""), None);
684        assert_eq!(between(2, 4, "a").eval(b"a"), None);
685        assert_eq!(between(2, 4, "a").eval(b"aa"), Some(2));
686        assert_eq!(between(2, 4, "a").eval(b"aaa"), Some(3));
687        assert_eq!(between(2, 4, "a").eval(b"aaaa"), Some(4));
688        assert_eq!(between(2, 4, "a").eval(b"aaaaa"), Some(4));
689    }
690}