bparse/
pattern.rs

1// The Clone & Copy super traits allow patterns to be re-used even when a function returns `impl Pattern`
2/// Expresses that the implementing type may be used to match a byte slice.
3pub trait Pattern: Clone + Copy {
4    /// Evaluates the pattern against a byte slice.
5    /// If the pattern matches, the length of matching slice should be returned.
6    /// Otherwise, `None` should be returned.
7    ///
8    /// It is assumed that the pattern begins matching from the start of the slice.
9    fn eval(&self, input: &[u8]) -> Option<usize>;
10
11    /// Returns a new pattern that will match if `self` and `other` match in sequence.
12    ///
13    /// ```
14    /// use bparse::Pattern;
15    ///
16    /// let pattern = "a".then("b");
17    /// assert_eq!(pattern.eval(b"abc"), Some(2));
18    /// ```
19    fn then<P>(self, other: P) -> Sequence<Self, P>
20    where
21        Self: Sized,
22        P: Pattern,
23    {
24        Sequence {
25            pat1: self,
26            pat2: other,
27        }
28    }
29
30    /// Returns a new pattern that will match if either `self` or `other` match.
31    ///
32    /// ```
33    /// use bparse::Pattern;
34    ///
35    /// let pattern = "a".or("b");
36    /// assert_eq!(pattern.eval(b"ba"), Some(1));
37    /// ```
38    fn or<P>(self, other: P) -> Choice<Self, P>
39    where
40        Self: Sized,
41        P: Pattern,
42    {
43        Choice {
44            pat1: self,
45            pat2: other,
46        }
47    }
48}
49
50impl<P> Pattern for &P
51where
52    P: Pattern,
53{
54    fn eval(&self, input: &[u8]) -> Option<usize> {
55        (*self).eval(input)
56    }
57}
58
59/// See [`Pattern::or`]
60#[derive(Clone, Copy, Debug)]
61pub struct Choice<C1, C2> {
62    pat1: C1,
63    pat2: C2,
64}
65
66impl<P1, P2> Pattern for Choice<P1, P2>
67where
68    P1: Pattern,
69    P2: Pattern,
70{
71    fn eval(&self, input: &[u8]) -> Option<usize> {
72        match self.pat1.eval(input) {
73            Some(res) => Some(res),
74            None => self.pat2.eval(input),
75        }
76    }
77}
78
79/// See [`Pattern::then`]
80#[derive(Clone, Copy, Debug)]
81pub struct Sequence<P1, P2> {
82    pat1: P1,
83    pat2: P2,
84}
85
86impl<P1, P2> Pattern for Sequence<P1, P2>
87where
88    P1: Pattern,
89    P2: Pattern,
90{
91    fn eval(&self, input: &[u8]) -> Option<usize> {
92        if let Some(a) = self.pat1.eval(input) {
93            if let Some(b) = self.pat2.eval(&input[a..]) {
94                return Some(a + b);
95            }
96        }
97        None
98    }
99}
100
101/// Returns a new pattern that always matches the next element in the input if it exists.
102///
103/// ```
104/// use bparse::{Pattern, one};
105///
106/// assert_eq!(one().eval(&[1, 2, 3]), Some(1));
107/// ```
108pub fn one() -> One {
109    One
110}
111
112/// See [`one`]
113#[derive(Debug, Clone, Copy)]
114pub struct One;
115
116impl Pattern for One {
117    fn eval(&self, input: &[u8]) -> Option<usize> {
118        if input.is_empty() {
119            return None;
120        }
121
122        Some(1)
123    }
124}
125
126/// Returns a new pattern that will match if `slice` is a prefix of the input.
127///
128/// ```
129/// use bparse::{Pattern, prefix};
130///
131/// let pattern = prefix(&[1]);
132/// assert_eq!(pattern.eval(&[1, 2, 3]), Some(1));
133/// ```
134///
135/// As a convenience, the `Pattern` trait is implemented for slices and bytes with the same effect:
136///
137/// ```
138/// use bparse::Pattern;
139///
140/// assert_eq!((&b"ab"[..]).eval(b"abc"), Some(2));
141/// ```
142///
143/// If you are working with ascii or utf8 byte slices, consider [`utf8`]
144pub fn prefix(slice: &[u8]) -> Prefix {
145    Prefix(slice)
146}
147
148/// Returns a new pattern that will match the utf8 string slice `s` at the start of the input.
149///
150/// ```
151/// use bparse::{Pattern, utf8};
152///
153/// let pattern = utf8("he");
154/// assert_eq!(pattern.eval(b"hello"), Some(2));
155/// ```
156///
157/// As a convenience, the `Pattern` trait is implemented for string slices with the same effect:
158///
159/// ```
160/// use bparse::Pattern;
161///
162/// assert_eq!("he".eval(b"hello"), Some(2));
163/// ```
164pub fn utf8(s: &str) -> Prefix {
165    Prefix(s.as_bytes())
166}
167
168impl Pattern for &str {
169    fn eval(&self, input: &[u8]) -> Option<usize> {
170        utf8(self).eval(input)
171    }
172}
173
174impl Pattern for &[u8] {
175    fn eval(&self, input: &[u8]) -> Option<usize> {
176        Prefix(self).eval(input)
177    }
178}
179
180impl Pattern for u8 {
181    fn eval(&self, input: &[u8]) -> Option<usize> {
182        Prefix(&[*self]).eval(input)
183    }
184}
185
186/// See [`prefix`]
187#[derive(Debug, Clone, Copy)]
188pub struct Prefix<'p>(&'p [u8]);
189
190impl Pattern for Prefix<'_> {
191    fn eval(&self, input: &[u8]) -> Option<usize> {
192        if !input.starts_with(self.0) {
193            return None;
194        }
195
196        Some(self.0.len())
197    }
198}
199
200/// Returns a pattern that will match any byte in `bytes` at the start of the input
201///
202/// ```
203/// use bparse::{Pattern, oneof};
204///
205/// let pattern = oneof(b"?!.:");
206/// assert_eq!(pattern.eval(b"hi"), None);
207/// assert_eq!(pattern.eval(b"!!"), Some(1));
208/// assert_eq!(pattern.eval(b":"), Some(1));
209/// ```
210pub fn oneof(bytes: &[u8]) -> ByteLookupTable {
211    let mut set: [bool; 256] = [false; 256];
212
213    let mut i = 0;
214    while i < bytes.len() {
215        set[bytes[i] as usize] = true;
216        i += 1;
217    }
218
219    ByteLookupTable(set)
220}
221
222/// Inverse of [`oneof`].
223pub fn noneof(bytes: &[u8]) -> ByteLookupTable {
224    let mut set = oneof(bytes).0;
225    let mut i = 0;
226    while i < set.len() {
227        set[i] = !set[i];
228        i += 1;
229    }
230
231    ByteLookupTable(set)
232}
233
234/// Returns a pattern that will match any ascii digit at the start of the input
235///
236/// ```
237/// use bparse::{Pattern, digit};
238///
239/// assert_eq!(digit().eval(b"a"), None);
240/// assert_eq!(digit().eval(b"8"), Some(1));
241/// ```
242pub fn digit() -> ByteLookupTable {
243    oneof(b"0123456789")
244}
245
246/// Returns a pattern that will match any ascii letter at the start of the input
247///
248/// ```
249/// use bparse::{Pattern, alpha};
250///
251/// assert_eq!(alpha().eval(b"1"), None);
252/// assert_eq!(alpha().eval(b"a"), Some(1));
253/// ```
254pub fn alpha() -> ByteLookupTable {
255    oneof(b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
256}
257
258/// Returns a pattern that will match any hexadecimal character at the start of the input
259///
260/// ```
261/// use bparse::{Pattern, hex};
262///
263/// assert_eq!(hex().eval(b"z"), None);
264/// assert_eq!(hex().eval(b"f"), Some(1));
265/// ```
266pub fn hex() -> Choice<ByteLookupTable, ByteLookupTable> {
267    oneof(b"abcdefABCDEF").or(digit())
268}
269
270/// See [`oneof`]
271#[derive(Debug, Clone, Copy)]
272pub struct ByteLookupTable([bool; 256]);
273
274impl Pattern for ByteLookupTable {
275    fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
276        let first = *input.first()?;
277
278        if self.0[first as usize] {
279            return Some(1);
280        }
281        None
282    }
283}
284/// Returns a new pattern that will match an element in the closed interval `[lo, hi]`
285///
286/// ```
287/// use bparse::{Pattern, range};
288///
289/// let pattern = range(b'a', b'z');
290/// assert_eq!(pattern.eval(b"b"), Some(1));
291/// ```
292pub fn range(lo: u8, hi: u8) -> ElementRange {
293    ElementRange(lo, hi)
294}
295
296/// See [`range()`]
297#[derive(Debug, Clone, Copy)]
298pub struct ElementRange(u8, u8);
299
300impl Pattern for ElementRange {
301    fn eval(&self, input: &[u8]) -> Option<usize> {
302        let first = input.first()?;
303
304        if first >= &self.0 && first <= &self.1 {
305            return Some(1);
306        }
307
308        None
309    }
310}
311
312/// Returns a new pattern that matches as many repetitions as possible of the given `pattern`, including 0.
313///
314/// ```
315/// use bparse::{Pattern, any};
316///
317/// assert_eq!(any("a").eval(b"aaa"), Some(3));
318/// assert_eq!(any("a").eval(b"aa"), Some(2));
319/// assert_eq!(any("a").eval(b""), Some(0));
320/// ```
321pub fn any<P>(pattern: P) -> Repetition<P>
322where
323    P: Pattern,
324{
325    Repetition {
326        lo: 0,
327        hi: None,
328        pattern,
329    }
330}
331
332/// Returns a new pattern that matches at least `n` repetitions of `pattern`.
333///
334/// ```
335/// use bparse::{Pattern, at_least};
336///
337/// assert_eq!(at_least(2, "a").eval(b"a"), None);
338/// assert_eq!(at_least(2, "a").eval(b"aaa"), Some(3));
339/// ```
340pub fn at_least<P>(n: usize, pattern: P) -> Repetition<P>
341where
342    P: Pattern,
343{
344    Repetition {
345        lo: n,
346        hi: None,
347        pattern,
348    }
349}
350
351/// Returns a new pattern that matches at most `n` repetitions of `pattern`.
352///
353/// ```
354/// use bparse::{Pattern, at_most};
355///
356/// assert_eq!(at_most(2, "b").eval(b"b"), Some(1));
357/// assert_eq!(at_most(2, "b").eval(b"bbbb"), Some(2));
358/// assert_eq!(at_most(2, "b").eval(b"aa"), Some(0));
359/// ```
360pub fn at_most<P>(n: usize, pattern: P) -> Repetition<P>
361where
362    P: Pattern,
363{
364    Repetition {
365        lo: 0,
366        hi: Some(n),
367        pattern,
368    }
369}
370
371/// Returns a new pattern that matches 0 or 1 repetitions of `pattern`
372///
373/// This implies the new pattern always matches the input.
374///
375/// ```
376/// use bparse::{Pattern, optional};
377///
378/// assert_eq!(optional("a").eval(b"b"), Some(0));
379/// assert_eq!(optional("a").eval(b"a"), Some(1));
380/// ```
381pub fn optional<P>(pattern: P) -> Repetition<P>
382where
383    P: Pattern,
384{
385    Repetition {
386        lo: 0,
387        hi: Some(1),
388        pattern,
389    }
390}
391
392/// Returns a new pattern that matches exactly `n` repetitions of `pattern`.
393///
394/// ```
395/// use bparse::{Pattern, count};
396///
397/// assert_eq!(count(2, "z").eval(b"zzz"), Some(2));
398/// assert_eq!(count(2, "z").eval(b"z"), None);
399/// ```
400pub fn count<P>(n: usize, pattern: P) -> Repetition<P>
401where
402    P: Pattern,
403{
404    Repetition {
405        lo: n,
406        hi: Some(n),
407        pattern,
408    }
409}
410
411/// Returns a new pattern that matches between `lo` and `hi` repetitions of `pattern`.
412///
413/// Both bounds are inclusive.
414pub fn between<P>(lo: usize, hi: usize, pattern: P) -> Repetition<P>
415where
416    P: Pattern,
417{
418    Repetition {
419        lo,
420        hi: Some(hi),
421        pattern,
422    }
423}
424
425/// Exppresses pattern repetition.
426///
427/// Many patterns (e.g. [`between`], [`optional`], [`at_least`]) are expressed in terms of this pattern.
428#[derive(Debug, Clone, Copy)]
429pub struct Repetition<P> {
430    pattern: P,
431    lo: usize,
432    hi: Option<usize>,
433}
434
435impl<P> Pattern for Repetition<P>
436where
437    P: Pattern,
438{
439    fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
440        let mut match_count = 0;
441        let mut offset = 0;
442
443        if let Some(upper_bound) = self.hi {
444            assert!(
445                upper_bound >= self.lo,
446                "upper bound should be greater than or equal to the lower bound"
447            );
448        };
449
450        loop {
451            // We hit the upper bound of pattern repetition
452            if let Some(upper_bound) = self.hi {
453                if match_count == upper_bound {
454                    return Some(offset);
455                }
456            };
457
458            let Some(value) = self.pattern.eval(&input[offset..]) else {
459                if match_count < self.lo {
460                    return None;
461                }
462                return Some(offset);
463            };
464
465            match_count += 1;
466            offset += value;
467        }
468    }
469}
470
471/// Returns a new pattern that matches only if `pattern` does not match.
472///
473/// This pattern returns `Some(0)` if the underlying pattern did not match which makes it function as a negative lookahead.
474pub fn not<P>(pattern: P) -> Not<P>
475where
476    P: Pattern,
477{
478    Not(pattern)
479}
480
481/// See [`not`]
482#[derive(Debug, Clone, Copy)]
483pub struct Not<P>(P);
484
485impl<P> Pattern for Not<P>
486where
487    P: Pattern,
488{
489    fn eval<'i>(&self, input: &[u8]) -> Option<usize> {
490        if self.0.eval(input).is_some() {
491            return None;
492        }
493
494        Some(0)
495    }
496}
497
498/// Returns a pattern that matches the end of the slice.
499pub fn end() -> Eof {
500    Eof
501}
502
503/// See [`end`]
504#[derive(Debug, Clone, Copy)]
505pub struct Eof;
506
507impl Pattern for Eof {
508    fn eval(&self, input: &[u8]) -> Option<usize> {
509        input.is_empty().then_some(0)
510    }
511}
512
513#[cfg(test)]
514mod tests {
515    use super::*;
516
517    #[test]
518    fn match_prefix() {
519        // Pattern is empty, input is empty:
520        assert_eq!("".eval(b""), Some(0));
521        // Pattern is empty, input is not empty:
522        assert_eq!("".eval(b"aa"), Some(0));
523        // Pattern is not empty, input is empty:
524        assert_eq!("aa".eval(b""), None);
525        // Pattern is not empty, input length is less than pattern length:
526        assert_eq!("aa".eval(b"a"), None);
527        assert_eq!("aa".eval(b"b"), None);
528        // Pattern is not empty, input length is equal to pattern length:
529        assert_eq!("aa".eval(b"aa"), Some(2));
530        assert_eq!("aa".eval(b"bb"), None);
531        // Pattern is not empty, input length is greater than pattern length:
532        assert_eq!("aa".eval(b"aaa"), Some(2));
533        assert_eq!("aa".eval(b"bbb"), None);
534    }
535
536    #[test]
537    fn match_range() {
538        // Range bounds are the same, input is empty:
539        assert_eq!(range(b'5', b'5').eval(b""), None);
540        // Range bounds are the same, input begins with a byte identical to bounds:
541        assert_eq!(range(b'5', b'5').eval(b"5"), Some(1));
542        // Range bounds are the same, input begins with outside bounds:
543        assert_eq!(range(b'5', b'5').eval(b"4"), None);
544        assert_eq!(range(b'5', b'5').eval(b"6"), None);
545        // Range bounds are different, input is empty:
546        assert_eq!(range(b'3', b'5').eval(b""), None);
547        // Range bounds are different, input begins with byte identical to bounds:
548        assert_eq!(range(b'3', b'5').eval(b"3"), Some(1));
549        assert_eq!(range(b'3', b'5').eval(b"5"), Some(1));
550        // Range bounds are different, input begins with byte within bounds:
551        assert_eq!(range(b'3', b'5').eval(b"4"), Some(1));
552        // Range bounds are different, input begins with byte outside bounds:
553        assert_eq!(range(b'3', b'5').eval(b"2"), None);
554        assert_eq!(range(b'3', b'5').eval(b"6"), None);
555    }
556
557    #[test]
558    fn match_oneof_noneof() {
559        // Set is empty, input is empty:
560        assert_eq!(oneof(b"").eval(b""), None);
561        assert_eq!(noneof(b"").eval(b""), None);
562
563        // Set is empty, input is not empty:
564        assert_eq!(oneof(b"").eval(b"a"), None);
565        assert_eq!(noneof(b"").eval(b"a"), Some(1));
566
567        // Set is not empty, input is empty:
568        assert_eq!(oneof(b"ab").eval(b""), None);
569        assert_eq!(noneof(b"ab").eval(b""), None);
570
571        // Set is not empty, input begins with byte in set
572        assert_eq!(oneof(b"ab").eval(b"a"), Some(1));
573        assert_eq!(oneof(b"ab").eval(b"b"), Some(1));
574
575        assert_eq!(noneof(b"ab").eval(b"a"), None);
576        assert_eq!(noneof(b"ab").eval(b"b"), None);
577
578        // Set is not empty, input begins with byte not in set
579        assert_eq!(oneof(b"ab").eval(b"c"), None);
580        assert_eq!(noneof(b"ab").eval(b"c"), Some(1));
581    }
582
583    #[test]
584    fn match_choice() {
585        // left matches, right matches:
586        assert_eq!("a".or("aa").eval(b"aa"), Some(1));
587        assert_eq!("aa".or("a").eval(b"aa"), Some(2));
588        // left matches, right does not match:
589        assert_eq!("a".or("b").eval(b"a"), Some(1));
590
591        // left does not match, right matches:
592        assert_eq!("b".or("a").eval(b"a"), Some(1));
593        // left does not match, right does not match:
594        assert_eq!("b".or("a").eval(b"c"), None);
595    }
596
597    #[test]
598    fn match_sequence() {
599        // left matches, right matches:
600        assert_eq!("a".then("b").eval(b"ab"), Some(2));
601        // left matches, right does not match:
602        assert_eq!("a".then("c").eval(b"ab"), None);
603
604        // left does not match, right matches:
605        assert_eq!("a".then("").eval(b"b"), None);
606        // left does not match, right does not match:
607        assert_eq!("a".then("c").eval(b"b"), None);
608    }
609
610    #[test]
611    fn patterns_are_reusable() {
612        let pattern = "a".then("b".or("c"));
613        assert_eq!(pattern.eval(b"ac"), Some(2));
614        assert_eq!(pattern.eval(b"ab"), Some(2));
615    }
616
617    #[test]
618    fn repetition_patterns() {
619        // repeats exactly 0 times
620        assert_eq!(count(0, "a").eval(b""), Some(0));
621        assert_eq!(count(0, "a").eval(b"a"), Some(0));
622        assert_eq!(count(0, "a").eval(b"aa"), Some(0));
623
624        // repeats exactly n times
625        assert_eq!(count(3, "a").eval(b""), None);
626        assert_eq!(count(3, "a").eval(b"aa"), None);
627        assert_eq!(count(3, "a").eval(b"aaa"), Some(3));
628        assert_eq!(count(3, "a").eval(b"aaaa"), Some(3));
629
630        // repeats any number of times
631        assert_eq!(any("a").eval(b""), Some(0));
632        assert_eq!(any("a").eval(b"b"), Some(0));
633        assert_eq!(any("a").eval(b"aa"), Some(2));
634        assert_eq!(any("a").eval(b"aab"), Some(2));
635
636        // repeats at least 0 times
637        assert_eq!(at_least(0, "a").eval(b""), Some(0));
638        assert_eq!(at_least(0, "a").eval(b"a"), Some(1));
639        assert_eq!(at_least(0, "a").eval(b"aaaa"), Some(4));
640
641        // repeats at least n times
642        assert_eq!(at_least(3, "a").eval(b""), None);
643        assert_eq!(at_least(3, "a").eval(b"aa"), None);
644        assert_eq!(at_least(3, "a").eval(b"aaa"), Some(3));
645        assert_eq!(at_least(3, "a").eval(b"aaaaa"), Some(5));
646
647        // repeats at most 0 times
648        assert_eq!(at_most(0, "a").eval(b""), Some(0));
649        assert_eq!(at_most(0, "a").eval(b"a"), Some(0));
650        assert_eq!(at_most(0, "a").eval(b"aa"), Some(0));
651
652        // repeats at most n times
653        assert_eq!(at_most(3, "a").eval(b""), Some(0));
654        assert_eq!(at_most(3, "a").eval(b"b"), Some(0));
655        assert_eq!(at_most(3, "a").eval(b"aa"), Some(2));
656        assert_eq!(at_most(3, "a").eval(b"aaa"), Some(3));
657        assert_eq!(at_most(3, "a").eval(b"aaaaa"), Some(3));
658
659        // repeats between n & m times
660        assert_eq!(between(2, 4, "a").eval(b""), None);
661        assert_eq!(between(2, 4, "a").eval(b"a"), None);
662        assert_eq!(between(2, 4, "a").eval(b"aa"), Some(2));
663        assert_eq!(between(2, 4, "a").eval(b"aaa"), Some(3));
664        assert_eq!(between(2, 4, "a").eval(b"aaaa"), Some(4));
665        assert_eq!(between(2, 4, "a").eval(b"aaaaa"), Some(4));
666    }
667}