fst_subseq_ascii_caseless/
lib.rs

1#![no_std]
2
3extern crate fst;
4
5use fst::Automaton;
6
7/// An automaton that matches if the input contains a specific subsequence
8/// ignoring ASCII case.
9///
10/// It is similar to `fst::automaton::Subsequence`, and can be used to build
11/// a simple fuzzy-finder for ASCII-only content.
12#[derive(Clone, Debug)]
13pub struct SubseqAsciiCaseless<'a> {
14    subseq: &'a [u8],
15}
16
17impl<'a> SubseqAsciiCaseless<'a> {
18    /// Constructs automaton that matches input containing the
19    /// specified subsequence ignoring ASCII case.
20    ///
21    /// # Panics
22    ///
23    /// Panics if `subseq` contains any ASCII uppercase character.
24    pub fn new(subseq: &'a str) -> Self {
25        assert!(!subseq.bytes().any(|b| b.is_ascii_uppercase()));
26        Self::new_unchecked(subseq)
27    }
28
29    /// Same as `new()` but don't check the string.
30    ///
31    /// It would never match if the subseq contains any ASCII uppercase
32    /// character.
33    pub fn new_unchecked(subseq: &'a str) -> Self {
34        SubseqAsciiCaseless {
35            subseq: subseq.as_bytes(),
36        }
37    }
38}
39
40impl<'a> Automaton for SubseqAsciiCaseless<'a> {
41    type State = usize;
42
43    fn start(&self) -> usize {
44        0
45    }
46
47    fn is_match(&self, state: &usize) -> bool {
48        *state == self.subseq.len()
49    }
50
51    fn can_match(&self, _: &usize) -> bool {
52        true
53    }
54
55    fn will_always_match(&self, state: &usize) -> bool {
56        self.is_match(state)
57    }
58
59    fn accept(&self, state: &usize, byte: u8) -> usize {
60        if self.is_match(state) {
61            return *state;
62        }
63        state + (byte.to_ascii_lowercase() == self.subseq[*state]) as usize
64    }
65}
66
67#[cfg(test)]
68mod tests {
69    use super::*;
70
71    const PATTERN1: &str = "sqaicl";
72
73    #[test]
74    fn test_states() {
75        let subseq = SubseqAsciiCaseless::new(PATTERN1);
76        assert_eq!(subseq.start(), 0);
77        assert_eq!(subseq.accept(&0, b'q'), 0);
78        assert_eq!(subseq.accept(&0, b'S'), 1);
79        assert_eq!(subseq.accept(&1, b'x'), 1);
80        assert_eq!(subseq.accept(&1, b'q'), 2);
81        assert_eq!(subseq.accept(&2, b'a'), 3);
82        assert_eq!(subseq.accept(&3, b'I'), 4);
83        assert_eq!(subseq.accept(&4, b'l'), 4);
84        assert_eq!(subseq.accept(&4, b'C'), 5);
85        assert_eq!(subseq.accept(&5, b'L'), 6);
86    }
87
88    #[test]
89    fn test_is_match() {
90        let subseq = SubseqAsciiCaseless::new(PATTERN1);
91        for i in 0..PATTERN1.len() {
92            assert!(!subseq.is_match(&i));
93        }
94        assert!(subseq.is_match(&PATTERN1.len()));
95    }
96
97    #[test]
98    #[should_panic]
99    fn test_new_check() {
100        SubseqAsciiCaseless::new("SqAiCl");
101    }
102}