pattern_lexer/
pattern.rs

1use regex::Regex;
2
3/// Returned by [Pattern] on match.
4#[derive(Debug, Clone, PartialEq)]
5pub struct Match<'a> {
6    /// The string that was searched in
7    pub base: &'a str,
8    /// Start of the match
9    pub start: usize,
10    /// End of the match
11    pub end: usize,
12}
13
14impl<'a> Match<'a> {
15    /// Create a match from a base `&str` and start..end range.
16    ///
17    /// # Panics
18    /// When ```rust start >= end``` or ```rust base.len() < end```.
19    ///
20    /// # Example
21    /// ```should_panic
22    /// # use pattern_lexer::pattern::Match;
23    /// #
24    /// let mat = Match::new("don't go to far...", 0, 100000);
25    /// ```
26    pub fn new(base: &'a str, start: usize, end: usize) -> Self {
27        assert!(start < end);
28        assert!(base.len() >= end);
29        Self { base, start, end }
30    }
31
32    /// Convert to to `&str`.
33    ///
34    /// # Example
35    /// ```rust
36    /// # use pattern_lexer::pattern::Match;
37    /// #
38    /// let mat = Match::new("it's here not here", 5, 9);
39    ///
40    /// assert_eq!(mat.as_str(), "here");
41    /// ```
42    pub fn as_str(&self) -> &'a str {
43        &self.base[self.start..self.end]
44    }
45}
46
47/// A string Pattern.
48///
49/// The type implementing it can be used as a pattern for `&str`,
50/// by default it is implemented for the following types:
51///
52/// | Pattern type              | Match condition                         |
53/// |---------------------------|-----------------------------------------|
54/// | ```char```                | is contained in string                  |
55/// | ```&str```                | is substring                            |
56/// | ```String```              | is substring                            |
57/// | ```&[char]```             | any `char` is contained in string         |
58/// | ```&[&str]```             | any `&str` is substring                   |
59/// | ```F: Fn(&str) -> bool``` | `F` returns `true` for substring |
60/// | ```Regex```               | `Regex` match substring        |
61pub trait Pattern<'a> {
62    /// Find all occurences of the pattern in the given `&str`.
63    ///
64    /// # Examples
65    /// ```rust
66    /// # use pattern_lexer::pattern::{Match, Pattern};
67    /// #
68    /// assert!("ab".find_in("cd").is_empty());
69    /// assert_eq!("ab".find_in("cabd"), vec![Match::new("cabd", 1, 3)]);
70    /// ```
71    fn find_in(&self, value: &'a str) -> Vec<Match<'a>>;
72
73    /// Find all occurences of the pattern in the given `&str` that are prefixes.
74    ///
75    /// # Examples
76    /// ```rust
77    /// # use pattern_lexer::pattern::{Match, Pattern};
78    /// #
79    /// assert!("ab".find_prefix_in("cdab").is_empty());
80    /// assert_eq!("ab".find_prefix_in("abcd"), vec![Match::new("abcd", 0, 2)]);
81    /// ```
82    fn find_prefix_in(&self, value: &'a str) -> Vec<Match<'a>> {
83        self.find_in(value)
84            .into_iter()
85            .filter(|mat| mat.start == 0)
86            .collect()
87    }
88
89    /// Find all occurences of the pattern in the given `&str` that are suffixes.
90    ///
91    /// # Examples
92    /// ```rust
93    /// # use pattern_lexer::pattern::{Match, Pattern};
94    /// #
95    /// assert!("ab".find_suffix_in("abcd").is_empty());
96    /// assert_eq!("ab".find_suffix_in("cdab"), vec![Match::new("cdab", 2, 4)]);
97    /// ```
98    fn find_suffix_in(&self, value: &'a str) -> Vec<Match<'a>> {
99        let len = value.len();
100        self.find_in(value)
101            .into_iter()
102            .filter(|mat| mat.end == len)
103            .collect()
104    }
105
106    /// Find one occurrence of the pattern in the given `&str`.
107    ///
108    /// # Examples
109    /// ```rust
110    /// # use pattern_lexer::pattern::{Match, Pattern};
111    /// #
112    /// assert!("ab".find_one_in("cd").is_none());
113    /// assert_eq!("ab".find_one_in("cdab"), Some(Match::new("cdab", 2, 4)));
114    /// ```
115    fn find_one_in(&self, value: &'a str) -> Option<Match<'a>> {
116        self.find_in(value).into_iter().next()
117    }
118
119    /// Find one occurrence of the pattern in the given `&str` that is prefix.
120    ///
121    /// # Examples
122    /// ```rust
123    /// # use pattern_lexer::pattern::{Match, Pattern};
124    /// #
125    /// assert!("ab".find_one_prefix_in("cdab").is_none());
126    /// assert_eq!("ab".find_one_prefix_in("abcd"), Some(Match::new("abcd", 0, 2)));
127    /// ```
128    fn find_one_prefix_in(&self, value: &'a str) -> Option<Match<'a>> {
129        self.find_prefix_in(value).into_iter().next()
130    }
131
132    /// Find one occurrence of the pattern in the given `&str` that is suffix.
133    ///
134    /// # Examples
135    /// ```rust
136    /// # use pattern_lexer::pattern::{Match, Pattern};
137    /// #
138    /// assert!("ab".find_one_suffix_in("abcd").is_none());
139    /// assert_eq!("ab".find_one_suffix_in("cdab"), Some(Match::new("cdab", 2, 4)));
140    /// ```
141    fn find_one_suffix_in(&self, value: &'a str) -> Option<Match<'a>> {
142        self.find_suffix_in(value).into_iter().next()
143    }
144}
145
146impl<'a> Pattern<'a> for char {
147    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
148        value
149            .match_indices(&self.to_string())
150            .map(|(i, mat)| Match::new(value, i, i + mat.len()))
151            .collect()
152    }
153}
154
155impl<'a> Pattern<'a> for [char] {
156    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
157        self.iter().flat_map(|ch| ch.find_in(value)).collect()
158    }
159}
160
161impl<'a, const N: usize> Pattern<'a> for [char; N] {
162    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
163        self.as_slice().find_in(value)
164    }
165}
166
167impl<'a, const N: usize> Pattern<'a> for &[char; N] {
168    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
169        self.as_slice().find_in(value)
170    }
171}
172
173impl<'a> Pattern<'a> for String {
174    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
175        value
176            .match_indices(self)
177            .map(|(i, mat)| Match::new(value, i, i + mat.len()))
178            .collect()
179    }
180}
181
182impl<'a> Pattern<'a> for &str {
183    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
184        self.to_string().find_in(value)
185    }
186}
187
188impl<'a> Pattern<'a> for [&str] {
189    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
190        self.iter().flat_map(|ch| ch.find_in(value)).collect()
191    }
192}
193
194impl<'a, const N: usize> Pattern<'a> for [&str; N] {
195    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
196        self.as_slice().find_in(value)
197    }
198}
199
200impl<'a, const N: usize> Pattern<'a> for &[&str; N] {
201    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
202        self.as_slice().find_in(value)
203    }
204}
205
206impl<'a: 'b, 'b, F> Pattern<'a> for F
207where
208    F: Fn(&'b str) -> bool,
209{
210    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
211        let mut matches = Vec::new();
212        let mut cur_1 = 0;
213        // The goal is to check from left to right and to take the largest match
214        while cur_1 < value.len() {
215            let mut cur_2 = value.len();
216            while cur_2 > cur_1 {
217                let sub = &value[cur_1..cur_2];
218                if (self)(sub) {
219                    matches.push(Match::new(value, cur_1, cur_2));
220                    cur_1 = cur_2;
221                }
222                cur_2 -= 1
223            }
224            cur_1 += 1;
225        }
226        matches
227    }
228}
229
230impl<'a> Pattern<'a> for Regex {
231    fn find_in(&self, value: &'a str) -> Vec<Match<'a>> {
232        self.find_iter(value)
233            .map(|mat| Match::new(value, mat.start(), mat.end()))
234            .collect()
235    }
236}