sub_strs/
glob.rs

1/*
2==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--
3
4sub-strs
5
6Copyright (C) 2019-2024  Anonymous
7
8There are several releases over multiple years,
9they are listed as ranges, such as: "2019-2024".
10
11This program is free software: you can redistribute it and/or modify
12it under the terms of the GNU Lesser General Public License as published by
13the Free Software Foundation, either version 3 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19GNU Lesser General Public License for more details.
20
21You should have received a copy of the GNU Lesser General Public License
22along with this program.  If not, see <https://www.gnu.org/licenses/>.
23
24::--::--::--::--::--::--::--::--::--::--::--::--::--::--::--::--
25*/
26
27//! # Glob
28
29use {
30    alloc::{
31        borrow::Cow,
32        collections::BTreeMap,
33        string::String,
34        vec::Vec,
35    },
36    core::{
37        fmt::{self, Display, Formatter},
38        str::FromStr,
39    },
40};
41
42use crate::Error;
43
44const ANY: char = '*';
45const ONE_CHAR: char = '?';
46
47/// # Parts of a `&str`
48#[derive(Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
49enum Part<'a> {
50
51    /// # A string
52    Str(Cow<'a, str>),
53
54    /// # Any (`*`)
55    Any,
56
57    /// # One character (`?`)
58    OneChar,
59}
60
61impl<'a> Part<'a> {
62
63    /// # Parses parts
64    fn parse<'b, F>(mut s: &'b str, str_fn: F) -> Vec<Self> where F: Fn(&'b str) -> Cow<'a, str> {
65        let mut result = alloc::vec![];
66        loop {
67            match s.find(|c| match c { self::ANY | self::ONE_CHAR => true, _ => false }) {
68                Some(i) => {
69                    if i > 0 {
70                        result.push(Part::Str(str_fn(&s[..i])));
71                    }
72                    match s.as_bytes()[i] as char {
73                        self::ANY => if result.last() != Some(&Part::Any) {
74                            result.push(Part::Any);
75                        },
76                        self::ONE_CHAR => result.push(Part::OneChar),
77                        _ => {},
78                    };
79                    if i + 1 == s.len() {
80                        break;
81                    }
82                    s = &s[i + 1..];
83                },
84                None => {
85                    if s.is_empty() == false {
86                        result.push(Part::Str(str_fn(s)));
87                    }
88                    break;
89                },
90            };
91        }
92
93        result
94    }
95
96}
97
98impl<'a> Part<'a> {
99
100    /// # Parses a `&str`
101    fn parse_str(s: &'a str) -> Vec<Self> {
102        Part::parse(s, |s| Cow::from(s))
103    }
104
105    /// # Parses a [`String`][r://String]
106    ///
107    /// [r://String]: https://doc.rust-lang.org/std/string/struct.String.html
108    fn parse_string(s: String) -> Vec<Self> {
109        Part::parse(&s, |s| Cow::from(String::from(s)))
110    }
111
112}
113
114/// # Sub string, used in Glob::matches()
115#[derive(Debug)]
116struct SubStr {
117    fixed: bool,
118    idx: usize,
119}
120
121/// # Glob
122///
123/// This struct is used to find matches from a pattern string against some string.
124///
125/// The pattern string supports 2 special characters: `*` and `?`:
126///
127/// - `*`: matches any characters or nothing at all.
128/// - `?`: matches one single character.
129///
130/// ## Notes
131///
132/// - The idea is inspired by <https://en.wikipedia.org/wiki/Glob_%28programming%29>, but this is _not_ an implementation of that or any other
133///   specifications.
134/// - Matches are _case sensitive_. If you want to ignore case, consider using [`to_lowercase()`][r://String/to_lowercase()] (or
135///   [`to_uppercase()`][r://String/to_uppercase()]) on _both_ pattern and target string.
136/// - [`Display`][r://Display] implementation prints _parsed_ pattern, not the original one.
137/// - Implementations of `From<&'a str>` and `From<&'a String>` always borrow the source string.
138/// - Implementations of `FromStr` and `From<String>` will _clone_ the source string.
139///
140/// ## Examples
141///
142/// <!-- NOTE: these examples are also *essential* tests, do NOT change or remove them. -->
143///
144/// ```
145/// use sub_strs::Glob;
146///
147/// let g = Glob::from("*r?st.rs");
148/// for s in &["rust.rs", "rEst.rs", "it's rust.rs"] {
149///     assert!(g.matches(s));
150/// }
151/// for s in &["it's not Rust", "rest", "rust!.rs"] {
152///     assert!(g.matches(s) == false);
153/// }
154/// ```
155///
156/// [r://String]: https://doc.rust-lang.org/std/string/struct.String.html
157/// [r://String/to_lowercase()]: https://doc.rust-lang.org/std/string/struct.String.html#method.to_lowercase
158/// [r://String/to_uppercase()]: https://doc.rust-lang.org/std/string/struct.String.html#method.to_uppercase
159/// [r://Display]: https://doc.rust-lang.org/std/fmt/trait.Display.html
160#[derive(Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
161pub struct Glob<'a> {
162    parts: Vec<Part<'a>>,
163}
164
165impl<'a> Glob<'a> {
166
167    /// # Makes new instance which can be used for lazy search
168    ///
169    /// -   Input will be converted into lowercase.
170    /// -   Whitespaces will be replaced with `*`.
171    /// -   `*` will be inserted into the start, or prepended to the end of input, if necessary.
172    pub fn lazy<S>(s: S) -> Self where S: AsRef<str> {
173        let s = s.as_ref();
174        let mut s = s.trim().split_whitespace().fold(String::with_capacity(s.len().saturating_add(9)), |mut result, next| {
175            if result.ends_with(ANY) == false && next.starts_with(ANY) == false {
176                result.push(ANY);
177            }
178            result.push_str(&next.to_lowercase());
179            result
180        });
181        if s.ends_with(ANY) == false {
182            s.push(ANY);
183        }
184        s.into()
185    }
186
187    /// # Makes new instance
188    const fn new(parts: Vec<Part<'a>>) -> Self {
189        Self {
190            parts,
191        }
192    }
193
194    /// # Checks if this glob matches a string
195    pub fn matches<S>(&self, s: S) -> bool where S: AsRef<str> {
196        let s = s.as_ref();
197
198        if self.parts.is_empty() {
199            return s.is_empty();
200        }
201
202        let mut map = match self.make_map(s) {
203            Some(map) => map,
204            None => return false,
205        };
206        loop {
207            let mut char_count: usize = 0;
208            for (part_idx, part) in self.parts.iter().enumerate() {
209                match part {
210                    Part::Any => {},
211                    Part::OneChar => char_count += 1,
212                    Part::Str(sub) => match map.get_mut(&part_idx) {
213                        Some(SubStr { fixed, idx: sub_idx }) => match char_count == 0 || s.chars().take(char_count).count() >= char_count {
214                            true => if *fixed && part_idx + 1 == self.parts.len() && sub.len() + *sub_idx != s.len() {
215                                return false;
216                            },
217                            false => match fixed {
218                                true => return false,
219                                false => match s[*sub_idx..].chars().next() {
220                                    Some(c) => match s[*sub_idx + c.len_utf8()..].find(sub.as_ref()) {
221                                        Some(i) => {
222                                            *sub_idx = i;
223                                            break;
224                                        },
225                                        None => return false,
226                                    },
227                                    None => return false,
228                                },
229                            },
230                        },
231                        // This is internal error
232                        None => return false,
233                    },
234                };
235
236                if part_idx + 1 == self.parts.len() {
237                    return true;
238                }
239            }
240        }
241    }
242
243    /// # Makes map
244    fn make_map(&self, s: &str) -> Option<BTreeMap<usize, SubStr>> {
245        let mut result = BTreeMap::new();
246
247        let mut dynamic = false;
248        let mut idx = 0;
249        for (part_idx, part) in self.parts.iter().enumerate() {
250            match part {
251                Part::Str(sub) => {
252                    let fixed;
253                    let sub_idx;
254                    if part_idx == 0 {
255                        match s.starts_with(sub.as_ref()) {
256                            true => {
257                                fixed = true;
258                                sub_idx = 0;
259                            },
260                            false => return None,
261                        };
262                    } else if part_idx + 1 == self.parts.len() {
263                        match s.ends_with(sub.as_ref()) {
264                            true => {
265                                fixed = true;
266                                sub_idx = s.len() - sub.len();
267                            },
268                            false => return None,
269                        };
270                    } else {
271                        fixed = dynamic == false;
272                        sub_idx = match s[idx..].find(sub.as_ref()) {
273                            Some(i) => {
274                                idx = i + sub.len();
275                                i
276                            },
277                            None => return None,
278                        };
279                    }
280                    result.insert(part_idx, SubStr { fixed, idx: sub_idx });
281                },
282                Part::Any => dynamic = true,
283                Part::OneChar => match s[idx..].chars().next() {
284                    Some(c) => idx += c.len_utf8(),
285                    None => return None,
286                },
287            };
288        }
289
290        Some(result)
291    }
292
293}
294
295/// # Converts from a `&str` to [`Glob`][::Glob]
296///
297/// [::Glob]: struct.Glob.html
298impl<'a> From<&'a str> for Glob<'a> {
299
300    fn from(src: &'a str) -> Self {
301        Self::new(Part::parse_str(src))
302    }
303
304}
305
306/// # Converts from a [`&String`][r://String] to [`Glob`][::Glob]
307///
308/// [::Glob]: struct.Glob.html
309/// [r://String]: https://doc.rust-lang.org/std/string/struct.String.html
310impl<'a> From<&'a String> for Glob<'a> {
311
312    fn from(src: &'a String) -> Self {
313        Self::from(src.as_str())
314    }
315
316}
317
318/// # Converts from a [`String`][r://String] to [`Glob`][::Glob]
319///
320/// [::Glob]: struct.Glob.html
321/// [r://String]: https://doc.rust-lang.org/std/string/struct.String.html
322impl From<String> for Glob<'_> {
323
324    fn from(src: String) -> Self {
325        Self::new(Part::parse_string(src))
326    }
327
328}
329
330impl<'a> From<Cow<'a, str>> for Glob<'a> {
331
332    fn from(s: Cow<'a, str>) -> Self {
333        match s {
334            Cow::Borrowed(s) => Self::from(s),
335            Cow::Owned(s) => Self::from(s),
336        }
337    }
338
339}
340
341impl FromStr for Glob<'_> {
342
343    type Err = Error;
344
345    fn from_str(s: &str) -> core::result::Result<Self, Self::Err> {
346        Ok(Self::from(String::from(s)))
347    }
348
349}
350
351impl Display for Glob<'_> {
352
353    fn fmt(&self, f: &mut Formatter) -> core::result::Result<(), fmt::Error> {
354        use fmt::Write;
355
356        for p in self.parts.iter() {
357            match p {
358                Part::Str(s) => f.write_str(s)?,
359                Part::Any => f.write_char(ANY)?,
360                Part::OneChar => f.write_char(ONE_CHAR)?,
361            };
362        }
363
364        Ok(())
365    }
366
367}