sub_strs/glob.rs
1/*
2==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--==--
3
4sub-strs
5
6Copyright (C) 2019-2024 Anonymous
7
8There are several releases over multiple years,
9they are listed as ranges, such as: "2019-2024".
10
11This program is free software: you can redistribute it and/or modify
12it under the terms of the GNU Lesser General Public License as published by
13the Free Software Foundation, either version 3 of the License, or
14(at your option) any later version.
15
16This program is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU Lesser General Public License for more details.
20
21You should have received a copy of the GNU Lesser General Public License
22along with this program. If not, see <https://www.gnu.org/licenses/>.
23
24::--::--::--::--::--::--::--::--::--::--::--::--::--::--::--::--
25*/
26
27//! # Glob
28
29use {
30 alloc::{
31 borrow::Cow,
32 collections::BTreeMap,
33 string::String,
34 vec::Vec,
35 },
36 core::{
37 fmt::{self, Display, Formatter},
38 str::FromStr,
39 },
40};
41
42use crate::Error;
43
44const ANY: char = '*';
45const ONE_CHAR: char = '?';
46
47/// # Parts of a `&str`
48#[derive(Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
49enum Part<'a> {
50
51 /// # A string
52 Str(Cow<'a, str>),
53
54 /// # Any (`*`)
55 Any,
56
57 /// # One character (`?`)
58 OneChar,
59}
60
61impl<'a> Part<'a> {
62
63 /// # Parses parts
64 fn parse<'b, F>(mut s: &'b str, str_fn: F) -> Vec<Self> where F: Fn(&'b str) -> Cow<'a, str> {
65 let mut result = alloc::vec![];
66 loop {
67 match s.find(|c| match c { self::ANY | self::ONE_CHAR => true, _ => false }) {
68 Some(i) => {
69 if i > 0 {
70 result.push(Part::Str(str_fn(&s[..i])));
71 }
72 match s.as_bytes()[i] as char {
73 self::ANY => if result.last() != Some(&Part::Any) {
74 result.push(Part::Any);
75 },
76 self::ONE_CHAR => result.push(Part::OneChar),
77 _ => {},
78 };
79 if i + 1 == s.len() {
80 break;
81 }
82 s = &s[i + 1..];
83 },
84 None => {
85 if s.is_empty() == false {
86 result.push(Part::Str(str_fn(s)));
87 }
88 break;
89 },
90 };
91 }
92
93 result
94 }
95
96}
97
98impl<'a> Part<'a> {
99
100 /// # Parses a `&str`
101 fn parse_str(s: &'a str) -> Vec<Self> {
102 Part::parse(s, |s| Cow::from(s))
103 }
104
105 /// # Parses a [`String`][r://String]
106 ///
107 /// [r://String]: https://doc.rust-lang.org/std/string/struct.String.html
108 fn parse_string(s: String) -> Vec<Self> {
109 Part::parse(&s, |s| Cow::from(String::from(s)))
110 }
111
112}
113
114/// # Sub string, used in Glob::matches()
115#[derive(Debug)]
116struct SubStr {
117 fixed: bool,
118 idx: usize,
119}
120
121/// # Glob
122///
123/// This struct is used to find matches from a pattern string against some string.
124///
125/// The pattern string supports 2 special characters: `*` and `?`:
126///
127/// - `*`: matches any characters or nothing at all.
128/// - `?`: matches one single character.
129///
130/// ## Notes
131///
132/// - The idea is inspired by <https://en.wikipedia.org/wiki/Glob_%28programming%29>, but this is _not_ an implementation of that or any other
133/// specifications.
134/// - Matches are _case sensitive_. If you want to ignore case, consider using [`to_lowercase()`][r://String/to_lowercase()] (or
135/// [`to_uppercase()`][r://String/to_uppercase()]) on _both_ pattern and target string.
136/// - [`Display`][r://Display] implementation prints _parsed_ pattern, not the original one.
137/// - Implementations of `From<&'a str>` and `From<&'a String>` always borrow the source string.
138/// - Implementations of `FromStr` and `From<String>` will _clone_ the source string.
139///
140/// ## Examples
141///
142/// <!-- NOTE: these examples are also *essential* tests, do NOT change or remove them. -->
143///
144/// ```
145/// use sub_strs::Glob;
146///
147/// let g = Glob::from("*r?st.rs");
148/// for s in &["rust.rs", "rEst.rs", "it's rust.rs"] {
149/// assert!(g.matches(s));
150/// }
151/// for s in &["it's not Rust", "rest", "rust!.rs"] {
152/// assert!(g.matches(s) == false);
153/// }
154/// ```
155///
156/// [r://String]: https://doc.rust-lang.org/std/string/struct.String.html
157/// [r://String/to_lowercase()]: https://doc.rust-lang.org/std/string/struct.String.html#method.to_lowercase
158/// [r://String/to_uppercase()]: https://doc.rust-lang.org/std/string/struct.String.html#method.to_uppercase
159/// [r://Display]: https://doc.rust-lang.org/std/fmt/trait.Display.html
160#[derive(Debug, Eq, PartialEq, Hash, Ord, PartialOrd)]
161pub struct Glob<'a> {
162 parts: Vec<Part<'a>>,
163}
164
165impl<'a> Glob<'a> {
166
167 /// # Makes new instance which can be used for lazy search
168 ///
169 /// - Input will be converted into lowercase.
170 /// - Whitespaces will be replaced with `*`.
171 /// - `*` will be inserted into the start, or prepended to the end of input, if necessary.
172 pub fn lazy<S>(s: S) -> Self where S: AsRef<str> {
173 let s = s.as_ref();
174 let mut s = s.trim().split_whitespace().fold(String::with_capacity(s.len().saturating_add(9)), |mut result, next| {
175 if result.ends_with(ANY) == false && next.starts_with(ANY) == false {
176 result.push(ANY);
177 }
178 result.push_str(&next.to_lowercase());
179 result
180 });
181 if s.ends_with(ANY) == false {
182 s.push(ANY);
183 }
184 s.into()
185 }
186
187 /// # Makes new instance
188 const fn new(parts: Vec<Part<'a>>) -> Self {
189 Self {
190 parts,
191 }
192 }
193
194 /// # Checks if this glob matches a string
195 pub fn matches<S>(&self, s: S) -> bool where S: AsRef<str> {
196 let s = s.as_ref();
197
198 if self.parts.is_empty() {
199 return s.is_empty();
200 }
201
202 let mut map = match self.make_map(s) {
203 Some(map) => map,
204 None => return false,
205 };
206 loop {
207 let mut char_count: usize = 0;
208 for (part_idx, part) in self.parts.iter().enumerate() {
209 match part {
210 Part::Any => {},
211 Part::OneChar => char_count += 1,
212 Part::Str(sub) => match map.get_mut(&part_idx) {
213 Some(SubStr { fixed, idx: sub_idx }) => match char_count == 0 || s.chars().take(char_count).count() >= char_count {
214 true => if *fixed && part_idx + 1 == self.parts.len() && sub.len() + *sub_idx != s.len() {
215 return false;
216 },
217 false => match fixed {
218 true => return false,
219 false => match s[*sub_idx..].chars().next() {
220 Some(c) => match s[*sub_idx + c.len_utf8()..].find(sub.as_ref()) {
221 Some(i) => {
222 *sub_idx = i;
223 break;
224 },
225 None => return false,
226 },
227 None => return false,
228 },
229 },
230 },
231 // This is internal error
232 None => return false,
233 },
234 };
235
236 if part_idx + 1 == self.parts.len() {
237 return true;
238 }
239 }
240 }
241 }
242
243 /// # Makes map
244 fn make_map(&self, s: &str) -> Option<BTreeMap<usize, SubStr>> {
245 let mut result = BTreeMap::new();
246
247 let mut dynamic = false;
248 let mut idx = 0;
249 for (part_idx, part) in self.parts.iter().enumerate() {
250 match part {
251 Part::Str(sub) => {
252 let fixed;
253 let sub_idx;
254 if part_idx == 0 {
255 match s.starts_with(sub.as_ref()) {
256 true => {
257 fixed = true;
258 sub_idx = 0;
259 },
260 false => return None,
261 };
262 } else if part_idx + 1 == self.parts.len() {
263 match s.ends_with(sub.as_ref()) {
264 true => {
265 fixed = true;
266 sub_idx = s.len() - sub.len();
267 },
268 false => return None,
269 };
270 } else {
271 fixed = dynamic == false;
272 sub_idx = match s[idx..].find(sub.as_ref()) {
273 Some(i) => {
274 idx = i + sub.len();
275 i
276 },
277 None => return None,
278 };
279 }
280 result.insert(part_idx, SubStr { fixed, idx: sub_idx });
281 },
282 Part::Any => dynamic = true,
283 Part::OneChar => match s[idx..].chars().next() {
284 Some(c) => idx += c.len_utf8(),
285 None => return None,
286 },
287 };
288 }
289
290 Some(result)
291 }
292
293}
294
295/// # Converts from a `&str` to [`Glob`][::Glob]
296///
297/// [::Glob]: struct.Glob.html
298impl<'a> From<&'a str> for Glob<'a> {
299
300 fn from(src: &'a str) -> Self {
301 Self::new(Part::parse_str(src))
302 }
303
304}
305
306/// # Converts from a [`&String`][r://String] to [`Glob`][::Glob]
307///
308/// [::Glob]: struct.Glob.html
309/// [r://String]: https://doc.rust-lang.org/std/string/struct.String.html
310impl<'a> From<&'a String> for Glob<'a> {
311
312 fn from(src: &'a String) -> Self {
313 Self::from(src.as_str())
314 }
315
316}
317
318/// # Converts from a [`String`][r://String] to [`Glob`][::Glob]
319///
320/// [::Glob]: struct.Glob.html
321/// [r://String]: https://doc.rust-lang.org/std/string/struct.String.html
322impl From<String> for Glob<'_> {
323
324 fn from(src: String) -> Self {
325 Self::new(Part::parse_string(src))
326 }
327
328}
329
330impl<'a> From<Cow<'a, str>> for Glob<'a> {
331
332 fn from(s: Cow<'a, str>) -> Self {
333 match s {
334 Cow::Borrowed(s) => Self::from(s),
335 Cow::Owned(s) => Self::from(s),
336 }
337 }
338
339}
340
341impl FromStr for Glob<'_> {
342
343 type Err = Error;
344
345 fn from_str(s: &str) -> core::result::Result<Self, Self::Err> {
346 Ok(Self::from(String::from(s)))
347 }
348
349}
350
351impl Display for Glob<'_> {
352
353 fn fmt(&self, f: &mut Formatter) -> core::result::Result<(), fmt::Error> {
354 use fmt::Write;
355
356 for p in self.parts.iter() {
357 match p {
358 Part::Str(s) => f.write_str(s)?,
359 Part::Any => f.write_char(ANY)?,
360 Part::OneChar => f.write_char(ONE_CHAR)?,
361 };
362 }
363
364 Ok(())
365 }
366
367}