1use std::borrow::Borrow;
18use std::borrow::Cow;
19use std::collections::BTreeMap;
20use std::fmt;
21use std::fmt::Debug;
22use std::ops::Deref;
23
24use bstr::ByteSlice as _;
25use either::Either;
26use globset::Glob;
27use globset::GlobBuilder;
28use thiserror::Error;
29
30#[derive(Debug, Error)]
32pub enum StringPatternParseError {
33 #[error("Invalid string pattern kind `{0}:`")]
35 InvalidKind(String),
36 #[error(transparent)]
38 GlobPattern(globset::Error),
39 #[error(transparent)]
41 Regex(regex::Error),
42}
43
44#[derive(Clone)]
46pub struct GlobPattern {
47 glob: Glob,
48 regex: regex::bytes::Regex,
51}
52
53impl GlobPattern {
54 pub fn is_match(&self, haystack: &[u8]) -> bool {
56 self.regex.is_match(haystack)
57 }
58
59 pub fn as_str(&self) -> &str {
61 self.glob.glob()
62 }
63
64 pub fn to_regex(&self) -> regex::bytes::Regex {
66 self.regex.clone()
67 }
68}
69
70impl Debug for GlobPattern {
71 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72 f.debug_tuple("GlobPattern").field(&self.as_str()).finish()
73 }
74}
75
76fn parse_glob(src: &str, icase: bool) -> Result<GlobPattern, StringPatternParseError> {
77 let glob = GlobBuilder::new(src)
78 .case_insensitive(icase)
79 .backslash_escape(true)
83 .build()
84 .map_err(StringPatternParseError::GlobPattern)?;
85 let regex = regex::bytes::RegexBuilder::new(glob.regex())
88 .dot_matches_new_line(true)
89 .build()
90 .expect("glob regex should be valid");
91 Ok(GlobPattern { glob, regex })
92}
93
94#[derive(Clone, Debug)]
97pub enum StringPattern {
98 Exact(String),
100 ExactI(String),
102 Substring(String),
104 SubstringI(String),
106 Glob(Box<GlobPattern>),
108 GlobI(Box<GlobPattern>),
110 Regex(regex::bytes::Regex),
112 RegexI(regex::bytes::Regex),
114}
115
116impl StringPattern {
117 pub const fn everything() -> Self {
119 Self::Substring(String::new())
120 }
121
122 pub fn parse(src: &str) -> Result<Self, StringPatternParseError> {
129 if let Some((kind, pat)) = src.split_once(':') {
130 Self::from_str_kind(pat, kind)
131 } else {
132 Ok(Self::exact(src))
133 }
134 }
135
136 pub fn exact(src: impl Into<String>) -> Self {
138 Self::Exact(src.into())
139 }
140
141 pub fn exact_i(src: impl Into<String>) -> Self {
143 Self::ExactI(src.into())
144 }
145
146 pub fn substring(src: impl Into<String>) -> Self {
148 Self::Substring(src.into())
149 }
150
151 pub fn substring_i(src: impl Into<String>) -> Self {
153 Self::SubstringI(src.into())
154 }
155
156 pub fn glob(src: &str) -> Result<Self, StringPatternParseError> {
158 Ok(Self::Glob(Box::new(parse_glob(src, false)?)))
160 }
161
162 pub fn glob_i(src: &str) -> Result<Self, StringPatternParseError> {
164 Ok(Self::GlobI(Box::new(parse_glob(src, true)?)))
165 }
166
167 pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
169 let pattern = regex::bytes::Regex::new(src).map_err(StringPatternParseError::Regex)?;
170 Ok(Self::Regex(pattern))
171 }
172
173 pub fn regex_i(src: &str) -> Result<Self, StringPatternParseError> {
175 let pattern = regex::bytes::RegexBuilder::new(src)
176 .case_insensitive(true)
177 .build()
178 .map_err(StringPatternParseError::Regex)?;
179 Ok(Self::RegexI(pattern))
180 }
181
182 pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
184 match kind {
185 "exact" => Ok(Self::exact(src)),
186 "exact-i" => Ok(Self::exact_i(src)),
187 "substring" => Ok(Self::substring(src)),
188 "substring-i" => Ok(Self::substring_i(src)),
189 "glob" => Self::glob(src),
190 "glob-i" => Self::glob_i(src),
191 "regex" => Self::regex(src),
192 "regex-i" => Self::regex_i(src),
193 _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
194 }
195 }
196
197 pub fn is_exact(&self) -> bool {
199 self.as_exact().is_some()
200 }
201
202 pub fn as_exact(&self) -> Option<&str> {
206 match self {
210 Self::Exact(literal) => Some(literal),
211 _ => None,
212 }
213 }
214
215 pub fn as_str(&self) -> &str {
217 match self {
218 Self::Exact(literal) => literal,
219 Self::ExactI(literal) => literal,
220 Self::Substring(needle) => needle,
221 Self::SubstringI(needle) => needle,
222 Self::Glob(pattern) => pattern.as_str(),
223 Self::GlobI(pattern) => pattern.as_str(),
224 Self::Regex(pattern) => pattern.as_str(),
225 Self::RegexI(pattern) => pattern.as_str(),
226 }
227 }
228
229 pub fn to_glob(&self) -> Option<Cow<'_, str>> {
232 match self {
236 Self::Exact(literal) => Some(globset::escape(literal).into()),
237 Self::Substring(needle) => {
238 if needle.is_empty() {
239 Some("*".into())
240 } else {
241 Some(format!("*{}*", globset::escape(needle)).into())
242 }
243 }
244 Self::Glob(pattern) => Some(pattern.as_str().into()),
245 Self::ExactI(_) => None,
246 Self::SubstringI(_) => None,
247 Self::GlobI(_) => None,
248 Self::Regex(_) => None,
249 Self::RegexI(_) => None,
250 }
251 }
252
253 pub fn is_match(&self, haystack: &str) -> bool {
258 self.is_match_bytes(haystack.as_bytes())
259 }
260
261 pub fn is_match_bytes(&self, haystack: &[u8]) -> bool {
263 match self {
281 Self::Exact(literal) => haystack == literal.as_bytes(),
282 Self::ExactI(literal) => haystack.eq_ignore_ascii_case(literal.as_bytes()),
283 Self::Substring(needle) => haystack.contains_str(needle),
284 Self::SubstringI(needle) => haystack
285 .to_ascii_lowercase()
286 .contains_str(needle.to_ascii_lowercase()),
287 Self::Glob(pattern) => pattern.is_match(haystack),
291 Self::GlobI(pattern) => pattern.is_match(haystack),
292 Self::Regex(pattern) => pattern.is_match(haystack),
293 Self::RegexI(pattern) => pattern.is_match(haystack),
294 }
295 }
296
297 pub fn to_regex(&self) -> regex::bytes::Regex {
299 match self {
300 Self::Exact(literal) => {
301 regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
302 .build()
303 .expect("impossible to fail to compile regex of literal")
304 }
305 Self::ExactI(literal) => {
306 regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
307 .case_insensitive(true)
308 .build()
309 .expect("impossible to fail to compile regex of literal")
310 }
311 Self::Substring(literal) => regex::bytes::RegexBuilder::new(®ex::escape(literal))
312 .build()
313 .expect("impossible to fail to compile regex of literal"),
314 Self::SubstringI(literal) => regex::bytes::RegexBuilder::new(®ex::escape(literal))
315 .case_insensitive(true)
316 .build()
317 .expect("impossible to fail to compile regex of literal"),
318 Self::Glob(glob_pattern) => glob_pattern.to_regex(),
319 Self::GlobI(glob_pattern) => glob_pattern.to_regex(),
321 Self::Regex(regex) => regex.clone(),
322 Self::RegexI(regex) => regex.clone(),
323 }
324 }
325
326 pub fn filter_btree_map<'a, 'b, K: Borrow<str> + Ord, V>(
329 &'b self,
330 map: &'a BTreeMap<K, V>,
331 ) -> impl Iterator<Item = (&'a K, &'a V)> + use<'a, 'b, K, V> {
332 self.filter_btree_map_with(map, |key| key, |key| key)
333 }
334
335 pub fn filter_btree_map_as_deref<'a, 'b, K, V>(
341 &'b self,
342 map: &'a BTreeMap<K, V>,
343 ) -> impl Iterator<Item = (&'a K, &'a V)> + use<'a, 'b, K, V>
344 where
345 K: Borrow<K::Target> + Deref + Ord,
346 K::Target: AsRef<str> + Ord,
347 str: AsRef<K::Target>,
348 {
349 self.filter_btree_map_with(map, AsRef::as_ref, AsRef::as_ref)
350 }
351
352 fn filter_btree_map_with<'a, 'b, K, Q, V, FromKey, ToKey>(
353 &'b self,
354 map: &'a BTreeMap<K, V>,
355 from_key: FromKey,
356 to_key: ToKey,
357 ) -> impl Iterator<Item = (&'a K, &'a V)> + use<'a, 'b, K, Q, V, FromKey, ToKey>
361 where
362 K: Borrow<Q> + Ord,
363 Q: Ord + ?Sized,
364 FromKey: Fn(&Q) -> &str,
365 ToKey: Fn(&str) -> &Q,
366 {
367 if let Some(key) = self.as_exact() {
368 Either::Left(map.get_key_value(to_key(key)).into_iter())
369 } else {
370 Either::Right(
371 map.iter()
372 .filter(move |&(key, _)| self.is_match(from_key(key.borrow()))),
373 )
374 }
375 }
376}
377
378impl fmt::Display for StringPattern {
379 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
381 write!(f, "{}", self.as_str())
382 }
383}
384
385#[cfg(test)]
386mod tests {
387 use assert_matches::assert_matches;
388
389 use super::*;
390
391 #[test]
392 fn test_string_pattern_to_glob() {
393 assert_eq!(StringPattern::everything().to_glob(), Some("*".into()));
394 assert_eq!(StringPattern::exact("a").to_glob(), Some("a".into()));
395 assert_eq!(StringPattern::exact("*").to_glob(), Some("[*]".into()));
396 assert_eq!(
397 StringPattern::glob("*").unwrap().to_glob(),
398 Some("*".into())
399 );
400 assert_eq!(
401 StringPattern::Substring("a".into()).to_glob(),
402 Some("*a*".into())
403 );
404 assert_eq!(
405 StringPattern::Substring("*".into()).to_glob(),
406 Some("*[*]*".into())
407 );
408 }
409
410 #[test]
411 fn test_parse() {
412 assert_matches!(
414 StringPattern::parse("exact:foo"),
415 Ok(StringPattern::Exact(s)) if s == "foo"
416 );
417 assert_matches!(
418 StringPattern::from_str_kind("foo", "exact"),
419 Ok(StringPattern::Exact(s)) if s == "foo"
420 );
421 assert_matches!(
422 StringPattern::parse("glob:foo*"),
423 Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
424 );
425 assert_matches!(
426 StringPattern::from_str_kind("foo*", "glob"),
427 Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
428 );
429 assert_matches!(
430 StringPattern::parse("substring:foo"),
431 Ok(StringPattern::Substring(s)) if s == "foo"
432 );
433 assert_matches!(
434 StringPattern::from_str_kind("foo", "substring"),
435 Ok(StringPattern::Substring(s)) if s == "foo"
436 );
437 assert_matches!(
438 StringPattern::parse("substring-i:foo"),
439 Ok(StringPattern::SubstringI(s)) if s == "foo"
440 );
441 assert_matches!(
442 StringPattern::from_str_kind("foo", "substring-i"),
443 Ok(StringPattern::SubstringI(s)) if s == "foo"
444 );
445 assert_matches!(
446 StringPattern::parse("regex:foo"),
447 Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
448 );
449 assert_matches!(
450 StringPattern::from_str_kind("foo", "regex"),
451 Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
452 );
453 assert_matches!(
454 StringPattern::parse("regex-i:foo"),
455 Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
456 );
457 assert_matches!(
458 StringPattern::from_str_kind("foo", "regex-i"),
459 Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
460 );
461
462 assert_matches!(
464 StringPattern::parse("exact:foo:bar"),
465 Ok(StringPattern::Exact(s)) if s == "foo:bar"
466 );
467
468 assert_matches!(
470 StringPattern::parse("foo"),
471 Ok(StringPattern::Exact(s)) if s == "foo"
472 );
473
474 assert_matches!(
476 StringPattern::parse("unknown-prefix:foo"),
477 Err(StringPatternParseError::InvalidKind(_))
478 );
479 }
480
481 #[test]
482 fn test_glob_is_match() {
483 assert!(StringPattern::glob("foo").unwrap().is_match("foo"));
484 assert!(!StringPattern::glob("foo").unwrap().is_match("foobar"));
485
486 assert!(StringPattern::glob("*").unwrap().is_match(".foo"));
488
489 assert!(StringPattern::glob("*").unwrap().is_match("foo/bar"));
491 assert!(StringPattern::glob(r"*/*").unwrap().is_match("foo/bar"));
492 assert!(!StringPattern::glob(r"*/*").unwrap().is_match(r"foo\bar"));
493
494 assert!(!StringPattern::glob(r"*\*").unwrap().is_match("foo/bar"));
496 assert!(StringPattern::glob(r"*\*").unwrap().is_match("foo*"));
497 assert!(StringPattern::glob(r"\\").unwrap().is_match(r"\"));
498
499 assert!(StringPattern::glob(r"*").unwrap().is_match("foo\nbar"));
501
502 assert!(!StringPattern::glob("f?O").unwrap().is_match("Foo"));
503 assert!(StringPattern::glob_i("f?O").unwrap().is_match("Foo"));
504 }
505
506 #[test]
507 fn test_regex_is_match() {
508 assert!(StringPattern::regex(r"^\w$").unwrap().is_match("\u{c0}"));
510 assert!(StringPattern::regex(r"^.$").unwrap().is_match("\u{c0}"));
511 assert!(StringPattern::regex(r"^(?-u)\w$").unwrap().is_match("a"));
513 assert!(
514 !StringPattern::regex(r"^(?-u)\w$")
515 .unwrap()
516 .is_match("\u{c0}")
517 );
518 assert!(
519 StringPattern::regex(r"^(?-u).{2}$")
520 .unwrap()
521 .is_match("\u{c0}")
522 );
523 }
524
525 #[test]
526 fn test_string_pattern_to_regex() {
527 let check = |pattern: StringPattern, match_to: &str| {
528 let regex = pattern.to_regex();
529 regex.is_match(match_to.as_bytes())
530 };
531 assert!(check(StringPattern::exact("$a"), "$a"));
532 assert!(!check(StringPattern::exact("$a"), "$A"));
533 assert!(!check(StringPattern::exact("a"), "aa"));
534 assert!(!check(StringPattern::exact("a"), "aa"));
535 assert!(check(StringPattern::exact_i("a"), "A"));
536 assert!(check(StringPattern::substring("$a"), "$abc"));
537 assert!(!check(StringPattern::substring("$a"), "$Abc"));
538 assert!(check(StringPattern::substring_i("$a"), "$Abc"));
539 assert!(!check(StringPattern::glob("a").unwrap(), "A"));
540 assert!(check(StringPattern::glob_i("a").unwrap(), "A"));
541 assert!(check(StringPattern::regex("^a{1,3}").unwrap(), "abcde"));
542 assert!(!check(StringPattern::regex("^a{1,3}").unwrap(), "Abcde"));
543 assert!(check(StringPattern::regex_i("^a{1,3}").unwrap(), "Abcde"));
544 }
545}