jj_lib/
str_util.rs

1// Copyright 2021-2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! String helpers.
16
17use std::borrow::Borrow;
18use std::borrow::Cow;
19use std::collections::BTreeMap;
20use std::fmt;
21use std::fmt::Debug;
22use std::ops::Deref;
23
24use bstr::ByteSlice as _;
25use either::Either;
26use globset::Glob;
27use globset::GlobBuilder;
28use thiserror::Error;
29
30/// Error occurred during pattern string parsing.
31#[derive(Debug, Error)]
32pub enum StringPatternParseError {
33    /// Unknown pattern kind is specified.
34    #[error("Invalid string pattern kind `{0}:`")]
35    InvalidKind(String),
36    /// Failed to parse glob pattern.
37    #[error(transparent)]
38    GlobPattern(globset::Error),
39    /// Failed to parse regular expression.
40    #[error(transparent)]
41    Regex(regex::Error),
42}
43
44/// A wrapper for [`Glob`] and its matcher with a more concise `Debug` impl.
45#[derive(Clone)]
46pub struct GlobPattern {
47    glob: Glob,
48    // TODO: Maybe better to add StringPattern::to_matcher(), and move regex
49    // compilation there.
50    regex: regex::bytes::Regex,
51}
52
53impl GlobPattern {
54    /// Returns true if this pattern matches `haystack`.
55    pub fn is_match(&self, haystack: &[u8]) -> bool {
56        self.regex.is_match(haystack)
57    }
58
59    /// Returns the original glob pattern.
60    pub fn as_str(&self) -> &str {
61        self.glob.glob()
62    }
63
64    /// Converts this glob pattern to a bytes regex.
65    pub fn to_regex(&self) -> regex::bytes::Regex {
66        self.regex.clone()
67    }
68}
69
70impl Debug for GlobPattern {
71    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72        f.debug_tuple("GlobPattern").field(&self.as_str()).finish()
73    }
74}
75
76fn parse_glob(src: &str, icase: bool) -> Result<GlobPattern, StringPatternParseError> {
77    let glob = GlobBuilder::new(src)
78        .case_insensitive(icase)
79        // Don't use platform-dependent default. This pattern isn't meant for
80        // testing file-system paths. If backslash escape were disabled, "\" in
81        // pattern would be normalized to "/" on Windows.
82        .backslash_escape(true)
83        .build()
84        .map_err(StringPatternParseError::GlobPattern)?;
85    // Based on new_regex() in globset. We don't use GlobMatcher::is_match(path)
86    // because the input string shouldn't be normalized as path.
87    let regex = regex::bytes::RegexBuilder::new(glob.regex())
88        .dot_matches_new_line(true)
89        .build()
90        .expect("glob regex should be valid");
91    Ok(GlobPattern { glob, regex })
92}
93
94/// Pattern to be tested against string property like commit description or
95/// bookmark name.
96#[derive(Clone, Debug)]
97pub enum StringPattern {
98    /// Matches strings exactly.
99    Exact(String),
100    /// Matches strings case‐insensitively.
101    ExactI(String),
102    /// Matches strings that contain a substring.
103    Substring(String),
104    /// Matches strings that case‐insensitively contain a substring.
105    SubstringI(String),
106    /// Matches with a Unix‐style shell wildcard pattern.
107    Glob(Box<GlobPattern>),
108    /// Matches with a case‐insensitive Unix‐style shell wildcard pattern.
109    GlobI(Box<GlobPattern>),
110    /// Matches substrings with a regular expression.
111    Regex(regex::bytes::Regex),
112    /// Matches substrings with a case‐insensitive regular expression.
113    RegexI(regex::bytes::Regex),
114}
115
116impl StringPattern {
117    /// Pattern that matches any string.
118    pub const fn all() -> Self {
119        Self::Substring(String::new())
120    }
121
122    /// Parses the given string as a [`StringPattern`]. Everything before the
123    /// first ":" is considered the string's prefix. If the prefix is
124    /// "exact[-i]:", "glob[-i]:", or "substring[-i]:", a pattern of the
125    /// specified kind is returned. Returns an error if the string has an
126    /// unrecognized prefix. Otherwise, a `StringPattern::Exact` is
127    /// returned.
128    pub fn parse(src: &str) -> Result<Self, StringPatternParseError> {
129        if let Some((kind, pat)) = src.split_once(':') {
130            Self::from_str_kind(pat, kind)
131        } else {
132            Ok(Self::exact(src))
133        }
134    }
135
136    /// Constructs a pattern that matches exactly.
137    pub fn exact(src: impl Into<String>) -> Self {
138        Self::Exact(src.into())
139    }
140
141    /// Constructs a pattern that matches case‐insensitively.
142    pub fn exact_i(src: impl Into<String>) -> Self {
143        Self::ExactI(src.into())
144    }
145
146    /// Constructs a pattern that matches a substring.
147    pub fn substring(src: impl Into<String>) -> Self {
148        Self::Substring(src.into())
149    }
150
151    /// Constructs a pattern that case‐insensitively matches a substring.
152    pub fn substring_i(src: impl Into<String>) -> Self {
153        Self::SubstringI(src.into())
154    }
155
156    /// Parses the given string as a glob pattern.
157    pub fn glob(src: &str) -> Result<Self, StringPatternParseError> {
158        // TODO: if no meta character found, it can be mapped to Exact.
159        Ok(Self::Glob(Box::new(parse_glob(src, false)?)))
160    }
161
162    /// Parses the given string as a case‐insensitive glob pattern.
163    pub fn glob_i(src: &str) -> Result<Self, StringPatternParseError> {
164        Ok(Self::GlobI(Box::new(parse_glob(src, true)?)))
165    }
166
167    /// Parses the given string as a regular expression.
168    pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
169        let pattern = regex::bytes::Regex::new(src).map_err(StringPatternParseError::Regex)?;
170        Ok(Self::Regex(pattern))
171    }
172
173    /// Parses the given string as a case-insensitive regular expression.
174    pub fn regex_i(src: &str) -> Result<Self, StringPatternParseError> {
175        let pattern = regex::bytes::RegexBuilder::new(src)
176            .case_insensitive(true)
177            .build()
178            .map_err(StringPatternParseError::Regex)?;
179        Ok(Self::RegexI(pattern))
180    }
181
182    /// Parses the given string as a pattern of the specified `kind`.
183    pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
184        match kind {
185            "exact" => Ok(Self::exact(src)),
186            "exact-i" => Ok(Self::exact_i(src)),
187            "substring" => Ok(Self::substring(src)),
188            "substring-i" => Ok(Self::substring_i(src)),
189            "glob" => Self::glob(src),
190            "glob-i" => Self::glob_i(src),
191            "regex" => Self::regex(src),
192            "regex-i" => Self::regex_i(src),
193            _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
194        }
195    }
196
197    /// Returns true if this pattern trivially matches any input strings.
198    fn is_all(&self) -> bool {
199        match self {
200            Self::Exact(_) | Self::ExactI(_) => false,
201            Self::Substring(needle) | Self::SubstringI(needle) => needle.is_empty(),
202            Self::Glob(pattern) | Self::GlobI(pattern) => pattern.as_str() == "*",
203            Self::Regex(pattern) | Self::RegexI(pattern) => pattern.as_str().is_empty(),
204        }
205    }
206
207    /// Returns true if this pattern matches input strings exactly.
208    pub fn is_exact(&self) -> bool {
209        self.as_exact().is_some()
210    }
211
212    /// Returns a literal pattern if this should match input strings exactly.
213    ///
214    /// This can be used to optimize map lookup by exact key.
215    pub fn as_exact(&self) -> Option<&str> {
216        // TODO: Handle trivial case‐insensitive patterns here? It might make people
217        // expect they can use case‐insensitive patterns in contexts where they
218        // generally can’t.
219        match self {
220            Self::Exact(literal) => Some(literal),
221            _ => None,
222        }
223    }
224
225    /// Returns the original string of this pattern.
226    pub fn as_str(&self) -> &str {
227        match self {
228            Self::Exact(literal) => literal,
229            Self::ExactI(literal) => literal,
230            Self::Substring(needle) => needle,
231            Self::SubstringI(needle) => needle,
232            Self::Glob(pattern) => pattern.as_str(),
233            Self::GlobI(pattern) => pattern.as_str(),
234            Self::Regex(pattern) => pattern.as_str(),
235            Self::RegexI(pattern) => pattern.as_str(),
236        }
237    }
238
239    /// Converts this pattern to a glob string. Returns `None` if the pattern
240    /// can't be represented as a glob.
241    pub fn to_glob(&self) -> Option<Cow<'_, str>> {
242        // TODO: Handle trivial case‐insensitive patterns here? It might make people
243        // expect they can use case‐insensitive patterns in contexts where they
244        // generally can’t.
245        match self {
246            Self::Exact(literal) => Some(globset::escape(literal).into()),
247            Self::Substring(needle) => {
248                if needle.is_empty() {
249                    Some("*".into())
250                } else {
251                    Some(format!("*{}*", globset::escape(needle)).into())
252                }
253            }
254            Self::Glob(pattern) => Some(pattern.as_str().into()),
255            Self::ExactI(_) => None,
256            Self::SubstringI(_) => None,
257            Self::GlobI(_) => None,
258            Self::Regex(_) => None,
259            Self::RegexI(_) => None,
260        }
261    }
262
263    /// Returns true if this pattern matches the `haystack` string.
264    ///
265    /// When matching against a case‐insensitive pattern, only ASCII case
266    /// differences are currently folded. This may change in the future.
267    pub fn is_match(&self, haystack: &str) -> bool {
268        self.is_match_bytes(haystack.as_bytes())
269    }
270
271    /// Returns true if this pattern matches the `haystack` bytes.
272    pub fn is_match_bytes(&self, haystack: &[u8]) -> bool {
273        // TODO: Unicode case folding is complicated and can be
274        // locale‐specific. The `globset` crate and Gitoxide only deal with
275        // ASCII case folding, so we do the same here; a more elaborate case
276        // folding system will require making sure those behave in a matching
277        // manner where relevant. That said, regex patterns are unicode-aware by
278        // default, so we already have some inconsistencies.
279        //
280        // Care will need to be taken regarding normalization and the choice of an
281        // appropriate case‐insensitive comparison scheme (`toNFKC_Casefold`?) to ensure
282        // that it is compatible with the standard case‐insensitivity of haystack
283        // components (like internationalized domain names in email addresses). The
284        // availability of normalization and case folding schemes in database backends
285        // will also need to be considered. A locale‐specific case folding
286        // scheme would likely not be appropriate for Jujutsu.
287        //
288        // For some discussion of this topic, see:
289        // <https://github.com/unicode-org/icu4x/issues/3151>
290        match self {
291            Self::Exact(literal) => haystack == literal.as_bytes(),
292            Self::ExactI(literal) => haystack.eq_ignore_ascii_case(literal.as_bytes()),
293            Self::Substring(needle) => haystack.contains_str(needle),
294            Self::SubstringI(needle) => haystack
295                .to_ascii_lowercase()
296                .contains_str(needle.to_ascii_lowercase()),
297            // (Glob, GlobI) and (Regex, RegexI) pairs are identical here, but
298            // callers might want to translate these to backend-specific query
299            // differently.
300            Self::Glob(pattern) => pattern.is_match(haystack),
301            Self::GlobI(pattern) => pattern.is_match(haystack),
302            Self::Regex(pattern) => pattern.is_match(haystack),
303            Self::RegexI(pattern) => pattern.is_match(haystack),
304        }
305    }
306
307    /// Creates matcher object from this pattern.
308    pub fn to_matcher(&self) -> StringMatcher {
309        if self.is_all() {
310            StringMatcher::All
311        } else if let Some(literal) = self.as_exact() {
312            StringMatcher::Exact(literal.to_owned())
313        } else {
314            // TODO: fully migrate is_match*() to StringMatcher, and add
315            // pattern.to_match_fn()?
316            let pattern = self.clone();
317            StringMatcher::Fn(Box::new(move |haystack| pattern.is_match_bytes(haystack)))
318        }
319    }
320
321    /// Converts the pattern into a bytes regex.
322    pub fn to_regex(&self) -> regex::bytes::Regex {
323        match self {
324            Self::Exact(literal) => {
325                regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
326                    .build()
327                    .expect("impossible to fail to compile regex of literal")
328            }
329            Self::ExactI(literal) => {
330                regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
331                    .case_insensitive(true)
332                    .build()
333                    .expect("impossible to fail to compile regex of literal")
334            }
335            Self::Substring(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
336                .build()
337                .expect("impossible to fail to compile regex of literal"),
338            Self::SubstringI(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
339                .case_insensitive(true)
340                .build()
341                .expect("impossible to fail to compile regex of literal"),
342            Self::Glob(glob_pattern) => glob_pattern.to_regex(),
343            // The regex generated represents the case insensitivity itself
344            Self::GlobI(glob_pattern) => glob_pattern.to_regex(),
345            Self::Regex(regex) => regex.clone(),
346            Self::RegexI(regex) => regex.clone(),
347        }
348    }
349}
350
351impl fmt::Display for StringPattern {
352    /// Shows the original string of this pattern.
353    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
354        write!(f, "{}", self.as_str())
355    }
356}
357
358/// AST-level representation of the string matcher expression.
359#[derive(Clone, Debug)]
360pub enum StringExpression {
361    // None and All can be represented by using Pattern. Add them if needed.
362    /// Matches pattern.
363    Pattern(Box<StringPattern>),
364    /// Matches anything other than the expression.
365    NotIn(Box<Self>),
366    /// Matches one of the expressions.
367    Union(Box<Self>, Box<Self>),
368    /// Matches both expressions.
369    Intersection(Box<Self>, Box<Self>),
370}
371
372impl StringExpression {
373    /// Expression that matches nothing.
374    pub fn none() -> Self {
375        Self::all().negated()
376    }
377
378    /// Expression that matches everything.
379    pub fn all() -> Self {
380        Self::pattern(StringPattern::all())
381    }
382
383    /// Expression that matches the given pattern.
384    pub fn pattern(pattern: StringPattern) -> Self {
385        Self::Pattern(Box::new(pattern))
386    }
387
388    /// Expression that matches strings exactly.
389    pub fn exact(src: impl Into<String>) -> Self {
390        Self::pattern(StringPattern::exact(src))
391    }
392
393    /// Expression that matches substrings.
394    pub fn substring(src: impl Into<String>) -> Self {
395        Self::pattern(StringPattern::substring(src))
396    }
397
398    /// Expression that matches anything other than this expression.
399    pub fn negated(self) -> Self {
400        Self::NotIn(Box::new(self))
401    }
402
403    /// Expression that matches `self` or `other` (or both).
404    pub fn union(self, other: Self) -> Self {
405        Self::Union(Box::new(self), Box::new(other))
406    }
407
408    /// Expression that matches any of the given `expressions`.
409    pub fn union_all(expressions: Vec<Self>) -> Self {
410        to_binary_expression(expressions, &Self::none, &Self::union)
411    }
412
413    /// Expression that matches both `self` and `other`.
414    pub fn intersection(self, other: Self) -> Self {
415        Self::Intersection(Box::new(self), Box::new(other))
416    }
417
418    /// Transforms the expression tree to matcher object.
419    pub fn to_matcher(&self) -> StringMatcher {
420        match self {
421            Self::Pattern(pattern) => pattern.to_matcher(),
422            Self::NotIn(expr) => {
423                let p = expr.to_matcher().into_match_fn();
424                StringMatcher::Fn(Box::new(move |haystack| !p(haystack)))
425            }
426            Self::Union(expr1, expr2) => {
427                let p1 = expr1.to_matcher().into_match_fn();
428                let p2 = expr2.to_matcher().into_match_fn();
429                StringMatcher::Fn(Box::new(move |haystack| p1(haystack) || p2(haystack)))
430            }
431            Self::Intersection(expr1, expr2) => {
432                let p1 = expr1.to_matcher().into_match_fn();
433                let p2 = expr2.to_matcher().into_match_fn();
434                StringMatcher::Fn(Box::new(move |haystack| p1(haystack) && p2(haystack)))
435            }
436        }
437    }
438}
439
440/// Constructs binary tree from `expressions` list, `unit` node, and associative
441/// `binary` operation.
442fn to_binary_expression<T>(
443    expressions: Vec<T>,
444    unit: &impl Fn() -> T,
445    binary: &impl Fn(T, T) -> T,
446) -> T {
447    match expressions.len() {
448        0 => unit(),
449        1 => expressions.into_iter().next().unwrap(),
450        _ => {
451            // Build balanced tree to minimize the recursion depth.
452            let mut left = expressions;
453            let right = left.split_off(left.len() / 2);
454            binary(
455                to_binary_expression(left, unit, binary),
456                to_binary_expression(right, unit, binary),
457            )
458        }
459    }
460}
461
462type DynMatchFn = dyn Fn(&[u8]) -> bool;
463
464/// Matcher for strings and bytes.
465pub enum StringMatcher {
466    /// Matches any strings.
467    All,
468    /// Matches strings exactly.
469    Exact(String),
470    /// Tests matches by arbitrary function.
471    Fn(Box<DynMatchFn>),
472}
473
474impl StringMatcher {
475    /// Matcher that matches any strings.
476    pub const fn all() -> Self {
477        Self::All
478    }
479
480    /// Matcher that matches `src` exactly.
481    pub fn exact(src: impl Into<String>) -> Self {
482        Self::Exact(src.into())
483    }
484
485    /// Returns true if this matches the `haystack` string.
486    pub fn is_match(&self, haystack: &str) -> bool {
487        self.is_match_bytes(haystack.as_bytes())
488    }
489
490    /// Returns true if this matches the `haystack` bytes.
491    pub fn is_match_bytes(&self, haystack: &[u8]) -> bool {
492        match self {
493            Self::All => true,
494            Self::Exact(needle) => haystack == needle.as_bytes(),
495            Self::Fn(predicate) => predicate(haystack),
496        }
497    }
498
499    fn into_match_fn(self) -> Box<DynMatchFn> {
500        match self {
501            Self::All => Box::new(|_haystack| true),
502            Self::Exact(needle) => Box::new(move |haystack| haystack == needle.as_bytes()),
503            Self::Fn(predicate) => predicate,
504        }
505    }
506
507    /// Iterates entries of the given `map` whose string keys match this.
508    pub fn filter_btree_map<'a, K: Borrow<str> + Ord, V>(
509        &self,
510        map: &'a BTreeMap<K, V>,
511    ) -> impl Iterator<Item = (&'a K, &'a V)> {
512        self.filter_btree_map_with(map, |key| key, |key| key)
513    }
514
515    /// Iterates entries of the given `map` whose string-like keys match this.
516    ///
517    /// The borrowed key type is constrained by the `Deref::Target`. It must be
518    /// convertible to/from `str`.
519    pub fn filter_btree_map_as_deref<'a, K, V>(
520        &self,
521        map: &'a BTreeMap<K, V>,
522    ) -> impl Iterator<Item = (&'a K, &'a V)>
523    where
524        K: Borrow<K::Target> + Deref + Ord,
525        K::Target: AsRef<str> + Ord,
526        str: AsRef<K::Target>,
527    {
528        self.filter_btree_map_with(map, AsRef::as_ref, AsRef::as_ref)
529    }
530
531    fn filter_btree_map_with<'a, K, Q, V>(
532        &self,
533        map: &'a BTreeMap<K, V>,
534        from_key: impl Fn(&Q) -> &str,
535        to_key: impl Fn(&str) -> &Q,
536    ) -> impl Iterator<Item = (&'a K, &'a V)>
537    where
538        K: Borrow<Q> + Ord,
539        Q: Ord + ?Sized,
540    {
541        match self {
542            Self::All => Either::Left(map.iter()),
543            Self::Exact(key) => {
544                Either::Right(Either::Left(map.get_key_value(to_key(key)).into_iter()))
545            }
546            Self::Fn(predicate) => {
547                Either::Right(Either::Right(map.iter().filter(move |&(key, _)| {
548                    predicate(from_key(key.borrow()).as_bytes())
549                })))
550            }
551        }
552    }
553}
554
555impl Debug for StringMatcher {
556    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
557        match self {
558            Self::All => write!(f, "All"),
559            Self::Exact(needle) => f.debug_tuple("Exact").field(needle).finish(),
560            Self::Fn(_) => f.debug_tuple("Fn").finish_non_exhaustive(),
561        }
562    }
563}
564
565#[cfg(test)]
566mod tests {
567    use assert_matches::assert_matches;
568    use itertools::Itertools as _;
569    use maplit::btreemap;
570
571    use super::*;
572
573    fn insta_settings() -> insta::Settings {
574        let mut settings = insta::Settings::clone_current();
575        // Collapse short "Thing(_,)" repeatedly to save vertical space and make
576        // the output more readable.
577        for _ in 0..4 {
578            settings.add_filter(
579                r"(?x)
580                \b([A-Z]\w*)\(\n
581                    \s*(.{1,60}),\n
582                \s*\)",
583                "$1($2)",
584            );
585        }
586        settings
587    }
588
589    #[test]
590    fn test_string_pattern_to_glob() {
591        assert_eq!(StringPattern::all().to_glob(), Some("*".into()));
592        assert_eq!(StringPattern::exact("a").to_glob(), Some("a".into()));
593        assert_eq!(StringPattern::exact("*").to_glob(), Some("[*]".into()));
594        assert_eq!(
595            StringPattern::glob("*").unwrap().to_glob(),
596            Some("*".into())
597        );
598        assert_eq!(
599            StringPattern::Substring("a".into()).to_glob(),
600            Some("*a*".into())
601        );
602        assert_eq!(
603            StringPattern::Substring("*".into()).to_glob(),
604            Some("*[*]*".into())
605        );
606    }
607
608    #[test]
609    fn test_parse() {
610        // Parse specific pattern kinds.
611        assert_matches!(
612            StringPattern::parse("exact:foo"),
613            Ok(StringPattern::Exact(s)) if s == "foo"
614        );
615        assert_matches!(
616            StringPattern::from_str_kind("foo", "exact"),
617            Ok(StringPattern::Exact(s)) if s == "foo"
618        );
619        assert_matches!(
620            StringPattern::parse("glob:foo*"),
621            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
622        );
623        assert_matches!(
624            StringPattern::from_str_kind("foo*", "glob"),
625            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
626        );
627        assert_matches!(
628            StringPattern::parse("substring:foo"),
629            Ok(StringPattern::Substring(s)) if s == "foo"
630        );
631        assert_matches!(
632            StringPattern::from_str_kind("foo", "substring"),
633            Ok(StringPattern::Substring(s)) if s == "foo"
634        );
635        assert_matches!(
636            StringPattern::parse("substring-i:foo"),
637            Ok(StringPattern::SubstringI(s)) if s == "foo"
638        );
639        assert_matches!(
640            StringPattern::from_str_kind("foo", "substring-i"),
641            Ok(StringPattern::SubstringI(s)) if s == "foo"
642        );
643        assert_matches!(
644            StringPattern::parse("regex:foo"),
645            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
646        );
647        assert_matches!(
648            StringPattern::from_str_kind("foo", "regex"),
649            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
650        );
651        assert_matches!(
652            StringPattern::parse("regex-i:foo"),
653            Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
654        );
655        assert_matches!(
656            StringPattern::from_str_kind("foo", "regex-i"),
657            Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
658        );
659
660        // Parse a pattern that contains a : itself.
661        assert_matches!(
662            StringPattern::parse("exact:foo:bar"),
663            Ok(StringPattern::Exact(s)) if s == "foo:bar"
664        );
665
666        // If no kind is specified, the input is treated as an exact pattern.
667        assert_matches!(
668            StringPattern::parse("foo"),
669            Ok(StringPattern::Exact(s)) if s == "foo"
670        );
671
672        // Parsing an unknown prefix results in an error.
673        assert_matches!(
674            StringPattern::parse("unknown-prefix:foo"),
675            Err(StringPatternParseError::InvalidKind(_))
676        );
677    }
678
679    #[test]
680    fn test_glob_is_match() {
681        assert!(StringPattern::glob("foo").unwrap().is_match("foo"));
682        assert!(!StringPattern::glob("foo").unwrap().is_match("foobar"));
683
684        // "." in string isn't any special
685        assert!(StringPattern::glob("*").unwrap().is_match(".foo"));
686
687        // "/" in string isn't any special
688        assert!(StringPattern::glob("*").unwrap().is_match("foo/bar"));
689        assert!(StringPattern::glob(r"*/*").unwrap().is_match("foo/bar"));
690        assert!(!StringPattern::glob(r"*/*").unwrap().is_match(r"foo\bar"));
691
692        // "\" is an escape character
693        assert!(!StringPattern::glob(r"*\*").unwrap().is_match("foo/bar"));
694        assert!(StringPattern::glob(r"*\*").unwrap().is_match("foo*"));
695        assert!(StringPattern::glob(r"\\").unwrap().is_match(r"\"));
696
697        // "*" matches newline
698        assert!(StringPattern::glob(r"*").unwrap().is_match("foo\nbar"));
699
700        assert!(!StringPattern::glob("f?O").unwrap().is_match("Foo"));
701        assert!(StringPattern::glob_i("f?O").unwrap().is_match("Foo"));
702    }
703
704    #[test]
705    fn test_regex_is_match() {
706        // Unicode mode is enabled by default
707        assert!(StringPattern::regex(r"^\w$").unwrap().is_match("\u{c0}"));
708        assert!(StringPattern::regex(r"^.$").unwrap().is_match("\u{c0}"));
709        // ASCII-compatible mode should also work
710        assert!(StringPattern::regex(r"^(?-u)\w$").unwrap().is_match("a"));
711        assert!(
712            !StringPattern::regex(r"^(?-u)\w$")
713                .unwrap()
714                .is_match("\u{c0}")
715        );
716        assert!(
717            StringPattern::regex(r"^(?-u).{2}$")
718                .unwrap()
719                .is_match("\u{c0}")
720        );
721    }
722
723    #[test]
724    fn test_string_pattern_to_regex() {
725        let check = |pattern: StringPattern, match_to: &str| {
726            let regex = pattern.to_regex();
727            regex.is_match(match_to.as_bytes())
728        };
729        assert!(check(StringPattern::exact("$a"), "$a"));
730        assert!(!check(StringPattern::exact("$a"), "$A"));
731        assert!(!check(StringPattern::exact("a"), "aa"));
732        assert!(!check(StringPattern::exact("a"), "aa"));
733        assert!(check(StringPattern::exact_i("a"), "A"));
734        assert!(check(StringPattern::substring("$a"), "$abc"));
735        assert!(!check(StringPattern::substring("$a"), "$Abc"));
736        assert!(check(StringPattern::substring_i("$a"), "$Abc"));
737        assert!(!check(StringPattern::glob("a").unwrap(), "A"));
738        assert!(check(StringPattern::glob_i("a").unwrap(), "A"));
739        assert!(check(StringPattern::regex("^a{1,3}").unwrap(), "abcde"));
740        assert!(!check(StringPattern::regex("^a{1,3}").unwrap(), "Abcde"));
741        assert!(check(StringPattern::regex_i("^a{1,3}").unwrap(), "Abcde"));
742    }
743
744    #[test]
745    fn test_exact_pattern_to_matcher() {
746        assert_matches!(
747            StringPattern::exact("").to_matcher(),
748            StringMatcher::Exact(needle) if needle.is_empty()
749        );
750        assert_matches!(
751            StringPattern::exact("x").to_matcher(),
752            StringMatcher::Exact(needle) if needle == "x"
753        );
754
755        assert_matches!(
756            StringPattern::exact_i("").to_matcher(),
757            StringMatcher::Fn(_) // or Exact
758        );
759        assert_matches!(
760            StringPattern::exact_i("x").to_matcher(),
761            StringMatcher::Fn(_)
762        );
763    }
764
765    #[test]
766    fn test_substring_pattern_to_matcher() {
767        assert_matches!(
768            StringPattern::substring("").to_matcher(),
769            StringMatcher::All
770        );
771        assert_matches!(
772            StringPattern::substring("x").to_matcher(),
773            StringMatcher::Fn(_)
774        );
775
776        assert_matches!(
777            StringPattern::substring_i("").to_matcher(),
778            StringMatcher::All
779        );
780        assert_matches!(
781            StringPattern::substring_i("x").to_matcher(),
782            StringMatcher::Fn(_)
783        );
784    }
785
786    #[test]
787    fn test_glob_pattern_to_matcher() {
788        assert_matches!(
789            StringPattern::glob("").unwrap().to_matcher(),
790            StringMatcher::Fn(_) // or Exact
791        );
792        assert_matches!(
793            StringPattern::glob("x").unwrap().to_matcher(),
794            StringMatcher::Fn(_) // or Exact
795        );
796        assert_matches!(
797            StringPattern::glob("x?").unwrap().to_matcher(),
798            StringMatcher::Fn(_)
799        );
800        assert_matches!(
801            StringPattern::glob("*").unwrap().to_matcher(),
802            StringMatcher::All
803        );
804
805        assert_matches!(
806            StringPattern::glob_i("").unwrap().to_matcher(),
807            StringMatcher::Fn(_) // or Exact
808        );
809        assert_matches!(
810            StringPattern::glob_i("x").unwrap().to_matcher(),
811            StringMatcher::Fn(_)
812        );
813        assert_matches!(
814            StringPattern::glob_i("x?").unwrap().to_matcher(),
815            StringMatcher::Fn(_)
816        );
817        assert_matches!(
818            StringPattern::glob_i("*").unwrap().to_matcher(),
819            StringMatcher::All
820        );
821    }
822
823    #[test]
824    fn test_regex_pattern_to_matcher() {
825        assert_matches!(
826            StringPattern::regex("").unwrap().to_matcher(),
827            StringMatcher::All
828        );
829        assert_matches!(
830            StringPattern::regex("x").unwrap().to_matcher(),
831            StringMatcher::Fn(_)
832        );
833        assert_matches!(
834            StringPattern::regex(".").unwrap().to_matcher(),
835            StringMatcher::Fn(_)
836        );
837
838        assert_matches!(
839            StringPattern::regex_i("").unwrap().to_matcher(),
840            StringMatcher::All
841        );
842        assert_matches!(
843            StringPattern::regex_i("x").unwrap().to_matcher(),
844            StringMatcher::Fn(_)
845        );
846        assert_matches!(
847            StringPattern::regex_i(".").unwrap().to_matcher(),
848            StringMatcher::Fn(_)
849        );
850    }
851
852    #[test]
853    fn test_union_all_expressions() {
854        let settings = insta_settings();
855        let _guard = settings.bind_to_scope();
856
857        insta::assert_debug_snapshot!(
858            StringExpression::union_all(vec![]),
859            @r#"NotIn(Pattern(Substring("")))"#);
860        insta::assert_debug_snapshot!(
861            StringExpression::union_all(vec![StringExpression::exact("a")]),
862            @r#"Pattern(Exact("a"))"#);
863        insta::assert_debug_snapshot!(
864            StringExpression::union_all(vec![
865                StringExpression::exact("a"),
866                StringExpression::exact("b"),
867            ]),
868            @r#"
869        Union(
870            Pattern(Exact("a")),
871            Pattern(Exact("b")),
872        )
873        "#);
874        insta::assert_debug_snapshot!(
875            StringExpression::union_all(vec![
876                StringExpression::exact("a"),
877                StringExpression::exact("b"),
878                StringExpression::exact("c"),
879            ]),
880            @r#"
881        Union(
882            Pattern(Exact("a")),
883            Union(
884                Pattern(Exact("b")),
885                Pattern(Exact("c")),
886            ),
887        )
888        "#);
889        insta::assert_debug_snapshot!(
890            StringExpression::union_all(vec![
891                StringExpression::exact("a"),
892                StringExpression::exact("b"),
893                StringExpression::exact("c"),
894                StringExpression::exact("d"),
895            ]),
896            @r#"
897        Union(
898            Union(
899                Pattern(Exact("a")),
900                Pattern(Exact("b")),
901            ),
902            Union(
903                Pattern(Exact("c")),
904                Pattern(Exact("d")),
905            ),
906        )
907        "#);
908    }
909
910    #[test]
911    fn test_trivial_expression_to_matcher() {
912        assert_matches!(StringExpression::all().to_matcher(), StringMatcher::All);
913        assert_matches!(
914            StringExpression::exact("x").to_matcher(),
915            StringMatcher::Exact(needle) if needle == "x"
916        );
917    }
918
919    #[test]
920    fn test_compound_expression_to_matcher() {
921        let matcher = StringExpression::exact("foo").negated().to_matcher();
922        assert!(!matcher.is_match("foo"));
923        assert!(matcher.is_match("bar"));
924
925        let matcher = StringExpression::union(
926            StringExpression::exact("foo"),
927            StringExpression::exact("bar"),
928        )
929        .to_matcher();
930        assert!(matcher.is_match("foo"));
931        assert!(matcher.is_match("bar"));
932        assert!(!matcher.is_match("baz"));
933
934        let matcher = StringExpression::intersection(
935            StringExpression::substring("a"),
936            StringExpression::substring("r"),
937        )
938        .to_matcher();
939        assert!(!matcher.is_match("foo"));
940        assert!(matcher.is_match("bar"));
941        assert!(!matcher.is_match("baz"));
942    }
943
944    #[test]
945    fn test_matcher_is_match() {
946        assert!(StringMatcher::all().is_match(""));
947        assert!(StringMatcher::all().is_match("foo"));
948        assert!(!StringMatcher::exact("o").is_match(""));
949        assert!(!StringMatcher::exact("o").is_match("foo"));
950        assert!(StringMatcher::exact("foo").is_match("foo"));
951        assert!(StringPattern::substring("o").to_matcher().is_match("foo"));
952    }
953
954    #[test]
955    fn test_matcher_filter_btree_map() {
956        let data = btreemap! {
957            "bar" => (),
958            "baz" => (),
959            "foo" => (),
960        };
961        let filter = |matcher: &StringMatcher| {
962            matcher
963                .filter_btree_map(&data)
964                .map(|(&key, ())| key)
965                .collect_vec()
966        };
967        assert_eq!(filter(&StringMatcher::all()), vec!["bar", "baz", "foo"]);
968        assert_eq!(filter(&StringMatcher::exact("o")), vec![""; 0]);
969        assert_eq!(filter(&StringMatcher::exact("foo")), vec!["foo"]);
970        assert_eq!(
971            filter(&StringPattern::substring("o").to_matcher()),
972            vec!["foo"]
973        );
974        assert_eq!(
975            filter(&StringPattern::substring("a").to_matcher()),
976            vec!["bar", "baz"]
977        );
978    }
979}