jj_lib/
str_util.rs

1// Copyright 2021-2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! String helpers.
16
17use std::borrow::Borrow;
18use std::borrow::Cow;
19use std::collections::BTreeMap;
20use std::fmt;
21use std::fmt::Debug;
22use std::iter;
23use std::ops::Deref;
24
25use bstr::ByteSlice as _;
26use either::Either;
27use globset::Glob;
28use globset::GlobBuilder;
29use thiserror::Error;
30
31/// Error occurred during pattern string parsing.
32#[derive(Debug, Error)]
33pub enum StringPatternParseError {
34    /// Unknown pattern kind is specified.
35    #[error("Invalid string pattern kind `{0}:`")]
36    InvalidKind(String),
37    /// Failed to parse glob pattern.
38    #[error(transparent)]
39    GlobPattern(globset::Error),
40    /// Failed to parse regular expression.
41    #[error(transparent)]
42    Regex(regex::Error),
43}
44
45/// A wrapper for [`Glob`] and its matcher with a more concise `Debug` impl.
46#[derive(Clone)]
47pub struct GlobPattern {
48    glob: Glob,
49    // TODO: Maybe better to add StringPattern::to_matcher(), and move regex
50    // compilation there.
51    regex: regex::bytes::Regex,
52}
53
54impl GlobPattern {
55    /// Returns true if this pattern matches `haystack`.
56    pub fn is_match(&self, haystack: &[u8]) -> bool {
57        self.regex.is_match(haystack)
58    }
59
60    /// Returns the original glob pattern.
61    pub fn as_str(&self) -> &str {
62        self.glob.glob()
63    }
64
65    /// Converts this glob pattern to a bytes regex.
66    pub fn to_regex(&self) -> regex::bytes::Regex {
67        self.regex.clone()
68    }
69}
70
71impl Debug for GlobPattern {
72    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
73        f.debug_tuple("GlobPattern").field(&self.as_str()).finish()
74    }
75}
76
77fn parse_glob(src: &str, icase: bool) -> Result<GlobPattern, StringPatternParseError> {
78    let glob = GlobBuilder::new(src)
79        .case_insensitive(icase)
80        // Don't use platform-dependent default. This pattern isn't meant for
81        // testing file-system paths. If backslash escape were disabled, "\" in
82        // pattern would be normalized to "/" on Windows.
83        .backslash_escape(true)
84        .build()
85        .map_err(StringPatternParseError::GlobPattern)?;
86    // Based on new_regex() in globset. We don't use GlobMatcher::is_match(path)
87    // because the input string shouldn't be normalized as path.
88    let regex = regex::bytes::RegexBuilder::new(glob.regex())
89        .dot_matches_new_line(true)
90        .build()
91        .expect("glob regex should be valid");
92    Ok(GlobPattern { glob, regex })
93}
94
95fn is_glob_char(c: char) -> bool {
96    // See globset::escape(). In addition to that, backslash is parsed as an
97    // escape sequence on all platforms.
98    matches!(c, '?' | '*' | '[' | ']' | '{' | '}' | '\\')
99}
100
101/// Pattern to be tested against string property like commit description or
102/// bookmark name.
103#[derive(Clone, Debug)]
104pub enum StringPattern {
105    /// Matches strings exactly.
106    Exact(String),
107    /// Matches strings case‐insensitively.
108    ExactI(String),
109    /// Matches strings that contain a substring.
110    Substring(String),
111    /// Matches strings that case‐insensitively contain a substring.
112    SubstringI(String),
113    /// Matches with a Unix‐style shell wildcard pattern.
114    Glob(Box<GlobPattern>),
115    /// Matches with a case‐insensitive Unix‐style shell wildcard pattern.
116    GlobI(Box<GlobPattern>),
117    /// Matches substrings with a regular expression.
118    Regex(regex::bytes::Regex),
119    /// Matches substrings with a case‐insensitive regular expression.
120    RegexI(regex::bytes::Regex),
121}
122
123impl StringPattern {
124    /// Pattern that matches any string.
125    pub const fn all() -> Self {
126        Self::Substring(String::new())
127    }
128
129    /// Parses the given string as a [`StringPattern`]. Everything before the
130    /// first ":" is considered the string's prefix. If the prefix is
131    /// "exact[-i]:", "glob[-i]:", or "substring[-i]:", a pattern of the
132    /// specified kind is returned. Returns an error if the string has an
133    /// unrecognized prefix. Otherwise, a `StringPattern::Exact` is
134    /// returned.
135    pub fn parse(src: &str) -> Result<Self, StringPatternParseError> {
136        if let Some((kind, pat)) = src.split_once(':') {
137            Self::from_str_kind(pat, kind)
138        } else {
139            Ok(Self::exact(src))
140        }
141    }
142
143    /// Constructs a pattern that matches exactly.
144    pub fn exact(src: impl Into<String>) -> Self {
145        Self::Exact(src.into())
146    }
147
148    /// Constructs a pattern that matches case‐insensitively.
149    pub fn exact_i(src: impl Into<String>) -> Self {
150        Self::ExactI(src.into())
151    }
152
153    /// Constructs a pattern that matches a substring.
154    pub fn substring(src: impl Into<String>) -> Self {
155        Self::Substring(src.into())
156    }
157
158    /// Constructs a pattern that case‐insensitively matches a substring.
159    pub fn substring_i(src: impl Into<String>) -> Self {
160        Self::SubstringI(src.into())
161    }
162
163    /// Parses the given string as a glob pattern.
164    pub fn glob(src: &str) -> Result<Self, StringPatternParseError> {
165        if !src.contains(is_glob_char) {
166            return Ok(Self::exact(src));
167        }
168        Ok(Self::Glob(Box::new(parse_glob(src, false)?)))
169    }
170
171    /// Parses the given string as a case‐insensitive glob pattern.
172    pub fn glob_i(src: &str) -> Result<Self, StringPatternParseError> {
173        // No special case for !src.contains(is_glob_char) because it's unclear
174        // whether we'll use unicode case comparison for "exact-i" patterns.
175        // "glob-i" should always be ASCII-based.
176        Ok(Self::GlobI(Box::new(parse_glob(src, true)?)))
177    }
178
179    /// Parses the given string as a regular expression.
180    pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
181        let pattern = regex::bytes::Regex::new(src).map_err(StringPatternParseError::Regex)?;
182        Ok(Self::Regex(pattern))
183    }
184
185    /// Parses the given string as a case-insensitive regular expression.
186    pub fn regex_i(src: &str) -> Result<Self, StringPatternParseError> {
187        let pattern = regex::bytes::RegexBuilder::new(src)
188            .case_insensitive(true)
189            .build()
190            .map_err(StringPatternParseError::Regex)?;
191        Ok(Self::RegexI(pattern))
192    }
193
194    /// Parses the given string as a pattern of the specified `kind`.
195    pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
196        match kind {
197            "exact" => Ok(Self::exact(src)),
198            "exact-i" => Ok(Self::exact_i(src)),
199            "substring" => Ok(Self::substring(src)),
200            "substring-i" => Ok(Self::substring_i(src)),
201            "glob" => Self::glob(src),
202            "glob-i" => Self::glob_i(src),
203            "regex" => Self::regex(src),
204            "regex-i" => Self::regex_i(src),
205            _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
206        }
207    }
208
209    /// Returns true if this pattern trivially matches any input strings.
210    fn is_all(&self) -> bool {
211        match self {
212            Self::Exact(_) | Self::ExactI(_) => false,
213            Self::Substring(needle) | Self::SubstringI(needle) => needle.is_empty(),
214            Self::Glob(pattern) | Self::GlobI(pattern) => pattern.as_str() == "*",
215            Self::Regex(pattern) | Self::RegexI(pattern) => pattern.as_str().is_empty(),
216        }
217    }
218
219    /// Returns true if this pattern matches input strings exactly.
220    pub fn is_exact(&self) -> bool {
221        self.as_exact().is_some()
222    }
223
224    /// Returns a literal pattern if this should match input strings exactly.
225    ///
226    /// This can be used to optimize map lookup by exact key.
227    pub fn as_exact(&self) -> Option<&str> {
228        // TODO: Handle trivial case‐insensitive patterns here? It might make people
229        // expect they can use case‐insensitive patterns in contexts where they
230        // generally can’t.
231        match self {
232            Self::Exact(literal) => Some(literal),
233            _ => None,
234        }
235    }
236
237    /// Returns the original string of this pattern.
238    pub fn as_str(&self) -> &str {
239        match self {
240            Self::Exact(literal) => literal,
241            Self::ExactI(literal) => literal,
242            Self::Substring(needle) => needle,
243            Self::SubstringI(needle) => needle,
244            Self::Glob(pattern) => pattern.as_str(),
245            Self::GlobI(pattern) => pattern.as_str(),
246            Self::Regex(pattern) => pattern.as_str(),
247            Self::RegexI(pattern) => pattern.as_str(),
248        }
249    }
250
251    /// Converts this pattern to a glob string. Returns `None` if the pattern
252    /// can't be represented as a glob.
253    pub fn to_glob(&self) -> Option<Cow<'_, str>> {
254        // TODO: Handle trivial case‐insensitive patterns here? It might make people
255        // expect they can use case‐insensitive patterns in contexts where they
256        // generally can’t.
257        match self {
258            Self::Exact(literal) => Some(globset::escape(literal).into()),
259            Self::Substring(needle) => {
260                if needle.is_empty() {
261                    Some("*".into())
262                } else {
263                    Some(format!("*{}*", globset::escape(needle)).into())
264                }
265            }
266            Self::Glob(pattern) => Some(pattern.as_str().into()),
267            Self::ExactI(_) => None,
268            Self::SubstringI(_) => None,
269            Self::GlobI(_) => None,
270            Self::Regex(_) => None,
271            Self::RegexI(_) => None,
272        }
273    }
274
275    /// Returns true if this pattern matches the `haystack` string.
276    ///
277    /// When matching against a case‐insensitive pattern, only ASCII case
278    /// differences are currently folded. This may change in the future.
279    pub fn is_match(&self, haystack: &str) -> bool {
280        self.is_match_bytes(haystack.as_bytes())
281    }
282
283    /// Returns true if this pattern matches the `haystack` bytes.
284    pub fn is_match_bytes(&self, haystack: &[u8]) -> bool {
285        // TODO: Unicode case folding is complicated and can be
286        // locale‐specific. The `globset` crate and Gitoxide only deal with
287        // ASCII case folding, so we do the same here; a more elaborate case
288        // folding system will require making sure those behave in a matching
289        // manner where relevant. That said, regex patterns are unicode-aware by
290        // default, so we already have some inconsistencies.
291        //
292        // Care will need to be taken regarding normalization and the choice of an
293        // appropriate case‐insensitive comparison scheme (`toNFKC_Casefold`?) to ensure
294        // that it is compatible with the standard case‐insensitivity of haystack
295        // components (like internationalized domain names in email addresses). The
296        // availability of normalization and case folding schemes in database backends
297        // will also need to be considered. A locale‐specific case folding
298        // scheme would likely not be appropriate for Jujutsu.
299        //
300        // For some discussion of this topic, see:
301        // <https://github.com/unicode-org/icu4x/issues/3151>
302        match self {
303            Self::Exact(literal) => haystack == literal.as_bytes(),
304            Self::ExactI(literal) => haystack.eq_ignore_ascii_case(literal.as_bytes()),
305            Self::Substring(needle) => haystack.contains_str(needle),
306            Self::SubstringI(needle) => haystack
307                .to_ascii_lowercase()
308                .contains_str(needle.to_ascii_lowercase()),
309            // (Glob, GlobI) and (Regex, RegexI) pairs are identical here, but
310            // callers might want to translate these to backend-specific query
311            // differently.
312            Self::Glob(pattern) => pattern.is_match(haystack),
313            Self::GlobI(pattern) => pattern.is_match(haystack),
314            Self::Regex(pattern) => pattern.is_match(haystack),
315            Self::RegexI(pattern) => pattern.is_match(haystack),
316        }
317    }
318
319    /// Creates matcher object from this pattern.
320    pub fn to_matcher(&self) -> StringMatcher {
321        if self.is_all() {
322            StringMatcher::All
323        } else if let Some(literal) = self.as_exact() {
324            StringMatcher::Exact(literal.to_owned())
325        } else {
326            // TODO: fully migrate is_match*() to StringMatcher, and add
327            // pattern.to_match_fn()?
328            let pattern = self.clone();
329            StringMatcher::Fn(Box::new(move |haystack| pattern.is_match_bytes(haystack)))
330        }
331    }
332
333    /// Converts the pattern into a bytes regex.
334    pub fn to_regex(&self) -> regex::bytes::Regex {
335        match self {
336            Self::Exact(literal) => {
337                regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
338                    .build()
339                    .expect("impossible to fail to compile regex of literal")
340            }
341            Self::ExactI(literal) => {
342                regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
343                    .case_insensitive(true)
344                    .build()
345                    .expect("impossible to fail to compile regex of literal")
346            }
347            Self::Substring(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
348                .build()
349                .expect("impossible to fail to compile regex of literal"),
350            Self::SubstringI(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
351                .case_insensitive(true)
352                .build()
353                .expect("impossible to fail to compile regex of literal"),
354            Self::Glob(glob_pattern) => glob_pattern.to_regex(),
355            // The regex generated represents the case insensitivity itself
356            Self::GlobI(glob_pattern) => glob_pattern.to_regex(),
357            Self::Regex(regex) => regex.clone(),
358            Self::RegexI(regex) => regex.clone(),
359        }
360    }
361}
362
363impl fmt::Display for StringPattern {
364    /// Shows the original string of this pattern.
365    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
366        write!(f, "{}", self.as_str())
367    }
368}
369
370/// AST-level representation of the string matcher expression.
371#[derive(Clone, Debug)]
372pub enum StringExpression {
373    // None and All can be represented by using Pattern. Add them if needed.
374    /// Matches pattern.
375    Pattern(Box<StringPattern>),
376    /// Matches anything other than the expression.
377    NotIn(Box<Self>),
378    /// Matches one of the expressions.
379    Union(Box<Self>, Box<Self>),
380    /// Matches both expressions.
381    Intersection(Box<Self>, Box<Self>),
382}
383
384impl StringExpression {
385    /// Expression that matches nothing.
386    pub fn none() -> Self {
387        Self::all().negated()
388    }
389
390    /// Expression that matches everything.
391    pub fn all() -> Self {
392        Self::pattern(StringPattern::all())
393    }
394
395    /// Expression that matches the given pattern.
396    pub fn pattern(pattern: StringPattern) -> Self {
397        Self::Pattern(Box::new(pattern))
398    }
399
400    /// Expression that matches strings exactly.
401    pub fn exact(src: impl Into<String>) -> Self {
402        Self::pattern(StringPattern::exact(src))
403    }
404
405    /// Expression that matches substrings.
406    pub fn substring(src: impl Into<String>) -> Self {
407        Self::pattern(StringPattern::substring(src))
408    }
409
410    /// Expression that matches anything other than this expression.
411    pub fn negated(self) -> Self {
412        Self::NotIn(Box::new(self))
413    }
414
415    /// Expression that matches `self` or `other` (or both).
416    pub fn union(self, other: Self) -> Self {
417        Self::Union(Box::new(self), Box::new(other))
418    }
419
420    /// Expression that matches any of the given `expressions`.
421    pub fn union_all(expressions: Vec<Self>) -> Self {
422        to_binary_expression(expressions, &Self::none, &Self::union)
423    }
424
425    /// Expression that matches both `self` and `other`.
426    pub fn intersection(self, other: Self) -> Self {
427        Self::Intersection(Box::new(self), Box::new(other))
428    }
429
430    fn dfs_pre(&self) -> impl Iterator<Item = &Self> {
431        let mut stack: Vec<&Self> = vec![self];
432        iter::from_fn(move || {
433            let expr = stack.pop()?;
434            match expr {
435                Self::Pattern(_) => {}
436                Self::NotIn(expr) => stack.push(expr),
437                Self::Union(expr1, expr2) | Self::Intersection(expr1, expr2) => {
438                    stack.push(expr2);
439                    stack.push(expr1);
440                }
441            }
442            Some(expr)
443        })
444    }
445
446    /// Iterates exact string patterns recursively from this expression.
447    ///
448    /// For example, `"a", "b", "c"` will be yielded in that order for
449    /// expression `"a" | glob:"?" & "b" | ~"c"`.
450    pub fn exact_strings(&self) -> impl Iterator<Item = &str> {
451        // pre/post-ordering doesn't matter so long as children are visited from
452        // left to right.
453        self.dfs_pre().filter_map(|expr| match expr {
454            Self::Pattern(pattern) => pattern.as_exact(),
455            _ => None,
456        })
457    }
458
459    /// Transforms the expression tree to matcher object.
460    pub fn to_matcher(&self) -> StringMatcher {
461        match self {
462            Self::Pattern(pattern) => pattern.to_matcher(),
463            Self::NotIn(expr) => {
464                let p = expr.to_matcher().into_match_fn();
465                StringMatcher::Fn(Box::new(move |haystack| !p(haystack)))
466            }
467            Self::Union(expr1, expr2) => {
468                let p1 = expr1.to_matcher().into_match_fn();
469                let p2 = expr2.to_matcher().into_match_fn();
470                StringMatcher::Fn(Box::new(move |haystack| p1(haystack) || p2(haystack)))
471            }
472            Self::Intersection(expr1, expr2) => {
473                let p1 = expr1.to_matcher().into_match_fn();
474                let p2 = expr2.to_matcher().into_match_fn();
475                StringMatcher::Fn(Box::new(move |haystack| p1(haystack) && p2(haystack)))
476            }
477        }
478    }
479}
480
481/// Constructs binary tree from `expressions` list, `unit` node, and associative
482/// `binary` operation.
483fn to_binary_expression<T>(
484    expressions: Vec<T>,
485    unit: &impl Fn() -> T,
486    binary: &impl Fn(T, T) -> T,
487) -> T {
488    match expressions.len() {
489        0 => unit(),
490        1 => expressions.into_iter().next().unwrap(),
491        _ => {
492            // Build balanced tree to minimize the recursion depth.
493            let mut left = expressions;
494            let right = left.split_off(left.len() / 2);
495            binary(
496                to_binary_expression(left, unit, binary),
497                to_binary_expression(right, unit, binary),
498            )
499        }
500    }
501}
502
503type DynMatchFn = dyn Fn(&[u8]) -> bool;
504
505/// Matcher for strings and bytes.
506pub enum StringMatcher {
507    /// Matches any strings.
508    All,
509    /// Matches strings exactly.
510    Exact(String),
511    /// Tests matches by arbitrary function.
512    Fn(Box<DynMatchFn>),
513}
514
515impl StringMatcher {
516    /// Matcher that matches any strings.
517    pub const fn all() -> Self {
518        Self::All
519    }
520
521    /// Matcher that matches `src` exactly.
522    pub fn exact(src: impl Into<String>) -> Self {
523        Self::Exact(src.into())
524    }
525
526    /// Returns true if this matches the `haystack` string.
527    pub fn is_match(&self, haystack: &str) -> bool {
528        self.is_match_bytes(haystack.as_bytes())
529    }
530
531    /// Returns true if this matches the `haystack` bytes.
532    pub fn is_match_bytes(&self, haystack: &[u8]) -> bool {
533        match self {
534            Self::All => true,
535            Self::Exact(needle) => haystack == needle.as_bytes(),
536            Self::Fn(predicate) => predicate(haystack),
537        }
538    }
539
540    fn into_match_fn(self) -> Box<DynMatchFn> {
541        match self {
542            Self::All => Box::new(|_haystack| true),
543            Self::Exact(needle) => Box::new(move |haystack| haystack == needle.as_bytes()),
544            Self::Fn(predicate) => predicate,
545        }
546    }
547
548    /// Iterates entries of the given `map` whose string keys match this.
549    pub fn filter_btree_map<'a, K: Borrow<str> + Ord, V>(
550        &self,
551        map: &'a BTreeMap<K, V>,
552    ) -> impl Iterator<Item = (&'a K, &'a V)> {
553        self.filter_btree_map_with(map, |key| key, |key| key)
554    }
555
556    /// Iterates entries of the given `map` whose string-like keys match this.
557    ///
558    /// The borrowed key type is constrained by the `Deref::Target`. It must be
559    /// convertible to/from `str`.
560    pub fn filter_btree_map_as_deref<'a, K, V>(
561        &self,
562        map: &'a BTreeMap<K, V>,
563    ) -> impl Iterator<Item = (&'a K, &'a V)>
564    where
565        K: Borrow<K::Target> + Deref + Ord,
566        K::Target: AsRef<str> + Ord,
567        str: AsRef<K::Target>,
568    {
569        self.filter_btree_map_with(map, AsRef::as_ref, AsRef::as_ref)
570    }
571
572    fn filter_btree_map_with<'a, K, Q, V>(
573        &self,
574        map: &'a BTreeMap<K, V>,
575        from_key: impl Fn(&Q) -> &str,
576        to_key: impl Fn(&str) -> &Q,
577    ) -> impl Iterator<Item = (&'a K, &'a V)>
578    where
579        K: Borrow<Q> + Ord,
580        Q: Ord + ?Sized,
581    {
582        match self {
583            Self::All => Either::Left(map.iter()),
584            Self::Exact(key) => {
585                Either::Right(Either::Left(map.get_key_value(to_key(key)).into_iter()))
586            }
587            Self::Fn(predicate) => {
588                Either::Right(Either::Right(map.iter().filter(move |&(key, _)| {
589                    predicate(from_key(key.borrow()).as_bytes())
590                })))
591            }
592        }
593    }
594}
595
596impl Debug for StringMatcher {
597    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
598        match self {
599            Self::All => write!(f, "All"),
600            Self::Exact(needle) => f.debug_tuple("Exact").field(needle).finish(),
601            Self::Fn(_) => f.debug_tuple("Fn").finish_non_exhaustive(),
602        }
603    }
604}
605
606#[cfg(test)]
607mod tests {
608    use assert_matches::assert_matches;
609    use itertools::Itertools as _;
610    use maplit::btreemap;
611
612    use super::*;
613
614    fn insta_settings() -> insta::Settings {
615        let mut settings = insta::Settings::clone_current();
616        // Collapse short "Thing(_,)" repeatedly to save vertical space and make
617        // the output more readable.
618        for _ in 0..4 {
619            settings.add_filter(
620                r"(?x)
621                \b([A-Z]\w*)\(\n
622                    \s*(.{1,60}),\n
623                \s*\)",
624                "$1($2)",
625            );
626        }
627        settings
628    }
629
630    #[test]
631    fn test_string_pattern_to_glob() {
632        assert_eq!(StringPattern::all().to_glob(), Some("*".into()));
633        assert_eq!(StringPattern::exact("a").to_glob(), Some("a".into()));
634        assert_eq!(StringPattern::exact("*").to_glob(), Some("[*]".into()));
635        assert_eq!(
636            StringPattern::glob("*").unwrap().to_glob(),
637            Some("*".into())
638        );
639        assert_eq!(
640            StringPattern::Substring("a".into()).to_glob(),
641            Some("*a*".into())
642        );
643        assert_eq!(
644            StringPattern::Substring("*".into()).to_glob(),
645            Some("*[*]*".into())
646        );
647    }
648
649    #[test]
650    fn test_parse() {
651        // Parse specific pattern kinds.
652        assert_matches!(
653            StringPattern::parse("exact:foo"),
654            Ok(StringPattern::Exact(s)) if s == "foo"
655        );
656        assert_matches!(
657            StringPattern::from_str_kind("foo", "exact"),
658            Ok(StringPattern::Exact(s)) if s == "foo"
659        );
660        assert_matches!(
661            StringPattern::parse("glob:foo*"),
662            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
663        );
664        assert_matches!(
665            StringPattern::from_str_kind("foo*", "glob"),
666            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
667        );
668        assert_matches!(
669            StringPattern::parse("substring:foo"),
670            Ok(StringPattern::Substring(s)) if s == "foo"
671        );
672        assert_matches!(
673            StringPattern::from_str_kind("foo", "substring"),
674            Ok(StringPattern::Substring(s)) if s == "foo"
675        );
676        assert_matches!(
677            StringPattern::parse("substring-i:foo"),
678            Ok(StringPattern::SubstringI(s)) if s == "foo"
679        );
680        assert_matches!(
681            StringPattern::from_str_kind("foo", "substring-i"),
682            Ok(StringPattern::SubstringI(s)) if s == "foo"
683        );
684        assert_matches!(
685            StringPattern::parse("regex:foo"),
686            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
687        );
688        assert_matches!(
689            StringPattern::from_str_kind("foo", "regex"),
690            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
691        );
692        assert_matches!(
693            StringPattern::parse("regex-i:foo"),
694            Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
695        );
696        assert_matches!(
697            StringPattern::from_str_kind("foo", "regex-i"),
698            Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
699        );
700
701        // Parse a pattern that contains a : itself.
702        assert_matches!(
703            StringPattern::parse("exact:foo:bar"),
704            Ok(StringPattern::Exact(s)) if s == "foo:bar"
705        );
706
707        // If no kind is specified, the input is treated as an exact pattern.
708        assert_matches!(
709            StringPattern::parse("foo"),
710            Ok(StringPattern::Exact(s)) if s == "foo"
711        );
712
713        // Parsing an unknown prefix results in an error.
714        assert_matches!(
715            StringPattern::parse("unknown-prefix:foo"),
716            Err(StringPatternParseError::InvalidKind(_))
717        );
718    }
719
720    #[test]
721    fn test_glob_is_match() {
722        assert!(StringPattern::glob("foo").unwrap().is_match("foo"));
723        assert!(!StringPattern::glob("foo").unwrap().is_match("foobar"));
724
725        // "." in string isn't any special
726        assert!(StringPattern::glob("*").unwrap().is_match(".foo"));
727
728        // "/" in string isn't any special
729        assert!(StringPattern::glob("*").unwrap().is_match("foo/bar"));
730        assert!(StringPattern::glob(r"*/*").unwrap().is_match("foo/bar"));
731        assert!(!StringPattern::glob(r"*/*").unwrap().is_match(r"foo\bar"));
732
733        // "\" is an escape character
734        assert!(!StringPattern::glob(r"*\*").unwrap().is_match("foo/bar"));
735        assert!(StringPattern::glob(r"*\*").unwrap().is_match("foo*"));
736        assert!(StringPattern::glob(r"\\").unwrap().is_match(r"\"));
737
738        // "*" matches newline
739        assert!(StringPattern::glob(r"*").unwrap().is_match("foo\nbar"));
740
741        assert!(!StringPattern::glob("f?O").unwrap().is_match("Foo"));
742        assert!(StringPattern::glob_i("f?O").unwrap().is_match("Foo"));
743    }
744
745    #[test]
746    fn test_regex_is_match() {
747        // Unicode mode is enabled by default
748        assert!(StringPattern::regex(r"^\w$").unwrap().is_match("\u{c0}"));
749        assert!(StringPattern::regex(r"^.$").unwrap().is_match("\u{c0}"));
750        // ASCII-compatible mode should also work
751        assert!(StringPattern::regex(r"^(?-u)\w$").unwrap().is_match("a"));
752        assert!(
753            !StringPattern::regex(r"^(?-u)\w$")
754                .unwrap()
755                .is_match("\u{c0}")
756        );
757        assert!(
758            StringPattern::regex(r"^(?-u).{2}$")
759                .unwrap()
760                .is_match("\u{c0}")
761        );
762    }
763
764    #[test]
765    fn test_string_pattern_to_regex() {
766        let check = |pattern: StringPattern, match_to: &str| {
767            let regex = pattern.to_regex();
768            regex.is_match(match_to.as_bytes())
769        };
770        assert!(check(StringPattern::exact("$a"), "$a"));
771        assert!(!check(StringPattern::exact("$a"), "$A"));
772        assert!(!check(StringPattern::exact("a"), "aa"));
773        assert!(!check(StringPattern::exact("a"), "aa"));
774        assert!(check(StringPattern::exact_i("a"), "A"));
775        assert!(check(StringPattern::substring("$a"), "$abc"));
776        assert!(!check(StringPattern::substring("$a"), "$Abc"));
777        assert!(check(StringPattern::substring_i("$a"), "$Abc"));
778        assert!(!check(StringPattern::glob("a").unwrap(), "A"));
779        assert!(check(StringPattern::glob_i("a").unwrap(), "A"));
780        assert!(check(StringPattern::regex("^a{1,3}").unwrap(), "abcde"));
781        assert!(!check(StringPattern::regex("^a{1,3}").unwrap(), "Abcde"));
782        assert!(check(StringPattern::regex_i("^a{1,3}").unwrap(), "Abcde"));
783    }
784
785    #[test]
786    fn test_exact_pattern_to_matcher() {
787        assert_matches!(
788            StringPattern::exact("").to_matcher(),
789            StringMatcher::Exact(needle) if needle.is_empty()
790        );
791        assert_matches!(
792            StringPattern::exact("x").to_matcher(),
793            StringMatcher::Exact(needle) if needle == "x"
794        );
795
796        assert_matches!(
797            StringPattern::exact_i("").to_matcher(),
798            StringMatcher::Fn(_) // or Exact
799        );
800        assert_matches!(
801            StringPattern::exact_i("x").to_matcher(),
802            StringMatcher::Fn(_)
803        );
804    }
805
806    #[test]
807    fn test_substring_pattern_to_matcher() {
808        assert_matches!(
809            StringPattern::substring("").to_matcher(),
810            StringMatcher::All
811        );
812        assert_matches!(
813            StringPattern::substring("x").to_matcher(),
814            StringMatcher::Fn(_)
815        );
816
817        assert_matches!(
818            StringPattern::substring_i("").to_matcher(),
819            StringMatcher::All
820        );
821        assert_matches!(
822            StringPattern::substring_i("x").to_matcher(),
823            StringMatcher::Fn(_)
824        );
825    }
826
827    #[test]
828    fn test_glob_pattern_to_matcher() {
829        assert_matches!(
830            StringPattern::glob("").unwrap().to_matcher(),
831            StringMatcher::Exact(_)
832        );
833        assert_matches!(
834            StringPattern::glob("x").unwrap().to_matcher(),
835            StringMatcher::Exact(_)
836        );
837        assert_matches!(
838            StringPattern::glob("x?").unwrap().to_matcher(),
839            StringMatcher::Fn(_)
840        );
841        assert_matches!(
842            StringPattern::glob("*").unwrap().to_matcher(),
843            StringMatcher::All
844        );
845        assert_matches!(
846            StringPattern::glob(r"\\").unwrap().to_matcher(),
847            StringMatcher::Fn(_) // or Exact(r"\")
848        );
849
850        assert_matches!(
851            StringPattern::glob_i("").unwrap().to_matcher(),
852            StringMatcher::Fn(_) // or Exact
853        );
854        assert_matches!(
855            StringPattern::glob_i("x").unwrap().to_matcher(),
856            StringMatcher::Fn(_)
857        );
858        assert_matches!(
859            StringPattern::glob_i("x?").unwrap().to_matcher(),
860            StringMatcher::Fn(_)
861        );
862        assert_matches!(
863            StringPattern::glob_i("*").unwrap().to_matcher(),
864            StringMatcher::All
865        );
866    }
867
868    #[test]
869    fn test_regex_pattern_to_matcher() {
870        assert_matches!(
871            StringPattern::regex("").unwrap().to_matcher(),
872            StringMatcher::All
873        );
874        assert_matches!(
875            StringPattern::regex("x").unwrap().to_matcher(),
876            StringMatcher::Fn(_)
877        );
878        assert_matches!(
879            StringPattern::regex(".").unwrap().to_matcher(),
880            StringMatcher::Fn(_)
881        );
882
883        assert_matches!(
884            StringPattern::regex_i("").unwrap().to_matcher(),
885            StringMatcher::All
886        );
887        assert_matches!(
888            StringPattern::regex_i("x").unwrap().to_matcher(),
889            StringMatcher::Fn(_)
890        );
891        assert_matches!(
892            StringPattern::regex_i(".").unwrap().to_matcher(),
893            StringMatcher::Fn(_)
894        );
895    }
896
897    #[test]
898    fn test_union_all_expressions() {
899        let settings = insta_settings();
900        let _guard = settings.bind_to_scope();
901
902        insta::assert_debug_snapshot!(
903            StringExpression::union_all(vec![]),
904            @r#"NotIn(Pattern(Substring("")))"#);
905        insta::assert_debug_snapshot!(
906            StringExpression::union_all(vec![StringExpression::exact("a")]),
907            @r#"Pattern(Exact("a"))"#);
908        insta::assert_debug_snapshot!(
909            StringExpression::union_all(vec![
910                StringExpression::exact("a"),
911                StringExpression::exact("b"),
912            ]),
913            @r#"
914        Union(
915            Pattern(Exact("a")),
916            Pattern(Exact("b")),
917        )
918        "#);
919        insta::assert_debug_snapshot!(
920            StringExpression::union_all(vec![
921                StringExpression::exact("a"),
922                StringExpression::exact("b"),
923                StringExpression::exact("c"),
924            ]),
925            @r#"
926        Union(
927            Pattern(Exact("a")),
928            Union(
929                Pattern(Exact("b")),
930                Pattern(Exact("c")),
931            ),
932        )
933        "#);
934        insta::assert_debug_snapshot!(
935            StringExpression::union_all(vec![
936                StringExpression::exact("a"),
937                StringExpression::exact("b"),
938                StringExpression::exact("c"),
939                StringExpression::exact("d"),
940            ]),
941            @r#"
942        Union(
943            Union(
944                Pattern(Exact("a")),
945                Pattern(Exact("b")),
946            ),
947            Union(
948                Pattern(Exact("c")),
949                Pattern(Exact("d")),
950            ),
951        )
952        "#);
953    }
954
955    #[test]
956    fn test_exact_strings_in_expression() {
957        assert_eq!(
958            StringExpression::all().exact_strings().collect_vec(),
959            [""; 0]
960        );
961        assert_eq!(
962            StringExpression::union_all(vec![
963                StringExpression::exact("a"),
964                StringExpression::substring("b"),
965                StringExpression::intersection(
966                    StringExpression::exact("c"),
967                    StringExpression::exact("d").negated(),
968                ),
969            ])
970            .exact_strings()
971            .collect_vec(),
972            ["a", "c", "d"]
973        );
974    }
975
976    #[test]
977    fn test_trivial_expression_to_matcher() {
978        assert_matches!(StringExpression::all().to_matcher(), StringMatcher::All);
979        assert_matches!(
980            StringExpression::exact("x").to_matcher(),
981            StringMatcher::Exact(needle) if needle == "x"
982        );
983    }
984
985    #[test]
986    fn test_compound_expression_to_matcher() {
987        let matcher = StringExpression::exact("foo").negated().to_matcher();
988        assert!(!matcher.is_match("foo"));
989        assert!(matcher.is_match("bar"));
990
991        let matcher = StringExpression::union(
992            StringExpression::exact("foo"),
993            StringExpression::exact("bar"),
994        )
995        .to_matcher();
996        assert!(matcher.is_match("foo"));
997        assert!(matcher.is_match("bar"));
998        assert!(!matcher.is_match("baz"));
999
1000        let matcher = StringExpression::intersection(
1001            StringExpression::substring("a"),
1002            StringExpression::substring("r"),
1003        )
1004        .to_matcher();
1005        assert!(!matcher.is_match("foo"));
1006        assert!(matcher.is_match("bar"));
1007        assert!(!matcher.is_match("baz"));
1008    }
1009
1010    #[test]
1011    fn test_matcher_is_match() {
1012        assert!(StringMatcher::all().is_match(""));
1013        assert!(StringMatcher::all().is_match("foo"));
1014        assert!(!StringMatcher::exact("o").is_match(""));
1015        assert!(!StringMatcher::exact("o").is_match("foo"));
1016        assert!(StringMatcher::exact("foo").is_match("foo"));
1017        assert!(StringPattern::substring("o").to_matcher().is_match("foo"));
1018    }
1019
1020    #[test]
1021    fn test_matcher_filter_btree_map() {
1022        let data = btreemap! {
1023            "bar" => (),
1024            "baz" => (),
1025            "foo" => (),
1026        };
1027        let filter = |matcher: &StringMatcher| {
1028            matcher
1029                .filter_btree_map(&data)
1030                .map(|(&key, ())| key)
1031                .collect_vec()
1032        };
1033        assert_eq!(filter(&StringMatcher::all()), vec!["bar", "baz", "foo"]);
1034        assert_eq!(filter(&StringMatcher::exact("o")), vec![""; 0]);
1035        assert_eq!(filter(&StringMatcher::exact("foo")), vec!["foo"]);
1036        assert_eq!(
1037            filter(&StringPattern::substring("o").to_matcher()),
1038            vec!["foo"]
1039        );
1040        assert_eq!(
1041            filter(&StringPattern::substring("a").to_matcher()),
1042            vec!["bar", "baz"]
1043        );
1044    }
1045}