Skip to main content

jj_lib/
str_util.rs

1// Copyright 2021-2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! String helpers.
16
17use std::borrow::Borrow;
18use std::borrow::Cow;
19use std::collections::BTreeMap;
20use std::fmt;
21use std::fmt::Debug;
22use std::iter;
23use std::ops::Deref;
24
25use bstr::ByteSlice as _;
26use either::Either;
27use globset::Glob;
28use globset::GlobBuilder;
29use thiserror::Error;
30
31/// Error occurred during pattern string parsing.
32#[derive(Debug, Error)]
33pub enum StringPatternParseError {
34    /// Unknown pattern kind is specified.
35    #[error("Invalid string pattern kind `{0}:`")]
36    InvalidKind(String),
37    /// Failed to parse glob pattern.
38    #[error(transparent)]
39    GlobPattern(globset::Error),
40    /// Failed to parse regular expression.
41    #[error(transparent)]
42    Regex(regex::Error),
43}
44
45/// A wrapper for [`Glob`] with a more concise `Debug` impl.
46#[derive(Clone)]
47pub struct GlobPattern {
48    glob: Glob,
49}
50
51impl GlobPattern {
52    /// Returns the original glob pattern.
53    pub fn as_str(&self) -> &str {
54        self.glob.glob()
55    }
56
57    /// Converts this glob pattern to a bytes regex.
58    pub fn to_regex(&self) -> regex::bytes::Regex {
59        // Based on new_regex() in globset. We don't use GlobMatcher::is_match(path)
60        // because the input string shouldn't be normalized as path.
61        regex::bytes::RegexBuilder::new(self.glob.regex())
62            .dot_matches_new_line(true)
63            .build()
64            .expect("glob regex should be valid")
65    }
66}
67
68impl Debug for GlobPattern {
69    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70        f.debug_tuple("GlobPattern").field(&self.as_str()).finish()
71    }
72}
73
74fn parse_glob(src: &str, icase: bool) -> Result<GlobPattern, StringPatternParseError> {
75    let glob = GlobBuilder::new(src)
76        .case_insensitive(icase)
77        // Don't use platform-dependent default. This pattern isn't meant for
78        // testing file-system paths. If backslash escape were disabled, "\" in
79        // pattern would be normalized to "/" on Windows.
80        .backslash_escape(true)
81        .build()
82        .map_err(StringPatternParseError::GlobPattern)?;
83    Ok(GlobPattern { glob })
84}
85
86fn is_glob_char(c: char) -> bool {
87    // See globset::escape(). In addition to that, backslash is parsed as an
88    // escape sequence on all platforms.
89    matches!(c, '?' | '*' | '[' | ']' | '{' | '}' | '\\')
90}
91
92/// Pattern to be tested against string property like commit description or
93/// bookmark name.
94#[derive(Clone, Debug)]
95pub enum StringPattern {
96    /// Matches strings exactly.
97    Exact(String),
98    /// Matches strings case‐insensitively.
99    ExactI(String),
100    /// Matches strings that contain a substring.
101    Substring(String),
102    /// Matches strings that case‐insensitively contain a substring.
103    SubstringI(String),
104    /// Matches with a Unix‐style shell wildcard pattern.
105    Glob(Box<GlobPattern>),
106    /// Matches with a case‐insensitive Unix‐style shell wildcard pattern.
107    GlobI(Box<GlobPattern>),
108    /// Matches substrings with a regular expression.
109    Regex(regex::bytes::Regex),
110    /// Matches substrings with a case‐insensitive regular expression.
111    RegexI(regex::bytes::Regex),
112}
113
114impl StringPattern {
115    /// Pattern that matches any string.
116    pub const fn all() -> Self {
117        Self::Substring(String::new())
118    }
119
120    /// Constructs a pattern that matches exactly.
121    pub fn exact(src: impl Into<String>) -> Self {
122        Self::Exact(src.into())
123    }
124
125    /// Constructs a pattern that matches case‐insensitively.
126    pub fn exact_i(src: impl Into<String>) -> Self {
127        Self::ExactI(src.into())
128    }
129
130    /// Constructs a pattern that matches a substring.
131    pub fn substring(src: impl Into<String>) -> Self {
132        Self::Substring(src.into())
133    }
134
135    /// Constructs a pattern that case‐insensitively matches a substring.
136    pub fn substring_i(src: impl Into<String>) -> Self {
137        Self::SubstringI(src.into())
138    }
139
140    /// Parses the given string as a glob pattern.
141    pub fn glob(src: &str) -> Result<Self, StringPatternParseError> {
142        if !src.contains(is_glob_char) {
143            return Ok(Self::exact(src));
144        }
145        Ok(Self::Glob(Box::new(parse_glob(src, false)?)))
146    }
147
148    /// Parses the given string as a case‐insensitive glob pattern.
149    pub fn glob_i(src: &str) -> Result<Self, StringPatternParseError> {
150        // No special case for !src.contains(is_glob_char) because it's unclear
151        // whether we'll use unicode case comparison for "exact-i" patterns.
152        // "glob-i" should always be ASCII-based.
153        Ok(Self::GlobI(Box::new(parse_glob(src, true)?)))
154    }
155
156    /// Parses the given string as a regular expression.
157    pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
158        let pattern = regex::bytes::Regex::new(src).map_err(StringPatternParseError::Regex)?;
159        Ok(Self::Regex(pattern))
160    }
161
162    /// Parses the given string as a case-insensitive regular expression.
163    pub fn regex_i(src: &str) -> Result<Self, StringPatternParseError> {
164        let pattern = regex::bytes::RegexBuilder::new(src)
165            .case_insensitive(true)
166            .build()
167            .map_err(StringPatternParseError::Regex)?;
168        Ok(Self::RegexI(pattern))
169    }
170
171    /// Parses the given string as a pattern of the specified `kind`.
172    pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
173        match kind {
174            "exact" => Ok(Self::exact(src)),
175            "exact-i" => Ok(Self::exact_i(src)),
176            "substring" => Ok(Self::substring(src)),
177            "substring-i" => Ok(Self::substring_i(src)),
178            "glob" => Self::glob(src),
179            "glob-i" => Self::glob_i(src),
180            "regex" => Self::regex(src),
181            "regex-i" => Self::regex_i(src),
182            _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
183        }
184    }
185
186    /// Returns true if this pattern trivially matches any input strings.
187    pub fn is_all(&self) -> bool {
188        match self {
189            Self::Exact(_) | Self::ExactI(_) => false,
190            Self::Substring(needle) | Self::SubstringI(needle) => needle.is_empty(),
191            Self::Glob(pattern) | Self::GlobI(pattern) => pattern.as_str() == "*",
192            Self::Regex(pattern) | Self::RegexI(pattern) => pattern.as_str().is_empty(),
193        }
194    }
195
196    /// Returns true if this pattern matches input strings exactly.
197    pub fn is_exact(&self) -> bool {
198        self.as_exact().is_some()
199    }
200
201    /// Returns a literal pattern if this should match input strings exactly.
202    ///
203    /// This can be used to optimize map lookup by exact key.
204    pub fn as_exact(&self) -> Option<&str> {
205        // TODO: Handle trivial case‐insensitive patterns here? It might make people
206        // expect they can use case‐insensitive patterns in contexts where they
207        // generally can’t.
208        match self {
209            Self::Exact(literal) => Some(literal),
210            _ => None,
211        }
212    }
213
214    /// Returns the original string of this pattern.
215    pub fn as_str(&self) -> &str {
216        match self {
217            Self::Exact(literal) => literal,
218            Self::ExactI(literal) => literal,
219            Self::Substring(needle) => needle,
220            Self::SubstringI(needle) => needle,
221            Self::Glob(pattern) => pattern.as_str(),
222            Self::GlobI(pattern) => pattern.as_str(),
223            Self::Regex(pattern) => pattern.as_str(),
224            Self::RegexI(pattern) => pattern.as_str(),
225        }
226    }
227
228    /// Converts this pattern to a glob string. Returns `None` if the pattern
229    /// can't be represented as a glob.
230    pub fn to_glob(&self) -> Option<Cow<'_, str>> {
231        // TODO: Handle trivial case‐insensitive patterns here? It might make people
232        // expect they can use case‐insensitive patterns in contexts where they
233        // generally can’t.
234        match self {
235            Self::Exact(literal) => Some(globset::escape(literal).into()),
236            Self::Substring(needle) => {
237                if needle.is_empty() {
238                    Some("*".into())
239                } else {
240                    Some(format!("*{}*", globset::escape(needle)).into())
241                }
242            }
243            Self::Glob(pattern) => Some(pattern.as_str().into()),
244            Self::ExactI(_) => None,
245            Self::SubstringI(_) => None,
246            Self::GlobI(_) => None,
247            Self::Regex(_) => None,
248            Self::RegexI(_) => None,
249        }
250    }
251
252    fn to_match_fn(&self) -> Box<DynMatchFn> {
253        // TODO: Unicode case folding is complicated and can be
254        // locale‐specific. The `globset` crate and Gitoxide only deal with
255        // ASCII case folding, so we do the same here; a more elaborate case
256        // folding system will require making sure those behave in a matching
257        // manner where relevant. That said, regex patterns are unicode-aware by
258        // default, so we already have some inconsistencies.
259        //
260        // Care will need to be taken regarding normalization and the choice of an
261        // appropriate case‐insensitive comparison scheme (`toNFKC_Casefold`?) to ensure
262        // that it is compatible with the standard case‐insensitivity of haystack
263        // components (like internationalized domain names in email addresses). The
264        // availability of normalization and case folding schemes in database backends
265        // will also need to be considered. A locale‐specific case folding
266        // scheme would likely not be appropriate for Jujutsu.
267        //
268        // For some discussion of this topic, see:
269        // <https://github.com/unicode-org/icu4x/issues/3151>
270        match self {
271            Self::Exact(literal) => {
272                let literal = literal.clone();
273                Box::new(move |haystack| haystack == literal.as_bytes())
274            }
275            Self::ExactI(literal) => {
276                let literal = literal.clone();
277                Box::new(move |haystack| haystack.eq_ignore_ascii_case(literal.as_bytes()))
278            }
279            Self::Substring(needle) => {
280                let needle = needle.clone();
281                Box::new(move |haystack| haystack.contains_str(&needle))
282            }
283            Self::SubstringI(needle) => {
284                let needle = needle.to_ascii_lowercase();
285                Box::new(move |haystack| haystack.to_ascii_lowercase().contains_str(&needle))
286            }
287            // (Glob, GlobI) and (Regex, RegexI) pairs are identical here, but
288            // callers might want to translate these to backend-specific query
289            // differently.
290            Self::Glob(pattern) | Self::GlobI(pattern) => {
291                let pattern = pattern.to_regex();
292                Box::new(move |haystack| pattern.is_match(haystack))
293            }
294            Self::Regex(pattern) | Self::RegexI(pattern) => {
295                let pattern = pattern.clone();
296                Box::new(move |haystack| pattern.is_match(haystack))
297            }
298        }
299    }
300
301    /// Creates matcher object from this pattern.
302    pub fn to_matcher(&self) -> StringMatcher {
303        if self.is_all() {
304            StringMatcher::All
305        } else if let Some(literal) = self.as_exact() {
306            StringMatcher::Exact(literal.to_owned())
307        } else {
308            StringMatcher::Fn(self.to_match_fn())
309        }
310    }
311
312    /// Converts the pattern into a bytes regex.
313    pub fn to_regex(&self) -> regex::bytes::Regex {
314        match self {
315            Self::Exact(literal) => {
316                regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
317                    .build()
318                    .expect("impossible to fail to compile regex of literal")
319            }
320            Self::ExactI(literal) => {
321                regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
322                    .case_insensitive(true)
323                    .build()
324                    .expect("impossible to fail to compile regex of literal")
325            }
326            Self::Substring(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
327                .build()
328                .expect("impossible to fail to compile regex of literal"),
329            Self::SubstringI(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
330                .case_insensitive(true)
331                .build()
332                .expect("impossible to fail to compile regex of literal"),
333            Self::Glob(glob_pattern) => glob_pattern.to_regex(),
334            // The regex generated represents the case insensitivity itself
335            Self::GlobI(glob_pattern) => glob_pattern.to_regex(),
336            Self::Regex(regex) => regex.clone(),
337            Self::RegexI(regex) => regex.clone(),
338        }
339    }
340}
341
342/// AST-level representation of the string matcher expression.
343#[derive(Clone, Debug)]
344pub enum StringExpression {
345    // None and All can be represented by using Pattern. Add them if needed.
346    /// Matches pattern.
347    Pattern(Box<StringPattern>),
348    /// Matches anything other than the expression.
349    NotIn(Box<Self>),
350    /// Matches one of the expressions.
351    Union(Box<Self>, Box<Self>),
352    /// Matches both expressions.
353    Intersection(Box<Self>, Box<Self>),
354}
355
356impl StringExpression {
357    /// Expression that matches nothing.
358    pub fn none() -> Self {
359        Self::all().negated()
360    }
361
362    /// Expression that matches everything.
363    pub fn all() -> Self {
364        Self::pattern(StringPattern::all())
365    }
366
367    /// Expression that matches the given pattern.
368    pub fn pattern(pattern: StringPattern) -> Self {
369        Self::Pattern(Box::new(pattern))
370    }
371
372    /// Expression that matches strings exactly.
373    pub fn exact(src: impl Into<String>) -> Self {
374        Self::pattern(StringPattern::exact(src))
375    }
376
377    /// Expression that matches substrings.
378    pub fn substring(src: impl Into<String>) -> Self {
379        Self::pattern(StringPattern::substring(src))
380    }
381
382    /// Expression that matches anything other than this expression.
383    pub fn negated(self) -> Self {
384        Self::NotIn(Box::new(self))
385    }
386
387    /// Expression that matches `self` or `other` (or both).
388    pub fn union(self, other: Self) -> Self {
389        Self::Union(Box::new(self), Box::new(other))
390    }
391
392    /// Expression that matches any of the given `expressions`.
393    pub fn union_all(expressions: Vec<Self>) -> Self {
394        to_binary_expression(expressions, &Self::none, &Self::union)
395    }
396
397    /// Expression that matches both `self` and `other`.
398    pub fn intersection(self, other: Self) -> Self {
399        Self::Intersection(Box::new(self), Box::new(other))
400    }
401
402    fn dfs_pre(&self) -> impl Iterator<Item = &Self> {
403        let mut stack: Vec<&Self> = vec![self];
404        iter::from_fn(move || {
405            let expr = stack.pop()?;
406            match expr {
407                Self::Pattern(_) => {}
408                Self::NotIn(expr) => stack.push(expr),
409                Self::Union(expr1, expr2) | Self::Intersection(expr1, expr2) => {
410                    stack.push(expr2);
411                    stack.push(expr1);
412                }
413            }
414            Some(expr)
415        })
416    }
417
418    /// Iterates exact string patterns recursively from this expression.
419    ///
420    /// For example, `"a", "b", "c"` will be yielded in that order for
421    /// expression `"a" | glob:"?" & "b" | ~"c"`.
422    pub fn exact_strings(&self) -> impl Iterator<Item = &str> {
423        // pre/post-ordering doesn't matter so long as children are visited from
424        // left to right.
425        self.dfs_pre().filter_map(|expr| match expr {
426            Self::Pattern(pattern) => pattern.as_exact(),
427            _ => None,
428        })
429    }
430
431    /// Transforms the expression tree to matcher object.
432    pub fn to_matcher(&self) -> StringMatcher {
433        match self {
434            Self::Pattern(pattern) => pattern.to_matcher(),
435            Self::NotIn(expr) => {
436                let p = expr.to_matcher().into_match_fn();
437                StringMatcher::Fn(Box::new(move |haystack| !p(haystack)))
438            }
439            Self::Union(expr1, expr2) => {
440                let p1 = expr1.to_matcher().into_match_fn();
441                let p2 = expr2.to_matcher().into_match_fn();
442                StringMatcher::Fn(Box::new(move |haystack| p1(haystack) || p2(haystack)))
443            }
444            Self::Intersection(expr1, expr2) => {
445                let p1 = expr1.to_matcher().into_match_fn();
446                let p2 = expr2.to_matcher().into_match_fn();
447                StringMatcher::Fn(Box::new(move |haystack| p1(haystack) && p2(haystack)))
448            }
449        }
450    }
451}
452
453/// Constructs binary tree from `expressions` list, `unit` node, and associative
454/// `binary` operation.
455fn to_binary_expression<T>(
456    expressions: Vec<T>,
457    unit: &impl Fn() -> T,
458    binary: &impl Fn(T, T) -> T,
459) -> T {
460    match expressions.len() {
461        0 => unit(),
462        1 => expressions.into_iter().next().unwrap(),
463        _ => {
464            // Build balanced tree to minimize the recursion depth.
465            let mut left = expressions;
466            let right = left.split_off(left.len() / 2);
467            binary(
468                to_binary_expression(left, unit, binary),
469                to_binary_expression(right, unit, binary),
470            )
471        }
472    }
473}
474
475type DynMatchFn = dyn Fn(&[u8]) -> bool;
476
477/// Matcher for strings and bytes.
478pub enum StringMatcher {
479    /// Matches any strings.
480    All,
481    /// Matches strings exactly.
482    Exact(String),
483    /// Tests matches by arbitrary function.
484    Fn(Box<DynMatchFn>),
485}
486
487impl StringMatcher {
488    /// Matcher that matches any strings.
489    pub const fn all() -> Self {
490        Self::All
491    }
492
493    /// Matcher that matches `src` exactly.
494    pub fn exact(src: impl Into<String>) -> Self {
495        Self::Exact(src.into())
496    }
497
498    /// Returns true if this matches the `haystack` string.
499    pub fn is_match(&self, haystack: &str) -> bool {
500        self.is_match_bytes(haystack.as_bytes())
501    }
502
503    /// Returns true if this matches the `haystack` bytes.
504    pub fn is_match_bytes(&self, haystack: &[u8]) -> bool {
505        match self {
506            Self::All => true,
507            Self::Exact(needle) => haystack == needle.as_bytes(),
508            Self::Fn(predicate) => predicate(haystack),
509        }
510    }
511
512    /// Iterates over matching lines in `text`.
513    pub fn match_lines<'a>(&self, text: &'a [u8]) -> impl Iterator<Item = &'a [u8]> {
514        // The pattern is matched line by line so that it can be anchored to line
515        // start/end. For example, exact:"" will match blank lines.
516        text.split_inclusive(|b| *b == b'\n').filter(|line| {
517            let line = line.strip_suffix(b"\n").unwrap_or(line);
518            self.is_match_bytes(line)
519        })
520    }
521
522    fn into_match_fn(self) -> Box<DynMatchFn> {
523        match self {
524            Self::All => Box::new(|_haystack| true),
525            Self::Exact(needle) => Box::new(move |haystack| haystack == needle.as_bytes()),
526            Self::Fn(predicate) => predicate,
527        }
528    }
529
530    /// Iterates entries of the given `map` whose string keys match this.
531    pub fn filter_btree_map<'a, K: Borrow<str> + Ord, V>(
532        &self,
533        map: &'a BTreeMap<K, V>,
534    ) -> impl Iterator<Item = (&'a K, &'a V)> {
535        self.filter_btree_map_with(map, |key| key, |key| key)
536    }
537
538    /// Iterates entries of the given `map` whose string-like keys match this.
539    ///
540    /// The borrowed key type is constrained by the `Deref::Target`. It must be
541    /// convertible to/from `str`.
542    pub fn filter_btree_map_as_deref<'a, K, V>(
543        &self,
544        map: &'a BTreeMap<K, V>,
545    ) -> impl Iterator<Item = (&'a K, &'a V)>
546    where
547        K: Borrow<K::Target> + Deref + Ord,
548        K::Target: AsRef<str> + Ord,
549        str: AsRef<K::Target>,
550    {
551        self.filter_btree_map_with(map, AsRef::as_ref, AsRef::as_ref)
552    }
553
554    fn filter_btree_map_with<'a, K, Q, V>(
555        &self,
556        map: &'a BTreeMap<K, V>,
557        from_key: impl Fn(&Q) -> &str,
558        to_key: impl Fn(&str) -> &Q,
559    ) -> impl Iterator<Item = (&'a K, &'a V)>
560    where
561        K: Borrow<Q> + Ord,
562        Q: Ord + ?Sized,
563    {
564        match self {
565            Self::All => Either::Left(map.iter()),
566            Self::Exact(key) => {
567                Either::Right(Either::Left(map.get_key_value(to_key(key)).into_iter()))
568            }
569            Self::Fn(predicate) => {
570                Either::Right(Either::Right(map.iter().filter(move |&(key, _)| {
571                    predicate(from_key(key.borrow()).as_bytes())
572                })))
573            }
574        }
575    }
576}
577
578impl Debug for StringMatcher {
579    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
580        match self {
581            Self::All => write!(f, "All"),
582            Self::Exact(needle) => f.debug_tuple("Exact").field(needle).finish(),
583            Self::Fn(_) => f.debug_tuple("Fn").finish_non_exhaustive(),
584        }
585    }
586}
587
588#[cfg(test)]
589mod tests {
590    use assert_matches::assert_matches;
591    use itertools::Itertools as _;
592    use maplit::btreemap;
593
594    use super::*;
595    use crate::tests::TestResult;
596
597    fn insta_settings() -> insta::Settings {
598        let mut settings = insta::Settings::clone_current();
599        // Collapse short "Thing(_,)" repeatedly to save vertical space and make
600        // the output more readable.
601        for _ in 0..4 {
602            settings.add_filter(
603                r"(?x)
604                \b([A-Z]\w*)\(\n
605                    \s*(.{1,60}),\n
606                \s*\)",
607                "$1($2)",
608            );
609        }
610        settings
611    }
612
613    #[test]
614    fn test_string_pattern_to_glob() -> TestResult {
615        assert_eq!(StringPattern::all().to_glob(), Some("*".into()));
616        assert_eq!(StringPattern::exact("a").to_glob(), Some("a".into()));
617        assert_eq!(StringPattern::exact("*").to_glob(), Some("[*]".into()));
618        assert_eq!(StringPattern::glob("*")?.to_glob(), Some("*".into()));
619        assert_eq!(
620            StringPattern::Substring("a".into()).to_glob(),
621            Some("*a*".into())
622        );
623        assert_eq!(
624            StringPattern::Substring("*".into()).to_glob(),
625            Some("*[*]*".into())
626        );
627        Ok(())
628    }
629
630    #[test]
631    fn test_parse() {
632        // Parse specific pattern kinds.
633        assert_matches!(
634            StringPattern::from_str_kind("foo", "exact"),
635            Ok(StringPattern::Exact(s)) if s == "foo"
636        );
637        assert_matches!(
638            StringPattern::from_str_kind("foo*", "glob"),
639            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
640        );
641        assert_matches!(
642            StringPattern::from_str_kind("foo", "substring"),
643            Ok(StringPattern::Substring(s)) if s == "foo"
644        );
645        assert_matches!(
646            StringPattern::from_str_kind("foo", "substring-i"),
647            Ok(StringPattern::SubstringI(s)) if s == "foo"
648        );
649        assert_matches!(
650            StringPattern::from_str_kind("foo", "regex"),
651            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
652        );
653        assert_matches!(
654            StringPattern::from_str_kind("foo", "regex-i"),
655            Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
656        );
657    }
658
659    #[test]
660    fn test_glob_is_match() {
661        let glob = |src: &str| StringPattern::glob(src).unwrap().to_matcher();
662        let glob_i = |src: &str| StringPattern::glob_i(src).unwrap().to_matcher();
663
664        assert!(glob("foo").is_match("foo"));
665        assert!(!glob("foo").is_match("foobar"));
666
667        // "." in string isn't any special
668        assert!(glob("*").is_match(".foo"));
669
670        // "/" in string isn't any special
671        assert!(glob("*").is_match("foo/bar"));
672        assert!(glob(r"*/*").is_match("foo/bar"));
673        assert!(!glob(r"*/*").is_match(r"foo\bar"));
674
675        // "\" is an escape character
676        assert!(!glob(r"*\*").is_match("foo/bar"));
677        assert!(glob(r"*\*").is_match("foo*"));
678        assert!(glob(r"\\").is_match(r"\"));
679
680        // "*" matches newline
681        assert!(glob(r"*").is_match("foo\nbar"));
682
683        assert!(!glob("f?O").is_match("Foo"));
684        assert!(glob_i("f?O").is_match("Foo"));
685    }
686
687    #[test]
688    fn test_regex_is_match() {
689        let regex = |src: &str| StringPattern::regex(src).unwrap().to_matcher();
690        // Unicode mode is enabled by default
691        assert!(regex(r"^\w$").is_match("\u{c0}"));
692        assert!(regex(r"^.$").is_match("\u{c0}"));
693        // ASCII-compatible mode should also work
694        assert!(regex(r"^(?-u)\w$").is_match("a"));
695        assert!(!regex(r"^(?-u)\w$").is_match("\u{c0}"));
696        assert!(regex(r"^(?-u).{2}$").is_match("\u{c0}"));
697    }
698
699    #[test]
700    fn test_string_pattern_to_regex() {
701        let check = |pattern: StringPattern, match_to: &str| {
702            let regex = pattern.to_regex();
703            regex.is_match(match_to.as_bytes())
704        };
705        assert!(check(StringPattern::exact("$a"), "$a"));
706        assert!(!check(StringPattern::exact("$a"), "$A"));
707        assert!(!check(StringPattern::exact("a"), "aa"));
708        assert!(!check(StringPattern::exact("a"), "aa"));
709        assert!(check(StringPattern::exact_i("a"), "A"));
710        assert!(check(StringPattern::substring("$a"), "$abc"));
711        assert!(!check(StringPattern::substring("$a"), "$Abc"));
712        assert!(check(StringPattern::substring_i("$a"), "$Abc"));
713        assert!(!check(StringPattern::glob("a").unwrap(), "A"));
714        assert!(check(StringPattern::glob_i("a").unwrap(), "A"));
715        assert!(check(StringPattern::regex("^a{1,3}").unwrap(), "abcde"));
716        assert!(!check(StringPattern::regex("^a{1,3}").unwrap(), "Abcde"));
717        assert!(check(StringPattern::regex_i("^a{1,3}").unwrap(), "Abcde"));
718    }
719
720    #[test]
721    fn test_exact_pattern_to_matcher() {
722        assert_matches!(
723            StringPattern::exact("").to_matcher(),
724            StringMatcher::Exact(needle) if needle.is_empty()
725        );
726        assert_matches!(
727            StringPattern::exact("x").to_matcher(),
728            StringMatcher::Exact(needle) if needle == "x"
729        );
730
731        assert_matches!(
732            StringPattern::exact_i("").to_matcher(),
733            StringMatcher::Fn(_) // or Exact
734        );
735        assert_matches!(
736            StringPattern::exact_i("x").to_matcher(),
737            StringMatcher::Fn(_)
738        );
739    }
740
741    #[test]
742    fn test_substring_pattern_to_matcher() {
743        assert_matches!(
744            StringPattern::substring("").to_matcher(),
745            StringMatcher::All
746        );
747        assert_matches!(
748            StringPattern::substring("x").to_matcher(),
749            StringMatcher::Fn(_)
750        );
751
752        assert_matches!(
753            StringPattern::substring_i("").to_matcher(),
754            StringMatcher::All
755        );
756        assert_matches!(
757            StringPattern::substring_i("x").to_matcher(),
758            StringMatcher::Fn(_)
759        );
760    }
761
762    #[test]
763    fn test_glob_pattern_to_matcher() -> TestResult {
764        assert_matches!(
765            StringPattern::glob("")?.to_matcher(),
766            StringMatcher::Exact(_)
767        );
768        assert_matches!(
769            StringPattern::glob("x")?.to_matcher(),
770            StringMatcher::Exact(_)
771        );
772        assert_matches!(
773            StringPattern::glob("x?")?.to_matcher(),
774            StringMatcher::Fn(_)
775        );
776        assert_matches!(StringPattern::glob("*")?.to_matcher(), StringMatcher::All);
777        assert_matches!(
778            StringPattern::glob(r"\\")?.to_matcher(),
779            StringMatcher::Fn(_) // or Exact(r"\")
780        );
781
782        assert_matches!(
783            StringPattern::glob_i("")?.to_matcher(),
784            StringMatcher::Fn(_) // or Exact
785        );
786        assert_matches!(
787            StringPattern::glob_i("x")?.to_matcher(),
788            StringMatcher::Fn(_)
789        );
790        assert_matches!(
791            StringPattern::glob_i("x?")?.to_matcher(),
792            StringMatcher::Fn(_)
793        );
794        assert_matches!(StringPattern::glob_i("*")?.to_matcher(), StringMatcher::All);
795        Ok(())
796    }
797
798    #[test]
799    fn test_regex_pattern_to_matcher() -> TestResult {
800        assert_matches!(StringPattern::regex("")?.to_matcher(), StringMatcher::All);
801        assert_matches!(
802            StringPattern::regex("x")?.to_matcher(),
803            StringMatcher::Fn(_)
804        );
805        assert_matches!(
806            StringPattern::regex(".")?.to_matcher(),
807            StringMatcher::Fn(_)
808        );
809
810        assert_matches!(StringPattern::regex_i("")?.to_matcher(), StringMatcher::All);
811        assert_matches!(
812            StringPattern::regex_i("x")?.to_matcher(),
813            StringMatcher::Fn(_)
814        );
815        assert_matches!(
816            StringPattern::regex_i(".")?.to_matcher(),
817            StringMatcher::Fn(_)
818        );
819        Ok(())
820    }
821
822    #[test]
823    fn test_union_all_expressions() {
824        let settings = insta_settings();
825        let _guard = settings.bind_to_scope();
826
827        insta::assert_debug_snapshot!(
828            StringExpression::union_all(vec![]),
829            @r#"NotIn(Pattern(Substring("")))"#);
830        insta::assert_debug_snapshot!(
831            StringExpression::union_all(vec![StringExpression::exact("a")]),
832            @r#"Pattern(Exact("a"))"#);
833        insta::assert_debug_snapshot!(
834            StringExpression::union_all(vec![
835                StringExpression::exact("a"),
836                StringExpression::exact("b"),
837            ]),
838            @r#"
839        Union(
840            Pattern(Exact("a")),
841            Pattern(Exact("b")),
842        )
843        "#);
844        insta::assert_debug_snapshot!(
845            StringExpression::union_all(vec![
846                StringExpression::exact("a"),
847                StringExpression::exact("b"),
848                StringExpression::exact("c"),
849            ]),
850            @r#"
851        Union(
852            Pattern(Exact("a")),
853            Union(
854                Pattern(Exact("b")),
855                Pattern(Exact("c")),
856            ),
857        )
858        "#);
859        insta::assert_debug_snapshot!(
860            StringExpression::union_all(vec![
861                StringExpression::exact("a"),
862                StringExpression::exact("b"),
863                StringExpression::exact("c"),
864                StringExpression::exact("d"),
865            ]),
866            @r#"
867        Union(
868            Union(
869                Pattern(Exact("a")),
870                Pattern(Exact("b")),
871            ),
872            Union(
873                Pattern(Exact("c")),
874                Pattern(Exact("d")),
875            ),
876        )
877        "#);
878    }
879
880    #[test]
881    fn test_exact_strings_in_expression() {
882        assert_eq!(
883            StringExpression::all().exact_strings().collect_vec(),
884            [""; 0]
885        );
886        assert_eq!(
887            StringExpression::union_all(vec![
888                StringExpression::exact("a"),
889                StringExpression::substring("b"),
890                StringExpression::intersection(
891                    StringExpression::exact("c"),
892                    StringExpression::exact("d").negated(),
893                ),
894            ])
895            .exact_strings()
896            .collect_vec(),
897            ["a", "c", "d"]
898        );
899    }
900
901    #[test]
902    fn test_trivial_expression_to_matcher() {
903        assert_matches!(StringExpression::all().to_matcher(), StringMatcher::All);
904        assert_matches!(
905            StringExpression::exact("x").to_matcher(),
906            StringMatcher::Exact(needle) if needle == "x"
907        );
908    }
909
910    #[test]
911    fn test_compound_expression_to_matcher() {
912        let matcher = StringExpression::exact("foo").negated().to_matcher();
913        assert!(!matcher.is_match("foo"));
914        assert!(matcher.is_match("bar"));
915
916        let matcher = StringExpression::union(
917            StringExpression::exact("foo"),
918            StringExpression::exact("bar"),
919        )
920        .to_matcher();
921        assert!(matcher.is_match("foo"));
922        assert!(matcher.is_match("bar"));
923        assert!(!matcher.is_match("baz"));
924
925        let matcher = StringExpression::intersection(
926            StringExpression::substring("a"),
927            StringExpression::substring("r"),
928        )
929        .to_matcher();
930        assert!(!matcher.is_match("foo"));
931        assert!(matcher.is_match("bar"));
932        assert!(!matcher.is_match("baz"));
933    }
934
935    #[test]
936    fn test_matcher_is_match() {
937        assert!(StringMatcher::all().is_match(""));
938        assert!(StringMatcher::all().is_match("foo"));
939        assert!(!StringMatcher::exact("o").is_match(""));
940        assert!(!StringMatcher::exact("o").is_match("foo"));
941        assert!(StringMatcher::exact("foo").is_match("foo"));
942        assert!(StringPattern::substring("o").to_matcher().is_match("foo"));
943    }
944
945    #[test]
946    fn test_matcher_match_lines() {
947        assert_eq!(
948            StringMatcher::all().match_lines(b"").collect_vec(),
949            Vec::<&[u8]>::new()
950        );
951        assert_eq!(
952            StringMatcher::all().match_lines(b"\n").collect_vec(),
953            vec![b"\n"]
954        );
955        assert_eq!(
956            StringMatcher::all().match_lines(b"foo").collect_vec(),
957            vec![b"foo"]
958        );
959        assert_eq!(
960            StringMatcher::all().match_lines(b"foo\n").collect_vec(),
961            vec![b"foo\n"]
962        );
963        assert_eq!(
964            StringMatcher::exact("foo")
965                .match_lines(b"foo\nbar\n")
966                .collect_vec(),
967            vec![b"foo\n"]
968        );
969        assert_eq!(
970            StringMatcher::exact("foo\n")
971                .match_lines(b"foo\nbar\n")
972                .collect_vec(),
973            Vec::<&[u8]>::new()
974        );
975    }
976
977    #[test]
978    fn test_matcher_filter_btree_map() {
979        let data = btreemap! {
980            "bar" => (),
981            "baz" => (),
982            "foo" => (),
983        };
984        let filter = |matcher: &StringMatcher| {
985            matcher
986                .filter_btree_map(&data)
987                .map(|(&key, ())| key)
988                .collect_vec()
989        };
990        assert_eq!(filter(&StringMatcher::all()), vec!["bar", "baz", "foo"]);
991        assert_eq!(filter(&StringMatcher::exact("o")), vec![""; 0]);
992        assert_eq!(filter(&StringMatcher::exact("foo")), vec!["foo"]);
993        assert_eq!(
994            filter(&StringPattern::substring("o").to_matcher()),
995            vec!["foo"]
996        );
997        assert_eq!(
998            filter(&StringPattern::substring("a").to_matcher()),
999            vec!["bar", "baz"]
1000        );
1001    }
1002}