Skip to main content

jj_lib/
str_util.rs

1// Copyright 2021-2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! String helpers.
16
17use std::borrow::Borrow;
18use std::borrow::Cow;
19use std::collections::BTreeMap;
20use std::fmt;
21use std::fmt::Debug;
22use std::iter;
23use std::ops::Deref;
24
25use bstr::ByteSlice as _;
26use either::Either;
27use globset::Glob;
28use globset::GlobBuilder;
29use thiserror::Error;
30
31/// Error occurred during pattern string parsing.
32#[derive(Debug, Error)]
33pub enum StringPatternParseError {
34    /// Unknown pattern kind is specified.
35    #[error("Invalid string pattern kind `{0}:`")]
36    InvalidKind(String),
37    /// Failed to parse glob pattern.
38    #[error(transparent)]
39    GlobPattern(globset::Error),
40    /// Failed to parse regular expression.
41    #[error(transparent)]
42    Regex(regex::Error),
43}
44
45/// A wrapper for [`Glob`] with a more concise `Debug` impl.
46#[derive(Clone)]
47pub struct GlobPattern {
48    glob: Glob,
49}
50
51impl GlobPattern {
52    /// Returns the original glob pattern.
53    pub fn as_str(&self) -> &str {
54        self.glob.glob()
55    }
56
57    /// Converts this glob pattern to a bytes regex.
58    pub fn to_regex(&self) -> regex::bytes::Regex {
59        // Based on new_regex() in globset. We don't use GlobMatcher::is_match(path)
60        // because the input string shouldn't be normalized as path.
61        regex::bytes::RegexBuilder::new(self.glob.regex())
62            .dot_matches_new_line(true)
63            .build()
64            .expect("glob regex should be valid")
65    }
66}
67
68impl Debug for GlobPattern {
69    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70        f.debug_tuple("GlobPattern").field(&self.as_str()).finish()
71    }
72}
73
74fn parse_glob(src: &str, icase: bool) -> Result<GlobPattern, StringPatternParseError> {
75    let glob = GlobBuilder::new(src)
76        .case_insensitive(icase)
77        // Don't use platform-dependent default. This pattern isn't meant for
78        // testing file-system paths. If backslash escape were disabled, "\" in
79        // pattern would be normalized to "/" on Windows.
80        .backslash_escape(true)
81        .build()
82        .map_err(StringPatternParseError::GlobPattern)?;
83    Ok(GlobPattern { glob })
84}
85
86fn is_glob_char(c: char) -> bool {
87    // See globset::escape(). In addition to that, backslash is parsed as an
88    // escape sequence on all platforms.
89    matches!(c, '?' | '*' | '[' | ']' | '{' | '}' | '\\')
90}
91
92/// Pattern to be tested against string property like commit description or
93/// bookmark name.
94#[derive(Clone, Debug)]
95pub enum StringPattern {
96    /// Matches strings exactly.
97    Exact(String),
98    /// Matches strings case‐insensitively.
99    ExactI(String),
100    /// Matches strings that contain a substring.
101    Substring(String),
102    /// Matches strings that case‐insensitively contain a substring.
103    SubstringI(String),
104    /// Matches with a Unix‐style shell wildcard pattern.
105    Glob(Box<GlobPattern>),
106    /// Matches with a case‐insensitive Unix‐style shell wildcard pattern.
107    GlobI(Box<GlobPattern>),
108    /// Matches substrings with a regular expression.
109    Regex(regex::bytes::Regex),
110    /// Matches substrings with a case‐insensitive regular expression.
111    RegexI(regex::bytes::Regex),
112}
113
114impl StringPattern {
115    /// Pattern that matches any string.
116    pub const fn all() -> Self {
117        Self::Substring(String::new())
118    }
119
120    /// Constructs a pattern that matches exactly.
121    pub fn exact(src: impl Into<String>) -> Self {
122        Self::Exact(src.into())
123    }
124
125    /// Constructs a pattern that matches case‐insensitively.
126    pub fn exact_i(src: impl Into<String>) -> Self {
127        Self::ExactI(src.into())
128    }
129
130    /// Constructs a pattern that matches a substring.
131    pub fn substring(src: impl Into<String>) -> Self {
132        Self::Substring(src.into())
133    }
134
135    /// Constructs a pattern that case‐insensitively matches a substring.
136    pub fn substring_i(src: impl Into<String>) -> Self {
137        Self::SubstringI(src.into())
138    }
139
140    /// Parses the given string as a glob pattern.
141    pub fn glob(src: &str) -> Result<Self, StringPatternParseError> {
142        if !src.contains(is_glob_char) {
143            return Ok(Self::exact(src));
144        }
145        Ok(Self::Glob(Box::new(parse_glob(src, false)?)))
146    }
147
148    /// Parses the given string as a case‐insensitive glob pattern.
149    pub fn glob_i(src: &str) -> Result<Self, StringPatternParseError> {
150        // No special case for !src.contains(is_glob_char) because it's unclear
151        // whether we'll use unicode case comparison for "exact-i" patterns.
152        // "glob-i" should always be ASCII-based.
153        Ok(Self::GlobI(Box::new(parse_glob(src, true)?)))
154    }
155
156    /// Parses the given string as a regular expression.
157    pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
158        let pattern = regex::bytes::Regex::new(src).map_err(StringPatternParseError::Regex)?;
159        Ok(Self::Regex(pattern))
160    }
161
162    /// Parses the given string as a case-insensitive regular expression.
163    pub fn regex_i(src: &str) -> Result<Self, StringPatternParseError> {
164        let pattern = regex::bytes::RegexBuilder::new(src)
165            .case_insensitive(true)
166            .build()
167            .map_err(StringPatternParseError::Regex)?;
168        Ok(Self::RegexI(pattern))
169    }
170
171    /// Parses the given string as a pattern of the specified `kind`.
172    pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
173        match kind {
174            "exact" => Ok(Self::exact(src)),
175            "exact-i" => Ok(Self::exact_i(src)),
176            "substring" => Ok(Self::substring(src)),
177            "substring-i" => Ok(Self::substring_i(src)),
178            "glob" => Self::glob(src),
179            "glob-i" => Self::glob_i(src),
180            "regex" => Self::regex(src),
181            "regex-i" => Self::regex_i(src),
182            _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
183        }
184    }
185
186    /// Returns true if this pattern trivially matches any input strings.
187    pub fn is_all(&self) -> bool {
188        match self {
189            Self::Exact(_) | Self::ExactI(_) => false,
190            Self::Substring(needle) | Self::SubstringI(needle) => needle.is_empty(),
191            Self::Glob(pattern) | Self::GlobI(pattern) => pattern.as_str() == "*",
192            Self::Regex(pattern) | Self::RegexI(pattern) => pattern.as_str().is_empty(),
193        }
194    }
195
196    /// Returns true if this pattern matches input strings exactly.
197    pub fn is_exact(&self) -> bool {
198        self.as_exact().is_some()
199    }
200
201    /// Returns a literal pattern if this should match input strings exactly.
202    ///
203    /// This can be used to optimize map lookup by exact key.
204    pub fn as_exact(&self) -> Option<&str> {
205        // TODO: Handle trivial case‐insensitive patterns here? It might make people
206        // expect they can use case‐insensitive patterns in contexts where they
207        // generally can’t.
208        match self {
209            Self::Exact(literal) => Some(literal),
210            _ => None,
211        }
212    }
213
214    /// Returns the original string of this pattern.
215    pub fn as_str(&self) -> &str {
216        match self {
217            Self::Exact(literal) => literal,
218            Self::ExactI(literal) => literal,
219            Self::Substring(needle) => needle,
220            Self::SubstringI(needle) => needle,
221            Self::Glob(pattern) => pattern.as_str(),
222            Self::GlobI(pattern) => pattern.as_str(),
223            Self::Regex(pattern) => pattern.as_str(),
224            Self::RegexI(pattern) => pattern.as_str(),
225        }
226    }
227
228    /// Converts this pattern to a glob string. Returns `None` if the pattern
229    /// can't be represented as a glob.
230    pub fn to_glob(&self) -> Option<Cow<'_, str>> {
231        // TODO: Handle trivial case‐insensitive patterns here? It might make people
232        // expect they can use case‐insensitive patterns in contexts where they
233        // generally can’t.
234        match self {
235            Self::Exact(literal) => Some(globset::escape(literal).into()),
236            Self::Substring(needle) => {
237                if needle.is_empty() {
238                    Some("*".into())
239                } else {
240                    Some(format!("*{}*", globset::escape(needle)).into())
241                }
242            }
243            Self::Glob(pattern) => Some(pattern.as_str().into()),
244            Self::ExactI(_) => None,
245            Self::SubstringI(_) => None,
246            Self::GlobI(_) => None,
247            Self::Regex(_) => None,
248            Self::RegexI(_) => None,
249        }
250    }
251
252    fn to_match_fn(&self) -> Box<DynMatchFn> {
253        // TODO: Unicode case folding is complicated and can be
254        // locale‐specific. The `globset` crate and Gitoxide only deal with
255        // ASCII case folding, so we do the same here; a more elaborate case
256        // folding system will require making sure those behave in a matching
257        // manner where relevant. That said, regex patterns are unicode-aware by
258        // default, so we already have some inconsistencies.
259        //
260        // Care will need to be taken regarding normalization and the choice of an
261        // appropriate case‐insensitive comparison scheme (`toNFKC_Casefold`?) to ensure
262        // that it is compatible with the standard case‐insensitivity of haystack
263        // components (like internationalized domain names in email addresses). The
264        // availability of normalization and case folding schemes in database backends
265        // will also need to be considered. A locale‐specific case folding
266        // scheme would likely not be appropriate for Jujutsu.
267        //
268        // For some discussion of this topic, see:
269        // <https://github.com/unicode-org/icu4x/issues/3151>
270        match self {
271            Self::Exact(literal) => {
272                let literal = literal.clone();
273                Box::new(move |haystack| haystack == literal.as_bytes())
274            }
275            Self::ExactI(literal) => {
276                let literal = literal.clone();
277                Box::new(move |haystack| haystack.eq_ignore_ascii_case(literal.as_bytes()))
278            }
279            Self::Substring(needle) => {
280                let needle = needle.clone();
281                Box::new(move |haystack| haystack.contains_str(&needle))
282            }
283            Self::SubstringI(needle) => {
284                let needle = needle.to_ascii_lowercase();
285                Box::new(move |haystack| haystack.to_ascii_lowercase().contains_str(&needle))
286            }
287            // (Glob, GlobI) and (Regex, RegexI) pairs are identical here, but
288            // callers might want to translate these to backend-specific query
289            // differently.
290            Self::Glob(pattern) | Self::GlobI(pattern) => {
291                let pattern = pattern.to_regex();
292                Box::new(move |haystack| pattern.is_match(haystack))
293            }
294            Self::Regex(pattern) | Self::RegexI(pattern) => {
295                let pattern = pattern.clone();
296                Box::new(move |haystack| pattern.is_match(haystack))
297            }
298        }
299    }
300
301    /// Creates matcher object from this pattern.
302    pub fn to_matcher(&self) -> StringMatcher {
303        if self.is_all() {
304            StringMatcher::All
305        } else if let Some(literal) = self.as_exact() {
306            StringMatcher::Exact(literal.to_owned())
307        } else {
308            StringMatcher::Fn(self.to_match_fn())
309        }
310    }
311
312    /// Converts the pattern into a bytes regex.
313    pub fn to_regex(&self) -> regex::bytes::Regex {
314        match self {
315            Self::Exact(literal) => {
316                regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
317                    .build()
318                    .expect("impossible to fail to compile regex of literal")
319            }
320            Self::ExactI(literal) => {
321                regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
322                    .case_insensitive(true)
323                    .build()
324                    .expect("impossible to fail to compile regex of literal")
325            }
326            Self::Substring(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
327                .build()
328                .expect("impossible to fail to compile regex of literal"),
329            Self::SubstringI(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
330                .case_insensitive(true)
331                .build()
332                .expect("impossible to fail to compile regex of literal"),
333            Self::Glob(glob_pattern) => glob_pattern.to_regex(),
334            // The regex generated represents the case insensitivity itself
335            Self::GlobI(glob_pattern) => glob_pattern.to_regex(),
336            Self::Regex(regex) => regex.clone(),
337            Self::RegexI(regex) => regex.clone(),
338        }
339    }
340}
341
342impl fmt::Display for StringPattern {
343    /// Shows the original string of this pattern.
344    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345        write!(f, "{}", self.as_str())
346    }
347}
348
349/// AST-level representation of the string matcher expression.
350#[derive(Clone, Debug)]
351pub enum StringExpression {
352    // None and All can be represented by using Pattern. Add them if needed.
353    /// Matches pattern.
354    Pattern(Box<StringPattern>),
355    /// Matches anything other than the expression.
356    NotIn(Box<Self>),
357    /// Matches one of the expressions.
358    Union(Box<Self>, Box<Self>),
359    /// Matches both expressions.
360    Intersection(Box<Self>, Box<Self>),
361}
362
363impl StringExpression {
364    /// Expression that matches nothing.
365    pub fn none() -> Self {
366        Self::all().negated()
367    }
368
369    /// Expression that matches everything.
370    pub fn all() -> Self {
371        Self::pattern(StringPattern::all())
372    }
373
374    /// Expression that matches the given pattern.
375    pub fn pattern(pattern: StringPattern) -> Self {
376        Self::Pattern(Box::new(pattern))
377    }
378
379    /// Expression that matches strings exactly.
380    pub fn exact(src: impl Into<String>) -> Self {
381        Self::pattern(StringPattern::exact(src))
382    }
383
384    /// Expression that matches substrings.
385    pub fn substring(src: impl Into<String>) -> Self {
386        Self::pattern(StringPattern::substring(src))
387    }
388
389    /// Expression that matches anything other than this expression.
390    pub fn negated(self) -> Self {
391        Self::NotIn(Box::new(self))
392    }
393
394    /// Expression that matches `self` or `other` (or both).
395    pub fn union(self, other: Self) -> Self {
396        Self::Union(Box::new(self), Box::new(other))
397    }
398
399    /// Expression that matches any of the given `expressions`.
400    pub fn union_all(expressions: Vec<Self>) -> Self {
401        to_binary_expression(expressions, &Self::none, &Self::union)
402    }
403
404    /// Expression that matches both `self` and `other`.
405    pub fn intersection(self, other: Self) -> Self {
406        Self::Intersection(Box::new(self), Box::new(other))
407    }
408
409    fn dfs_pre(&self) -> impl Iterator<Item = &Self> {
410        let mut stack: Vec<&Self> = vec![self];
411        iter::from_fn(move || {
412            let expr = stack.pop()?;
413            match expr {
414                Self::Pattern(_) => {}
415                Self::NotIn(expr) => stack.push(expr),
416                Self::Union(expr1, expr2) | Self::Intersection(expr1, expr2) => {
417                    stack.push(expr2);
418                    stack.push(expr1);
419                }
420            }
421            Some(expr)
422        })
423    }
424
425    /// Iterates exact string patterns recursively from this expression.
426    ///
427    /// For example, `"a", "b", "c"` will be yielded in that order for
428    /// expression `"a" | glob:"?" & "b" | ~"c"`.
429    pub fn exact_strings(&self) -> impl Iterator<Item = &str> {
430        // pre/post-ordering doesn't matter so long as children are visited from
431        // left to right.
432        self.dfs_pre().filter_map(|expr| match expr {
433            Self::Pattern(pattern) => pattern.as_exact(),
434            _ => None,
435        })
436    }
437
438    /// Transforms the expression tree to matcher object.
439    pub fn to_matcher(&self) -> StringMatcher {
440        match self {
441            Self::Pattern(pattern) => pattern.to_matcher(),
442            Self::NotIn(expr) => {
443                let p = expr.to_matcher().into_match_fn();
444                StringMatcher::Fn(Box::new(move |haystack| !p(haystack)))
445            }
446            Self::Union(expr1, expr2) => {
447                let p1 = expr1.to_matcher().into_match_fn();
448                let p2 = expr2.to_matcher().into_match_fn();
449                StringMatcher::Fn(Box::new(move |haystack| p1(haystack) || p2(haystack)))
450            }
451            Self::Intersection(expr1, expr2) => {
452                let p1 = expr1.to_matcher().into_match_fn();
453                let p2 = expr2.to_matcher().into_match_fn();
454                StringMatcher::Fn(Box::new(move |haystack| p1(haystack) && p2(haystack)))
455            }
456        }
457    }
458}
459
460/// Constructs binary tree from `expressions` list, `unit` node, and associative
461/// `binary` operation.
462fn to_binary_expression<T>(
463    expressions: Vec<T>,
464    unit: &impl Fn() -> T,
465    binary: &impl Fn(T, T) -> T,
466) -> T {
467    match expressions.len() {
468        0 => unit(),
469        1 => expressions.into_iter().next().unwrap(),
470        _ => {
471            // Build balanced tree to minimize the recursion depth.
472            let mut left = expressions;
473            let right = left.split_off(left.len() / 2);
474            binary(
475                to_binary_expression(left, unit, binary),
476                to_binary_expression(right, unit, binary),
477            )
478        }
479    }
480}
481
482type DynMatchFn = dyn Fn(&[u8]) -> bool;
483
484/// Matcher for strings and bytes.
485pub enum StringMatcher {
486    /// Matches any strings.
487    All,
488    /// Matches strings exactly.
489    Exact(String),
490    /// Tests matches by arbitrary function.
491    Fn(Box<DynMatchFn>),
492}
493
494impl StringMatcher {
495    /// Matcher that matches any strings.
496    pub const fn all() -> Self {
497        Self::All
498    }
499
500    /// Matcher that matches `src` exactly.
501    pub fn exact(src: impl Into<String>) -> Self {
502        Self::Exact(src.into())
503    }
504
505    /// Returns true if this matches the `haystack` string.
506    pub fn is_match(&self, haystack: &str) -> bool {
507        self.is_match_bytes(haystack.as_bytes())
508    }
509
510    /// Returns true if this matches the `haystack` bytes.
511    pub fn is_match_bytes(&self, haystack: &[u8]) -> bool {
512        match self {
513            Self::All => true,
514            Self::Exact(needle) => haystack == needle.as_bytes(),
515            Self::Fn(predicate) => predicate(haystack),
516        }
517    }
518
519    /// Iterates over matching lines in `text`.
520    pub fn match_lines<'a>(&self, text: &'a [u8]) -> impl Iterator<Item = &'a [u8]> {
521        // The pattern is matched line by line so that it can be anchored to line
522        // start/end. For example, exact:"" will match blank lines.
523        text.split_inclusive(|b| *b == b'\n').filter(|line| {
524            let line = line.strip_suffix(b"\n").unwrap_or(line);
525            self.is_match_bytes(line)
526        })
527    }
528
529    fn into_match_fn(self) -> Box<DynMatchFn> {
530        match self {
531            Self::All => Box::new(|_haystack| true),
532            Self::Exact(needle) => Box::new(move |haystack| haystack == needle.as_bytes()),
533            Self::Fn(predicate) => predicate,
534        }
535    }
536
537    /// Iterates entries of the given `map` whose string keys match this.
538    pub fn filter_btree_map<'a, K: Borrow<str> + Ord, V>(
539        &self,
540        map: &'a BTreeMap<K, V>,
541    ) -> impl Iterator<Item = (&'a K, &'a V)> {
542        self.filter_btree_map_with(map, |key| key, |key| key)
543    }
544
545    /// Iterates entries of the given `map` whose string-like keys match this.
546    ///
547    /// The borrowed key type is constrained by the `Deref::Target`. It must be
548    /// convertible to/from `str`.
549    pub fn filter_btree_map_as_deref<'a, K, V>(
550        &self,
551        map: &'a BTreeMap<K, V>,
552    ) -> impl Iterator<Item = (&'a K, &'a V)>
553    where
554        K: Borrow<K::Target> + Deref + Ord,
555        K::Target: AsRef<str> + Ord,
556        str: AsRef<K::Target>,
557    {
558        self.filter_btree_map_with(map, AsRef::as_ref, AsRef::as_ref)
559    }
560
561    fn filter_btree_map_with<'a, K, Q, V>(
562        &self,
563        map: &'a BTreeMap<K, V>,
564        from_key: impl Fn(&Q) -> &str,
565        to_key: impl Fn(&str) -> &Q,
566    ) -> impl Iterator<Item = (&'a K, &'a V)>
567    where
568        K: Borrow<Q> + Ord,
569        Q: Ord + ?Sized,
570    {
571        match self {
572            Self::All => Either::Left(map.iter()),
573            Self::Exact(key) => {
574                Either::Right(Either::Left(map.get_key_value(to_key(key)).into_iter()))
575            }
576            Self::Fn(predicate) => {
577                Either::Right(Either::Right(map.iter().filter(move |&(key, _)| {
578                    predicate(from_key(key.borrow()).as_bytes())
579                })))
580            }
581        }
582    }
583}
584
585impl Debug for StringMatcher {
586    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
587        match self {
588            Self::All => write!(f, "All"),
589            Self::Exact(needle) => f.debug_tuple("Exact").field(needle).finish(),
590            Self::Fn(_) => f.debug_tuple("Fn").finish_non_exhaustive(),
591        }
592    }
593}
594
595#[cfg(test)]
596mod tests {
597    use assert_matches::assert_matches;
598    use itertools::Itertools as _;
599    use maplit::btreemap;
600
601    use super::*;
602    use crate::tests::TestResult;
603
604    fn insta_settings() -> insta::Settings {
605        let mut settings = insta::Settings::clone_current();
606        // Collapse short "Thing(_,)" repeatedly to save vertical space and make
607        // the output more readable.
608        for _ in 0..4 {
609            settings.add_filter(
610                r"(?x)
611                \b([A-Z]\w*)\(\n
612                    \s*(.{1,60}),\n
613                \s*\)",
614                "$1($2)",
615            );
616        }
617        settings
618    }
619
620    #[test]
621    fn test_string_pattern_to_glob() -> TestResult {
622        assert_eq!(StringPattern::all().to_glob(), Some("*".into()));
623        assert_eq!(StringPattern::exact("a").to_glob(), Some("a".into()));
624        assert_eq!(StringPattern::exact("*").to_glob(), Some("[*]".into()));
625        assert_eq!(StringPattern::glob("*")?.to_glob(), Some("*".into()));
626        assert_eq!(
627            StringPattern::Substring("a".into()).to_glob(),
628            Some("*a*".into())
629        );
630        assert_eq!(
631            StringPattern::Substring("*".into()).to_glob(),
632            Some("*[*]*".into())
633        );
634        Ok(())
635    }
636
637    #[test]
638    fn test_parse() {
639        // Parse specific pattern kinds.
640        assert_matches!(
641            StringPattern::from_str_kind("foo", "exact"),
642            Ok(StringPattern::Exact(s)) if s == "foo"
643        );
644        assert_matches!(
645            StringPattern::from_str_kind("foo*", "glob"),
646            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
647        );
648        assert_matches!(
649            StringPattern::from_str_kind("foo", "substring"),
650            Ok(StringPattern::Substring(s)) if s == "foo"
651        );
652        assert_matches!(
653            StringPattern::from_str_kind("foo", "substring-i"),
654            Ok(StringPattern::SubstringI(s)) if s == "foo"
655        );
656        assert_matches!(
657            StringPattern::from_str_kind("foo", "regex"),
658            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
659        );
660        assert_matches!(
661            StringPattern::from_str_kind("foo", "regex-i"),
662            Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
663        );
664    }
665
666    #[test]
667    fn test_glob_is_match() {
668        let glob = |src: &str| StringPattern::glob(src).unwrap().to_matcher();
669        let glob_i = |src: &str| StringPattern::glob_i(src).unwrap().to_matcher();
670
671        assert!(glob("foo").is_match("foo"));
672        assert!(!glob("foo").is_match("foobar"));
673
674        // "." in string isn't any special
675        assert!(glob("*").is_match(".foo"));
676
677        // "/" in string isn't any special
678        assert!(glob("*").is_match("foo/bar"));
679        assert!(glob(r"*/*").is_match("foo/bar"));
680        assert!(!glob(r"*/*").is_match(r"foo\bar"));
681
682        // "\" is an escape character
683        assert!(!glob(r"*\*").is_match("foo/bar"));
684        assert!(glob(r"*\*").is_match("foo*"));
685        assert!(glob(r"\\").is_match(r"\"));
686
687        // "*" matches newline
688        assert!(glob(r"*").is_match("foo\nbar"));
689
690        assert!(!glob("f?O").is_match("Foo"));
691        assert!(glob_i("f?O").is_match("Foo"));
692    }
693
694    #[test]
695    fn test_regex_is_match() {
696        let regex = |src: &str| StringPattern::regex(src).unwrap().to_matcher();
697        // Unicode mode is enabled by default
698        assert!(regex(r"^\w$").is_match("\u{c0}"));
699        assert!(regex(r"^.$").is_match("\u{c0}"));
700        // ASCII-compatible mode should also work
701        assert!(regex(r"^(?-u)\w$").is_match("a"));
702        assert!(!regex(r"^(?-u)\w$").is_match("\u{c0}"));
703        assert!(regex(r"^(?-u).{2}$").is_match("\u{c0}"));
704    }
705
706    #[test]
707    fn test_string_pattern_to_regex() {
708        let check = |pattern: StringPattern, match_to: &str| {
709            let regex = pattern.to_regex();
710            regex.is_match(match_to.as_bytes())
711        };
712        assert!(check(StringPattern::exact("$a"), "$a"));
713        assert!(!check(StringPattern::exact("$a"), "$A"));
714        assert!(!check(StringPattern::exact("a"), "aa"));
715        assert!(!check(StringPattern::exact("a"), "aa"));
716        assert!(check(StringPattern::exact_i("a"), "A"));
717        assert!(check(StringPattern::substring("$a"), "$abc"));
718        assert!(!check(StringPattern::substring("$a"), "$Abc"));
719        assert!(check(StringPattern::substring_i("$a"), "$Abc"));
720        assert!(!check(StringPattern::glob("a").unwrap(), "A"));
721        assert!(check(StringPattern::glob_i("a").unwrap(), "A"));
722        assert!(check(StringPattern::regex("^a{1,3}").unwrap(), "abcde"));
723        assert!(!check(StringPattern::regex("^a{1,3}").unwrap(), "Abcde"));
724        assert!(check(StringPattern::regex_i("^a{1,3}").unwrap(), "Abcde"));
725    }
726
727    #[test]
728    fn test_exact_pattern_to_matcher() {
729        assert_matches!(
730            StringPattern::exact("").to_matcher(),
731            StringMatcher::Exact(needle) if needle.is_empty()
732        );
733        assert_matches!(
734            StringPattern::exact("x").to_matcher(),
735            StringMatcher::Exact(needle) if needle == "x"
736        );
737
738        assert_matches!(
739            StringPattern::exact_i("").to_matcher(),
740            StringMatcher::Fn(_) // or Exact
741        );
742        assert_matches!(
743            StringPattern::exact_i("x").to_matcher(),
744            StringMatcher::Fn(_)
745        );
746    }
747
748    #[test]
749    fn test_substring_pattern_to_matcher() {
750        assert_matches!(
751            StringPattern::substring("").to_matcher(),
752            StringMatcher::All
753        );
754        assert_matches!(
755            StringPattern::substring("x").to_matcher(),
756            StringMatcher::Fn(_)
757        );
758
759        assert_matches!(
760            StringPattern::substring_i("").to_matcher(),
761            StringMatcher::All
762        );
763        assert_matches!(
764            StringPattern::substring_i("x").to_matcher(),
765            StringMatcher::Fn(_)
766        );
767    }
768
769    #[test]
770    fn test_glob_pattern_to_matcher() -> TestResult {
771        assert_matches!(
772            StringPattern::glob("")?.to_matcher(),
773            StringMatcher::Exact(_)
774        );
775        assert_matches!(
776            StringPattern::glob("x")?.to_matcher(),
777            StringMatcher::Exact(_)
778        );
779        assert_matches!(
780            StringPattern::glob("x?")?.to_matcher(),
781            StringMatcher::Fn(_)
782        );
783        assert_matches!(StringPattern::glob("*")?.to_matcher(), StringMatcher::All);
784        assert_matches!(
785            StringPattern::glob(r"\\")?.to_matcher(),
786            StringMatcher::Fn(_) // or Exact(r"\")
787        );
788
789        assert_matches!(
790            StringPattern::glob_i("")?.to_matcher(),
791            StringMatcher::Fn(_) // or Exact
792        );
793        assert_matches!(
794            StringPattern::glob_i("x")?.to_matcher(),
795            StringMatcher::Fn(_)
796        );
797        assert_matches!(
798            StringPattern::glob_i("x?")?.to_matcher(),
799            StringMatcher::Fn(_)
800        );
801        assert_matches!(StringPattern::glob_i("*")?.to_matcher(), StringMatcher::All);
802        Ok(())
803    }
804
805    #[test]
806    fn test_regex_pattern_to_matcher() -> TestResult {
807        assert_matches!(StringPattern::regex("")?.to_matcher(), StringMatcher::All);
808        assert_matches!(
809            StringPattern::regex("x")?.to_matcher(),
810            StringMatcher::Fn(_)
811        );
812        assert_matches!(
813            StringPattern::regex(".")?.to_matcher(),
814            StringMatcher::Fn(_)
815        );
816
817        assert_matches!(StringPattern::regex_i("")?.to_matcher(), StringMatcher::All);
818        assert_matches!(
819            StringPattern::regex_i("x")?.to_matcher(),
820            StringMatcher::Fn(_)
821        );
822        assert_matches!(
823            StringPattern::regex_i(".")?.to_matcher(),
824            StringMatcher::Fn(_)
825        );
826        Ok(())
827    }
828
829    #[test]
830    fn test_union_all_expressions() {
831        let settings = insta_settings();
832        let _guard = settings.bind_to_scope();
833
834        insta::assert_debug_snapshot!(
835            StringExpression::union_all(vec![]),
836            @r#"NotIn(Pattern(Substring("")))"#);
837        insta::assert_debug_snapshot!(
838            StringExpression::union_all(vec![StringExpression::exact("a")]),
839            @r#"Pattern(Exact("a"))"#);
840        insta::assert_debug_snapshot!(
841            StringExpression::union_all(vec![
842                StringExpression::exact("a"),
843                StringExpression::exact("b"),
844            ]),
845            @r#"
846        Union(
847            Pattern(Exact("a")),
848            Pattern(Exact("b")),
849        )
850        "#);
851        insta::assert_debug_snapshot!(
852            StringExpression::union_all(vec![
853                StringExpression::exact("a"),
854                StringExpression::exact("b"),
855                StringExpression::exact("c"),
856            ]),
857            @r#"
858        Union(
859            Pattern(Exact("a")),
860            Union(
861                Pattern(Exact("b")),
862                Pattern(Exact("c")),
863            ),
864        )
865        "#);
866        insta::assert_debug_snapshot!(
867            StringExpression::union_all(vec![
868                StringExpression::exact("a"),
869                StringExpression::exact("b"),
870                StringExpression::exact("c"),
871                StringExpression::exact("d"),
872            ]),
873            @r#"
874        Union(
875            Union(
876                Pattern(Exact("a")),
877                Pattern(Exact("b")),
878            ),
879            Union(
880                Pattern(Exact("c")),
881                Pattern(Exact("d")),
882            ),
883        )
884        "#);
885    }
886
887    #[test]
888    fn test_exact_strings_in_expression() {
889        assert_eq!(
890            StringExpression::all().exact_strings().collect_vec(),
891            [""; 0]
892        );
893        assert_eq!(
894            StringExpression::union_all(vec![
895                StringExpression::exact("a"),
896                StringExpression::substring("b"),
897                StringExpression::intersection(
898                    StringExpression::exact("c"),
899                    StringExpression::exact("d").negated(),
900                ),
901            ])
902            .exact_strings()
903            .collect_vec(),
904            ["a", "c", "d"]
905        );
906    }
907
908    #[test]
909    fn test_trivial_expression_to_matcher() {
910        assert_matches!(StringExpression::all().to_matcher(), StringMatcher::All);
911        assert_matches!(
912            StringExpression::exact("x").to_matcher(),
913            StringMatcher::Exact(needle) if needle == "x"
914        );
915    }
916
917    #[test]
918    fn test_compound_expression_to_matcher() {
919        let matcher = StringExpression::exact("foo").negated().to_matcher();
920        assert!(!matcher.is_match("foo"));
921        assert!(matcher.is_match("bar"));
922
923        let matcher = StringExpression::union(
924            StringExpression::exact("foo"),
925            StringExpression::exact("bar"),
926        )
927        .to_matcher();
928        assert!(matcher.is_match("foo"));
929        assert!(matcher.is_match("bar"));
930        assert!(!matcher.is_match("baz"));
931
932        let matcher = StringExpression::intersection(
933            StringExpression::substring("a"),
934            StringExpression::substring("r"),
935        )
936        .to_matcher();
937        assert!(!matcher.is_match("foo"));
938        assert!(matcher.is_match("bar"));
939        assert!(!matcher.is_match("baz"));
940    }
941
942    #[test]
943    fn test_matcher_is_match() {
944        assert!(StringMatcher::all().is_match(""));
945        assert!(StringMatcher::all().is_match("foo"));
946        assert!(!StringMatcher::exact("o").is_match(""));
947        assert!(!StringMatcher::exact("o").is_match("foo"));
948        assert!(StringMatcher::exact("foo").is_match("foo"));
949        assert!(StringPattern::substring("o").to_matcher().is_match("foo"));
950    }
951
952    #[test]
953    fn test_matcher_match_lines() {
954        assert_eq!(
955            StringMatcher::all().match_lines(b"").collect_vec(),
956            Vec::<&[u8]>::new()
957        );
958        assert_eq!(
959            StringMatcher::all().match_lines(b"\n").collect_vec(),
960            vec![b"\n"]
961        );
962        assert_eq!(
963            StringMatcher::all().match_lines(b"foo").collect_vec(),
964            vec![b"foo"]
965        );
966        assert_eq!(
967            StringMatcher::all().match_lines(b"foo\n").collect_vec(),
968            vec![b"foo\n"]
969        );
970        assert_eq!(
971            StringMatcher::exact("foo")
972                .match_lines(b"foo\nbar\n")
973                .collect_vec(),
974            vec![b"foo\n"]
975        );
976        assert_eq!(
977            StringMatcher::exact("foo\n")
978                .match_lines(b"foo\nbar\n")
979                .collect_vec(),
980            Vec::<&[u8]>::new()
981        );
982    }
983
984    #[test]
985    fn test_matcher_filter_btree_map() {
986        let data = btreemap! {
987            "bar" => (),
988            "baz" => (),
989            "foo" => (),
990        };
991        let filter = |matcher: &StringMatcher| {
992            matcher
993                .filter_btree_map(&data)
994                .map(|(&key, ())| key)
995                .collect_vec()
996        };
997        assert_eq!(filter(&StringMatcher::all()), vec!["bar", "baz", "foo"]);
998        assert_eq!(filter(&StringMatcher::exact("o")), vec![""; 0]);
999        assert_eq!(filter(&StringMatcher::exact("foo")), vec!["foo"]);
1000        assert_eq!(
1001            filter(&StringPattern::substring("o").to_matcher()),
1002            vec!["foo"]
1003        );
1004        assert_eq!(
1005            filter(&StringPattern::substring("a").to_matcher()),
1006            vec!["bar", "baz"]
1007        );
1008    }
1009}