jj_lib/
str_util.rs

1// Copyright 2021-2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! String helpers.
16
17use std::borrow::Borrow;
18use std::borrow::Cow;
19use std::collections::BTreeMap;
20use std::fmt;
21use std::fmt::Debug;
22use std::iter;
23use std::ops::Deref;
24
25use bstr::ByteSlice as _;
26use either::Either;
27use globset::Glob;
28use globset::GlobBuilder;
29use thiserror::Error;
30
31/// Error occurred during pattern string parsing.
32#[derive(Debug, Error)]
33pub enum StringPatternParseError {
34    /// Unknown pattern kind is specified.
35    #[error("Invalid string pattern kind `{0}:`")]
36    InvalidKind(String),
37    /// Failed to parse glob pattern.
38    #[error(transparent)]
39    GlobPattern(globset::Error),
40    /// Failed to parse regular expression.
41    #[error(transparent)]
42    Regex(regex::Error),
43}
44
45/// A wrapper for [`Glob`] with a more concise `Debug` impl.
46#[derive(Clone)]
47pub struct GlobPattern {
48    glob: Glob,
49}
50
51impl GlobPattern {
52    /// Returns the original glob pattern.
53    pub fn as_str(&self) -> &str {
54        self.glob.glob()
55    }
56
57    /// Converts this glob pattern to a bytes regex.
58    pub fn to_regex(&self) -> regex::bytes::Regex {
59        // Based on new_regex() in globset. We don't use GlobMatcher::is_match(path)
60        // because the input string shouldn't be normalized as path.
61        regex::bytes::RegexBuilder::new(self.glob.regex())
62            .dot_matches_new_line(true)
63            .build()
64            .expect("glob regex should be valid")
65    }
66}
67
68impl Debug for GlobPattern {
69    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70        f.debug_tuple("GlobPattern").field(&self.as_str()).finish()
71    }
72}
73
74fn parse_glob(src: &str, icase: bool) -> Result<GlobPattern, StringPatternParseError> {
75    let glob = GlobBuilder::new(src)
76        .case_insensitive(icase)
77        // Don't use platform-dependent default. This pattern isn't meant for
78        // testing file-system paths. If backslash escape were disabled, "\" in
79        // pattern would be normalized to "/" on Windows.
80        .backslash_escape(true)
81        .build()
82        .map_err(StringPatternParseError::GlobPattern)?;
83    Ok(GlobPattern { glob })
84}
85
86fn is_glob_char(c: char) -> bool {
87    // See globset::escape(). In addition to that, backslash is parsed as an
88    // escape sequence on all platforms.
89    matches!(c, '?' | '*' | '[' | ']' | '{' | '}' | '\\')
90}
91
92/// Pattern to be tested against string property like commit description or
93/// bookmark name.
94#[derive(Clone, Debug)]
95pub enum StringPattern {
96    /// Matches strings exactly.
97    Exact(String),
98    /// Matches strings case‐insensitively.
99    ExactI(String),
100    /// Matches strings that contain a substring.
101    Substring(String),
102    /// Matches strings that case‐insensitively contain a substring.
103    SubstringI(String),
104    /// Matches with a Unix‐style shell wildcard pattern.
105    Glob(Box<GlobPattern>),
106    /// Matches with a case‐insensitive Unix‐style shell wildcard pattern.
107    GlobI(Box<GlobPattern>),
108    /// Matches substrings with a regular expression.
109    Regex(regex::bytes::Regex),
110    /// Matches substrings with a case‐insensitive regular expression.
111    RegexI(regex::bytes::Regex),
112}
113
114impl StringPattern {
115    /// Pattern that matches any string.
116    pub const fn all() -> Self {
117        Self::Substring(String::new())
118    }
119
120    /// Constructs a pattern that matches exactly.
121    pub fn exact(src: impl Into<String>) -> Self {
122        Self::Exact(src.into())
123    }
124
125    /// Constructs a pattern that matches case‐insensitively.
126    pub fn exact_i(src: impl Into<String>) -> Self {
127        Self::ExactI(src.into())
128    }
129
130    /// Constructs a pattern that matches a substring.
131    pub fn substring(src: impl Into<String>) -> Self {
132        Self::Substring(src.into())
133    }
134
135    /// Constructs a pattern that case‐insensitively matches a substring.
136    pub fn substring_i(src: impl Into<String>) -> Self {
137        Self::SubstringI(src.into())
138    }
139
140    /// Parses the given string as a glob pattern.
141    pub fn glob(src: &str) -> Result<Self, StringPatternParseError> {
142        if !src.contains(is_glob_char) {
143            return Ok(Self::exact(src));
144        }
145        Ok(Self::Glob(Box::new(parse_glob(src, false)?)))
146    }
147
148    /// Parses the given string as a case‐insensitive glob pattern.
149    pub fn glob_i(src: &str) -> Result<Self, StringPatternParseError> {
150        // No special case for !src.contains(is_glob_char) because it's unclear
151        // whether we'll use unicode case comparison for "exact-i" patterns.
152        // "glob-i" should always be ASCII-based.
153        Ok(Self::GlobI(Box::new(parse_glob(src, true)?)))
154    }
155
156    /// Parses the given string as a regular expression.
157    pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
158        let pattern = regex::bytes::Regex::new(src).map_err(StringPatternParseError::Regex)?;
159        Ok(Self::Regex(pattern))
160    }
161
162    /// Parses the given string as a case-insensitive regular expression.
163    pub fn regex_i(src: &str) -> Result<Self, StringPatternParseError> {
164        let pattern = regex::bytes::RegexBuilder::new(src)
165            .case_insensitive(true)
166            .build()
167            .map_err(StringPatternParseError::Regex)?;
168        Ok(Self::RegexI(pattern))
169    }
170
171    /// Parses the given string as a pattern of the specified `kind`.
172    pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
173        match kind {
174            "exact" => Ok(Self::exact(src)),
175            "exact-i" => Ok(Self::exact_i(src)),
176            "substring" => Ok(Self::substring(src)),
177            "substring-i" => Ok(Self::substring_i(src)),
178            "glob" => Self::glob(src),
179            "glob-i" => Self::glob_i(src),
180            "regex" => Self::regex(src),
181            "regex-i" => Self::regex_i(src),
182            _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
183        }
184    }
185
186    /// Returns true if this pattern trivially matches any input strings.
187    fn is_all(&self) -> bool {
188        match self {
189            Self::Exact(_) | Self::ExactI(_) => false,
190            Self::Substring(needle) | Self::SubstringI(needle) => needle.is_empty(),
191            Self::Glob(pattern) | Self::GlobI(pattern) => pattern.as_str() == "*",
192            Self::Regex(pattern) | Self::RegexI(pattern) => pattern.as_str().is_empty(),
193        }
194    }
195
196    /// Returns true if this pattern matches input strings exactly.
197    pub fn is_exact(&self) -> bool {
198        self.as_exact().is_some()
199    }
200
201    /// Returns a literal pattern if this should match input strings exactly.
202    ///
203    /// This can be used to optimize map lookup by exact key.
204    pub fn as_exact(&self) -> Option<&str> {
205        // TODO: Handle trivial case‐insensitive patterns here? It might make people
206        // expect they can use case‐insensitive patterns in contexts where they
207        // generally can’t.
208        match self {
209            Self::Exact(literal) => Some(literal),
210            _ => None,
211        }
212    }
213
214    /// Returns the original string of this pattern.
215    pub fn as_str(&self) -> &str {
216        match self {
217            Self::Exact(literal) => literal,
218            Self::ExactI(literal) => literal,
219            Self::Substring(needle) => needle,
220            Self::SubstringI(needle) => needle,
221            Self::Glob(pattern) => pattern.as_str(),
222            Self::GlobI(pattern) => pattern.as_str(),
223            Self::Regex(pattern) => pattern.as_str(),
224            Self::RegexI(pattern) => pattern.as_str(),
225        }
226    }
227
228    /// Converts this pattern to a glob string. Returns `None` if the pattern
229    /// can't be represented as a glob.
230    pub fn to_glob(&self) -> Option<Cow<'_, str>> {
231        // TODO: Handle trivial case‐insensitive patterns here? It might make people
232        // expect they can use case‐insensitive patterns in contexts where they
233        // generally can’t.
234        match self {
235            Self::Exact(literal) => Some(globset::escape(literal).into()),
236            Self::Substring(needle) => {
237                if needle.is_empty() {
238                    Some("*".into())
239                } else {
240                    Some(format!("*{}*", globset::escape(needle)).into())
241                }
242            }
243            Self::Glob(pattern) => Some(pattern.as_str().into()),
244            Self::ExactI(_) => None,
245            Self::SubstringI(_) => None,
246            Self::GlobI(_) => None,
247            Self::Regex(_) => None,
248            Self::RegexI(_) => None,
249        }
250    }
251
252    fn to_match_fn(&self) -> Box<DynMatchFn> {
253        // TODO: Unicode case folding is complicated and can be
254        // locale‐specific. The `globset` crate and Gitoxide only deal with
255        // ASCII case folding, so we do the same here; a more elaborate case
256        // folding system will require making sure those behave in a matching
257        // manner where relevant. That said, regex patterns are unicode-aware by
258        // default, so we already have some inconsistencies.
259        //
260        // Care will need to be taken regarding normalization and the choice of an
261        // appropriate case‐insensitive comparison scheme (`toNFKC_Casefold`?) to ensure
262        // that it is compatible with the standard case‐insensitivity of haystack
263        // components (like internationalized domain names in email addresses). The
264        // availability of normalization and case folding schemes in database backends
265        // will also need to be considered. A locale‐specific case folding
266        // scheme would likely not be appropriate for Jujutsu.
267        //
268        // For some discussion of this topic, see:
269        // <https://github.com/unicode-org/icu4x/issues/3151>
270        match self {
271            Self::Exact(literal) => {
272                let literal = literal.clone();
273                Box::new(move |haystack| haystack == literal.as_bytes())
274            }
275            Self::ExactI(literal) => {
276                let literal = literal.clone();
277                Box::new(move |haystack| haystack.eq_ignore_ascii_case(literal.as_bytes()))
278            }
279            Self::Substring(needle) => {
280                let needle = needle.clone();
281                Box::new(move |haystack| haystack.contains_str(&needle))
282            }
283            Self::SubstringI(needle) => {
284                let needle = needle.to_ascii_lowercase();
285                Box::new(move |haystack| haystack.to_ascii_lowercase().contains_str(&needle))
286            }
287            // (Glob, GlobI) and (Regex, RegexI) pairs are identical here, but
288            // callers might want to translate these to backend-specific query
289            // differently.
290            Self::Glob(pattern) | Self::GlobI(pattern) => {
291                let pattern = pattern.to_regex();
292                Box::new(move |haystack| pattern.is_match(haystack))
293            }
294            Self::Regex(pattern) | Self::RegexI(pattern) => {
295                let pattern = pattern.clone();
296                Box::new(move |haystack| pattern.is_match(haystack))
297            }
298        }
299    }
300
301    /// Creates matcher object from this pattern.
302    pub fn to_matcher(&self) -> StringMatcher {
303        if self.is_all() {
304            StringMatcher::All
305        } else if let Some(literal) = self.as_exact() {
306            StringMatcher::Exact(literal.to_owned())
307        } else {
308            StringMatcher::Fn(self.to_match_fn())
309        }
310    }
311
312    /// Converts the pattern into a bytes regex.
313    pub fn to_regex(&self) -> regex::bytes::Regex {
314        match self {
315            Self::Exact(literal) => {
316                regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
317                    .build()
318                    .expect("impossible to fail to compile regex of literal")
319            }
320            Self::ExactI(literal) => {
321                regex::bytes::RegexBuilder::new(&format!("^{}$", regex::escape(literal)))
322                    .case_insensitive(true)
323                    .build()
324                    .expect("impossible to fail to compile regex of literal")
325            }
326            Self::Substring(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
327                .build()
328                .expect("impossible to fail to compile regex of literal"),
329            Self::SubstringI(literal) => regex::bytes::RegexBuilder::new(&regex::escape(literal))
330                .case_insensitive(true)
331                .build()
332                .expect("impossible to fail to compile regex of literal"),
333            Self::Glob(glob_pattern) => glob_pattern.to_regex(),
334            // The regex generated represents the case insensitivity itself
335            Self::GlobI(glob_pattern) => glob_pattern.to_regex(),
336            Self::Regex(regex) => regex.clone(),
337            Self::RegexI(regex) => regex.clone(),
338        }
339    }
340}
341
342impl fmt::Display for StringPattern {
343    /// Shows the original string of this pattern.
344    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345        write!(f, "{}", self.as_str())
346    }
347}
348
349/// AST-level representation of the string matcher expression.
350#[derive(Clone, Debug)]
351pub enum StringExpression {
352    // None and All can be represented by using Pattern. Add them if needed.
353    /// Matches pattern.
354    Pattern(Box<StringPattern>),
355    /// Matches anything other than the expression.
356    NotIn(Box<Self>),
357    /// Matches one of the expressions.
358    Union(Box<Self>, Box<Self>),
359    /// Matches both expressions.
360    Intersection(Box<Self>, Box<Self>),
361}
362
363impl StringExpression {
364    /// Expression that matches nothing.
365    pub fn none() -> Self {
366        Self::all().negated()
367    }
368
369    /// Expression that matches everything.
370    pub fn all() -> Self {
371        Self::pattern(StringPattern::all())
372    }
373
374    /// Expression that matches the given pattern.
375    pub fn pattern(pattern: StringPattern) -> Self {
376        Self::Pattern(Box::new(pattern))
377    }
378
379    /// Expression that matches strings exactly.
380    pub fn exact(src: impl Into<String>) -> Self {
381        Self::pattern(StringPattern::exact(src))
382    }
383
384    /// Expression that matches substrings.
385    pub fn substring(src: impl Into<String>) -> Self {
386        Self::pattern(StringPattern::substring(src))
387    }
388
389    /// Expression that matches anything other than this expression.
390    pub fn negated(self) -> Self {
391        Self::NotIn(Box::new(self))
392    }
393
394    /// Expression that matches `self` or `other` (or both).
395    pub fn union(self, other: Self) -> Self {
396        Self::Union(Box::new(self), Box::new(other))
397    }
398
399    /// Expression that matches any of the given `expressions`.
400    pub fn union_all(expressions: Vec<Self>) -> Self {
401        to_binary_expression(expressions, &Self::none, &Self::union)
402    }
403
404    /// Expression that matches both `self` and `other`.
405    pub fn intersection(self, other: Self) -> Self {
406        Self::Intersection(Box::new(self), Box::new(other))
407    }
408
409    fn dfs_pre(&self) -> impl Iterator<Item = &Self> {
410        let mut stack: Vec<&Self> = vec![self];
411        iter::from_fn(move || {
412            let expr = stack.pop()?;
413            match expr {
414                Self::Pattern(_) => {}
415                Self::NotIn(expr) => stack.push(expr),
416                Self::Union(expr1, expr2) | Self::Intersection(expr1, expr2) => {
417                    stack.push(expr2);
418                    stack.push(expr1);
419                }
420            }
421            Some(expr)
422        })
423    }
424
425    /// Iterates exact string patterns recursively from this expression.
426    ///
427    /// For example, `"a", "b", "c"` will be yielded in that order for
428    /// expression `"a" | glob:"?" & "b" | ~"c"`.
429    pub fn exact_strings(&self) -> impl Iterator<Item = &str> {
430        // pre/post-ordering doesn't matter so long as children are visited from
431        // left to right.
432        self.dfs_pre().filter_map(|expr| match expr {
433            Self::Pattern(pattern) => pattern.as_exact(),
434            _ => None,
435        })
436    }
437
438    /// Transforms the expression tree to matcher object.
439    pub fn to_matcher(&self) -> StringMatcher {
440        match self {
441            Self::Pattern(pattern) => pattern.to_matcher(),
442            Self::NotIn(expr) => {
443                let p = expr.to_matcher().into_match_fn();
444                StringMatcher::Fn(Box::new(move |haystack| !p(haystack)))
445            }
446            Self::Union(expr1, expr2) => {
447                let p1 = expr1.to_matcher().into_match_fn();
448                let p2 = expr2.to_matcher().into_match_fn();
449                StringMatcher::Fn(Box::new(move |haystack| p1(haystack) || p2(haystack)))
450            }
451            Self::Intersection(expr1, expr2) => {
452                let p1 = expr1.to_matcher().into_match_fn();
453                let p2 = expr2.to_matcher().into_match_fn();
454                StringMatcher::Fn(Box::new(move |haystack| p1(haystack) && p2(haystack)))
455            }
456        }
457    }
458}
459
460/// Constructs binary tree from `expressions` list, `unit` node, and associative
461/// `binary` operation.
462fn to_binary_expression<T>(
463    expressions: Vec<T>,
464    unit: &impl Fn() -> T,
465    binary: &impl Fn(T, T) -> T,
466) -> T {
467    match expressions.len() {
468        0 => unit(),
469        1 => expressions.into_iter().next().unwrap(),
470        _ => {
471            // Build balanced tree to minimize the recursion depth.
472            let mut left = expressions;
473            let right = left.split_off(left.len() / 2);
474            binary(
475                to_binary_expression(left, unit, binary),
476                to_binary_expression(right, unit, binary),
477            )
478        }
479    }
480}
481
482type DynMatchFn = dyn Fn(&[u8]) -> bool;
483
484/// Matcher for strings and bytes.
485pub enum StringMatcher {
486    /// Matches any strings.
487    All,
488    /// Matches strings exactly.
489    Exact(String),
490    /// Tests matches by arbitrary function.
491    Fn(Box<DynMatchFn>),
492}
493
494impl StringMatcher {
495    /// Matcher that matches any strings.
496    pub const fn all() -> Self {
497        Self::All
498    }
499
500    /// Matcher that matches `src` exactly.
501    pub fn exact(src: impl Into<String>) -> Self {
502        Self::Exact(src.into())
503    }
504
505    /// Returns true if this matches the `haystack` string.
506    pub fn is_match(&self, haystack: &str) -> bool {
507        self.is_match_bytes(haystack.as_bytes())
508    }
509
510    /// Returns true if this matches the `haystack` bytes.
511    pub fn is_match_bytes(&self, haystack: &[u8]) -> bool {
512        match self {
513            Self::All => true,
514            Self::Exact(needle) => haystack == needle.as_bytes(),
515            Self::Fn(predicate) => predicate(haystack),
516        }
517    }
518
519    /// Iterates over matching lines in `text`.
520    pub fn match_lines<'a>(&self, text: &'a [u8]) -> impl Iterator<Item = &'a [u8]> {
521        // The pattern is matched line by line so that it can be anchored to line
522        // start/end. For example, exact:"" will match blank lines.
523        text.split_inclusive(|b| *b == b'\n').filter(|line| {
524            let line = line.strip_suffix(b"\n").unwrap_or(line);
525            self.is_match_bytes(line)
526        })
527    }
528
529    fn into_match_fn(self) -> Box<DynMatchFn> {
530        match self {
531            Self::All => Box::new(|_haystack| true),
532            Self::Exact(needle) => Box::new(move |haystack| haystack == needle.as_bytes()),
533            Self::Fn(predicate) => predicate,
534        }
535    }
536
537    /// Iterates entries of the given `map` whose string keys match this.
538    pub fn filter_btree_map<'a, K: Borrow<str> + Ord, V>(
539        &self,
540        map: &'a BTreeMap<K, V>,
541    ) -> impl Iterator<Item = (&'a K, &'a V)> {
542        self.filter_btree_map_with(map, |key| key, |key| key)
543    }
544
545    /// Iterates entries of the given `map` whose string-like keys match this.
546    ///
547    /// The borrowed key type is constrained by the `Deref::Target`. It must be
548    /// convertible to/from `str`.
549    pub fn filter_btree_map_as_deref<'a, K, V>(
550        &self,
551        map: &'a BTreeMap<K, V>,
552    ) -> impl Iterator<Item = (&'a K, &'a V)>
553    where
554        K: Borrow<K::Target> + Deref + Ord,
555        K::Target: AsRef<str> + Ord,
556        str: AsRef<K::Target>,
557    {
558        self.filter_btree_map_with(map, AsRef::as_ref, AsRef::as_ref)
559    }
560
561    fn filter_btree_map_with<'a, K, Q, V>(
562        &self,
563        map: &'a BTreeMap<K, V>,
564        from_key: impl Fn(&Q) -> &str,
565        to_key: impl Fn(&str) -> &Q,
566    ) -> impl Iterator<Item = (&'a K, &'a V)>
567    where
568        K: Borrow<Q> + Ord,
569        Q: Ord + ?Sized,
570    {
571        match self {
572            Self::All => Either::Left(map.iter()),
573            Self::Exact(key) => {
574                Either::Right(Either::Left(map.get_key_value(to_key(key)).into_iter()))
575            }
576            Self::Fn(predicate) => {
577                Either::Right(Either::Right(map.iter().filter(move |&(key, _)| {
578                    predicate(from_key(key.borrow()).as_bytes())
579                })))
580            }
581        }
582    }
583}
584
585impl Debug for StringMatcher {
586    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
587        match self {
588            Self::All => write!(f, "All"),
589            Self::Exact(needle) => f.debug_tuple("Exact").field(needle).finish(),
590            Self::Fn(_) => f.debug_tuple("Fn").finish_non_exhaustive(),
591        }
592    }
593}
594
595#[cfg(test)]
596mod tests {
597    use assert_matches::assert_matches;
598    use itertools::Itertools as _;
599    use maplit::btreemap;
600
601    use super::*;
602
603    fn insta_settings() -> insta::Settings {
604        let mut settings = insta::Settings::clone_current();
605        // Collapse short "Thing(_,)" repeatedly to save vertical space and make
606        // the output more readable.
607        for _ in 0..4 {
608            settings.add_filter(
609                r"(?x)
610                \b([A-Z]\w*)\(\n
611                    \s*(.{1,60}),\n
612                \s*\)",
613                "$1($2)",
614            );
615        }
616        settings
617    }
618
619    #[test]
620    fn test_string_pattern_to_glob() {
621        assert_eq!(StringPattern::all().to_glob(), Some("*".into()));
622        assert_eq!(StringPattern::exact("a").to_glob(), Some("a".into()));
623        assert_eq!(StringPattern::exact("*").to_glob(), Some("[*]".into()));
624        assert_eq!(
625            StringPattern::glob("*").unwrap().to_glob(),
626            Some("*".into())
627        );
628        assert_eq!(
629            StringPattern::Substring("a".into()).to_glob(),
630            Some("*a*".into())
631        );
632        assert_eq!(
633            StringPattern::Substring("*".into()).to_glob(),
634            Some("*[*]*".into())
635        );
636    }
637
638    #[test]
639    fn test_parse() {
640        // Parse specific pattern kinds.
641        assert_matches!(
642            StringPattern::from_str_kind("foo", "exact"),
643            Ok(StringPattern::Exact(s)) if s == "foo"
644        );
645        assert_matches!(
646            StringPattern::from_str_kind("foo*", "glob"),
647            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
648        );
649        assert_matches!(
650            StringPattern::from_str_kind("foo", "substring"),
651            Ok(StringPattern::Substring(s)) if s == "foo"
652        );
653        assert_matches!(
654            StringPattern::from_str_kind("foo", "substring-i"),
655            Ok(StringPattern::SubstringI(s)) if s == "foo"
656        );
657        assert_matches!(
658            StringPattern::from_str_kind("foo", "regex"),
659            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
660        );
661        assert_matches!(
662            StringPattern::from_str_kind("foo", "regex-i"),
663            Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
664        );
665    }
666
667    #[test]
668    fn test_glob_is_match() {
669        let glob = |src: &str| StringPattern::glob(src).unwrap().to_matcher();
670        let glob_i = |src: &str| StringPattern::glob_i(src).unwrap().to_matcher();
671
672        assert!(glob("foo").is_match("foo"));
673        assert!(!glob("foo").is_match("foobar"));
674
675        // "." in string isn't any special
676        assert!(glob("*").is_match(".foo"));
677
678        // "/" in string isn't any special
679        assert!(glob("*").is_match("foo/bar"));
680        assert!(glob(r"*/*").is_match("foo/bar"));
681        assert!(!glob(r"*/*").is_match(r"foo\bar"));
682
683        // "\" is an escape character
684        assert!(!glob(r"*\*").is_match("foo/bar"));
685        assert!(glob(r"*\*").is_match("foo*"));
686        assert!(glob(r"\\").is_match(r"\"));
687
688        // "*" matches newline
689        assert!(glob(r"*").is_match("foo\nbar"));
690
691        assert!(!glob("f?O").is_match("Foo"));
692        assert!(glob_i("f?O").is_match("Foo"));
693    }
694
695    #[test]
696    fn test_regex_is_match() {
697        let regex = |src: &str| StringPattern::regex(src).unwrap().to_matcher();
698        // Unicode mode is enabled by default
699        assert!(regex(r"^\w$").is_match("\u{c0}"));
700        assert!(regex(r"^.$").is_match("\u{c0}"));
701        // ASCII-compatible mode should also work
702        assert!(regex(r"^(?-u)\w$").is_match("a"));
703        assert!(!regex(r"^(?-u)\w$").is_match("\u{c0}"));
704        assert!(regex(r"^(?-u).{2}$").is_match("\u{c0}"));
705    }
706
707    #[test]
708    fn test_string_pattern_to_regex() {
709        let check = |pattern: StringPattern, match_to: &str| {
710            let regex = pattern.to_regex();
711            regex.is_match(match_to.as_bytes())
712        };
713        assert!(check(StringPattern::exact("$a"), "$a"));
714        assert!(!check(StringPattern::exact("$a"), "$A"));
715        assert!(!check(StringPattern::exact("a"), "aa"));
716        assert!(!check(StringPattern::exact("a"), "aa"));
717        assert!(check(StringPattern::exact_i("a"), "A"));
718        assert!(check(StringPattern::substring("$a"), "$abc"));
719        assert!(!check(StringPattern::substring("$a"), "$Abc"));
720        assert!(check(StringPattern::substring_i("$a"), "$Abc"));
721        assert!(!check(StringPattern::glob("a").unwrap(), "A"));
722        assert!(check(StringPattern::glob_i("a").unwrap(), "A"));
723        assert!(check(StringPattern::regex("^a{1,3}").unwrap(), "abcde"));
724        assert!(!check(StringPattern::regex("^a{1,3}").unwrap(), "Abcde"));
725        assert!(check(StringPattern::regex_i("^a{1,3}").unwrap(), "Abcde"));
726    }
727
728    #[test]
729    fn test_exact_pattern_to_matcher() {
730        assert_matches!(
731            StringPattern::exact("").to_matcher(),
732            StringMatcher::Exact(needle) if needle.is_empty()
733        );
734        assert_matches!(
735            StringPattern::exact("x").to_matcher(),
736            StringMatcher::Exact(needle) if needle == "x"
737        );
738
739        assert_matches!(
740            StringPattern::exact_i("").to_matcher(),
741            StringMatcher::Fn(_) // or Exact
742        );
743        assert_matches!(
744            StringPattern::exact_i("x").to_matcher(),
745            StringMatcher::Fn(_)
746        );
747    }
748
749    #[test]
750    fn test_substring_pattern_to_matcher() {
751        assert_matches!(
752            StringPattern::substring("").to_matcher(),
753            StringMatcher::All
754        );
755        assert_matches!(
756            StringPattern::substring("x").to_matcher(),
757            StringMatcher::Fn(_)
758        );
759
760        assert_matches!(
761            StringPattern::substring_i("").to_matcher(),
762            StringMatcher::All
763        );
764        assert_matches!(
765            StringPattern::substring_i("x").to_matcher(),
766            StringMatcher::Fn(_)
767        );
768    }
769
770    #[test]
771    fn test_glob_pattern_to_matcher() {
772        assert_matches!(
773            StringPattern::glob("").unwrap().to_matcher(),
774            StringMatcher::Exact(_)
775        );
776        assert_matches!(
777            StringPattern::glob("x").unwrap().to_matcher(),
778            StringMatcher::Exact(_)
779        );
780        assert_matches!(
781            StringPattern::glob("x?").unwrap().to_matcher(),
782            StringMatcher::Fn(_)
783        );
784        assert_matches!(
785            StringPattern::glob("*").unwrap().to_matcher(),
786            StringMatcher::All
787        );
788        assert_matches!(
789            StringPattern::glob(r"\\").unwrap().to_matcher(),
790            StringMatcher::Fn(_) // or Exact(r"\")
791        );
792
793        assert_matches!(
794            StringPattern::glob_i("").unwrap().to_matcher(),
795            StringMatcher::Fn(_) // or Exact
796        );
797        assert_matches!(
798            StringPattern::glob_i("x").unwrap().to_matcher(),
799            StringMatcher::Fn(_)
800        );
801        assert_matches!(
802            StringPattern::glob_i("x?").unwrap().to_matcher(),
803            StringMatcher::Fn(_)
804        );
805        assert_matches!(
806            StringPattern::glob_i("*").unwrap().to_matcher(),
807            StringMatcher::All
808        );
809    }
810
811    #[test]
812    fn test_regex_pattern_to_matcher() {
813        assert_matches!(
814            StringPattern::regex("").unwrap().to_matcher(),
815            StringMatcher::All
816        );
817        assert_matches!(
818            StringPattern::regex("x").unwrap().to_matcher(),
819            StringMatcher::Fn(_)
820        );
821        assert_matches!(
822            StringPattern::regex(".").unwrap().to_matcher(),
823            StringMatcher::Fn(_)
824        );
825
826        assert_matches!(
827            StringPattern::regex_i("").unwrap().to_matcher(),
828            StringMatcher::All
829        );
830        assert_matches!(
831            StringPattern::regex_i("x").unwrap().to_matcher(),
832            StringMatcher::Fn(_)
833        );
834        assert_matches!(
835            StringPattern::regex_i(".").unwrap().to_matcher(),
836            StringMatcher::Fn(_)
837        );
838    }
839
840    #[test]
841    fn test_union_all_expressions() {
842        let settings = insta_settings();
843        let _guard = settings.bind_to_scope();
844
845        insta::assert_debug_snapshot!(
846            StringExpression::union_all(vec![]),
847            @r#"NotIn(Pattern(Substring("")))"#);
848        insta::assert_debug_snapshot!(
849            StringExpression::union_all(vec![StringExpression::exact("a")]),
850            @r#"Pattern(Exact("a"))"#);
851        insta::assert_debug_snapshot!(
852            StringExpression::union_all(vec![
853                StringExpression::exact("a"),
854                StringExpression::exact("b"),
855            ]),
856            @r#"
857        Union(
858            Pattern(Exact("a")),
859            Pattern(Exact("b")),
860        )
861        "#);
862        insta::assert_debug_snapshot!(
863            StringExpression::union_all(vec![
864                StringExpression::exact("a"),
865                StringExpression::exact("b"),
866                StringExpression::exact("c"),
867            ]),
868            @r#"
869        Union(
870            Pattern(Exact("a")),
871            Union(
872                Pattern(Exact("b")),
873                Pattern(Exact("c")),
874            ),
875        )
876        "#);
877        insta::assert_debug_snapshot!(
878            StringExpression::union_all(vec![
879                StringExpression::exact("a"),
880                StringExpression::exact("b"),
881                StringExpression::exact("c"),
882                StringExpression::exact("d"),
883            ]),
884            @r#"
885        Union(
886            Union(
887                Pattern(Exact("a")),
888                Pattern(Exact("b")),
889            ),
890            Union(
891                Pattern(Exact("c")),
892                Pattern(Exact("d")),
893            ),
894        )
895        "#);
896    }
897
898    #[test]
899    fn test_exact_strings_in_expression() {
900        assert_eq!(
901            StringExpression::all().exact_strings().collect_vec(),
902            [""; 0]
903        );
904        assert_eq!(
905            StringExpression::union_all(vec![
906                StringExpression::exact("a"),
907                StringExpression::substring("b"),
908                StringExpression::intersection(
909                    StringExpression::exact("c"),
910                    StringExpression::exact("d").negated(),
911                ),
912            ])
913            .exact_strings()
914            .collect_vec(),
915            ["a", "c", "d"]
916        );
917    }
918
919    #[test]
920    fn test_trivial_expression_to_matcher() {
921        assert_matches!(StringExpression::all().to_matcher(), StringMatcher::All);
922        assert_matches!(
923            StringExpression::exact("x").to_matcher(),
924            StringMatcher::Exact(needle) if needle == "x"
925        );
926    }
927
928    #[test]
929    fn test_compound_expression_to_matcher() {
930        let matcher = StringExpression::exact("foo").negated().to_matcher();
931        assert!(!matcher.is_match("foo"));
932        assert!(matcher.is_match("bar"));
933
934        let matcher = StringExpression::union(
935            StringExpression::exact("foo"),
936            StringExpression::exact("bar"),
937        )
938        .to_matcher();
939        assert!(matcher.is_match("foo"));
940        assert!(matcher.is_match("bar"));
941        assert!(!matcher.is_match("baz"));
942
943        let matcher = StringExpression::intersection(
944            StringExpression::substring("a"),
945            StringExpression::substring("r"),
946        )
947        .to_matcher();
948        assert!(!matcher.is_match("foo"));
949        assert!(matcher.is_match("bar"));
950        assert!(!matcher.is_match("baz"));
951    }
952
953    #[test]
954    fn test_matcher_is_match() {
955        assert!(StringMatcher::all().is_match(""));
956        assert!(StringMatcher::all().is_match("foo"));
957        assert!(!StringMatcher::exact("o").is_match(""));
958        assert!(!StringMatcher::exact("o").is_match("foo"));
959        assert!(StringMatcher::exact("foo").is_match("foo"));
960        assert!(StringPattern::substring("o").to_matcher().is_match("foo"));
961    }
962
963    #[test]
964    fn test_matcher_match_lines() {
965        // TODO: Yield a match for the empty line?
966        assert_eq!(
967            StringMatcher::all().match_lines(b"").collect_vec(),
968            Vec::<&[u8]>::new()
969        );
970        assert_eq!(
971            StringMatcher::all().match_lines(b"\n").collect_vec(),
972            vec![b"\n"]
973        );
974        assert_eq!(
975            StringMatcher::all().match_lines(b"foo").collect_vec(),
976            vec![b"foo"]
977        );
978        assert_eq!(
979            StringMatcher::all().match_lines(b"foo\n").collect_vec(),
980            vec![b"foo\n"]
981        );
982        assert_eq!(
983            StringMatcher::exact("foo")
984                .match_lines(b"foo\nbar\n")
985                .collect_vec(),
986            vec![b"foo\n"]
987        );
988        assert_eq!(
989            StringMatcher::exact("foo\n")
990                .match_lines(b"foo\nbar\n")
991                .collect_vec(),
992            Vec::<&[u8]>::new()
993        );
994    }
995
996    #[test]
997    fn test_matcher_filter_btree_map() {
998        let data = btreemap! {
999            "bar" => (),
1000            "baz" => (),
1001            "foo" => (),
1002        };
1003        let filter = |matcher: &StringMatcher| {
1004            matcher
1005                .filter_btree_map(&data)
1006                .map(|(&key, ())| key)
1007                .collect_vec()
1008        };
1009        assert_eq!(filter(&StringMatcher::all()), vec!["bar", "baz", "foo"]);
1010        assert_eq!(filter(&StringMatcher::exact("o")), vec![""; 0]);
1011        assert_eq!(filter(&StringMatcher::exact("foo")), vec!["foo"]);
1012        assert_eq!(
1013            filter(&StringPattern::substring("o").to_matcher()),
1014            vec!["foo"]
1015        );
1016        assert_eq!(
1017            filter(&StringPattern::substring("a").to_matcher()),
1018            vec!["bar", "baz"]
1019        );
1020    }
1021}