jj_lib/
str_util.rs

1// Copyright 2021-2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! String helpers.
16
17use std::borrow::Borrow;
18use std::borrow::Cow;
19use std::collections::BTreeMap;
20use std::fmt;
21use std::fmt::Debug;
22use std::ops::Deref;
23
24use either::Either;
25use thiserror::Error;
26
27/// Error occurred during pattern string parsing.
28#[derive(Debug, Error)]
29pub enum StringPatternParseError {
30    /// Unknown pattern kind is specified.
31    #[error("Invalid string pattern kind `{0}:`")]
32    InvalidKind(String),
33    /// Failed to parse glob pattern.
34    #[error(transparent)]
35    GlobPattern(glob::PatternError),
36    /// Failed to parse regular expression.
37    #[error(transparent)]
38    Regex(regex::Error),
39}
40
41/// A wrapper for [`glob::Pattern`] with a more concise Debug impl
42#[derive(Clone)]
43pub struct GlobPattern(pub glob::Pattern);
44
45impl GlobPattern {
46    fn as_str(&self) -> &str {
47        self.0.as_str()
48    }
49}
50
51impl Debug for GlobPattern {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        f.debug_tuple("GlobPattern").field(&self.as_str()).finish()
54    }
55}
56
57fn parse_glob(src: &str) -> Result<GlobPattern, StringPatternParseError> {
58    glob::Pattern::new(src)
59        .map(GlobPattern)
60        .map_err(StringPatternParseError::GlobPattern)
61}
62
63/// Pattern to be tested against string property like commit description or
64/// bookmark name.
65#[derive(Clone, Debug)]
66pub enum StringPattern {
67    /// Matches strings exactly.
68    Exact(String),
69    /// Matches strings case‐insensitively.
70    ExactI(String),
71    /// Matches strings that contain a substring.
72    Substring(String),
73    /// Matches strings that case‐insensitively contain a substring.
74    SubstringI(String),
75    /// Matches with a Unix‐style shell wildcard pattern.
76    Glob(GlobPattern),
77    /// Matches with a case‐insensitive Unix‐style shell wildcard pattern.
78    GlobI(GlobPattern),
79    /// Matches substrings with a regular expression.
80    Regex(regex::Regex),
81    /// Matches substrings with a case‐insensitive regular expression.
82    RegexI(regex::Regex),
83}
84
85impl StringPattern {
86    /// Pattern that matches any string.
87    pub const fn everything() -> Self {
88        StringPattern::Substring(String::new())
89    }
90
91    /// Parses the given string as a [`StringPattern`]. Everything before the
92    /// first ":" is considered the string's prefix. If the prefix is
93    /// "exact[-i]:", "glob[-i]:", or "substring[-i]:", a pattern of the
94    /// specified kind is returned. Returns an error if the string has an
95    /// unrecognized prefix. Otherwise, a `StringPattern::Exact` is
96    /// returned.
97    pub fn parse(src: &str) -> Result<StringPattern, StringPatternParseError> {
98        if let Some((kind, pat)) = src.split_once(':') {
99            StringPattern::from_str_kind(pat, kind)
100        } else {
101            Ok(StringPattern::exact(src))
102        }
103    }
104
105    /// Constructs a pattern that matches exactly.
106    pub fn exact(src: impl Into<String>) -> Self {
107        StringPattern::Exact(src.into())
108    }
109
110    /// Constructs a pattern that matches case‐insensitively.
111    pub fn exact_i(src: impl Into<String>) -> Self {
112        StringPattern::ExactI(src.into())
113    }
114
115    /// Constructs a pattern that matches a substring.
116    pub fn substring(src: impl Into<String>) -> Self {
117        StringPattern::Substring(src.into())
118    }
119
120    /// Constructs a pattern that case‐insensitively matches a substring.
121    pub fn substring_i(src: impl Into<String>) -> Self {
122        StringPattern::SubstringI(src.into())
123    }
124
125    /// Parses the given string as a glob pattern.
126    pub fn glob(src: &str) -> Result<Self, StringPatternParseError> {
127        // TODO: might be better to do parsing and compilation separately since
128        // not all backends would use the compiled pattern object.
129        // TODO: if no meta character found, it can be mapped to Exact.
130        Ok(StringPattern::Glob(parse_glob(src)?))
131    }
132
133    /// Parses the given string as a case‐insensitive glob pattern.
134    pub fn glob_i(src: &str) -> Result<Self, StringPatternParseError> {
135        Ok(StringPattern::GlobI(parse_glob(src)?))
136    }
137
138    /// Parses the given string as a regular expression.
139    pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
140        let pattern = regex::Regex::new(src).map_err(StringPatternParseError::Regex)?;
141        Ok(StringPattern::Regex(pattern))
142    }
143
144    /// Parses the given string as a case-insensitive regular expression.
145    pub fn regex_i(src: &str) -> Result<Self, StringPatternParseError> {
146        let pattern = regex::RegexBuilder::new(src)
147            .case_insensitive(true)
148            .build()
149            .map_err(StringPatternParseError::Regex)?;
150        Ok(StringPattern::RegexI(pattern))
151    }
152
153    /// Parses the given string as a pattern of the specified `kind`.
154    pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
155        match kind {
156            "exact" => Ok(StringPattern::exact(src)),
157            "exact-i" => Ok(StringPattern::exact_i(src)),
158            "substring" => Ok(StringPattern::substring(src)),
159            "substring-i" => Ok(StringPattern::substring_i(src)),
160            "glob" => StringPattern::glob(src),
161            "glob-i" => StringPattern::glob_i(src),
162            "regex" => StringPattern::regex(src),
163            "regex-i" => StringPattern::regex_i(src),
164            _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
165        }
166    }
167
168    /// Returns true if this pattern matches input strings exactly.
169    pub fn is_exact(&self) -> bool {
170        self.as_exact().is_some()
171    }
172
173    /// Returns a literal pattern if this should match input strings exactly.
174    ///
175    /// This can be used to optimize map lookup by exact key.
176    pub fn as_exact(&self) -> Option<&str> {
177        // TODO: Handle trivial case‐insensitive patterns here? It might make people
178        // expect they can use case‐insensitive patterns in contexts where they
179        // generally can’t.
180        match self {
181            StringPattern::Exact(literal) => Some(literal),
182            _ => None,
183        }
184    }
185
186    /// Returns the original string of this pattern.
187    pub fn as_str(&self) -> &str {
188        match self {
189            StringPattern::Exact(literal) => literal,
190            StringPattern::ExactI(literal) => literal,
191            StringPattern::Substring(needle) => needle,
192            StringPattern::SubstringI(needle) => needle,
193            StringPattern::Glob(pattern) => pattern.as_str(),
194            StringPattern::GlobI(pattern) => pattern.as_str(),
195            StringPattern::Regex(pattern) => pattern.as_str(),
196            StringPattern::RegexI(pattern) => pattern.as_str(),
197        }
198    }
199
200    /// Converts this pattern to a glob string. Returns `None` if the pattern
201    /// can't be represented as a glob.
202    pub fn to_glob(&self) -> Option<Cow<'_, str>> {
203        // TODO: Handle trivial case‐insensitive patterns here? It might make people
204        // expect they can use case‐insensitive patterns in contexts where they
205        // generally can’t.
206        match self {
207            StringPattern::Exact(literal) => Some(glob::Pattern::escape(literal).into()),
208            StringPattern::Substring(needle) => {
209                if needle.is_empty() {
210                    Some("*".into())
211                } else {
212                    Some(format!("*{}*", glob::Pattern::escape(needle)).into())
213                }
214            }
215            StringPattern::Glob(pattern) => Some(pattern.as_str().into()),
216            StringPattern::ExactI(_) => None,
217            StringPattern::SubstringI(_) => None,
218            StringPattern::GlobI(_) => None,
219            StringPattern::Regex(_) => None,
220            StringPattern::RegexI(_) => None,
221        }
222    }
223
224    /// Returns true if this pattern matches the `haystack`.
225    ///
226    /// When matching against a case‐insensitive pattern, only ASCII case
227    /// differences are currently folded. This may change in the future.
228    pub fn matches(&self, haystack: &str) -> bool {
229        // TODO: Unicode case folding is complicated and can be
230        // locale‐specific. The `glob` crate and Gitoxide only deal with ASCII
231        // case folding, so we do the same here; a more elaborate case folding
232        // system will require making sure those behave in a matching manner
233        // where relevant. That said, regex patterns are unicode-aware by
234        // default, so we already have some inconsistencies.
235        //
236        // Care will need to be taken regarding normalization and the choice of an
237        // appropriate case‐insensitive comparison scheme (`toNFKC_Casefold`?) to ensure
238        // that it is compatible with the standard case‐insensitivity of haystack
239        // components (like internationalized domain names in email addresses). The
240        // availability of normalization and case folding schemes in database backends
241        // will also need to be considered. A locale‐specific case folding
242        // scheme would likely not be appropriate for Jujutsu.
243        //
244        // For some discussion of this topic, see:
245        // <https://github.com/unicode-org/icu4x/issues/3151>
246        match self {
247            StringPattern::Exact(literal) => haystack == literal,
248            StringPattern::ExactI(literal) => haystack.eq_ignore_ascii_case(literal),
249            StringPattern::Substring(needle) => haystack.contains(needle),
250            StringPattern::SubstringI(needle) => haystack
251                .to_ascii_lowercase()
252                .contains(&needle.to_ascii_lowercase()),
253            StringPattern::Glob(pattern) => pattern.0.matches(haystack),
254            StringPattern::GlobI(pattern) => pattern.0.matches_with(
255                haystack,
256                glob::MatchOptions {
257                    case_sensitive: false,
258                    ..glob::MatchOptions::new()
259                },
260            ),
261            // Regex and RegexI are identical here, but callers might want to
262            // translate these to backend-specific query differently.
263            StringPattern::Regex(pattern) => pattern.is_match(haystack),
264            StringPattern::RegexI(pattern) => pattern.is_match(haystack),
265        }
266    }
267
268    /// Iterates entries of the given `map` whose string keys match this
269    /// pattern.
270    pub fn filter_btree_map<'a, 'b, K: Borrow<str> + Ord, V>(
271        &'b self,
272        map: &'a BTreeMap<K, V>,
273    ) -> impl Iterator<Item = (&'a K, &'a V)> + use<'a, 'b, K, V> {
274        self.filter_btree_map_with(map, |key| key, |key| key)
275    }
276
277    /// Iterates entries of the given `map` whose string-like keys match this
278    /// pattern.
279    ///
280    /// The borrowed key type is constrained by the `Deref::Target`. It must be
281    /// convertible to/from `str`.
282    pub fn filter_btree_map_as_deref<'a, 'b, K, V>(
283        &'b self,
284        map: &'a BTreeMap<K, V>,
285    ) -> impl Iterator<Item = (&'a K, &'a V)> + use<'a, 'b, K, V>
286    where
287        K: Borrow<K::Target> + Deref + Ord,
288        K::Target: AsRef<str> + Ord,
289        str: AsRef<K::Target>,
290    {
291        self.filter_btree_map_with(map, AsRef::as_ref, AsRef::as_ref)
292    }
293
294    fn filter_btree_map_with<'a, 'b, K, Q, V, FromKey, ToKey>(
295        &'b self,
296        map: &'a BTreeMap<K, V>,
297        from_key: FromKey,
298        to_key: ToKey,
299        // TODO: Q, FromKey, and ToKey don't have to be captured, but
300        // "currently, all type parameters are required to be mentioned in the
301        // precise captures list" as of rustc 1.85.0.
302    ) -> impl Iterator<Item = (&'a K, &'a V)> + use<'a, 'b, K, Q, V, FromKey, ToKey>
303    where
304        K: Borrow<Q> + Ord,
305        Q: Ord + ?Sized,
306        FromKey: Fn(&Q) -> &str,
307        ToKey: Fn(&str) -> &Q,
308    {
309        if let Some(key) = self.as_exact() {
310            Either::Left(map.get_key_value(to_key(key)).into_iter())
311        } else {
312            Either::Right(
313                map.iter()
314                    .filter(move |&(key, _)| self.matches(from_key(key.borrow()))),
315            )
316        }
317    }
318}
319
320impl fmt::Display for StringPattern {
321    /// Shows the original string of this pattern.
322    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
323        write!(f, "{}", self.as_str())
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use assert_matches::assert_matches;
330
331    use super::*;
332
333    #[test]
334    fn test_string_pattern_to_glob() {
335        assert_eq!(StringPattern::everything().to_glob(), Some("*".into()));
336        assert_eq!(StringPattern::exact("a").to_glob(), Some("a".into()));
337        assert_eq!(StringPattern::exact("*").to_glob(), Some("[*]".into()));
338        assert_eq!(
339            StringPattern::glob("*").unwrap().to_glob(),
340            Some("*".into())
341        );
342        assert_eq!(
343            StringPattern::Substring("a".into()).to_glob(),
344            Some("*a*".into())
345        );
346        assert_eq!(
347            StringPattern::Substring("*".into()).to_glob(),
348            Some("*[*]*".into())
349        );
350    }
351
352    #[test]
353    fn test_parse() {
354        // Parse specific pattern kinds.
355        assert_matches!(
356            StringPattern::parse("exact:foo"),
357            Ok(StringPattern::Exact(s)) if s == "foo"
358        );
359        assert_matches!(
360            StringPattern::from_str_kind("foo", "exact"),
361            Ok(StringPattern::Exact(s)) if s == "foo"
362        );
363        assert_matches!(
364            StringPattern::parse("glob:foo*"),
365            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
366        );
367        assert_matches!(
368            StringPattern::from_str_kind("foo*", "glob"),
369            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
370        );
371        assert_matches!(
372            StringPattern::parse("substring:foo"),
373            Ok(StringPattern::Substring(s)) if s == "foo"
374        );
375        assert_matches!(
376            StringPattern::from_str_kind("foo", "substring"),
377            Ok(StringPattern::Substring(s)) if s == "foo"
378        );
379        assert_matches!(
380            StringPattern::parse("substring-i:foo"),
381            Ok(StringPattern::SubstringI(s)) if s == "foo"
382        );
383        assert_matches!(
384            StringPattern::from_str_kind("foo", "substring-i"),
385            Ok(StringPattern::SubstringI(s)) if s == "foo"
386        );
387        assert_matches!(
388            StringPattern::parse("regex:foo"),
389            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
390        );
391        assert_matches!(
392            StringPattern::from_str_kind("foo", "regex"),
393            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
394        );
395        assert_matches!(
396            StringPattern::parse("regex-i:foo"),
397            Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
398        );
399        assert_matches!(
400            StringPattern::from_str_kind("foo", "regex-i"),
401            Ok(StringPattern::RegexI(p)) if p.as_str() == "foo"
402        );
403
404        // Parse a pattern that contains a : itself.
405        assert_matches!(
406            StringPattern::parse("exact:foo:bar"),
407            Ok(StringPattern::Exact(s)) if s == "foo:bar"
408        );
409
410        // If no kind is specified, the input is treated as an exact pattern.
411        assert_matches!(
412            StringPattern::parse("foo"),
413            Ok(StringPattern::Exact(s)) if s == "foo"
414        );
415
416        // Parsing an unknown prefix results in an error.
417        assert_matches!(
418            StringPattern::parse("unknown-prefix:foo"),
419            Err(StringPatternParseError::InvalidKind(_))
420        );
421    }
422}