jj_lib/
str_util.rs

1// Copyright 2021-2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! String helpers.
16
17use std::borrow::Borrow;
18use std::borrow::Cow;
19use std::collections::BTreeMap;
20use std::fmt;
21use std::fmt::Debug;
22use std::ops::Deref;
23
24use either::Either;
25use thiserror::Error;
26
27/// Error occurred during pattern string parsing.
28#[derive(Debug, Error)]
29pub enum StringPatternParseError {
30    /// Unknown pattern kind is specified.
31    #[error("Invalid string pattern kind `{0}:`")]
32    InvalidKind(String),
33    /// Failed to parse glob pattern.
34    #[error(transparent)]
35    GlobPattern(glob::PatternError),
36    /// Failed to parse regular expression.
37    #[error(transparent)]
38    Regex(regex::Error),
39}
40
41/// A wrapper for [`glob::Pattern`] with a more concise Debug impl
42#[derive(Clone)]
43pub struct GlobPattern(pub glob::Pattern);
44
45impl GlobPattern {
46    fn as_str(&self) -> &str {
47        self.0.as_str()
48    }
49}
50
51impl Debug for GlobPattern {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        f.debug_tuple("GlobPattern").field(&self.as_str()).finish()
54    }
55}
56
57fn parse_glob(src: &str) -> Result<GlobPattern, StringPatternParseError> {
58    glob::Pattern::new(src)
59        .map(GlobPattern)
60        .map_err(StringPatternParseError::GlobPattern)
61}
62
63/// Pattern to be tested against string property like commit description or
64/// bookmark name.
65#[derive(Clone, Debug)]
66pub enum StringPattern {
67    /// Matches strings exactly.
68    Exact(String),
69    /// Matches strings case‐insensitively.
70    ExactI(String),
71    /// Matches strings that contain a substring.
72    Substring(String),
73    /// Matches strings that case‐insensitively contain a substring.
74    SubstringI(String),
75    /// Matches with a Unix‐style shell wildcard pattern.
76    Glob(GlobPattern),
77    /// Matches with a case‐insensitive Unix‐style shell wildcard pattern.
78    GlobI(GlobPattern),
79    /// Matches substrings with a regular expression.
80    Regex(regex::Regex),
81    // TODO: Should we add RegexI and "regex-i" prefix?
82}
83
84impl StringPattern {
85    /// Pattern that matches any string.
86    pub const fn everything() -> Self {
87        StringPattern::Substring(String::new())
88    }
89
90    /// Parses the given string as a [`StringPattern`]. Everything before the
91    /// first ":" is considered the string's prefix. If the prefix is
92    /// "exact[-i]:", "glob[-i]:", or "substring[-i]:", a pattern of the
93    /// specified kind is returned. Returns an error if the string has an
94    /// unrecognized prefix. Otherwise, a `StringPattern::Exact` is
95    /// returned.
96    pub fn parse(src: &str) -> Result<StringPattern, StringPatternParseError> {
97        if let Some((kind, pat)) = src.split_once(':') {
98            StringPattern::from_str_kind(pat, kind)
99        } else {
100            Ok(StringPattern::exact(src))
101        }
102    }
103
104    /// Constructs a pattern that matches exactly.
105    pub fn exact(src: impl Into<String>) -> Self {
106        StringPattern::Exact(src.into())
107    }
108
109    /// Constructs a pattern that matches case‐insensitively.
110    pub fn exact_i(src: impl Into<String>) -> Self {
111        StringPattern::ExactI(src.into())
112    }
113
114    /// Constructs a pattern that matches a substring.
115    pub fn substring(src: impl Into<String>) -> Self {
116        StringPattern::Substring(src.into())
117    }
118
119    /// Constructs a pattern that case‐insensitively matches a substring.
120    pub fn substring_i(src: impl Into<String>) -> Self {
121        StringPattern::SubstringI(src.into())
122    }
123
124    /// Parses the given string as a glob pattern.
125    pub fn glob(src: &str) -> Result<Self, StringPatternParseError> {
126        // TODO: might be better to do parsing and compilation separately since
127        // not all backends would use the compiled pattern object.
128        // TODO: if no meta character found, it can be mapped to Exact.
129        Ok(StringPattern::Glob(parse_glob(src)?))
130    }
131
132    /// Parses the given string as a case‐insensitive glob pattern.
133    pub fn glob_i(src: &str) -> Result<Self, StringPatternParseError> {
134        Ok(StringPattern::GlobI(parse_glob(src)?))
135    }
136
137    /// Parses the given string as a regular expression.
138    pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
139        let pattern = regex::Regex::new(src).map_err(StringPatternParseError::Regex)?;
140        Ok(StringPattern::Regex(pattern))
141    }
142
143    /// Parses the given string as a pattern of the specified `kind`.
144    pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
145        match kind {
146            "exact" => Ok(StringPattern::exact(src)),
147            "exact-i" => Ok(StringPattern::exact_i(src)),
148            "substring" => Ok(StringPattern::substring(src)),
149            "substring-i" => Ok(StringPattern::substring_i(src)),
150            "glob" => StringPattern::glob(src),
151            "glob-i" => StringPattern::glob_i(src),
152            "regex" => StringPattern::regex(src),
153            _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
154        }
155    }
156
157    /// Returns true if this pattern matches input strings exactly.
158    pub fn is_exact(&self) -> bool {
159        self.as_exact().is_some()
160    }
161
162    /// Returns a literal pattern if this should match input strings exactly.
163    ///
164    /// This can be used to optimize map lookup by exact key.
165    pub fn as_exact(&self) -> Option<&str> {
166        // TODO: Handle trivial case‐insensitive patterns here? It might make people
167        // expect they can use case‐insensitive patterns in contexts where they
168        // generally can’t.
169        match self {
170            StringPattern::Exact(literal) => Some(literal),
171            _ => None,
172        }
173    }
174
175    /// Returns the original string of this pattern.
176    pub fn as_str(&self) -> &str {
177        match self {
178            StringPattern::Exact(literal) => literal,
179            StringPattern::ExactI(literal) => literal,
180            StringPattern::Substring(needle) => needle,
181            StringPattern::SubstringI(needle) => needle,
182            StringPattern::Glob(pattern) => pattern.as_str(),
183            StringPattern::GlobI(pattern) => pattern.as_str(),
184            StringPattern::Regex(pattern) => pattern.as_str(),
185        }
186    }
187
188    /// Converts this pattern to a glob string. Returns `None` if the pattern
189    /// can't be represented as a glob.
190    pub fn to_glob(&self) -> Option<Cow<'_, str>> {
191        // TODO: Handle trivial case‐insensitive patterns here? It might make people
192        // expect they can use case‐insensitive patterns in contexts where they
193        // generally can’t.
194        match self {
195            StringPattern::Exact(literal) => Some(glob::Pattern::escape(literal).into()),
196            StringPattern::Substring(needle) => {
197                if needle.is_empty() {
198                    Some("*".into())
199                } else {
200                    Some(format!("*{}*", glob::Pattern::escape(needle)).into())
201                }
202            }
203            StringPattern::Glob(pattern) => Some(pattern.as_str().into()),
204            StringPattern::ExactI(_) => None,
205            StringPattern::SubstringI(_) => None,
206            StringPattern::GlobI(_) => None,
207            StringPattern::Regex(_) => None,
208        }
209    }
210
211    /// Returns true if this pattern matches the `haystack`.
212    ///
213    /// When matching against a case‐insensitive pattern, only ASCII case
214    /// differences are currently folded. This may change in the future.
215    pub fn matches(&self, haystack: &str) -> bool {
216        // TODO: Unicode case folding is complicated and can be locale‐specific. The
217        // `glob` crate and Gitoxide only deal with ASCII case folding, so we do
218        // the same here; a more elaborate case folding system will require
219        // making sure those behave in a matching manner where relevant.
220        //
221        // Care will need to be taken regarding normalization and the choice of an
222        // appropriate case‐insensitive comparison scheme (`toNFKC_Casefold`?) to ensure
223        // that it is compatible with the standard case‐insensitivity of haystack
224        // components (like internationalized domain names in email addresses). The
225        // availability of normalization and case folding schemes in database backends
226        // will also need to be considered. A locale‐specific case folding
227        // scheme would likely not be appropriate for Jujutsu.
228        //
229        // For some discussion of this topic, see:
230        // <https://github.com/unicode-org/icu4x/issues/3151>
231        match self {
232            StringPattern::Exact(literal) => haystack == literal,
233            StringPattern::ExactI(literal) => haystack.eq_ignore_ascii_case(literal),
234            StringPattern::Substring(needle) => haystack.contains(needle),
235            StringPattern::SubstringI(needle) => haystack
236                .to_ascii_lowercase()
237                .contains(&needle.to_ascii_lowercase()),
238            StringPattern::Glob(pattern) => pattern.0.matches(haystack),
239            StringPattern::GlobI(pattern) => pattern.0.matches_with(
240                haystack,
241                glob::MatchOptions {
242                    case_sensitive: false,
243                    ..glob::MatchOptions::new()
244                },
245            ),
246            StringPattern::Regex(pattern) => pattern.is_match(haystack),
247        }
248    }
249
250    /// Iterates entries of the given `map` whose string keys match this
251    /// pattern.
252    pub fn filter_btree_map<'a, 'b, K: Borrow<str> + Ord, V>(
253        &'b self,
254        map: &'a BTreeMap<K, V>,
255    ) -> impl Iterator<Item = (&'a K, &'a V)> + use<'a, 'b, K, V> {
256        self.filter_btree_map_with(map, |key| key, |key| key)
257    }
258
259    /// Iterates entries of the given `map` whose string-like keys match this
260    /// pattern.
261    ///
262    /// The borrowed key type is constrained by the `Deref::Target`. It must be
263    /// convertible to/from `str`.
264    pub fn filter_btree_map_as_deref<'a, 'b, K, V>(
265        &'b self,
266        map: &'a BTreeMap<K, V>,
267    ) -> impl Iterator<Item = (&'a K, &'a V)> + use<'a, 'b, K, V>
268    where
269        K: Borrow<K::Target> + Deref + Ord,
270        K::Target: AsRef<str> + Ord,
271        str: AsRef<K::Target>,
272    {
273        self.filter_btree_map_with(map, AsRef::as_ref, AsRef::as_ref)
274    }
275
276    fn filter_btree_map_with<'a, 'b, K, Q, V, FromKey, ToKey>(
277        &'b self,
278        map: &'a BTreeMap<K, V>,
279        from_key: FromKey,
280        to_key: ToKey,
281        // TODO: Q, FromKey, and ToKey don't have to be captured, but
282        // "currently, all type parameters are required to be mentioned in the
283        // precise captures list" as of rustc 1.85.0.
284    ) -> impl Iterator<Item = (&'a K, &'a V)> + use<'a, 'b, K, Q, V, FromKey, ToKey>
285    where
286        K: Borrow<Q> + Ord,
287        Q: Ord + ?Sized,
288        FromKey: Fn(&Q) -> &str,
289        ToKey: Fn(&str) -> &Q,
290    {
291        if let Some(key) = self.as_exact() {
292            Either::Left(map.get_key_value(to_key(key)).into_iter())
293        } else {
294            Either::Right(
295                map.iter()
296                    .filter(move |&(key, _)| self.matches(from_key(key.borrow()))),
297            )
298        }
299    }
300}
301
302impl fmt::Display for StringPattern {
303    /// Shows the original string of this pattern.
304    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
305        write!(f, "{}", self.as_str())
306    }
307}
308
309#[cfg(test)]
310mod tests {
311    use assert_matches::assert_matches;
312
313    use super::*;
314
315    #[test]
316    fn test_string_pattern_to_glob() {
317        assert_eq!(StringPattern::everything().to_glob(), Some("*".into()));
318        assert_eq!(StringPattern::exact("a").to_glob(), Some("a".into()));
319        assert_eq!(StringPattern::exact("*").to_glob(), Some("[*]".into()));
320        assert_eq!(
321            StringPattern::glob("*").unwrap().to_glob(),
322            Some("*".into())
323        );
324        assert_eq!(
325            StringPattern::Substring("a".into()).to_glob(),
326            Some("*a*".into())
327        );
328        assert_eq!(
329            StringPattern::Substring("*".into()).to_glob(),
330            Some("*[*]*".into())
331        );
332    }
333
334    #[test]
335    fn test_parse() {
336        // Parse specific pattern kinds.
337        assert_matches!(
338            StringPattern::parse("exact:foo"),
339            Ok(StringPattern::Exact(s)) if s == "foo"
340        );
341        assert_matches!(
342            StringPattern::from_str_kind("foo", "exact"),
343            Ok(StringPattern::Exact(s)) if s == "foo"
344        );
345        assert_matches!(
346            StringPattern::parse("glob:foo*"),
347            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
348        );
349        assert_matches!(
350            StringPattern::from_str_kind("foo*", "glob"),
351            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
352        );
353        assert_matches!(
354            StringPattern::parse("substring:foo"),
355            Ok(StringPattern::Substring(s)) if s == "foo"
356        );
357        assert_matches!(
358            StringPattern::from_str_kind("foo", "substring"),
359            Ok(StringPattern::Substring(s)) if s == "foo"
360        );
361        assert_matches!(
362            StringPattern::parse("substring-i:foo"),
363            Ok(StringPattern::SubstringI(s)) if s == "foo"
364        );
365        assert_matches!(
366            StringPattern::from_str_kind("foo", "substring-i"),
367            Ok(StringPattern::SubstringI(s)) if s == "foo"
368        );
369        assert_matches!(
370            StringPattern::parse("regex:foo"),
371            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
372        );
373        assert_matches!(
374            StringPattern::from_str_kind("foo", "regex"),
375            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
376        );
377
378        // Parse a pattern that contains a : itself.
379        assert_matches!(
380            StringPattern::parse("exact:foo:bar"),
381            Ok(StringPattern::Exact(s)) if s == "foo:bar"
382        );
383
384        // If no kind is specified, the input is treated as an exact pattern.
385        assert_matches!(
386            StringPattern::parse("foo"),
387            Ok(StringPattern::Exact(s)) if s == "foo"
388        );
389
390        // Parsing an unknown prefix results in an error.
391        assert_matches!(
392            StringPattern::parse("unknown-prefix:foo"),
393            Err(StringPatternParseError::InvalidKind(_))
394        );
395    }
396}