jj_lib/
str_util.rs

1// Copyright 2021-2023 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! String helpers.
16
17use std::borrow::Borrow;
18use std::borrow::Cow;
19use std::collections::BTreeMap;
20use std::fmt;
21
22use either::Either;
23use thiserror::Error;
24
25/// Error occurred during pattern string parsing.
26#[derive(Debug, Error)]
27pub enum StringPatternParseError {
28    /// Unknown pattern kind is specified.
29    #[error("Invalid string pattern kind `{0}:`")]
30    InvalidKind(String),
31    /// Failed to parse glob pattern.
32    #[error(transparent)]
33    GlobPattern(glob::PatternError),
34    /// Failed to parse regular expression.
35    #[error(transparent)]
36    Regex(regex::Error),
37}
38
39fn parse_glob(src: &str) -> Result<glob::Pattern, StringPatternParseError> {
40    glob::Pattern::new(src).map_err(StringPatternParseError::GlobPattern)
41}
42
43/// Pattern to be tested against string property like commit description or
44/// bookmark name.
45#[derive(Clone, Debug)]
46pub enum StringPattern {
47    /// Matches strings exactly.
48    Exact(String),
49    /// Matches strings case‐insensitively.
50    ExactI(String),
51    /// Matches strings that contain a substring.
52    Substring(String),
53    /// Matches strings that case‐insensitively contain a substring.
54    SubstringI(String),
55    /// Matches with a Unix‐style shell wildcard pattern.
56    Glob(glob::Pattern),
57    /// Matches with a case‐insensitive Unix‐style shell wildcard pattern.
58    GlobI(glob::Pattern),
59    /// Matches substrings with a regular expression.
60    Regex(regex::Regex),
61    // TODO: Should we add RegexI and "regex-i" prefix?
62}
63
64impl StringPattern {
65    /// Pattern that matches any string.
66    pub const fn everything() -> Self {
67        StringPattern::Substring(String::new())
68    }
69
70    /// Parses the given string as a [`StringPattern`]. Everything before the
71    /// first ":" is considered the string's prefix. If the prefix is
72    /// "exact[-i]:", "glob[-i]:", or "substring[-i]:", a pattern of the
73    /// specified kind is returned. Returns an error if the string has an
74    /// unrecognized prefix. Otherwise, a `StringPattern::Exact` is
75    /// returned.
76    pub fn parse(src: &str) -> Result<StringPattern, StringPatternParseError> {
77        if let Some((kind, pat)) = src.split_once(':') {
78            StringPattern::from_str_kind(pat, kind)
79        } else {
80            Ok(StringPattern::exact(src))
81        }
82    }
83
84    /// Constructs a pattern that matches exactly.
85    pub fn exact(src: impl Into<String>) -> Self {
86        StringPattern::Exact(src.into())
87    }
88
89    /// Constructs a pattern that matches case‐insensitively.
90    pub fn exact_i(src: impl Into<String>) -> Self {
91        StringPattern::ExactI(src.into())
92    }
93
94    /// Constructs a pattern that matches a substring.
95    pub fn substring(src: impl Into<String>) -> Self {
96        StringPattern::Substring(src.into())
97    }
98
99    /// Constructs a pattern that case‐insensitively matches a substring.
100    pub fn substring_i(src: impl Into<String>) -> Self {
101        StringPattern::SubstringI(src.into())
102    }
103
104    /// Parses the given string as a glob pattern.
105    pub fn glob(src: &str) -> Result<Self, StringPatternParseError> {
106        // TODO: might be better to do parsing and compilation separately since
107        // not all backends would use the compiled pattern object.
108        // TODO: if no meta character found, it can be mapped to Exact.
109        Ok(StringPattern::Glob(parse_glob(src)?))
110    }
111
112    /// Parses the given string as a case‐insensitive glob pattern.
113    pub fn glob_i(src: &str) -> Result<Self, StringPatternParseError> {
114        Ok(StringPattern::GlobI(parse_glob(src)?))
115    }
116
117    /// Parses the given string as a regular expression.
118    pub fn regex(src: &str) -> Result<Self, StringPatternParseError> {
119        let pattern = regex::Regex::new(src).map_err(StringPatternParseError::Regex)?;
120        Ok(StringPattern::Regex(pattern))
121    }
122
123    /// Parses the given string as a pattern of the specified `kind`.
124    pub fn from_str_kind(src: &str, kind: &str) -> Result<Self, StringPatternParseError> {
125        match kind {
126            "exact" => Ok(StringPattern::exact(src)),
127            "exact-i" => Ok(StringPattern::exact_i(src)),
128            "substring" => Ok(StringPattern::substring(src)),
129            "substring-i" => Ok(StringPattern::substring_i(src)),
130            "glob" => StringPattern::glob(src),
131            "glob-i" => StringPattern::glob_i(src),
132            "regex" => StringPattern::regex(src),
133            _ => Err(StringPatternParseError::InvalidKind(kind.to_owned())),
134        }
135    }
136
137    /// Returns true if this pattern matches input strings exactly.
138    pub fn is_exact(&self) -> bool {
139        self.as_exact().is_some()
140    }
141
142    /// Returns a literal pattern if this should match input strings exactly.
143    ///
144    /// This can be used to optimize map lookup by exact key.
145    pub fn as_exact(&self) -> Option<&str> {
146        // TODO: Handle trivial case‐insensitive patterns here? It might make people
147        // expect they can use case‐insensitive patterns in contexts where they
148        // generally can’t.
149        match self {
150            StringPattern::Exact(literal) => Some(literal),
151            _ => None,
152        }
153    }
154
155    /// Returns the original string of this pattern.
156    pub fn as_str(&self) -> &str {
157        match self {
158            StringPattern::Exact(literal) => literal,
159            StringPattern::ExactI(literal) => literal,
160            StringPattern::Substring(needle) => needle,
161            StringPattern::SubstringI(needle) => needle,
162            StringPattern::Glob(pattern) => pattern.as_str(),
163            StringPattern::GlobI(pattern) => pattern.as_str(),
164            StringPattern::Regex(pattern) => pattern.as_str(),
165        }
166    }
167
168    /// Converts this pattern to a glob string. Returns `None` if the pattern
169    /// can't be represented as a glob.
170    pub fn to_glob(&self) -> Option<Cow<'_, str>> {
171        // TODO: Handle trivial case‐insensitive patterns here? It might make people
172        // expect they can use case‐insensitive patterns in contexts where they
173        // generally can’t.
174        match self {
175            StringPattern::Exact(literal) => Some(glob::Pattern::escape(literal).into()),
176            StringPattern::Substring(needle) => {
177                if needle.is_empty() {
178                    Some("*".into())
179                } else {
180                    Some(format!("*{}*", glob::Pattern::escape(needle)).into())
181                }
182            }
183            StringPattern::Glob(pattern) => Some(pattern.as_str().into()),
184            StringPattern::ExactI(_) => None,
185            StringPattern::SubstringI(_) => None,
186            StringPattern::GlobI(_) => None,
187            StringPattern::Regex(_) => None,
188        }
189    }
190
191    /// Returns true if this pattern matches the `haystack`.
192    ///
193    /// When matching against a case‐insensitive pattern, only ASCII case
194    /// differences are currently folded. This may change in the future.
195    pub fn matches(&self, haystack: &str) -> bool {
196        // TODO: Unicode case folding is complicated and can be locale‐specific. The
197        // `glob` crate and Gitoxide only deal with ASCII case folding, so we do
198        // the same here; a more elaborate case folding system will require
199        // making sure those behave in a matching manner where relevant.
200        //
201        // Care will need to be taken regarding normalization and the choice of an
202        // appropriate case‐insensitive comparison scheme (`toNFKC_Casefold`?) to ensure
203        // that it is compatible with the standard case‐insensitivity of haystack
204        // components (like internationalized domain names in email addresses). The
205        // availability of normalization and case folding schemes in database backends
206        // will also need to be considered. A locale‐specific case folding
207        // scheme would likely not be appropriate for Jujutsu.
208        //
209        // For some discussion of this topic, see:
210        // <https://github.com/unicode-org/icu4x/issues/3151>
211        match self {
212            StringPattern::Exact(literal) => haystack == literal,
213            StringPattern::ExactI(literal) => haystack.eq_ignore_ascii_case(literal),
214            StringPattern::Substring(needle) => haystack.contains(needle),
215            StringPattern::SubstringI(needle) => haystack
216                .to_ascii_lowercase()
217                .contains(&needle.to_ascii_lowercase()),
218            StringPattern::Glob(pattern) => pattern.matches(haystack),
219            StringPattern::GlobI(pattern) => pattern.matches_with(
220                haystack,
221                glob::MatchOptions {
222                    case_sensitive: false,
223                    ..glob::MatchOptions::new()
224                },
225            ),
226            StringPattern::Regex(pattern) => pattern.is_match(haystack),
227        }
228    }
229
230    /// Iterates entries of the given `map` whose keys matches this pattern.
231    pub fn filter_btree_map<'a: 'b, 'b, K: Borrow<str> + Ord, V>(
232        &'b self,
233        map: &'a BTreeMap<K, V>,
234    ) -> impl Iterator<Item = (&'a K, &'a V)> + 'b {
235        if let Some(key) = self.as_exact() {
236            Either::Left(map.get_key_value(key).into_iter())
237        } else {
238            Either::Right(map.iter().filter(|&(key, _)| self.matches(key.borrow())))
239        }
240    }
241}
242
243impl fmt::Display for StringPattern {
244    /// Shows the original string of this pattern.
245    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
246        write!(f, "{}", self.as_str())
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use assert_matches::assert_matches;
253
254    use super::*;
255
256    #[test]
257    fn test_string_pattern_to_glob() {
258        assert_eq!(StringPattern::everything().to_glob(), Some("*".into()));
259        assert_eq!(StringPattern::exact("a").to_glob(), Some("a".into()));
260        assert_eq!(StringPattern::exact("*").to_glob(), Some("[*]".into()));
261        assert_eq!(
262            StringPattern::glob("*").unwrap().to_glob(),
263            Some("*".into())
264        );
265        assert_eq!(
266            StringPattern::Substring("a".into()).to_glob(),
267            Some("*a*".into())
268        );
269        assert_eq!(
270            StringPattern::Substring("*".into()).to_glob(),
271            Some("*[*]*".into())
272        );
273    }
274
275    #[test]
276    fn test_parse() {
277        // Parse specific pattern kinds.
278        assert_matches!(
279            StringPattern::parse("exact:foo"),
280            Ok(StringPattern::Exact(s)) if s == "foo"
281        );
282        assert_matches!(
283            StringPattern::from_str_kind("foo", "exact"),
284            Ok(StringPattern::Exact(s)) if s == "foo"
285        );
286        assert_matches!(
287            StringPattern::parse("glob:foo*"),
288            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
289        );
290        assert_matches!(
291            StringPattern::from_str_kind("foo*", "glob"),
292            Ok(StringPattern::Glob(p)) if p.as_str() == "foo*"
293        );
294        assert_matches!(
295            StringPattern::parse("substring:foo"),
296            Ok(StringPattern::Substring(s)) if s == "foo"
297        );
298        assert_matches!(
299            StringPattern::from_str_kind("foo", "substring"),
300            Ok(StringPattern::Substring(s)) if s == "foo"
301        );
302        assert_matches!(
303            StringPattern::parse("substring-i:foo"),
304            Ok(StringPattern::SubstringI(s)) if s == "foo"
305        );
306        assert_matches!(
307            StringPattern::from_str_kind("foo", "substring-i"),
308            Ok(StringPattern::SubstringI(s)) if s == "foo"
309        );
310        assert_matches!(
311            StringPattern::parse("regex:foo"),
312            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
313        );
314        assert_matches!(
315            StringPattern::from_str_kind("foo", "regex"),
316            Ok(StringPattern::Regex(p)) if p.as_str() == "foo"
317        );
318
319        // Parse a pattern that contains a : itself.
320        assert_matches!(
321            StringPattern::parse("exact:foo:bar"),
322            Ok(StringPattern::Exact(s)) if s == "foo:bar"
323        );
324
325        // If no kind is specified, the input is treated as an exact pattern.
326        assert_matches!(
327            StringPattern::parse("foo"),
328            Ok(StringPattern::Exact(s)) if s == "foo"
329        );
330
331        // Parsing an unknown prefix results in an error.
332        assert_matches!(
333            StringPattern::parse("unknown-prefix:foo"),
334            Err(StringPatternParseError::InvalidKind(_))
335        );
336    }
337}