Skip to main content

rippy_cli/
pattern.rs

1/// A glob-style pattern compatible with Dippy's matching semantics.
2///
3/// Supports:
4/// - `*` matches any sequence of characters
5/// - `?` matches exactly one character
6/// - `[abc]` and `[!abc]` character classes
7/// - `**` matches any sequence (including across word boundaries)
8/// - Trailing `|` means exact match only
9/// - Default (no `|`): prefix match — pattern `git` matches `git add .`
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct Pattern {
12    raw: String,
13    exact: bool,
14    literal: bool,
15}
16
17impl Pattern {
18    /// Create a pattern from a raw string. A trailing `|` forces exact matching.
19    #[must_use]
20    pub fn new(raw: &str) -> Self {
21        let (raw_str, exact) = raw
22            .strip_suffix('|')
23            .map_or((raw, false), |stripped| (stripped, true));
24        let literal = !raw_str.contains(['*', '?', '[']);
25        Self {
26            raw: raw_str.to_owned(),
27            exact,
28            literal,
29        }
30    }
31
32    /// Create a pattern that matches any input. Used for structured-only rules.
33    #[must_use]
34    pub fn any() -> Self {
35        Self {
36            raw: "*".to_owned(),
37            exact: false,
38            literal: false,
39        }
40    }
41
42    /// Return the raw pattern string (without the trailing `|` if it was exact).
43    #[must_use]
44    pub fn raw(&self) -> &str {
45        &self.raw
46    }
47
48    /// Returns `true` if this pattern is the wildcard `*` (matches anything).
49    #[must_use]
50    pub fn is_any(&self) -> bool {
51        self.raw == "*"
52    }
53
54    /// Test whether `input` matches this pattern.
55    #[must_use]
56    pub fn matches(&self, input: &str) -> bool {
57        if self.literal {
58            return self.matches_literal(input);
59        }
60        if self.exact {
61            return glob_match(self.raw.as_bytes(), input.as_bytes());
62        }
63        if self.raw.is_empty() {
64            return true;
65        }
66        if glob_match(self.raw.as_bytes(), input.as_bytes()) {
67            return true;
68        }
69        for (i, _) in input.match_indices(' ') {
70            if glob_match(
71                self.raw.as_bytes(),
72                input.as_bytes().get(..i).unwrap_or_default(),
73            ) {
74                return true;
75            }
76        }
77        false
78    }
79
80    /// Fast path for patterns without glob metacharacters.
81    fn matches_literal(&self, input: &str) -> bool {
82        if self.exact {
83            return self.raw == input;
84        }
85        if self.raw.is_empty() {
86            return true;
87        }
88        // Exact match or prefix match at a space boundary
89        input == self.raw
90            || (input.len() > self.raw.len()
91                && input.as_bytes()[self.raw.len()] == b' '
92                && input.starts_with(&self.raw))
93    }
94
95    /// Return the raw pattern string (without trailing `|` if it was exact).
96    #[must_use]
97    pub fn as_str(&self) -> &str {
98        &self.raw
99    }
100}
101
102/// Iterative glob matching with backtracking. O(n*m) worst case.
103/// Since `*` already crosses `/`, `**/` is normalized to `*`.
104fn glob_match(pattern: &[u8], text: &[u8]) -> bool {
105    let normalized = normalize_stars(pattern);
106    let pat = &normalized;
107    let mut pidx = 0;
108    let mut tidx = 0;
109    // Saved positions for `*` backtracking
110    let mut saved_pat = usize::MAX;
111    let mut saved_txt: usize = 0;
112
113    while tidx < text.len() {
114        if pidx < pat.len() && pat[pidx] == b'*' {
115            saved_pat = pidx + 1;
116            saved_txt = tidx;
117            pidx += 1;
118        } else if pidx < pat.len() && pat[pidx] == b'?' {
119            pidx += 1;
120            tidx += 1;
121        } else if pidx < pat.len() && pat[pidx] == b'[' {
122            if let Some((true, rest)) = match_char_class(&pat[pidx + 1..], text[tidx]) {
123                pidx = pat.len() - rest.len();
124                tidx += 1;
125                continue;
126            }
127            if !bt(&mut pidx, &mut tidx, saved_pat, &mut saved_txt, text.len()) {
128                return false;
129            }
130        } else if pidx < pat.len() && pat[pidx] == text[tidx] {
131            pidx += 1;
132            tidx += 1;
133        } else if !bt(&mut pidx, &mut tidx, saved_pat, &mut saved_txt, text.len()) {
134            return false;
135        }
136    }
137
138    while pidx < pat.len() && pat[pidx] == b'*' {
139        pidx += 1;
140    }
141    pidx == pat.len()
142}
143
144/// Backtrack to the last `*` position, advancing the text cursor by one.
145const fn bt(
146    pidx: &mut usize,
147    tidx: &mut usize,
148    saved_pat: usize,
149    saved_txt: &mut usize,
150    text_len: usize,
151) -> bool {
152    if saved_pat == usize::MAX || *saved_txt >= text_len {
153        return false;
154    }
155    *saved_txt += 1;
156    *pidx = saved_pat;
157    *tidx = *saved_txt;
158    true
159}
160
161/// Normalize `**/` to `*` and collapse consecutive `*`s.
162fn normalize_stars(pattern: &[u8]) -> Vec<u8> {
163    let mut result = Vec::with_capacity(pattern.len());
164    let mut i = 0;
165    while i < pattern.len() {
166        if pattern[i] == b'*' {
167            result.push(b'*');
168            // Skip all consecutive stars and `**/` sequences
169            while i < pattern.len() && pattern[i] == b'*' {
170                i += 1;
171            }
172            // `**/` is equivalent to `*` since our `*` crosses `/` — skip the `/`
173            if i < pattern.len() && pattern[i] == b'/' {
174                i += 1;
175            }
176        } else {
177            result.push(pattern[i]);
178            i += 1;
179        }
180    }
181    result
182}
183
184/// Parse a character class after the opening `[`.
185/// Returns `Some((matched, remaining_pattern_after_]))` or `None` if malformed.
186fn match_char_class(pattern: &[u8], ch: u8) -> Option<(bool, &[u8])> {
187    let (negated, mut pat) = if pattern.first() == Some(&b'!') {
188        (true, &pattern[1..])
189    } else {
190        (false, pattern)
191    };
192
193    let mut matched = false;
194
195    // Allow `]` as the first character in the class
196    if pat.first() == Some(&b']') {
197        if ch == b']' {
198            matched = true;
199        }
200        pat = &pat[1..];
201    }
202
203    loop {
204        match pat {
205            [] => return None,
206            [b']', rest @ ..] => return Some((matched ^ negated, rest)),
207            [a, b'-', b, rest @ ..] if *b != b']' => {
208                if ch >= *a && ch <= *b {
209                    matched = true;
210                }
211                pat = rest;
212            }
213            [c, rest @ ..] => {
214                if ch == *c {
215                    matched = true;
216                }
217                pat = rest;
218            }
219        }
220    }
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226
227    #[test]
228    fn literal_exact() {
229        let p = Pattern::new("git|");
230        assert!(p.matches("git"));
231        assert!(!p.matches("git status"));
232        assert!(!p.matches("gi"));
233    }
234
235    #[test]
236    fn literal_prefix() {
237        let p = Pattern::new("git");
238        assert!(p.matches("git"));
239        assert!(p.matches("git status"));
240        assert!(p.matches("git add ."));
241        assert!(!p.matches("gitk"));
242        assert!(!p.matches("g"));
243    }
244
245    #[test]
246    fn wildcard_star() {
247        let p = Pattern::new("git *|");
248        assert!(p.matches("git status"));
249        assert!(p.matches("git add"));
250        assert!(!p.matches("git"));
251    }
252
253    #[test]
254    fn wildcard_question() {
255        let p = Pattern::new("ca?|");
256        assert!(p.matches("cat"));
257        assert!(p.matches("car"));
258        assert!(!p.matches("ca"));
259        assert!(!p.matches("cats"));
260    }
261
262    #[test]
263    fn char_class() {
264        let p = Pattern::new("[abc]at|");
265        assert!(p.matches("cat"));
266        assert!(p.matches("bat"));
267        assert!(!p.matches("dat"));
268    }
269
270    #[test]
271    fn negated_char_class() {
272        let p = Pattern::new("[!abc]at|");
273        assert!(!p.matches("cat"));
274        assert!(p.matches("dat"));
275    }
276
277    #[test]
278    fn char_class_range() {
279        let p = Pattern::new("[a-z]|");
280        assert!(p.matches("m"));
281        assert!(!p.matches("M"));
282        assert!(!p.matches("5"));
283    }
284
285    #[test]
286    fn double_star() {
287        let p = Pattern::new("**/.env*|");
288        assert!(p.matches(".env"));
289        assert!(p.matches("foo/.env"));
290        assert!(p.matches("foo/bar/.env.local"));
291    }
292
293    #[test]
294    fn prefix_matching_at_word_boundaries() {
295        let p = Pattern::new("rm -rf");
296        assert!(p.matches("rm -rf /"));
297        assert!(p.matches("rm -rf /tmp"));
298        assert!(p.matches("rm -rf"));
299    }
300
301    #[test]
302    fn empty_pattern() {
303        let p = Pattern::new("");
304        assert!(p.matches(""));
305        assert!(p.matches("anything"));
306    }
307
308    #[test]
309    fn empty_exact_pattern() {
310        let p = Pattern::new("|");
311        assert!(p.matches(""));
312        assert!(!p.matches("anything"));
313    }
314
315    #[test]
316    fn double_star_at_end() {
317        let p = Pattern::new("/tmp/**|");
318        assert!(p.matches("/tmp/foo"));
319        assert!(p.matches("/tmp/foo/bar"));
320    }
321}