jj_lib/
fileset.rs

1// Copyright 2024 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Functional language for selecting a set of paths.
16
17use std::collections::HashMap;
18use std::iter;
19use std::path;
20use std::slice;
21use std::sync::LazyLock;
22
23use globset::Glob;
24use globset::GlobBuilder;
25use itertools::Itertools as _;
26use thiserror::Error;
27
28use crate::dsl_util::collect_similar;
29use crate::fileset_parser;
30use crate::fileset_parser::BinaryOp;
31use crate::fileset_parser::ExpressionKind;
32use crate::fileset_parser::ExpressionNode;
33pub use crate::fileset_parser::FilesetDiagnostics;
34pub use crate::fileset_parser::FilesetParseError;
35pub use crate::fileset_parser::FilesetParseErrorKind;
36pub use crate::fileset_parser::FilesetParseResult;
37use crate::fileset_parser::FunctionCallNode;
38use crate::fileset_parser::UnaryOp;
39use crate::matchers::DifferenceMatcher;
40use crate::matchers::EverythingMatcher;
41use crate::matchers::FilesMatcher;
42use crate::matchers::GlobsMatcher;
43use crate::matchers::IntersectionMatcher;
44use crate::matchers::Matcher;
45use crate::matchers::NothingMatcher;
46use crate::matchers::PrefixMatcher;
47use crate::matchers::UnionMatcher;
48use crate::repo_path::RelativePathParseError;
49use crate::repo_path::RepoPath;
50use crate::repo_path::RepoPathBuf;
51use crate::repo_path::RepoPathUiConverter;
52use crate::repo_path::UiPathParseError;
53
54/// Error occurred during file pattern parsing.
55#[derive(Debug, Error)]
56pub enum FilePatternParseError {
57    /// Unknown pattern kind is specified.
58    #[error("Invalid file pattern kind `{0}:`")]
59    InvalidKind(String),
60    /// Failed to parse input UI path.
61    #[error(transparent)]
62    UiPath(#[from] UiPathParseError),
63    /// Failed to parse input workspace-relative path.
64    #[error(transparent)]
65    RelativePath(#[from] RelativePathParseError),
66    /// Failed to parse glob pattern.
67    #[error(transparent)]
68    GlobPattern(#[from] globset::Error),
69}
70
71/// Basic pattern to match `RepoPath`.
72#[derive(Clone, Debug)]
73pub enum FilePattern {
74    /// Matches file (or exact) path.
75    FilePath(RepoPathBuf),
76    /// Matches path prefix.
77    PrefixPath(RepoPathBuf),
78    /// Matches file (or exact) path with glob pattern.
79    FileGlob {
80        /// Prefix directory path where the `pattern` will be evaluated.
81        dir: RepoPathBuf,
82        /// Glob pattern relative to `dir`.
83        pattern: Box<Glob>,
84    },
85    // TODO: add more patterns:
86    // - FilesInPath: files in directory, non-recursively?
87    // - NameGlob or SuffixGlob: file name with glob?
88}
89
90impl FilePattern {
91    /// Parses the given `input` string as pattern of the specified `kind`.
92    pub fn from_str_kind(
93        path_converter: &RepoPathUiConverter,
94        input: &str,
95        kind: &str,
96    ) -> Result<Self, FilePatternParseError> {
97        // Naming convention:
98        // * path normalization
99        //   * cwd: cwd-relative path (default)
100        //   * root: workspace-relative path
101        // * where to anchor
102        //   * file: exact file path
103        //   * prefix: path prefix (files under directory recursively)
104        //   * files-in: files in directory non-recursively
105        //   * name: file name component (or suffix match?)
106        //   * substring: substring match?
107        // * string pattern syntax (+ case sensitivity?)
108        //   * path: literal path (default) (default anchor: prefix)
109        //   * glob: glob pattern (default anchor: file)
110        //   * regex?
111        match kind {
112            "cwd" => Self::cwd_prefix_path(path_converter, input),
113            "cwd-file" | "file" => Self::cwd_file_path(path_converter, input),
114            "cwd-glob" | "glob" => Self::cwd_file_glob(path_converter, input),
115            "cwd-glob-i" | "glob-i" => Self::cwd_file_glob_i(path_converter, input),
116            "root" => Self::root_prefix_path(input),
117            "root-file" => Self::root_file_path(input),
118            "root-glob" => Self::root_file_glob(input),
119            "root-glob-i" => Self::root_file_glob_i(input),
120            _ => Err(FilePatternParseError::InvalidKind(kind.to_owned())),
121        }
122    }
123
124    /// Pattern that matches cwd-relative file (or exact) path.
125    pub fn cwd_file_path(
126        path_converter: &RepoPathUiConverter,
127        input: impl AsRef<str>,
128    ) -> Result<Self, FilePatternParseError> {
129        let path = path_converter.parse_file_path(input.as_ref())?;
130        Ok(Self::FilePath(path))
131    }
132
133    /// Pattern that matches cwd-relative path prefix.
134    pub fn cwd_prefix_path(
135        path_converter: &RepoPathUiConverter,
136        input: impl AsRef<str>,
137    ) -> Result<Self, FilePatternParseError> {
138        let path = path_converter.parse_file_path(input.as_ref())?;
139        Ok(Self::PrefixPath(path))
140    }
141
142    /// Pattern that matches cwd-relative file path glob.
143    pub fn cwd_file_glob(
144        path_converter: &RepoPathUiConverter,
145        input: impl AsRef<str>,
146    ) -> Result<Self, FilePatternParseError> {
147        let (dir, pattern) = split_glob_path(input.as_ref());
148        let dir = path_converter.parse_file_path(dir)?;
149        Self::file_glob_at(dir, pattern, false)
150    }
151
152    /// Pattern that matches cwd-relative file path glob (case-insensitive).
153    pub fn cwd_file_glob_i(
154        path_converter: &RepoPathUiConverter,
155        input: impl AsRef<str>,
156    ) -> Result<Self, FilePatternParseError> {
157        let (dir, pattern) = split_glob_path_i(input.as_ref());
158        let dir = path_converter.parse_file_path(dir)?;
159        Self::file_glob_at(dir, pattern, true)
160    }
161
162    /// Pattern that matches workspace-relative file (or exact) path.
163    pub fn root_file_path(input: impl AsRef<str>) -> Result<Self, FilePatternParseError> {
164        // TODO: Let caller pass in converter for root-relative paths too
165        let path = RepoPathBuf::from_relative_path(input.as_ref())?;
166        Ok(Self::FilePath(path))
167    }
168
169    /// Pattern that matches workspace-relative path prefix.
170    pub fn root_prefix_path(input: impl AsRef<str>) -> Result<Self, FilePatternParseError> {
171        let path = RepoPathBuf::from_relative_path(input.as_ref())?;
172        Ok(Self::PrefixPath(path))
173    }
174
175    /// Pattern that matches workspace-relative file path glob.
176    pub fn root_file_glob(input: impl AsRef<str>) -> Result<Self, FilePatternParseError> {
177        let (dir, pattern) = split_glob_path(input.as_ref());
178        let dir = RepoPathBuf::from_relative_path(dir)?;
179        Self::file_glob_at(dir, pattern, false)
180    }
181
182    /// Pattern that matches workspace-relative file path glob
183    /// (case-insensitive).
184    pub fn root_file_glob_i(input: impl AsRef<str>) -> Result<Self, FilePatternParseError> {
185        let (dir, pattern) = split_glob_path_i(input.as_ref());
186        let dir = RepoPathBuf::from_relative_path(dir)?;
187        Self::file_glob_at(dir, pattern, true)
188    }
189
190    fn file_glob_at(
191        dir: RepoPathBuf,
192        input: &str,
193        icase: bool,
194    ) -> Result<Self, FilePatternParseError> {
195        if input.is_empty() {
196            return Ok(Self::FilePath(dir));
197        }
198        // Normalize separator to '/', reject ".." which will never match
199        let normalized = RepoPathBuf::from_relative_path(input)?;
200        let pattern = Box::new(parse_file_glob(
201            normalized.as_internal_file_string(),
202            icase,
203        )?);
204        Ok(Self::FileGlob { dir, pattern })
205    }
206
207    /// Returns path if this pattern represents a literal path in a workspace.
208    /// Returns `None` if this is a glob pattern for example.
209    pub fn as_path(&self) -> Option<&RepoPath> {
210        match self {
211            Self::FilePath(path) => Some(path),
212            Self::PrefixPath(path) => Some(path),
213            Self::FileGlob { .. } => None,
214        }
215    }
216}
217
218pub(super) fn parse_file_glob(input: &str, icase: bool) -> Result<Glob, globset::Error> {
219    GlobBuilder::new(input)
220        .literal_separator(true)
221        .case_insensitive(icase)
222        .build()
223}
224
225/// Checks if a character is a glob metacharacter.
226fn is_glob_char(c: char) -> bool {
227    // See globset::escape(). In addition to that, backslash is parsed as an
228    // escape sequence on Unix.
229    const GLOB_CHARS: &[char] = if cfg!(windows) {
230        &['?', '*', '[', ']', '{', '}']
231    } else {
232        &['?', '*', '[', ']', '{', '}', '\\']
233    };
234    GLOB_CHARS.contains(&c)
235}
236
237/// Splits `input` path into literal directory path and glob pattern.
238fn split_glob_path(input: &str) -> (&str, &str) {
239    let prefix_len = input
240        .split_inclusive(path::is_separator)
241        .take_while(|component| !component.contains(is_glob_char))
242        .map(|component| component.len())
243        .sum();
244    input.split_at(prefix_len)
245}
246
247/// Splits `input` path into literal directory path and glob pattern, for
248/// case-insensitive patterns.
249fn split_glob_path_i(input: &str) -> (&str, &str) {
250    let prefix_len = input
251        .split_inclusive(path::is_separator)
252        .take_while(|component| {
253            !component.contains(|c: char| c.is_ascii_alphabetic() || is_glob_char(c))
254        })
255        .map(|component| component.len())
256        .sum();
257    input.split_at(prefix_len)
258}
259
260/// AST-level representation of the fileset expression.
261#[derive(Clone, Debug)]
262pub enum FilesetExpression {
263    /// Matches nothing.
264    None,
265    /// Matches everything.
266    All,
267    /// Matches basic pattern.
268    Pattern(FilePattern),
269    /// Matches any of the expressions.
270    ///
271    /// Use `FilesetExpression::union_all()` to construct a union expression.
272    /// It will normalize 0-ary or 1-ary union.
273    UnionAll(Vec<Self>),
274    /// Matches both expressions.
275    Intersection(Box<Self>, Box<Self>),
276    /// Matches the first expression, but not the second expression.
277    Difference(Box<Self>, Box<Self>),
278}
279
280impl FilesetExpression {
281    /// Expression that matches nothing.
282    pub fn none() -> Self {
283        Self::None
284    }
285
286    /// Expression that matches everything.
287    pub fn all() -> Self {
288        Self::All
289    }
290
291    /// Expression that matches the given `pattern`.
292    pub fn pattern(pattern: FilePattern) -> Self {
293        Self::Pattern(pattern)
294    }
295
296    /// Expression that matches file (or exact) path.
297    pub fn file_path(path: RepoPathBuf) -> Self {
298        Self::Pattern(FilePattern::FilePath(path))
299    }
300
301    /// Expression that matches path prefix.
302    pub fn prefix_path(path: RepoPathBuf) -> Self {
303        Self::Pattern(FilePattern::PrefixPath(path))
304    }
305
306    /// Expression that matches any of the given `expressions`.
307    pub fn union_all(expressions: Vec<Self>) -> Self {
308        match expressions.len() {
309            0 => Self::none(),
310            1 => expressions.into_iter().next().unwrap(),
311            _ => Self::UnionAll(expressions),
312        }
313    }
314
315    /// Expression that matches both `self` and `other`.
316    pub fn intersection(self, other: Self) -> Self {
317        Self::Intersection(Box::new(self), Box::new(other))
318    }
319
320    /// Expression that matches `self` but not `other`.
321    pub fn difference(self, other: Self) -> Self {
322        Self::Difference(Box::new(self), Box::new(other))
323    }
324
325    /// Flattens union expression at most one level.
326    fn as_union_all(&self) -> &[Self] {
327        match self {
328            Self::None => &[],
329            Self::UnionAll(exprs) => exprs,
330            _ => slice::from_ref(self),
331        }
332    }
333
334    fn dfs_pre(&self) -> impl Iterator<Item = &Self> {
335        let mut stack: Vec<&Self> = vec![self];
336        iter::from_fn(move || {
337            let expr = stack.pop()?;
338            match expr {
339                Self::None | Self::All | Self::Pattern(_) => {}
340                Self::UnionAll(exprs) => stack.extend(exprs.iter().rev()),
341                Self::Intersection(expr1, expr2) | Self::Difference(expr1, expr2) => {
342                    stack.push(expr2);
343                    stack.push(expr1);
344                }
345            }
346            Some(expr)
347        })
348    }
349
350    /// Iterates literal paths recursively from this expression.
351    ///
352    /// For example, `"a", "b", "c"` will be yielded in that order for
353    /// expression `"a" | all() & "b" | ~"c"`.
354    pub fn explicit_paths(&self) -> impl Iterator<Item = &RepoPath> {
355        // pre/post-ordering doesn't matter so long as children are visited from
356        // left to right.
357        self.dfs_pre().filter_map(|expr| match expr {
358            Self::Pattern(pattern) => pattern.as_path(),
359            _ => None,
360        })
361    }
362
363    /// Transforms the expression tree to `Matcher` object.
364    pub fn to_matcher(&self) -> Box<dyn Matcher> {
365        build_union_matcher(self.as_union_all())
366    }
367}
368
369/// Transforms the union `expressions` to `Matcher` object.
370///
371/// Since `Matcher` typically accepts a set of patterns to be OR-ed, this
372/// function takes a list of union `expressions` as input.
373fn build_union_matcher(expressions: &[FilesetExpression]) -> Box<dyn Matcher> {
374    let mut file_paths = Vec::new();
375    let mut prefix_paths = Vec::new();
376    let mut file_globs = GlobsMatcher::builder();
377    let mut matchers: Vec<Option<Box<dyn Matcher>>> = Vec::new();
378    for expr in expressions {
379        let matcher: Box<dyn Matcher> = match expr {
380            // None and All are supposed to be simplified by caller.
381            FilesetExpression::None => Box::new(NothingMatcher),
382            FilesetExpression::All => Box::new(EverythingMatcher),
383            FilesetExpression::Pattern(pattern) => {
384                match pattern {
385                    FilePattern::FilePath(path) => file_paths.push(path),
386                    FilePattern::PrefixPath(path) => prefix_paths.push(path),
387                    FilePattern::FileGlob { dir, pattern } => file_globs.add(dir, pattern),
388                }
389                continue;
390            }
391            // UnionAll is supposed to be flattened by caller.
392            FilesetExpression::UnionAll(exprs) => build_union_matcher(exprs),
393            FilesetExpression::Intersection(expr1, expr2) => {
394                let m1 = build_union_matcher(expr1.as_union_all());
395                let m2 = build_union_matcher(expr2.as_union_all());
396                Box::new(IntersectionMatcher::new(m1, m2))
397            }
398            FilesetExpression::Difference(expr1, expr2) => {
399                let m1 = build_union_matcher(expr1.as_union_all());
400                let m2 = build_union_matcher(expr2.as_union_all());
401                Box::new(DifferenceMatcher::new(m1, m2))
402            }
403        };
404        matchers.push(Some(matcher));
405    }
406
407    if !file_paths.is_empty() {
408        matchers.push(Some(Box::new(FilesMatcher::new(file_paths))));
409    }
410    if !prefix_paths.is_empty() {
411        matchers.push(Some(Box::new(PrefixMatcher::new(prefix_paths))));
412    }
413    if !file_globs.is_empty() {
414        matchers.push(Some(Box::new(file_globs.build())));
415    }
416    union_all_matchers(&mut matchers)
417}
418
419/// Concatenates all `matchers` as union.
420///
421/// Each matcher element must be wrapped in `Some` so the matchers can be moved
422/// in arbitrary order.
423fn union_all_matchers(matchers: &mut [Option<Box<dyn Matcher>>]) -> Box<dyn Matcher> {
424    match matchers {
425        [] => Box::new(NothingMatcher),
426        [matcher] => matcher.take().expect("matcher should still be available"),
427        _ => {
428            // Build balanced tree to minimize the recursion depth.
429            let (left, right) = matchers.split_at_mut(matchers.len() / 2);
430            let m1 = union_all_matchers(left);
431            let m2 = union_all_matchers(right);
432            Box::new(UnionMatcher::new(m1, m2))
433        }
434    }
435}
436
437type FilesetFunction = fn(
438    &mut FilesetDiagnostics,
439    &RepoPathUiConverter,
440    &FunctionCallNode,
441) -> FilesetParseResult<FilesetExpression>;
442
443static BUILTIN_FUNCTION_MAP: LazyLock<HashMap<&str, FilesetFunction>> = LazyLock::new(|| {
444    // Not using maplit::hashmap!{} or custom declarative macro here because
445    // code completion inside macro is quite restricted.
446    let mut map: HashMap<&str, FilesetFunction> = HashMap::new();
447    map.insert("none", |_diagnostics, _path_converter, function| {
448        function.expect_no_arguments()?;
449        Ok(FilesetExpression::none())
450    });
451    map.insert("all", |_diagnostics, _path_converter, function| {
452        function.expect_no_arguments()?;
453        Ok(FilesetExpression::all())
454    });
455    map
456});
457
458fn resolve_function(
459    diagnostics: &mut FilesetDiagnostics,
460    path_converter: &RepoPathUiConverter,
461    function: &FunctionCallNode,
462) -> FilesetParseResult<FilesetExpression> {
463    if let Some(func) = BUILTIN_FUNCTION_MAP.get(function.name) {
464        func(diagnostics, path_converter, function)
465    } else {
466        Err(FilesetParseError::new(
467            FilesetParseErrorKind::NoSuchFunction {
468                name: function.name.to_owned(),
469                candidates: collect_similar(function.name, BUILTIN_FUNCTION_MAP.keys()),
470            },
471            function.name_span,
472        ))
473    }
474}
475
476fn resolve_expression(
477    diagnostics: &mut FilesetDiagnostics,
478    path_converter: &RepoPathUiConverter,
479    node: &ExpressionNode,
480) -> FilesetParseResult<FilesetExpression> {
481    let wrap_pattern_error =
482        |err| FilesetParseError::expression("Invalid file pattern", node.span).with_source(err);
483    match &node.kind {
484        ExpressionKind::Identifier(name) => {
485            let pattern =
486                FilePattern::cwd_prefix_path(path_converter, name).map_err(wrap_pattern_error)?;
487            Ok(FilesetExpression::pattern(pattern))
488        }
489        ExpressionKind::String(name) => {
490            let pattern =
491                FilePattern::cwd_prefix_path(path_converter, name).map_err(wrap_pattern_error)?;
492            Ok(FilesetExpression::pattern(pattern))
493        }
494        ExpressionKind::StringPattern { kind, value } => {
495            let pattern = FilePattern::from_str_kind(path_converter, value, kind)
496                .map_err(wrap_pattern_error)?;
497            Ok(FilesetExpression::pattern(pattern))
498        }
499        ExpressionKind::Unary(op, arg_node) => {
500            let arg = resolve_expression(diagnostics, path_converter, arg_node)?;
501            match op {
502                UnaryOp::Negate => Ok(FilesetExpression::all().difference(arg)),
503            }
504        }
505        ExpressionKind::Binary(op, lhs_node, rhs_node) => {
506            let lhs = resolve_expression(diagnostics, path_converter, lhs_node)?;
507            let rhs = resolve_expression(diagnostics, path_converter, rhs_node)?;
508            match op {
509                BinaryOp::Intersection => Ok(lhs.intersection(rhs)),
510                BinaryOp::Difference => Ok(lhs.difference(rhs)),
511            }
512        }
513        ExpressionKind::UnionAll(nodes) => {
514            let expressions = nodes
515                .iter()
516                .map(|node| resolve_expression(diagnostics, path_converter, node))
517                .try_collect()?;
518            Ok(FilesetExpression::union_all(expressions))
519        }
520        ExpressionKind::FunctionCall(function) => {
521            resolve_function(diagnostics, path_converter, function)
522        }
523    }
524}
525
526/// Parses text into `FilesetExpression` without bare string fallback.
527pub fn parse(
528    diagnostics: &mut FilesetDiagnostics,
529    text: &str,
530    path_converter: &RepoPathUiConverter,
531) -> FilesetParseResult<FilesetExpression> {
532    let node = fileset_parser::parse_program(text)?;
533    // TODO: add basic tree substitution pass to eliminate redundant expressions
534    resolve_expression(diagnostics, path_converter, &node)
535}
536
537/// Parses text into `FilesetExpression` with bare string fallback.
538///
539/// If the text can't be parsed as a fileset expression, and if it doesn't
540/// contain any operator-like characters, it will be parsed as a file path.
541pub fn parse_maybe_bare(
542    diagnostics: &mut FilesetDiagnostics,
543    text: &str,
544    path_converter: &RepoPathUiConverter,
545) -> FilesetParseResult<FilesetExpression> {
546    let node = fileset_parser::parse_program_or_bare_string(text)?;
547    // TODO: add basic tree substitution pass to eliminate redundant expressions
548    resolve_expression(diagnostics, path_converter, &node)
549}
550
551#[cfg(test)]
552mod tests {
553    use std::path::PathBuf;
554
555    use super::*;
556
557    fn repo_path_buf(value: impl Into<String>) -> RepoPathBuf {
558        RepoPathBuf::from_internal_string(value).unwrap()
559    }
560
561    fn insta_settings() -> insta::Settings {
562        let mut settings = insta::Settings::clone_current();
563        // Elide parsed glob options and tokens, which aren't interesting.
564        settings.add_filter(
565            r"(?m)^(\s{12}opts):\s*GlobOptions\s*\{\n(\s{16}.*\n)*\s{12}\},",
566            "$1: _,",
567        );
568        settings.add_filter(
569            r"(?m)^(\s{12}tokens):\s*Tokens\(\n(\s{16}.*\n)*\s{12}\),",
570            "$1: _,",
571        );
572        // Collapse short "Thing(_,)" repeatedly to save vertical space and make
573        // the output more readable.
574        for _ in 0..4 {
575            settings.add_filter(
576                r"(?x)
577                \b([A-Z]\w*)\(\n
578                    \s*(.{1,60}),\n
579                \s*\)",
580                "$1($2)",
581            );
582        }
583        settings
584    }
585
586    #[test]
587    fn test_parse_file_pattern() {
588        let settings = insta_settings();
589        let _guard = settings.bind_to_scope();
590        let path_converter = RepoPathUiConverter::Fs {
591            cwd: PathBuf::from("/ws/cur"),
592            base: PathBuf::from("/ws"),
593        };
594        let parse = |text| parse_maybe_bare(&mut FilesetDiagnostics::new(), text, &path_converter);
595
596        // cwd-relative patterns
597        insta::assert_debug_snapshot!(
598            parse(".").unwrap(),
599            @r#"Pattern(PrefixPath("cur"))"#);
600        insta::assert_debug_snapshot!(
601            parse("..").unwrap(),
602            @r#"Pattern(PrefixPath(""))"#);
603        assert!(parse("../..").is_err());
604        insta::assert_debug_snapshot!(
605            parse("foo").unwrap(),
606            @r#"Pattern(PrefixPath("cur/foo"))"#);
607        insta::assert_debug_snapshot!(
608            parse("cwd:.").unwrap(),
609            @r#"Pattern(PrefixPath("cur"))"#);
610        insta::assert_debug_snapshot!(
611            parse("cwd-file:foo").unwrap(),
612            @r#"Pattern(FilePath("cur/foo"))"#);
613        insta::assert_debug_snapshot!(
614            parse("file:../foo/bar").unwrap(),
615            @r#"Pattern(FilePath("foo/bar"))"#);
616
617        // workspace-relative patterns
618        insta::assert_debug_snapshot!(
619            parse("root:.").unwrap(),
620            @r#"Pattern(PrefixPath(""))"#);
621        assert!(parse("root:..").is_err());
622        insta::assert_debug_snapshot!(
623            parse("root:foo/bar").unwrap(),
624            @r#"Pattern(PrefixPath("foo/bar"))"#);
625        insta::assert_debug_snapshot!(
626            parse("root-file:bar").unwrap(),
627            @r#"Pattern(FilePath("bar"))"#);
628    }
629
630    #[test]
631    fn test_parse_glob_pattern() {
632        let settings = insta_settings();
633        let _guard = settings.bind_to_scope();
634        let path_converter = RepoPathUiConverter::Fs {
635            // meta character in cwd path shouldn't be expanded
636            cwd: PathBuf::from("/ws/cur*"),
637            base: PathBuf::from("/ws"),
638        };
639        let parse = |text| parse_maybe_bare(&mut FilesetDiagnostics::new(), text, &path_converter);
640
641        // cwd-relative, without meta characters
642        insta::assert_debug_snapshot!(
643            parse(r#"cwd-glob:"foo""#).unwrap(),
644            @r#"Pattern(FilePath("cur*/foo"))"#);
645        // Strictly speaking, glob:"" shouldn't match a file named <cwd>, but
646        // file pattern doesn't distinguish "foo/" from "foo".
647        insta::assert_debug_snapshot!(
648            parse(r#"glob:"""#).unwrap(),
649            @r#"Pattern(FilePath("cur*"))"#);
650        insta::assert_debug_snapshot!(
651            parse(r#"glob:".""#).unwrap(),
652            @r#"Pattern(FilePath("cur*"))"#);
653        insta::assert_debug_snapshot!(
654            parse(r#"glob:"..""#).unwrap(),
655            @r#"Pattern(FilePath(""))"#);
656
657        // cwd-relative, with meta characters
658        insta::assert_debug_snapshot!(
659            parse(r#"glob:"*""#).unwrap(), @r#"
660        Pattern(
661            FileGlob {
662                dir: "cur*",
663                pattern: Glob {
664                    glob: "*",
665                    re: "(?-u)^[^/]*$",
666                    opts: _,
667                    tokens: _,
668                },
669            },
670        )
671        "#);
672        insta::assert_debug_snapshot!(
673            parse(r#"glob:"./*""#).unwrap(), @r#"
674        Pattern(
675            FileGlob {
676                dir: "cur*",
677                pattern: Glob {
678                    glob: "*",
679                    re: "(?-u)^[^/]*$",
680                    opts: _,
681                    tokens: _,
682                },
683            },
684        )
685        "#);
686        insta::assert_debug_snapshot!(
687            parse(r#"glob:"../*""#).unwrap(), @r#"
688        Pattern(
689            FileGlob {
690                dir: "",
691                pattern: Glob {
692                    glob: "*",
693                    re: "(?-u)^[^/]*$",
694                    opts: _,
695                    tokens: _,
696                },
697            },
698        )
699        "#);
700        // glob:"**" is equivalent to root-glob:"<cwd>/**", not root-glob:"**"
701        insta::assert_debug_snapshot!(
702            parse(r#"glob:"**""#).unwrap(), @r#"
703        Pattern(
704            FileGlob {
705                dir: "cur*",
706                pattern: Glob {
707                    glob: "**",
708                    re: "(?-u)^.*$",
709                    opts: _,
710                    tokens: _,
711                },
712            },
713        )
714        "#);
715        insta::assert_debug_snapshot!(
716            parse(r#"glob:"../foo/b?r/baz""#).unwrap(), @r#"
717        Pattern(
718            FileGlob {
719                dir: "foo",
720                pattern: Glob {
721                    glob: "b?r/baz",
722                    re: "(?-u)^b[^/]r/baz$",
723                    opts: _,
724                    tokens: _,
725                },
726            },
727        )
728        "#);
729        assert!(parse(r#"glob:"../../*""#).is_err());
730        assert!(parse(r#"glob-i:"../../*""#).is_err());
731        assert!(parse(r#"glob:"/*""#).is_err());
732        assert!(parse(r#"glob-i:"/*""#).is_err());
733        // no support for relative path component after glob meta character
734        assert!(parse(r#"glob:"*/..""#).is_err());
735        assert!(parse(r#"glob-i:"*/..""#).is_err());
736
737        if cfg!(windows) {
738            // cwd-relative, with Windows path separators
739            insta::assert_debug_snapshot!(
740                parse(r#"glob:"..\\foo\\*\\bar""#).unwrap(), @r#"
741            Pattern(
742                FileGlob {
743                    dir: "foo",
744                    pattern: Glob {
745                        glob: "*/bar",
746                        re: "(?-u)^[^/]*/bar$",
747                        opts: _,
748                        tokens: _,
749                    },
750                },
751            )
752            "#);
753        } else {
754            // backslash is an escape character on Unix
755            insta::assert_debug_snapshot!(
756                parse(r#"glob:"..\\foo\\*\\bar""#).unwrap(), @r#"
757            Pattern(
758                FileGlob {
759                    dir: "cur*",
760                    pattern: Glob {
761                        glob: "..\\foo\\*\\bar",
762                        re: "(?-u)^\\.\\.foo\\*bar$",
763                        opts: _,
764                        tokens: _,
765                    },
766                },
767            )
768            "#);
769        }
770
771        // workspace-relative, without meta characters
772        insta::assert_debug_snapshot!(
773            parse(r#"root-glob:"foo""#).unwrap(),
774            @r#"Pattern(FilePath("foo"))"#);
775        insta::assert_debug_snapshot!(
776            parse(r#"root-glob:"""#).unwrap(),
777            @r#"Pattern(FilePath(""))"#);
778        insta::assert_debug_snapshot!(
779            parse(r#"root-glob:".""#).unwrap(),
780            @r#"Pattern(FilePath(""))"#);
781
782        // workspace-relative, with meta characters
783        insta::assert_debug_snapshot!(
784            parse(r#"root-glob:"*""#).unwrap(), @r#"
785        Pattern(
786            FileGlob {
787                dir: "",
788                pattern: Glob {
789                    glob: "*",
790                    re: "(?-u)^[^/]*$",
791                    opts: _,
792                    tokens: _,
793                },
794            },
795        )
796        "#);
797        insta::assert_debug_snapshot!(
798            parse(r#"root-glob:"foo/bar/b[az]""#).unwrap(), @r#"
799        Pattern(
800            FileGlob {
801                dir: "foo/bar",
802                pattern: Glob {
803                    glob: "b[az]",
804                    re: "(?-u)^b[az]$",
805                    opts: _,
806                    tokens: _,
807                },
808            },
809        )
810        "#);
811        insta::assert_debug_snapshot!(
812            parse(r#"root-glob:"foo/bar/b{ar,az}""#).unwrap(), @r#"
813        Pattern(
814            FileGlob {
815                dir: "foo/bar",
816                pattern: Glob {
817                    glob: "b{ar,az}",
818                    re: "(?-u)^b(?:ar|az)$",
819                    opts: _,
820                    tokens: _,
821                },
822            },
823        )
824        "#);
825        assert!(parse(r#"root-glob:"../*""#).is_err());
826        assert!(parse(r#"root-glob-i:"../*""#).is_err());
827        assert!(parse(r#"root-glob:"/*""#).is_err());
828        assert!(parse(r#"root-glob-i:"/*""#).is_err());
829
830        // workspace-relative, backslash escape without meta characters
831        if cfg!(not(windows)) {
832            insta::assert_debug_snapshot!(
833                parse(r#"root-glob:'foo/bar\baz'"#).unwrap(), @r#"
834            Pattern(
835                FileGlob {
836                    dir: "foo",
837                    pattern: Glob {
838                        glob: "bar\\baz",
839                        re: "(?-u)^barbaz$",
840                        opts: _,
841                        tokens: _,
842                    },
843                },
844            )
845            "#);
846        }
847    }
848
849    #[test]
850    fn test_parse_glob_pattern_case_insensitive() {
851        let settings = insta_settings();
852        let _guard = settings.bind_to_scope();
853        let path_converter = RepoPathUiConverter::Fs {
854            cwd: PathBuf::from("/ws/cur"),
855            base: PathBuf::from("/ws"),
856        };
857        let parse = |text| parse_maybe_bare(&mut FilesetDiagnostics::new(), text, &path_converter);
858
859        // cwd-relative case-insensitive glob
860        insta::assert_debug_snapshot!(
861            parse(r#"glob-i:"*.TXT""#).unwrap(), @r#"
862        Pattern(
863            FileGlob {
864                dir: "cur",
865                pattern: Glob {
866                    glob: "*.TXT",
867                    re: "(?-u)(?i)^[^/]*\\.TXT$",
868                    opts: _,
869                    tokens: _,
870                },
871            },
872        )
873        "#);
874
875        // cwd-relative case-insensitive glob with more specific pattern
876        insta::assert_debug_snapshot!(
877            parse(r#"cwd-glob-i:"[Ff]oo""#).unwrap(), @r#"
878        Pattern(
879            FileGlob {
880                dir: "cur",
881                pattern: Glob {
882                    glob: "[Ff]oo",
883                    re: "(?-u)(?i)^[Ff]oo$",
884                    opts: _,
885                    tokens: _,
886                },
887            },
888        )
889        "#);
890
891        // workspace-relative case-insensitive glob
892        insta::assert_debug_snapshot!(
893            parse(r#"root-glob-i:"*.Rs""#).unwrap(), @r#"
894        Pattern(
895            FileGlob {
896                dir: "",
897                pattern: Glob {
898                    glob: "*.Rs",
899                    re: "(?-u)(?i)^[^/]*\\.Rs$",
900                    opts: _,
901                    tokens: _,
902                },
903            },
904        )
905        "#);
906
907        // case-insensitive pattern with directory component (should not split the path)
908        insta::assert_debug_snapshot!(
909            parse(r#"glob-i:"SubDir/*.rs""#).unwrap(), @r#"
910        Pattern(
911            FileGlob {
912                dir: "cur",
913                pattern: Glob {
914                    glob: "SubDir/*.rs",
915                    re: "(?-u)(?i)^SubDir/[^/]*\\.rs$",
916                    opts: _,
917                    tokens: _,
918                },
919            },
920        )
921        "#);
922
923        // case-sensitive pattern with directory component (should split the path)
924        insta::assert_debug_snapshot!(
925            parse(r#"glob:"SubDir/*.rs""#).unwrap(), @r#"
926        Pattern(
927            FileGlob {
928                dir: "cur/SubDir",
929                pattern: Glob {
930                    glob: "*.rs",
931                    re: "(?-u)^[^/]*\\.rs$",
932                    opts: _,
933                    tokens: _,
934                },
935            },
936        )
937        "#);
938
939        // case-insensitive pattern with leading dots (should split dots but not dirs)
940        insta::assert_debug_snapshot!(
941            parse(r#"glob-i:"../SomeDir/*.rs""#).unwrap(), @r#"
942        Pattern(
943            FileGlob {
944                dir: "",
945                pattern: Glob {
946                    glob: "SomeDir/*.rs",
947                    re: "(?-u)(?i)^SomeDir/[^/]*\\.rs$",
948                    opts: _,
949                    tokens: _,
950                },
951            },
952        )
953        "#);
954
955        // case-insensitive pattern with single leading dot
956        insta::assert_debug_snapshot!(
957            parse(r#"glob-i:"./SomeFile*.txt""#).unwrap(), @r#"
958        Pattern(
959            FileGlob {
960                dir: "cur",
961                pattern: Glob {
962                    glob: "SomeFile*.txt",
963                    re: "(?-u)(?i)^SomeFile[^/]*\\.txt$",
964                    opts: _,
965                    tokens: _,
966                },
967            },
968        )
969        "#);
970    }
971
972    #[test]
973    fn test_parse_function() {
974        let settings = insta_settings();
975        let _guard = settings.bind_to_scope();
976        let path_converter = RepoPathUiConverter::Fs {
977            cwd: PathBuf::from("/ws/cur"),
978            base: PathBuf::from("/ws"),
979        };
980        let parse = |text| parse_maybe_bare(&mut FilesetDiagnostics::new(), text, &path_converter);
981
982        insta::assert_debug_snapshot!(parse("all()").unwrap(), @"All");
983        insta::assert_debug_snapshot!(parse("none()").unwrap(), @"None");
984        insta::assert_debug_snapshot!(parse("all(x)").unwrap_err().kind(), @r#"
985        InvalidArguments {
986            name: "all",
987            message: "Expected 0 arguments",
988        }
989        "#);
990        insta::assert_debug_snapshot!(parse("ale()").unwrap_err().kind(), @r#"
991        NoSuchFunction {
992            name: "ale",
993            candidates: [
994                "all",
995            ],
996        }
997        "#);
998    }
999
1000    #[test]
1001    fn test_parse_compound_expression() {
1002        let settings = insta_settings();
1003        let _guard = settings.bind_to_scope();
1004        let path_converter = RepoPathUiConverter::Fs {
1005            cwd: PathBuf::from("/ws/cur"),
1006            base: PathBuf::from("/ws"),
1007        };
1008        let parse = |text| parse_maybe_bare(&mut FilesetDiagnostics::new(), text, &path_converter);
1009
1010        insta::assert_debug_snapshot!(parse("~x").unwrap(), @r#"
1011        Difference(
1012            All,
1013            Pattern(PrefixPath("cur/x")),
1014        )
1015        "#);
1016        insta::assert_debug_snapshot!(parse("x|y|root:z").unwrap(), @r#"
1017        UnionAll(
1018            [
1019                Pattern(PrefixPath("cur/x")),
1020                Pattern(PrefixPath("cur/y")),
1021                Pattern(PrefixPath("z")),
1022            ],
1023        )
1024        "#);
1025        insta::assert_debug_snapshot!(parse("x|y&z").unwrap(), @r#"
1026        UnionAll(
1027            [
1028                Pattern(PrefixPath("cur/x")),
1029                Intersection(
1030                    Pattern(PrefixPath("cur/y")),
1031                    Pattern(PrefixPath("cur/z")),
1032                ),
1033            ],
1034        )
1035        "#);
1036    }
1037
1038    #[test]
1039    fn test_explicit_paths() {
1040        let collect = |expr: &FilesetExpression| -> Vec<RepoPathBuf> {
1041            expr.explicit_paths().map(|path| path.to_owned()).collect()
1042        };
1043        let file_expr = |path: &str| FilesetExpression::file_path(repo_path_buf(path));
1044        assert!(collect(&FilesetExpression::none()).is_empty());
1045        assert_eq!(collect(&file_expr("a")), ["a"].map(repo_path_buf));
1046        assert_eq!(
1047            collect(&FilesetExpression::union_all(vec![
1048                file_expr("a"),
1049                file_expr("b"),
1050                file_expr("c"),
1051            ])),
1052            ["a", "b", "c"].map(repo_path_buf)
1053        );
1054        assert_eq!(
1055            collect(&FilesetExpression::intersection(
1056                FilesetExpression::union_all(vec![
1057                    file_expr("a"),
1058                    FilesetExpression::none(),
1059                    file_expr("b"),
1060                    file_expr("c"),
1061                ]),
1062                FilesetExpression::difference(
1063                    file_expr("d"),
1064                    FilesetExpression::union_all(vec![file_expr("e"), file_expr("f")])
1065                )
1066            )),
1067            ["a", "b", "c", "d", "e", "f"].map(repo_path_buf)
1068        );
1069    }
1070
1071    #[test]
1072    fn test_build_matcher_simple() {
1073        let settings = insta_settings();
1074        let _guard = settings.bind_to_scope();
1075
1076        insta::assert_debug_snapshot!(FilesetExpression::none().to_matcher(), @"NothingMatcher");
1077        insta::assert_debug_snapshot!(FilesetExpression::all().to_matcher(), @"EverythingMatcher");
1078        insta::assert_debug_snapshot!(
1079            FilesetExpression::file_path(repo_path_buf("foo")).to_matcher(),
1080            @r#"
1081        FilesMatcher {
1082            tree: Dir {
1083                "foo": File {},
1084            },
1085        }
1086        "#);
1087        insta::assert_debug_snapshot!(
1088            FilesetExpression::prefix_path(repo_path_buf("foo")).to_matcher(),
1089            @r#"
1090        PrefixMatcher {
1091            tree: Dir {
1092                "foo": Prefix {},
1093            },
1094        }
1095        "#);
1096    }
1097
1098    #[test]
1099    fn test_build_matcher_glob_pattern() {
1100        let settings = insta_settings();
1101        let _guard = settings.bind_to_scope();
1102        let glob_expr = |dir: &str, pattern: &str| {
1103            FilesetExpression::pattern(FilePattern::FileGlob {
1104                dir: repo_path_buf(dir),
1105                pattern: Box::new(parse_file_glob(pattern, false).unwrap()),
1106            })
1107        };
1108
1109        insta::assert_debug_snapshot!(glob_expr("", "*").to_matcher(), @r#"
1110        GlobsMatcher {
1111            tree: Some(RegexSet(["(?-u)^[^/]*$"])) {},
1112        }
1113        "#);
1114
1115        let expr = FilesetExpression::union_all(vec![
1116            glob_expr("foo", "*"),
1117            glob_expr("foo/bar", "*"),
1118            glob_expr("foo", "?"),
1119        ]);
1120        insta::assert_debug_snapshot!(expr.to_matcher(), @r#"
1121        GlobsMatcher {
1122            tree: None {
1123                "foo": Some(RegexSet(["(?-u)^[^/]*$", "(?-u)^[^/]$"])) {
1124                    "bar": Some(RegexSet(["(?-u)^[^/]*$"])) {},
1125                },
1126            },
1127        }
1128        "#);
1129    }
1130
1131    #[test]
1132    fn test_build_matcher_union_patterns_of_same_kind() {
1133        let settings = insta_settings();
1134        let _guard = settings.bind_to_scope();
1135
1136        let expr = FilesetExpression::union_all(vec![
1137            FilesetExpression::file_path(repo_path_buf("foo")),
1138            FilesetExpression::file_path(repo_path_buf("foo/bar")),
1139        ]);
1140        insta::assert_debug_snapshot!(expr.to_matcher(), @r#"
1141        FilesMatcher {
1142            tree: Dir {
1143                "foo": File {
1144                    "bar": File {},
1145                },
1146            },
1147        }
1148        "#);
1149
1150        let expr = FilesetExpression::union_all(vec![
1151            FilesetExpression::prefix_path(repo_path_buf("bar")),
1152            FilesetExpression::prefix_path(repo_path_buf("bar/baz")),
1153        ]);
1154        insta::assert_debug_snapshot!(expr.to_matcher(), @r#"
1155        PrefixMatcher {
1156            tree: Dir {
1157                "bar": Prefix {
1158                    "baz": Prefix {},
1159                },
1160            },
1161        }
1162        "#);
1163    }
1164
1165    #[test]
1166    fn test_build_matcher_union_patterns_of_different_kind() {
1167        let settings = insta_settings();
1168        let _guard = settings.bind_to_scope();
1169
1170        let expr = FilesetExpression::union_all(vec![
1171            FilesetExpression::file_path(repo_path_buf("foo")),
1172            FilesetExpression::prefix_path(repo_path_buf("bar")),
1173        ]);
1174        insta::assert_debug_snapshot!(expr.to_matcher(), @r#"
1175        UnionMatcher {
1176            input1: FilesMatcher {
1177                tree: Dir {
1178                    "foo": File {},
1179                },
1180            },
1181            input2: PrefixMatcher {
1182                tree: Dir {
1183                    "bar": Prefix {},
1184                },
1185            },
1186        }
1187        "#);
1188    }
1189
1190    #[test]
1191    fn test_build_matcher_unnormalized_union() {
1192        let settings = insta_settings();
1193        let _guard = settings.bind_to_scope();
1194
1195        let expr = FilesetExpression::UnionAll(vec![]);
1196        insta::assert_debug_snapshot!(expr.to_matcher(), @"NothingMatcher");
1197
1198        let expr =
1199            FilesetExpression::UnionAll(vec![FilesetExpression::None, FilesetExpression::All]);
1200        insta::assert_debug_snapshot!(expr.to_matcher(), @r"
1201        UnionMatcher {
1202            input1: NothingMatcher,
1203            input2: EverythingMatcher,
1204        }
1205        ");
1206    }
1207
1208    #[test]
1209    fn test_build_matcher_combined() {
1210        let settings = insta_settings();
1211        let _guard = settings.bind_to_scope();
1212
1213        let expr = FilesetExpression::union_all(vec![
1214            FilesetExpression::intersection(FilesetExpression::all(), FilesetExpression::none()),
1215            FilesetExpression::difference(FilesetExpression::none(), FilesetExpression::all()),
1216            FilesetExpression::file_path(repo_path_buf("foo")),
1217            FilesetExpression::prefix_path(repo_path_buf("bar")),
1218        ]);
1219        insta::assert_debug_snapshot!(expr.to_matcher(), @r#"
1220        UnionMatcher {
1221            input1: UnionMatcher {
1222                input1: IntersectionMatcher {
1223                    input1: EverythingMatcher,
1224                    input2: NothingMatcher,
1225                },
1226                input2: DifferenceMatcher {
1227                    wanted: NothingMatcher,
1228                    unwanted: EverythingMatcher,
1229                },
1230            },
1231            input2: UnionMatcher {
1232                input1: FilesMatcher {
1233                    tree: Dir {
1234                        "foo": File {},
1235                    },
1236                },
1237                input2: PrefixMatcher {
1238                    tree: Dir {
1239                        "bar": Prefix {},
1240                    },
1241                },
1242            },
1243        }
1244        "#);
1245    }
1246}