jj_lib/
fileset.rs

1// Copyright 2024 The Jujutsu Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Functional language for selecting a set of paths.
16
17use std::collections::HashMap;
18use std::iter;
19use std::path;
20use std::slice;
21use std::sync::LazyLock;
22
23use globset::Glob;
24use globset::GlobBuilder;
25use itertools::Itertools as _;
26use thiserror::Error;
27
28use crate::dsl_util::collect_similar;
29use crate::fileset_parser;
30use crate::fileset_parser::BinaryOp;
31use crate::fileset_parser::ExpressionKind;
32use crate::fileset_parser::ExpressionNode;
33pub use crate::fileset_parser::FilesetDiagnostics;
34pub use crate::fileset_parser::FilesetParseError;
35pub use crate::fileset_parser::FilesetParseErrorKind;
36pub use crate::fileset_parser::FilesetParseResult;
37use crate::fileset_parser::FunctionCallNode;
38use crate::fileset_parser::UnaryOp;
39use crate::matchers::DifferenceMatcher;
40use crate::matchers::EverythingMatcher;
41use crate::matchers::FileGlobsMatcher;
42use crate::matchers::FilesMatcher;
43use crate::matchers::IntersectionMatcher;
44use crate::matchers::Matcher;
45use crate::matchers::NothingMatcher;
46use crate::matchers::PrefixMatcher;
47use crate::matchers::UnionMatcher;
48use crate::repo_path::RelativePathParseError;
49use crate::repo_path::RepoPath;
50use crate::repo_path::RepoPathBuf;
51use crate::repo_path::RepoPathUiConverter;
52use crate::repo_path::UiPathParseError;
53
54/// Error occurred during file pattern parsing.
55#[derive(Debug, Error)]
56pub enum FilePatternParseError {
57    /// Unknown pattern kind is specified.
58    #[error("Invalid file pattern kind `{0}:`")]
59    InvalidKind(String),
60    /// Failed to parse input UI path.
61    #[error(transparent)]
62    UiPath(#[from] UiPathParseError),
63    /// Failed to parse input workspace-relative path.
64    #[error(transparent)]
65    RelativePath(#[from] RelativePathParseError),
66    /// Failed to parse glob pattern.
67    #[error(transparent)]
68    GlobPattern(#[from] globset::Error),
69}
70
71/// Basic pattern to match `RepoPath`.
72#[derive(Clone, Debug)]
73pub enum FilePattern {
74    /// Matches file (or exact) path.
75    FilePath(RepoPathBuf),
76    /// Matches path prefix.
77    PrefixPath(RepoPathBuf),
78    /// Matches file (or exact) path with glob pattern.
79    FileGlob {
80        /// Prefix directory path where the `pattern` will be evaluated.
81        dir: RepoPathBuf,
82        /// Glob pattern relative to `dir`.
83        pattern: Box<Glob>,
84    },
85    // TODO: add more patterns:
86    // - FilesInPath: files in directory, non-recursively?
87    // - NameGlob or SuffixGlob: file name with glob?
88}
89
90impl FilePattern {
91    /// Parses the given `input` string as pattern of the specified `kind`.
92    pub fn from_str_kind(
93        path_converter: &RepoPathUiConverter,
94        input: &str,
95        kind: &str,
96    ) -> Result<Self, FilePatternParseError> {
97        // Naming convention:
98        // * path normalization
99        //   * cwd: cwd-relative path (default)
100        //   * root: workspace-relative path
101        // * where to anchor
102        //   * file: exact file path
103        //   * prefix: path prefix (files under directory recursively)
104        //   * files-in: files in directory non-recursively
105        //   * name: file name component (or suffix match?)
106        //   * substring: substring match?
107        // * string pattern syntax (+ case sensitivity?)
108        //   * path: literal path (default) (default anchor: prefix)
109        //   * glob: glob pattern (default anchor: file)
110        //   * regex?
111        match kind {
112            "cwd" => Self::cwd_prefix_path(path_converter, input),
113            "cwd-file" | "file" => Self::cwd_file_path(path_converter, input),
114            "cwd-glob" | "glob" => Self::cwd_file_glob(path_converter, input),
115            "cwd-glob-i" | "glob-i" => Self::cwd_file_glob_i(path_converter, input),
116            "root" => Self::root_prefix_path(input),
117            "root-file" => Self::root_file_path(input),
118            "root-glob" => Self::root_file_glob(input),
119            "root-glob-i" => Self::root_file_glob_i(input),
120            _ => Err(FilePatternParseError::InvalidKind(kind.to_owned())),
121        }
122    }
123
124    /// Pattern that matches cwd-relative file (or exact) path.
125    pub fn cwd_file_path(
126        path_converter: &RepoPathUiConverter,
127        input: impl AsRef<str>,
128    ) -> Result<Self, FilePatternParseError> {
129        let path = path_converter.parse_file_path(input.as_ref())?;
130        Ok(Self::FilePath(path))
131    }
132
133    /// Pattern that matches cwd-relative path prefix.
134    pub fn cwd_prefix_path(
135        path_converter: &RepoPathUiConverter,
136        input: impl AsRef<str>,
137    ) -> Result<Self, FilePatternParseError> {
138        let path = path_converter.parse_file_path(input.as_ref())?;
139        Ok(Self::PrefixPath(path))
140    }
141
142    /// Pattern that matches cwd-relative file path glob.
143    pub fn cwd_file_glob(
144        path_converter: &RepoPathUiConverter,
145        input: impl AsRef<str>,
146    ) -> Result<Self, FilePatternParseError> {
147        let (dir, pattern) = split_glob_path(input.as_ref());
148        let dir = path_converter.parse_file_path(dir)?;
149        Self::file_glob_at(dir, pattern, false)
150    }
151
152    /// Pattern that matches cwd-relative file path glob (case-insensitive).
153    pub fn cwd_file_glob_i(
154        path_converter: &RepoPathUiConverter,
155        input: impl AsRef<str>,
156    ) -> Result<Self, FilePatternParseError> {
157        let (dir, pattern) = split_glob_path_i(input.as_ref());
158        let dir = path_converter.parse_file_path(dir)?;
159        Self::file_glob_at(dir, pattern, true)
160    }
161
162    /// Pattern that matches workspace-relative file (or exact) path.
163    pub fn root_file_path(input: impl AsRef<str>) -> Result<Self, FilePatternParseError> {
164        // TODO: Let caller pass in converter for root-relative paths too
165        let path = RepoPathBuf::from_relative_path(input.as_ref())?;
166        Ok(Self::FilePath(path))
167    }
168
169    /// Pattern that matches workspace-relative path prefix.
170    pub fn root_prefix_path(input: impl AsRef<str>) -> Result<Self, FilePatternParseError> {
171        let path = RepoPathBuf::from_relative_path(input.as_ref())?;
172        Ok(Self::PrefixPath(path))
173    }
174
175    /// Pattern that matches workspace-relative file path glob.
176    pub fn root_file_glob(input: impl AsRef<str>) -> Result<Self, FilePatternParseError> {
177        let (dir, pattern) = split_glob_path(input.as_ref());
178        let dir = RepoPathBuf::from_relative_path(dir)?;
179        Self::file_glob_at(dir, pattern, false)
180    }
181
182    /// Pattern that matches workspace-relative file path glob
183    /// (case-insensitive).
184    pub fn root_file_glob_i(input: impl AsRef<str>) -> Result<Self, FilePatternParseError> {
185        let (dir, pattern) = split_glob_path_i(input.as_ref());
186        let dir = RepoPathBuf::from_relative_path(dir)?;
187        Self::file_glob_at(dir, pattern, true)
188    }
189
190    fn file_glob_at(
191        dir: RepoPathBuf,
192        input: &str,
193        icase: bool,
194    ) -> Result<Self, FilePatternParseError> {
195        if input.is_empty() {
196            return Ok(Self::FilePath(dir));
197        }
198        // Normalize separator to '/', reject ".." which will never match
199        let normalized = RepoPathBuf::from_relative_path(input)?;
200        let pattern = Box::new(parse_file_glob(
201            normalized.as_internal_file_string(),
202            icase,
203        )?);
204        Ok(Self::FileGlob { dir, pattern })
205    }
206
207    /// Returns path if this pattern represents a literal path in a workspace.
208    /// Returns `None` if this is a glob pattern for example.
209    pub fn as_path(&self) -> Option<&RepoPath> {
210        match self {
211            Self::FilePath(path) => Some(path),
212            Self::PrefixPath(path) => Some(path),
213            Self::FileGlob { .. } => None,
214        }
215    }
216}
217
218pub(super) fn parse_file_glob(input: &str, icase: bool) -> Result<Glob, globset::Error> {
219    GlobBuilder::new(input)
220        .literal_separator(true)
221        .case_insensitive(icase)
222        .build()
223}
224
225/// Checks if a character is a glob metacharacter.
226fn is_glob_char(c: char) -> bool {
227    // See globset::escape(). In addition to that, backslash is parsed as an
228    // escape sequence on Unix.
229    const GLOB_CHARS: &[char] = if cfg!(windows) {
230        &['?', '*', '[', ']', '{', '}']
231    } else {
232        &['?', '*', '[', ']', '{', '}', '\\']
233    };
234    GLOB_CHARS.contains(&c)
235}
236
237/// Splits `input` path into literal directory path and glob pattern.
238fn split_glob_path(input: &str) -> (&str, &str) {
239    let prefix_len = input
240        .split_inclusive(path::is_separator)
241        .take_while(|component| !component.contains(is_glob_char))
242        .map(|component| component.len())
243        .sum();
244    input.split_at(prefix_len)
245}
246
247/// Splits `input` path into literal directory path and glob pattern, for
248/// case-insensitive patterns.
249fn split_glob_path_i(input: &str) -> (&str, &str) {
250    let prefix_len = input
251        .split_inclusive(path::is_separator)
252        .take_while(|component| {
253            !component.contains(|c: char| c.is_ascii_alphabetic() || is_glob_char(c))
254        })
255        .map(|component| component.len())
256        .sum();
257    input.split_at(prefix_len)
258}
259
260/// AST-level representation of the fileset expression.
261#[derive(Clone, Debug)]
262pub enum FilesetExpression {
263    /// Matches nothing.
264    None,
265    /// Matches everything.
266    All,
267    /// Matches basic pattern.
268    Pattern(FilePattern),
269    /// Matches any of the expressions.
270    ///
271    /// Use `FilesetExpression::union_all()` to construct a union expression.
272    /// It will normalize 0-ary or 1-ary union.
273    UnionAll(Vec<FilesetExpression>),
274    /// Matches both expressions.
275    Intersection(Box<FilesetExpression>, Box<FilesetExpression>),
276    /// Matches the first expression, but not the second expression.
277    Difference(Box<FilesetExpression>, Box<FilesetExpression>),
278}
279
280impl FilesetExpression {
281    /// Expression that matches nothing.
282    pub fn none() -> Self {
283        Self::None
284    }
285
286    /// Expression that matches everything.
287    pub fn all() -> Self {
288        Self::All
289    }
290
291    /// Expression that matches the given `pattern`.
292    pub fn pattern(pattern: FilePattern) -> Self {
293        Self::Pattern(pattern)
294    }
295
296    /// Expression that matches file (or exact) path.
297    pub fn file_path(path: RepoPathBuf) -> Self {
298        Self::Pattern(FilePattern::FilePath(path))
299    }
300
301    /// Expression that matches path prefix.
302    pub fn prefix_path(path: RepoPathBuf) -> Self {
303        Self::Pattern(FilePattern::PrefixPath(path))
304    }
305
306    /// Expression that matches any of the given `expressions`.
307    pub fn union_all(expressions: Vec<Self>) -> Self {
308        match expressions.len() {
309            0 => Self::none(),
310            1 => expressions.into_iter().next().unwrap(),
311            _ => Self::UnionAll(expressions),
312        }
313    }
314
315    /// Expression that matches both `self` and `other`.
316    pub fn intersection(self, other: Self) -> Self {
317        Self::Intersection(Box::new(self), Box::new(other))
318    }
319
320    /// Expression that matches `self` but not `other`.
321    pub fn difference(self, other: Self) -> Self {
322        Self::Difference(Box::new(self), Box::new(other))
323    }
324
325    /// Flattens union expression at most one level.
326    fn as_union_all(&self) -> &[Self] {
327        match self {
328            Self::None => &[],
329            Self::UnionAll(exprs) => exprs,
330            _ => slice::from_ref(self),
331        }
332    }
333
334    fn dfs_pre(&self) -> impl Iterator<Item = &Self> {
335        let mut stack: Vec<&Self> = vec![self];
336        iter::from_fn(move || {
337            let expr = stack.pop()?;
338            match expr {
339                Self::None | Self::All | Self::Pattern(_) => {}
340                Self::UnionAll(exprs) => stack.extend(exprs.iter().rev()),
341                Self::Intersection(expr1, expr2) | Self::Difference(expr1, expr2) => {
342                    stack.push(expr2);
343                    stack.push(expr1);
344                }
345            }
346            Some(expr)
347        })
348    }
349
350    /// Iterates literal paths recursively from this expression.
351    ///
352    /// For example, `"a", "b", "c"` will be yielded in that order for
353    /// expression `"a" | all() & "b" | ~"c"`.
354    pub fn explicit_paths(&self) -> impl Iterator<Item = &RepoPath> {
355        // pre/post-ordering doesn't matter so long as children are visited from
356        // left to right.
357        self.dfs_pre().filter_map(|expr| match expr {
358            Self::Pattern(pattern) => pattern.as_path(),
359            _ => None,
360        })
361    }
362
363    /// Transforms the expression tree to `Matcher` object.
364    pub fn to_matcher(&self) -> Box<dyn Matcher> {
365        build_union_matcher(self.as_union_all())
366    }
367}
368
369/// Transforms the union `expressions` to `Matcher` object.
370///
371/// Since `Matcher` typically accepts a set of patterns to be OR-ed, this
372/// function takes a list of union `expressions` as input.
373fn build_union_matcher(expressions: &[FilesetExpression]) -> Box<dyn Matcher> {
374    let mut file_paths = Vec::new();
375    let mut prefix_paths = Vec::new();
376    let mut file_globs = Vec::new();
377    let mut matchers: Vec<Option<Box<dyn Matcher>>> = Vec::new();
378    for expr in expressions {
379        let matcher: Box<dyn Matcher> = match expr {
380            // None and All are supposed to be simplified by caller.
381            FilesetExpression::None => Box::new(NothingMatcher),
382            FilesetExpression::All => Box::new(EverythingMatcher),
383            FilesetExpression::Pattern(pattern) => {
384                match pattern {
385                    FilePattern::FilePath(path) => file_paths.push(path),
386                    FilePattern::PrefixPath(path) => prefix_paths.push(path),
387                    FilePattern::FileGlob { dir, pattern } => {
388                        file_globs.push((dir, pattern.clone()));
389                    }
390                }
391                continue;
392            }
393            // UnionAll is supposed to be flattened by caller.
394            FilesetExpression::UnionAll(exprs) => build_union_matcher(exprs),
395            FilesetExpression::Intersection(expr1, expr2) => {
396                let m1 = build_union_matcher(expr1.as_union_all());
397                let m2 = build_union_matcher(expr2.as_union_all());
398                Box::new(IntersectionMatcher::new(m1, m2))
399            }
400            FilesetExpression::Difference(expr1, expr2) => {
401                let m1 = build_union_matcher(expr1.as_union_all());
402                let m2 = build_union_matcher(expr2.as_union_all());
403                Box::new(DifferenceMatcher::new(m1, m2))
404            }
405        };
406        matchers.push(Some(matcher));
407    }
408
409    if !file_paths.is_empty() {
410        matchers.push(Some(Box::new(FilesMatcher::new(file_paths))));
411    }
412    if !prefix_paths.is_empty() {
413        matchers.push(Some(Box::new(PrefixMatcher::new(prefix_paths))));
414    }
415    if !file_globs.is_empty() {
416        matchers.push(Some(Box::new(FileGlobsMatcher::new(file_globs))));
417    }
418    union_all_matchers(&mut matchers)
419}
420
421/// Concatenates all `matchers` as union.
422///
423/// Each matcher element must be wrapped in `Some` so the matchers can be moved
424/// in arbitrary order.
425fn union_all_matchers(matchers: &mut [Option<Box<dyn Matcher>>]) -> Box<dyn Matcher> {
426    match matchers {
427        [] => Box::new(NothingMatcher),
428        [matcher] => matcher.take().expect("matcher should still be available"),
429        _ => {
430            // Build balanced tree to minimize the recursion depth.
431            let (left, right) = matchers.split_at_mut(matchers.len() / 2);
432            let m1 = union_all_matchers(left);
433            let m2 = union_all_matchers(right);
434            Box::new(UnionMatcher::new(m1, m2))
435        }
436    }
437}
438
439type FilesetFunction = fn(
440    &mut FilesetDiagnostics,
441    &RepoPathUiConverter,
442    &FunctionCallNode,
443) -> FilesetParseResult<FilesetExpression>;
444
445static BUILTIN_FUNCTION_MAP: LazyLock<HashMap<&str, FilesetFunction>> = LazyLock::new(|| {
446    // Not using maplit::hashmap!{} or custom declarative macro here because
447    // code completion inside macro is quite restricted.
448    let mut map: HashMap<&str, FilesetFunction> = HashMap::new();
449    map.insert("none", |_diagnostics, _path_converter, function| {
450        function.expect_no_arguments()?;
451        Ok(FilesetExpression::none())
452    });
453    map.insert("all", |_diagnostics, _path_converter, function| {
454        function.expect_no_arguments()?;
455        Ok(FilesetExpression::all())
456    });
457    map
458});
459
460fn resolve_function(
461    diagnostics: &mut FilesetDiagnostics,
462    path_converter: &RepoPathUiConverter,
463    function: &FunctionCallNode,
464) -> FilesetParseResult<FilesetExpression> {
465    if let Some(func) = BUILTIN_FUNCTION_MAP.get(function.name) {
466        func(diagnostics, path_converter, function)
467    } else {
468        Err(FilesetParseError::new(
469            FilesetParseErrorKind::NoSuchFunction {
470                name: function.name.to_owned(),
471                candidates: collect_similar(function.name, BUILTIN_FUNCTION_MAP.keys()),
472            },
473            function.name_span,
474        ))
475    }
476}
477
478fn resolve_expression(
479    diagnostics: &mut FilesetDiagnostics,
480    path_converter: &RepoPathUiConverter,
481    node: &ExpressionNode,
482) -> FilesetParseResult<FilesetExpression> {
483    let wrap_pattern_error =
484        |err| FilesetParseError::expression("Invalid file pattern", node.span).with_source(err);
485    match &node.kind {
486        ExpressionKind::Identifier(name) => {
487            let pattern =
488                FilePattern::cwd_prefix_path(path_converter, name).map_err(wrap_pattern_error)?;
489            Ok(FilesetExpression::pattern(pattern))
490        }
491        ExpressionKind::String(name) => {
492            let pattern =
493                FilePattern::cwd_prefix_path(path_converter, name).map_err(wrap_pattern_error)?;
494            Ok(FilesetExpression::pattern(pattern))
495        }
496        ExpressionKind::StringPattern { kind, value } => {
497            let pattern = FilePattern::from_str_kind(path_converter, value, kind)
498                .map_err(wrap_pattern_error)?;
499            Ok(FilesetExpression::pattern(pattern))
500        }
501        ExpressionKind::Unary(op, arg_node) => {
502            let arg = resolve_expression(diagnostics, path_converter, arg_node)?;
503            match op {
504                UnaryOp::Negate => Ok(FilesetExpression::all().difference(arg)),
505            }
506        }
507        ExpressionKind::Binary(op, lhs_node, rhs_node) => {
508            let lhs = resolve_expression(diagnostics, path_converter, lhs_node)?;
509            let rhs = resolve_expression(diagnostics, path_converter, rhs_node)?;
510            match op {
511                BinaryOp::Intersection => Ok(lhs.intersection(rhs)),
512                BinaryOp::Difference => Ok(lhs.difference(rhs)),
513            }
514        }
515        ExpressionKind::UnionAll(nodes) => {
516            let expressions = nodes
517                .iter()
518                .map(|node| resolve_expression(diagnostics, path_converter, node))
519                .try_collect()?;
520            Ok(FilesetExpression::union_all(expressions))
521        }
522        ExpressionKind::FunctionCall(function) => {
523            resolve_function(diagnostics, path_converter, function)
524        }
525    }
526}
527
528/// Parses text into `FilesetExpression` without bare string fallback.
529pub fn parse(
530    diagnostics: &mut FilesetDiagnostics,
531    text: &str,
532    path_converter: &RepoPathUiConverter,
533) -> FilesetParseResult<FilesetExpression> {
534    let node = fileset_parser::parse_program(text)?;
535    // TODO: add basic tree substitution pass to eliminate redundant expressions
536    resolve_expression(diagnostics, path_converter, &node)
537}
538
539/// Parses text into `FilesetExpression` with bare string fallback.
540///
541/// If the text can't be parsed as a fileset expression, and if it doesn't
542/// contain any operator-like characters, it will be parsed as a file path.
543pub fn parse_maybe_bare(
544    diagnostics: &mut FilesetDiagnostics,
545    text: &str,
546    path_converter: &RepoPathUiConverter,
547) -> FilesetParseResult<FilesetExpression> {
548    let node = fileset_parser::parse_program_or_bare_string(text)?;
549    // TODO: add basic tree substitution pass to eliminate redundant expressions
550    resolve_expression(diagnostics, path_converter, &node)
551}
552
553#[cfg(test)]
554mod tests {
555    use std::path::PathBuf;
556
557    use super::*;
558
559    fn repo_path_buf(value: impl Into<String>) -> RepoPathBuf {
560        RepoPathBuf::from_internal_string(value).unwrap()
561    }
562
563    fn insta_settings() -> insta::Settings {
564        let mut settings = insta::Settings::clone_current();
565        // Elide parsed glob options and tokens, which aren't interesting.
566        settings.add_filter(
567            r"(?m)^(\s{12}opts):\s*GlobOptions\s*\{\n(\s{16}.*\n)*\s{12}\},",
568            "$1: _,",
569        );
570        settings.add_filter(
571            r"(?m)^(\s{12}tokens):\s*Tokens\(\n(\s{16}.*\n)*\s{12}\),",
572            "$1: _,",
573        );
574        // Collapse short "Thing(_,)" repeatedly to save vertical space and make
575        // the output more readable.
576        for _ in 0..4 {
577            settings.add_filter(
578                r"(?x)
579                \b([A-Z]\w*)\(\n
580                    \s*(.{1,60}),\n
581                \s*\)",
582                "$1($2)",
583            );
584        }
585        settings
586    }
587
588    #[test]
589    fn test_parse_file_pattern() {
590        let settings = insta_settings();
591        let _guard = settings.bind_to_scope();
592        let path_converter = RepoPathUiConverter::Fs {
593            cwd: PathBuf::from("/ws/cur"),
594            base: PathBuf::from("/ws"),
595        };
596        let parse = |text| parse_maybe_bare(&mut FilesetDiagnostics::new(), text, &path_converter);
597
598        // cwd-relative patterns
599        insta::assert_debug_snapshot!(
600            parse(".").unwrap(),
601            @r#"Pattern(PrefixPath("cur"))"#);
602        insta::assert_debug_snapshot!(
603            parse("..").unwrap(),
604            @r#"Pattern(PrefixPath(""))"#);
605        assert!(parse("../..").is_err());
606        insta::assert_debug_snapshot!(
607            parse("foo").unwrap(),
608            @r#"Pattern(PrefixPath("cur/foo"))"#);
609        insta::assert_debug_snapshot!(
610            parse("cwd:.").unwrap(),
611            @r#"Pattern(PrefixPath("cur"))"#);
612        insta::assert_debug_snapshot!(
613            parse("cwd-file:foo").unwrap(),
614            @r#"Pattern(FilePath("cur/foo"))"#);
615        insta::assert_debug_snapshot!(
616            parse("file:../foo/bar").unwrap(),
617            @r#"Pattern(FilePath("foo/bar"))"#);
618
619        // workspace-relative patterns
620        insta::assert_debug_snapshot!(
621            parse("root:.").unwrap(),
622            @r#"Pattern(PrefixPath(""))"#);
623        assert!(parse("root:..").is_err());
624        insta::assert_debug_snapshot!(
625            parse("root:foo/bar").unwrap(),
626            @r#"Pattern(PrefixPath("foo/bar"))"#);
627        insta::assert_debug_snapshot!(
628            parse("root-file:bar").unwrap(),
629            @r#"Pattern(FilePath("bar"))"#);
630    }
631
632    #[test]
633    fn test_parse_glob_pattern() {
634        let settings = insta_settings();
635        let _guard = settings.bind_to_scope();
636        let path_converter = RepoPathUiConverter::Fs {
637            // meta character in cwd path shouldn't be expanded
638            cwd: PathBuf::from("/ws/cur*"),
639            base: PathBuf::from("/ws"),
640        };
641        let parse = |text| parse_maybe_bare(&mut FilesetDiagnostics::new(), text, &path_converter);
642
643        // cwd-relative, without meta characters
644        insta::assert_debug_snapshot!(
645            parse(r#"cwd-glob:"foo""#).unwrap(),
646            @r#"Pattern(FilePath("cur*/foo"))"#);
647        // Strictly speaking, glob:"" shouldn't match a file named <cwd>, but
648        // file pattern doesn't distinguish "foo/" from "foo".
649        insta::assert_debug_snapshot!(
650            parse(r#"glob:"""#).unwrap(),
651            @r#"Pattern(FilePath("cur*"))"#);
652        insta::assert_debug_snapshot!(
653            parse(r#"glob:".""#).unwrap(),
654            @r#"Pattern(FilePath("cur*"))"#);
655        insta::assert_debug_snapshot!(
656            parse(r#"glob:"..""#).unwrap(),
657            @r#"Pattern(FilePath(""))"#);
658
659        // cwd-relative, with meta characters
660        insta::assert_debug_snapshot!(
661            parse(r#"glob:"*""#).unwrap(), @r#"
662        Pattern(
663            FileGlob {
664                dir: "cur*",
665                pattern: Glob {
666                    glob: "*",
667                    re: "(?-u)^[^/]*$",
668                    opts: _,
669                    tokens: _,
670                },
671            },
672        )
673        "#);
674        insta::assert_debug_snapshot!(
675            parse(r#"glob:"./*""#).unwrap(), @r#"
676        Pattern(
677            FileGlob {
678                dir: "cur*",
679                pattern: Glob {
680                    glob: "*",
681                    re: "(?-u)^[^/]*$",
682                    opts: _,
683                    tokens: _,
684                },
685            },
686        )
687        "#);
688        insta::assert_debug_snapshot!(
689            parse(r#"glob:"../*""#).unwrap(), @r#"
690        Pattern(
691            FileGlob {
692                dir: "",
693                pattern: Glob {
694                    glob: "*",
695                    re: "(?-u)^[^/]*$",
696                    opts: _,
697                    tokens: _,
698                },
699            },
700        )
701        "#);
702        // glob:"**" is equivalent to root-glob:"<cwd>/**", not root-glob:"**"
703        insta::assert_debug_snapshot!(
704            parse(r#"glob:"**""#).unwrap(), @r#"
705        Pattern(
706            FileGlob {
707                dir: "cur*",
708                pattern: Glob {
709                    glob: "**",
710                    re: "(?-u)^.*$",
711                    opts: _,
712                    tokens: _,
713                },
714            },
715        )
716        "#);
717        insta::assert_debug_snapshot!(
718            parse(r#"glob:"../foo/b?r/baz""#).unwrap(), @r#"
719        Pattern(
720            FileGlob {
721                dir: "foo",
722                pattern: Glob {
723                    glob: "b?r/baz",
724                    re: "(?-u)^b[^/]r/baz$",
725                    opts: _,
726                    tokens: _,
727                },
728            },
729        )
730        "#);
731        assert!(parse(r#"glob:"../../*""#).is_err());
732        assert!(parse(r#"glob-i:"../../*""#).is_err());
733        assert!(parse(r#"glob:"/*""#).is_err());
734        assert!(parse(r#"glob-i:"/*""#).is_err());
735        // no support for relative path component after glob meta character
736        assert!(parse(r#"glob:"*/..""#).is_err());
737        assert!(parse(r#"glob-i:"*/..""#).is_err());
738
739        if cfg!(windows) {
740            // cwd-relative, with Windows path separators
741            insta::assert_debug_snapshot!(
742                parse(r#"glob:"..\\foo\\*\\bar""#).unwrap(), @r#"
743            Pattern(
744                FileGlob {
745                    dir: "foo",
746                    pattern: Glob {
747                        glob: "*/bar",
748                        re: "(?-u)^[^/]*/bar$",
749                        opts: _,
750                        tokens: _,
751                    },
752                },
753            )
754            "#);
755        } else {
756            // backslash is an escape character on Unix
757            insta::assert_debug_snapshot!(
758                parse(r#"glob:"..\\foo\\*\\bar""#).unwrap(), @r#"
759            Pattern(
760                FileGlob {
761                    dir: "cur*",
762                    pattern: Glob {
763                        glob: "..\\foo\\*\\bar",
764                        re: "(?-u)^\\.\\.foo\\*bar$",
765                        opts: _,
766                        tokens: _,
767                    },
768                },
769            )
770            "#);
771        }
772
773        // workspace-relative, without meta characters
774        insta::assert_debug_snapshot!(
775            parse(r#"root-glob:"foo""#).unwrap(),
776            @r#"Pattern(FilePath("foo"))"#);
777        insta::assert_debug_snapshot!(
778            parse(r#"root-glob:"""#).unwrap(),
779            @r#"Pattern(FilePath(""))"#);
780        insta::assert_debug_snapshot!(
781            parse(r#"root-glob:".""#).unwrap(),
782            @r#"Pattern(FilePath(""))"#);
783
784        // workspace-relative, with meta characters
785        insta::assert_debug_snapshot!(
786            parse(r#"root-glob:"*""#).unwrap(), @r#"
787        Pattern(
788            FileGlob {
789                dir: "",
790                pattern: Glob {
791                    glob: "*",
792                    re: "(?-u)^[^/]*$",
793                    opts: _,
794                    tokens: _,
795                },
796            },
797        )
798        "#);
799        insta::assert_debug_snapshot!(
800            parse(r#"root-glob:"foo/bar/b[az]""#).unwrap(), @r#"
801        Pattern(
802            FileGlob {
803                dir: "foo/bar",
804                pattern: Glob {
805                    glob: "b[az]",
806                    re: "(?-u)^b[az]$",
807                    opts: _,
808                    tokens: _,
809                },
810            },
811        )
812        "#);
813        insta::assert_debug_snapshot!(
814            parse(r#"root-glob:"foo/bar/b{ar,az}""#).unwrap(), @r#"
815        Pattern(
816            FileGlob {
817                dir: "foo/bar",
818                pattern: Glob {
819                    glob: "b{ar,az}",
820                    re: "(?-u)^b(?:az|ar)$",
821                    opts: _,
822                    tokens: _,
823                },
824            },
825        )
826        "#);
827        assert!(parse(r#"root-glob:"../*""#).is_err());
828        assert!(parse(r#"root-glob-i:"../*""#).is_err());
829        assert!(parse(r#"root-glob:"/*""#).is_err());
830        assert!(parse(r#"root-glob-i:"/*""#).is_err());
831
832        // workspace-relative, backslash escape without meta characters
833        if cfg!(not(windows)) {
834            insta::assert_debug_snapshot!(
835                parse(r#"root-glob:'foo/bar\baz'"#).unwrap(), @r#"
836            Pattern(
837                FileGlob {
838                    dir: "foo",
839                    pattern: Glob {
840                        glob: "bar\\baz",
841                        re: "(?-u)^barbaz$",
842                        opts: _,
843                        tokens: _,
844                    },
845                },
846            )
847            "#);
848        }
849    }
850
851    #[test]
852    fn test_parse_glob_pattern_case_insensitive() {
853        let settings = insta_settings();
854        let _guard = settings.bind_to_scope();
855        let path_converter = RepoPathUiConverter::Fs {
856            cwd: PathBuf::from("/ws/cur"),
857            base: PathBuf::from("/ws"),
858        };
859        let parse = |text| parse_maybe_bare(&mut FilesetDiagnostics::new(), text, &path_converter);
860
861        // cwd-relative case-insensitive glob
862        insta::assert_debug_snapshot!(
863            parse(r#"glob-i:"*.TXT""#).unwrap(), @r#"
864        Pattern(
865            FileGlob {
866                dir: "cur",
867                pattern: Glob {
868                    glob: "*.TXT",
869                    re: "(?-u)(?i)^[^/]*\\.TXT$",
870                    opts: _,
871                    tokens: _,
872                },
873            },
874        )
875        "#);
876
877        // cwd-relative case-insensitive glob with more specific pattern
878        insta::assert_debug_snapshot!(
879            parse(r#"cwd-glob-i:"[Ff]oo""#).unwrap(), @r#"
880        Pattern(
881            FileGlob {
882                dir: "cur",
883                pattern: Glob {
884                    glob: "[Ff]oo",
885                    re: "(?-u)(?i)^[Ff]oo$",
886                    opts: _,
887                    tokens: _,
888                },
889            },
890        )
891        "#);
892
893        // workspace-relative case-insensitive glob
894        insta::assert_debug_snapshot!(
895            parse(r#"root-glob-i:"*.Rs""#).unwrap(), @r#"
896        Pattern(
897            FileGlob {
898                dir: "",
899                pattern: Glob {
900                    glob: "*.Rs",
901                    re: "(?-u)(?i)^[^/]*\\.Rs$",
902                    opts: _,
903                    tokens: _,
904                },
905            },
906        )
907        "#);
908
909        // case-insensitive pattern with directory component (should not split the path)
910        insta::assert_debug_snapshot!(
911            parse(r#"glob-i:"SubDir/*.rs""#).unwrap(), @r#"
912        Pattern(
913            FileGlob {
914                dir: "cur",
915                pattern: Glob {
916                    glob: "SubDir/*.rs",
917                    re: "(?-u)(?i)^SubDir/[^/]*\\.rs$",
918                    opts: _,
919                    tokens: _,
920                },
921            },
922        )
923        "#);
924
925        // case-sensitive pattern with directory component (should split the path)
926        insta::assert_debug_snapshot!(
927            parse(r#"glob:"SubDir/*.rs""#).unwrap(), @r#"
928        Pattern(
929            FileGlob {
930                dir: "cur/SubDir",
931                pattern: Glob {
932                    glob: "*.rs",
933                    re: "(?-u)^[^/]*\\.rs$",
934                    opts: _,
935                    tokens: _,
936                },
937            },
938        )
939        "#);
940
941        // case-insensitive pattern with leading dots (should split dots but not dirs)
942        insta::assert_debug_snapshot!(
943            parse(r#"glob-i:"../SomeDir/*.rs""#).unwrap(), @r#"
944        Pattern(
945            FileGlob {
946                dir: "",
947                pattern: Glob {
948                    glob: "SomeDir/*.rs",
949                    re: "(?-u)(?i)^SomeDir/[^/]*\\.rs$",
950                    opts: _,
951                    tokens: _,
952                },
953            },
954        )
955        "#);
956
957        // case-insensitive pattern with single leading dot
958        insta::assert_debug_snapshot!(
959            parse(r#"glob-i:"./SomeFile*.txt""#).unwrap(), @r#"
960        Pattern(
961            FileGlob {
962                dir: "cur",
963                pattern: Glob {
964                    glob: "SomeFile*.txt",
965                    re: "(?-u)(?i)^SomeFile[^/]*\\.txt$",
966                    opts: _,
967                    tokens: _,
968                },
969            },
970        )
971        "#);
972    }
973
974    #[test]
975    fn test_parse_function() {
976        let settings = insta_settings();
977        let _guard = settings.bind_to_scope();
978        let path_converter = RepoPathUiConverter::Fs {
979            cwd: PathBuf::from("/ws/cur"),
980            base: PathBuf::from("/ws"),
981        };
982        let parse = |text| parse_maybe_bare(&mut FilesetDiagnostics::new(), text, &path_converter);
983
984        insta::assert_debug_snapshot!(parse("all()").unwrap(), @"All");
985        insta::assert_debug_snapshot!(parse("none()").unwrap(), @"None");
986        insta::assert_debug_snapshot!(parse("all(x)").unwrap_err().kind(), @r#"
987        InvalidArguments {
988            name: "all",
989            message: "Expected 0 arguments",
990        }
991        "#);
992        insta::assert_debug_snapshot!(parse("ale()").unwrap_err().kind(), @r#"
993        NoSuchFunction {
994            name: "ale",
995            candidates: [
996                "all",
997            ],
998        }
999        "#);
1000    }
1001
1002    #[test]
1003    fn test_parse_compound_expression() {
1004        let settings = insta_settings();
1005        let _guard = settings.bind_to_scope();
1006        let path_converter = RepoPathUiConverter::Fs {
1007            cwd: PathBuf::from("/ws/cur"),
1008            base: PathBuf::from("/ws"),
1009        };
1010        let parse = |text| parse_maybe_bare(&mut FilesetDiagnostics::new(), text, &path_converter);
1011
1012        insta::assert_debug_snapshot!(parse("~x").unwrap(), @r#"
1013        Difference(
1014            All,
1015            Pattern(PrefixPath("cur/x")),
1016        )
1017        "#);
1018        insta::assert_debug_snapshot!(parse("x|y|root:z").unwrap(), @r#"
1019        UnionAll(
1020            [
1021                Pattern(PrefixPath("cur/x")),
1022                Pattern(PrefixPath("cur/y")),
1023                Pattern(PrefixPath("z")),
1024            ],
1025        )
1026        "#);
1027        insta::assert_debug_snapshot!(parse("x|y&z").unwrap(), @r#"
1028        UnionAll(
1029            [
1030                Pattern(PrefixPath("cur/x")),
1031                Intersection(
1032                    Pattern(PrefixPath("cur/y")),
1033                    Pattern(PrefixPath("cur/z")),
1034                ),
1035            ],
1036        )
1037        "#);
1038    }
1039
1040    #[test]
1041    fn test_explicit_paths() {
1042        let collect = |expr: &FilesetExpression| -> Vec<RepoPathBuf> {
1043            expr.explicit_paths().map(|path| path.to_owned()).collect()
1044        };
1045        let file_expr = |path: &str| FilesetExpression::file_path(repo_path_buf(path));
1046        assert!(collect(&FilesetExpression::none()).is_empty());
1047        assert_eq!(collect(&file_expr("a")), ["a"].map(repo_path_buf));
1048        assert_eq!(
1049            collect(&FilesetExpression::union_all(vec![
1050                file_expr("a"),
1051                file_expr("b"),
1052                file_expr("c"),
1053            ])),
1054            ["a", "b", "c"].map(repo_path_buf)
1055        );
1056        assert_eq!(
1057            collect(&FilesetExpression::intersection(
1058                FilesetExpression::union_all(vec![
1059                    file_expr("a"),
1060                    FilesetExpression::none(),
1061                    file_expr("b"),
1062                    file_expr("c"),
1063                ]),
1064                FilesetExpression::difference(
1065                    file_expr("d"),
1066                    FilesetExpression::union_all(vec![file_expr("e"), file_expr("f")])
1067                )
1068            )),
1069            ["a", "b", "c", "d", "e", "f"].map(repo_path_buf)
1070        );
1071    }
1072
1073    #[test]
1074    fn test_build_matcher_simple() {
1075        let settings = insta_settings();
1076        let _guard = settings.bind_to_scope();
1077
1078        insta::assert_debug_snapshot!(FilesetExpression::none().to_matcher(), @"NothingMatcher");
1079        insta::assert_debug_snapshot!(FilesetExpression::all().to_matcher(), @"EverythingMatcher");
1080        insta::assert_debug_snapshot!(
1081            FilesetExpression::file_path(repo_path_buf("foo")).to_matcher(),
1082            @r#"
1083        FilesMatcher {
1084            tree: Dir {
1085                "foo": File {},
1086            },
1087        }
1088        "#);
1089        insta::assert_debug_snapshot!(
1090            FilesetExpression::prefix_path(repo_path_buf("foo")).to_matcher(),
1091            @r#"
1092        PrefixMatcher {
1093            tree: Dir {
1094                "foo": Prefix {},
1095            },
1096        }
1097        "#);
1098    }
1099
1100    #[test]
1101    fn test_build_matcher_glob_pattern() {
1102        let settings = insta_settings();
1103        let _guard = settings.bind_to_scope();
1104        let glob_expr = |dir: &str, pattern: &str| {
1105            FilesetExpression::pattern(FilePattern::FileGlob {
1106                dir: repo_path_buf(dir),
1107                pattern: Box::new(parse_file_glob(pattern, false).unwrap()),
1108            })
1109        };
1110
1111        insta::assert_debug_snapshot!(glob_expr("", "*").to_matcher(), @r#"
1112        FileGlobsMatcher {
1113            tree: [
1114                Regex("(?-u)^[^/]*$"),
1115            ] {},
1116        }
1117        "#);
1118
1119        let expr =
1120            FilesetExpression::union_all(vec![glob_expr("foo", "*"), glob_expr("foo/bar", "*")]);
1121        insta::assert_debug_snapshot!(expr.to_matcher(), @r#"
1122        FileGlobsMatcher {
1123            tree: [] {
1124                "foo": [
1125                    Regex("(?-u)^[^/]*$"),
1126                ] {
1127                    "bar": [
1128                        Regex("(?-u)^[^/]*$"),
1129                    ] {},
1130                },
1131            },
1132        }
1133        "#);
1134    }
1135
1136    #[test]
1137    fn test_build_matcher_union_patterns_of_same_kind() {
1138        let settings = insta_settings();
1139        let _guard = settings.bind_to_scope();
1140
1141        let expr = FilesetExpression::union_all(vec![
1142            FilesetExpression::file_path(repo_path_buf("foo")),
1143            FilesetExpression::file_path(repo_path_buf("foo/bar")),
1144        ]);
1145        insta::assert_debug_snapshot!(expr.to_matcher(), @r#"
1146        FilesMatcher {
1147            tree: Dir {
1148                "foo": File {
1149                    "bar": File {},
1150                },
1151            },
1152        }
1153        "#);
1154
1155        let expr = FilesetExpression::union_all(vec![
1156            FilesetExpression::prefix_path(repo_path_buf("bar")),
1157            FilesetExpression::prefix_path(repo_path_buf("bar/baz")),
1158        ]);
1159        insta::assert_debug_snapshot!(expr.to_matcher(), @r#"
1160        PrefixMatcher {
1161            tree: Dir {
1162                "bar": Prefix {
1163                    "baz": Prefix {},
1164                },
1165            },
1166        }
1167        "#);
1168    }
1169
1170    #[test]
1171    fn test_build_matcher_union_patterns_of_different_kind() {
1172        let settings = insta_settings();
1173        let _guard = settings.bind_to_scope();
1174
1175        let expr = FilesetExpression::union_all(vec![
1176            FilesetExpression::file_path(repo_path_buf("foo")),
1177            FilesetExpression::prefix_path(repo_path_buf("bar")),
1178        ]);
1179        insta::assert_debug_snapshot!(expr.to_matcher(), @r#"
1180        UnionMatcher {
1181            input1: FilesMatcher {
1182                tree: Dir {
1183                    "foo": File {},
1184                },
1185            },
1186            input2: PrefixMatcher {
1187                tree: Dir {
1188                    "bar": Prefix {},
1189                },
1190            },
1191        }
1192        "#);
1193    }
1194
1195    #[test]
1196    fn test_build_matcher_unnormalized_union() {
1197        let settings = insta_settings();
1198        let _guard = settings.bind_to_scope();
1199
1200        let expr = FilesetExpression::UnionAll(vec![]);
1201        insta::assert_debug_snapshot!(expr.to_matcher(), @"NothingMatcher");
1202
1203        let expr =
1204            FilesetExpression::UnionAll(vec![FilesetExpression::None, FilesetExpression::All]);
1205        insta::assert_debug_snapshot!(expr.to_matcher(), @r"
1206        UnionMatcher {
1207            input1: NothingMatcher,
1208            input2: EverythingMatcher,
1209        }
1210        ");
1211    }
1212
1213    #[test]
1214    fn test_build_matcher_combined() {
1215        let settings = insta_settings();
1216        let _guard = settings.bind_to_scope();
1217
1218        let expr = FilesetExpression::union_all(vec![
1219            FilesetExpression::intersection(FilesetExpression::all(), FilesetExpression::none()),
1220            FilesetExpression::difference(FilesetExpression::none(), FilesetExpression::all()),
1221            FilesetExpression::file_path(repo_path_buf("foo")),
1222            FilesetExpression::prefix_path(repo_path_buf("bar")),
1223        ]);
1224        insta::assert_debug_snapshot!(expr.to_matcher(), @r#"
1225        UnionMatcher {
1226            input1: UnionMatcher {
1227                input1: IntersectionMatcher {
1228                    input1: EverythingMatcher,
1229                    input2: NothingMatcher,
1230                },
1231                input2: DifferenceMatcher {
1232                    wanted: NothingMatcher,
1233                    unwanted: EverythingMatcher,
1234                },
1235            },
1236            input2: UnionMatcher {
1237                input1: FilesMatcher {
1238                    tree: Dir {
1239                        "foo": File {},
1240                    },
1241                },
1242                input2: PrefixMatcher {
1243                    tree: Dir {
1244                        "bar": Prefix {},
1245                    },
1246                },
1247            },
1248        }
1249        "#);
1250    }
1251}