Skip to main content

fallow_config/config/
glob_validation.rs

1//! Validation of user-supplied glob patterns from the config file.
2//!
3//! Fallow accepts filesystem glob patterns in several config fields (`entry`,
4//! `ignorePatterns`, `dynamicallyLoaded`, `duplicates.ignore`, `health.ignore`,
5//! `boundaries.zones[].patterns`, `overrides[].files`, `ignoreExports[].file`,
6//! `ignoreCatalogReferences[].consumer`). All of these are matched against
7//! project-root-relative file paths. The matcher cannot reach outside the
8//! project root by construction, but a malicious config can still slip in
9//! absolute paths or `..` traversal segments that silently no-op today and
10//! mask user intent.
11//!
12//! This module rejects such patterns at config-load time so users get a clear
13//! error instead of a silent no-match. Invalid glob syntax also fails loud
14//! here, replacing the historical `if let Ok(glob) = Glob::new(pattern)` drop
15//! patterns scattered across the codebase.
16//!
17//! See issue #463 for the threat model.
18
19use std::fmt;
20use std::path::{Component, Path};
21
22use globset::Glob;
23
24/// Validation failure for a single user-supplied glob pattern.
25#[derive(Debug)]
26pub enum GlobValidationError {
27    /// Pattern is an absolute path (`/foo`, `\foo`, `C:\foo`, `\\share`).
28    AbsolutePath {
29        field: &'static str,
30        pattern: String,
31    },
32    /// Pattern contains a `..` path segment.
33    TraversalSegment {
34        field: &'static str,
35        pattern: String,
36    },
37    /// Pattern is not valid glob syntax.
38    InvalidSyntax {
39        field: &'static str,
40        pattern: String,
41        source: globset::Error,
42    },
43}
44
45impl fmt::Display for GlobValidationError {
46    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47        match self {
48            Self::AbsolutePath { field, pattern } => {
49                write!(
50                    f,
51                    "{field}: '{pattern}' is an absolute path; \
52                     use a pattern relative to the project root (e.g. 'src/**')"
53                )
54            }
55            Self::TraversalSegment { field, pattern } => {
56                write!(
57                    f,
58                    "{field}: '{pattern}' contains a '..' segment; \
59                     rewrite the pattern to stay inside the project root, \
60                     or run fallow with --root pointing at the directory you want to scan"
61                )
62            }
63            Self::InvalidSyntax {
64                field,
65                pattern,
66                source,
67            } => {
68                // `globset::Error`'s Display re-quotes the pattern, so strip
69                // the `error parsing glob '...': ` prefix to avoid showing
70                // the pattern twice. The kind tail (e.g. "unclosed character
71                // class; missing ']'") is the actionable bit.
72                let source_msg = source.to_string();
73                let tail = source_msg
74                    .find("': ")
75                    .map_or(source_msg.as_str(), |idx| &source_msg[idx + 3..]);
76                write!(
77                    f,
78                    "{field}: invalid glob '{pattern}': {tail}; \
79                     fix the syntax (see https://docs.rs/globset for the supported grammar)"
80                )
81            }
82        }
83    }
84}
85
86impl std::error::Error for GlobValidationError {
87    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
88        match self {
89            Self::InvalidSyntax { source, .. } => Some(source),
90            _ => None,
91        }
92    }
93}
94
95/// Detect absolute paths cross-platform without relying on `Path::is_absolute`
96/// (which is platform-specific: on Unix, `C:\foo` would be treated as relative).
97///
98/// Rejected shapes:
99/// - Unix root: `/foo`
100/// - Windows backslash root: `\foo`
101/// - UNC: `\\share\path` or `//share/path`
102/// - Drive letter: `C:\foo`, `c:/foo`, `D:foo`
103fn is_absolute_pattern(pattern: &str) -> bool {
104    if pattern.starts_with('/') || pattern.starts_with('\\') {
105        return true;
106    }
107    let bytes = pattern.as_bytes();
108    if bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':' {
109        return true;
110    }
111    false
112}
113
114/// Return `true` if any segment of `pattern` is `..`.
115///
116/// We split on BOTH `/` and `\` so a backslash-separated traversal pattern
117/// (`..\foo`) authored on a Windows machine is rejected even when fallow runs
118/// on Unix. `Path::components` on Unix treats `\` as a regular character, so
119/// it cannot be relied on as a cross-platform separator detector.
120///
121/// Glob meta characters (`*`, `**`, `[abc]`, `{a,b}`) pass through unchanged
122/// because the split only inspects separators.
123fn has_traversal_segment(pattern: &str) -> bool {
124    pattern.split(['/', '\\']).any(|seg| seg == "..")
125        || Path::new(pattern)
126            .components()
127            .any(|c| matches!(c, Component::ParentDir))
128}
129
130/// Validate that `pattern` is a relative, non-traversal, syntactically valid
131/// glob; return the compiled glob on success.
132///
133/// `field` is the dotted-path name of the config field the pattern came from
134/// (e.g. `"entry"`, `"ignorePatterns"`, `"duplicates.ignore"`); it appears
135/// verbatim in the error message so users can locate the bad value.
136///
137/// # Errors
138///
139/// Returns:
140/// - `AbsolutePath` if the pattern is rooted at `/`, `\`, `\\`, `//`, or a
141///   Windows drive letter
142/// - `TraversalSegment` if any path segment of the pattern is `..`
143/// - `InvalidSyntax` if `globset::Glob::new` rejects the pattern
144pub fn compile_user_glob(pattern: &str, field: &'static str) -> Result<Glob, GlobValidationError> {
145    if is_absolute_pattern(pattern) {
146        return Err(GlobValidationError::AbsolutePath {
147            field,
148            pattern: pattern.to_owned(),
149        });
150    }
151    if has_traversal_segment(pattern) {
152        return Err(GlobValidationError::TraversalSegment {
153            field,
154            pattern: pattern.to_owned(),
155        });
156    }
157    Glob::new(pattern).map_err(|source| GlobValidationError::InvalidSyntax {
158        field,
159        pattern: pattern.to_owned(),
160        source,
161    })
162}
163
164/// Validate a glob pattern that matches a raw import specifier, not a
165/// filesystem path.
166///
167/// Specifiers such as `../generated/foo` are valid import strings, so this
168/// intentionally skips the absolute-path and traversal-segment checks used for
169/// project-root-relative file globs.
170///
171/// # Errors
172///
173/// Returns `InvalidSyntax` if `globset::Glob::new` rejects the pattern.
174pub fn compile_user_specifier_glob(
175    pattern: &str,
176    field: &'static str,
177) -> Result<Glob, GlobValidationError> {
178    Glob::new(pattern).map_err(|source| GlobValidationError::InvalidSyntax {
179        field,
180        pattern: pattern.to_owned(),
181        source,
182    })
183}
184
185/// Validate a slice of import-specifier patterns, accumulating syntax errors.
186pub fn validate_user_specifier_globs(
187    patterns: &[String],
188    field: &'static str,
189    errors: &mut Vec<GlobValidationError>,
190) {
191    for pattern in patterns {
192        if let Err(e) = compile_user_specifier_glob(pattern, field) {
193            errors.push(e);
194        }
195    }
196}
197
198/// Validate a slice of patterns, accumulating ALL errors so the user sees
199/// every offending pattern in one run rather than fixing them one at a time.
200pub fn validate_user_globs(
201    patterns: &[String],
202    field: &'static str,
203    errors: &mut Vec<GlobValidationError>,
204) {
205    for pattern in patterns {
206        if let Err(e) = compile_user_glob(pattern, field) {
207            errors.push(e);
208        }
209    }
210}
211
212/// Validate a user-supplied DIRECTORY PATH (not a glob). Same absolute-path
213/// and traversal checks as `compile_user_glob`, but skips the glob-syntax
214/// check because the value is a literal path, not a pattern.
215///
216/// Used for fields like `boundaries.zones[].root` and
217/// `boundaries.zones[].autoDiscover` that name a directory subtree rather
218/// than a match pattern.
219///
220/// # Errors
221///
222/// Returns `AbsolutePath` or `TraversalSegment` for the same shapes
223/// `compile_user_glob` rejects. Never returns `InvalidSyntax`.
224pub fn validate_user_path(path: &str, field: &'static str) -> Result<(), GlobValidationError> {
225    if is_absolute_pattern(path) {
226        return Err(GlobValidationError::AbsolutePath {
227            field,
228            pattern: path.to_owned(),
229        });
230    }
231    if has_traversal_segment(path) {
232        return Err(GlobValidationError::TraversalSegment {
233            field,
234            pattern: path.to_owned(),
235        });
236    }
237    Ok(())
238}
239
240/// Same as `validate_user_path` but accumulates errors over a slice.
241pub fn validate_user_paths(
242    paths: &[String],
243    field: &'static str,
244    errors: &mut Vec<GlobValidationError>,
245) {
246    for path in paths {
247        if let Err(e) = validate_user_path(path, field) {
248            errors.push(e);
249        }
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn relative_glob_accepted() {
259        assert!(compile_user_glob("src/**/*.ts", "entry").is_ok());
260        assert!(compile_user_glob("**/*.test.ts", "entry").is_ok());
261        assert!(compile_user_glob("./src/main.ts", "entry").is_ok());
262        assert!(compile_user_glob("packages/*/src/index.ts", "entry").is_ok());
263        assert!(compile_user_glob("**/{a,b}.ts", "entry").is_ok());
264    }
265
266    #[test]
267    fn bracket_character_class_accepted() {
268        // Library authors use bracket character classes for PascalCase
269        // component file globs (`[A-Z]*.tsx`). Make sure the validator
270        // doesn't confuse a legitimate `[A-Z]` opening with an unclosed
271        // class. See user-panel review (Aisha's case).
272        assert!(compile_user_glob("[A-Z]*.tsx", "entry").is_ok());
273        assert!(compile_user_glob("src/**/[A-Z]*.{ts,tsx}", "ignoreExports[].file").is_ok());
274        assert!(compile_user_glob("**/[0-9][0-9]*.md", "entry").is_ok());
275    }
276
277    #[test]
278    fn validate_user_path_rejects_traversal_and_absolute() {
279        assert!(validate_user_path("../escape", "boundaries.zones[].root").is_err());
280        assert!(validate_user_path("/abs/dir", "boundaries.zones[].root").is_err());
281        assert!(validate_user_path("packages/ui", "boundaries.zones[].root").is_ok());
282        // Non-glob paths skip syntax check, so `[abc]` is fine as a literal name.
283        assert!(validate_user_path("[brackets-literal]/dir", "boundaries.zones[].root").is_ok());
284    }
285
286    #[test]
287    fn absolute_unix_path_rejected() {
288        let err = compile_user_glob("/etc/passwd", "entry").unwrap_err();
289        assert!(matches!(err, GlobValidationError::AbsolutePath { .. }));
290        let msg = err.to_string();
291        assert!(msg.contains("/etc/passwd"), "msg: {msg}");
292        assert!(msg.contains("entry"), "msg: {msg}");
293        assert!(msg.contains("absolute"), "msg: {msg}");
294        assert!(msg.contains("relative to the project root"), "msg: {msg}");
295    }
296
297    #[test]
298    fn absolute_unix_glob_rejected() {
299        let err = compile_user_glob("/root/.ssh/**", "ignorePatterns").unwrap_err();
300        assert!(matches!(err, GlobValidationError::AbsolutePath { .. }));
301    }
302
303    #[test]
304    fn absolute_windows_backslash_path_rejected() {
305        let err = compile_user_glob("\\Windows\\System32", "entry").unwrap_err();
306        assert!(matches!(err, GlobValidationError::AbsolutePath { .. }));
307    }
308
309    #[test]
310    fn unc_path_rejected() {
311        let err = compile_user_glob("\\\\share\\secrets", "entry").unwrap_err();
312        assert!(matches!(err, GlobValidationError::AbsolutePath { .. }));
313    }
314
315    #[test]
316    fn unc_forward_slash_rejected() {
317        let err = compile_user_glob("//share/secrets", "entry").unwrap_err();
318        assert!(matches!(err, GlobValidationError::AbsolutePath { .. }));
319    }
320
321    #[test]
322    fn windows_drive_letter_rejected() {
323        for pat in ["C:\\Users", "c:/Users", "D:foo", "Z:\\"] {
324            let err = compile_user_glob(pat, "entry").unwrap_err();
325            assert!(
326                matches!(err, GlobValidationError::AbsolutePath { .. }),
327                "expected AbsolutePath for {pat}, got {err:?}"
328            );
329        }
330    }
331
332    #[test]
333    fn traversal_segment_rejected() {
334        let err = compile_user_glob("../foo", "entry").unwrap_err();
335        assert!(matches!(err, GlobValidationError::TraversalSegment { .. }));
336        assert!(err.to_string().contains("../foo"));
337    }
338
339    #[test]
340    fn traversal_in_middle_rejected() {
341        let err = compile_user_glob("src/../../../etc", "ignorePatterns").unwrap_err();
342        assert!(matches!(err, GlobValidationError::TraversalSegment { .. }));
343    }
344
345    #[test]
346    fn traversal_with_backslash_rejected() {
347        let err = compile_user_glob("..\\foo", "entry").unwrap_err();
348        assert!(matches!(err, GlobValidationError::TraversalSegment { .. }));
349    }
350
351    #[test]
352    fn traversal_in_glob_pattern_rejected() {
353        let err = compile_user_glob("**/../secrets", "entry").unwrap_err();
354        assert!(matches!(err, GlobValidationError::TraversalSegment { .. }));
355    }
356
357    #[test]
358    fn double_dot_filename_accepted() {
359        // `..` is a path-segment marker; `foo..bar` is a regular filename
360        // (extension separator) and must NOT be flagged.
361        assert!(compile_user_glob("foo..bar", "entry").is_ok());
362        assert!(compile_user_glob("src/file.with..dots.ts", "entry").is_ok());
363    }
364
365    #[test]
366    fn current_dir_dot_accepted() {
367        // `./` is a no-op prefix; `Component::CurDir`, not `ParentDir`.
368        assert!(compile_user_glob("./src/**", "entry").is_ok());
369    }
370
371    #[test]
372    fn invalid_glob_syntax_rejected() {
373        let err = compile_user_glob("[invalid", "entry").unwrap_err();
374        assert!(matches!(err, GlobValidationError::InvalidSyntax { .. }));
375        let msg = err.to_string();
376        assert!(msg.contains("entry"), "msg: {msg}");
377        // Pattern appears once (inside `'[invalid'`), not twice.
378        assert_eq!(msg.matches("[invalid").count(), 1, "msg: {msg}");
379        assert!(msg.contains("unclosed character class"), "msg: {msg}");
380    }
381
382    #[test]
383    fn empty_pattern_accepted_as_globset_handles_it() {
384        // globset accepts the empty pattern (matches the empty string); we
385        // pass it through rather than special-casing here. The downstream
386        // matcher will never see an empty relative path so the practical
387        // effect is a no-op.
388        assert!(compile_user_glob("", "entry").is_ok());
389    }
390
391    #[test]
392    fn validate_user_globs_collects_all_errors() {
393        let patterns = vec![
394            "src/**".to_owned(),
395            "../foo".to_owned(),
396            "/abs".to_owned(),
397            "[bad".to_owned(),
398            "**/*.ts".to_owned(),
399        ];
400        let mut errors = Vec::new();
401        validate_user_globs(&patterns, "ignorePatterns", &mut errors);
402        assert_eq!(errors.len(), 3);
403        assert!(matches!(
404            errors[0],
405            GlobValidationError::TraversalSegment { .. }
406        ));
407        assert!(matches!(
408            errors[1],
409            GlobValidationError::AbsolutePath { .. }
410        ));
411        assert!(matches!(
412            errors[2],
413            GlobValidationError::InvalidSyntax { .. }
414        ));
415    }
416
417    #[test]
418    fn field_name_in_error_message() {
419        let err = compile_user_glob("../oops", "duplicates.ignore").unwrap_err();
420        assert!(err.to_string().starts_with("duplicates.ignore:"));
421    }
422}