Skip to main content

fallow_config/config/
glob_validation.rs

1//! Validation of user-supplied glob patterns from the config file.
2//!
3//! Fallow accepts glob patterns in several config fields (`entry`,
4//! `ignorePatterns`, `dynamicallyLoaded`, `duplicates.ignore`, `health.ignore`,
5//! `boundaries.zones[].patterns`, `overrides[].files`, `ignoreExports[].file`,
6//! `ignoreCatalogReferences[].consumer`). All of these are matched against
7//! project-root-relative file paths. The matcher cannot reach outside the
8//! project root by construction, but a malicious config can still slip in
9//! absolute paths or `..` traversal segments that silently no-op today and
10//! mask user intent.
11//!
12//! This module rejects such patterns at config-load time so users get a clear
13//! error instead of a silent no-match. Invalid glob syntax also fails loud
14//! here, replacing the historical `if let Ok(glob) = Glob::new(pattern)` drop
15//! patterns scattered across the codebase.
16//!
17//! See issue #463 for the threat model.
18
19use std::fmt;
20use std::path::{Component, Path};
21
22use globset::Glob;
23
24/// Validation failure for a single user-supplied glob pattern.
25#[derive(Debug)]
26pub enum GlobValidationError {
27    /// Pattern is an absolute path (`/foo`, `\foo`, `C:\foo`, `\\share`).
28    AbsolutePath {
29        field: &'static str,
30        pattern: String,
31    },
32    /// Pattern contains a `..` path segment.
33    TraversalSegment {
34        field: &'static str,
35        pattern: String,
36    },
37    /// Pattern is not valid glob syntax.
38    InvalidSyntax {
39        field: &'static str,
40        pattern: String,
41        source: globset::Error,
42    },
43}
44
45impl fmt::Display for GlobValidationError {
46    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47        match self {
48            Self::AbsolutePath { field, pattern } => {
49                write!(
50                    f,
51                    "{field}: '{pattern}' is an absolute path; \
52                     use a pattern relative to the project root (e.g. 'src/**')"
53                )
54            }
55            Self::TraversalSegment { field, pattern } => {
56                write!(
57                    f,
58                    "{field}: '{pattern}' contains a '..' segment; \
59                     rewrite the pattern to stay inside the project root, \
60                     or run fallow with --root pointing at the directory you want to scan"
61                )
62            }
63            Self::InvalidSyntax {
64                field,
65                pattern,
66                source,
67            } => {
68                // `globset::Error`'s Display re-quotes the pattern, so strip
69                // the `error parsing glob '...': ` prefix to avoid showing
70                // the pattern twice. The kind tail (e.g. "unclosed character
71                // class; missing ']'") is the actionable bit.
72                let source_msg = source.to_string();
73                let tail = source_msg
74                    .find("': ")
75                    .map_or(source_msg.as_str(), |idx| &source_msg[idx + 3..]);
76                write!(
77                    f,
78                    "{field}: invalid glob '{pattern}': {tail}; \
79                     fix the syntax (see https://docs.rs/globset for the supported grammar)"
80                )
81            }
82        }
83    }
84}
85
86impl std::error::Error for GlobValidationError {
87    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
88        match self {
89            Self::InvalidSyntax { source, .. } => Some(source),
90            _ => None,
91        }
92    }
93}
94
95/// Detect absolute paths cross-platform without relying on `Path::is_absolute`
96/// (which is platform-specific: on Unix, `C:\foo` would be treated as relative).
97///
98/// Rejected shapes:
99/// - Unix root: `/foo`
100/// - Windows backslash root: `\foo`
101/// - UNC: `\\share\path` or `//share/path`
102/// - Drive letter: `C:\foo`, `c:/foo`, `D:foo`
103fn is_absolute_pattern(pattern: &str) -> bool {
104    if pattern.starts_with('/') || pattern.starts_with('\\') {
105        return true;
106    }
107    let bytes = pattern.as_bytes();
108    if bytes.len() >= 2 && bytes[0].is_ascii_alphabetic() && bytes[1] == b':' {
109        return true;
110    }
111    false
112}
113
114/// Return `true` if any segment of `pattern` is `..`.
115///
116/// We split on BOTH `/` and `\` so a backslash-separated traversal pattern
117/// (`..\foo`) authored on a Windows machine is rejected even when fallow runs
118/// on Unix. `Path::components` on Unix treats `\` as a regular character, so
119/// it cannot be relied on as a cross-platform separator detector.
120///
121/// Glob meta characters (`*`, `**`, `[abc]`, `{a,b}`) pass through unchanged
122/// because the split only inspects separators.
123fn has_traversal_segment(pattern: &str) -> bool {
124    pattern.split(['/', '\\']).any(|seg| seg == "..")
125        || Path::new(pattern)
126            .components()
127            .any(|c| matches!(c, Component::ParentDir))
128}
129
130/// Validate that `pattern` is a relative, non-traversal, syntactically valid
131/// glob; return the compiled glob on success.
132///
133/// `field` is the dotted-path name of the config field the pattern came from
134/// (e.g. `"entry"`, `"ignorePatterns"`, `"duplicates.ignore"`); it appears
135/// verbatim in the error message so users can locate the bad value.
136///
137/// # Errors
138///
139/// Returns:
140/// - `AbsolutePath` if the pattern is rooted at `/`, `\`, `\\`, `//`, or a
141///   Windows drive letter
142/// - `TraversalSegment` if any path segment of the pattern is `..`
143/// - `InvalidSyntax` if `globset::Glob::new` rejects the pattern
144pub fn compile_user_glob(pattern: &str, field: &'static str) -> Result<Glob, GlobValidationError> {
145    if is_absolute_pattern(pattern) {
146        return Err(GlobValidationError::AbsolutePath {
147            field,
148            pattern: pattern.to_owned(),
149        });
150    }
151    if has_traversal_segment(pattern) {
152        return Err(GlobValidationError::TraversalSegment {
153            field,
154            pattern: pattern.to_owned(),
155        });
156    }
157    Glob::new(pattern).map_err(|source| GlobValidationError::InvalidSyntax {
158        field,
159        pattern: pattern.to_owned(),
160        source,
161    })
162}
163
164/// Validate a slice of patterns, accumulating ALL errors so the user sees
165/// every offending pattern in one run rather than fixing them one at a time.
166pub fn validate_user_globs(
167    patterns: &[String],
168    field: &'static str,
169    errors: &mut Vec<GlobValidationError>,
170) {
171    for pattern in patterns {
172        if let Err(e) = compile_user_glob(pattern, field) {
173            errors.push(e);
174        }
175    }
176}
177
178/// Validate a user-supplied DIRECTORY PATH (not a glob). Same absolute-path
179/// and traversal checks as `compile_user_glob`, but skips the glob-syntax
180/// check because the value is a literal path, not a pattern.
181///
182/// Used for fields like `boundaries.zones[].root` and
183/// `boundaries.zones[].autoDiscover` that name a directory subtree rather
184/// than a match pattern.
185///
186/// # Errors
187///
188/// Returns `AbsolutePath` or `TraversalSegment` for the same shapes
189/// `compile_user_glob` rejects. Never returns `InvalidSyntax`.
190pub fn validate_user_path(path: &str, field: &'static str) -> Result<(), GlobValidationError> {
191    if is_absolute_pattern(path) {
192        return Err(GlobValidationError::AbsolutePath {
193            field,
194            pattern: path.to_owned(),
195        });
196    }
197    if has_traversal_segment(path) {
198        return Err(GlobValidationError::TraversalSegment {
199            field,
200            pattern: path.to_owned(),
201        });
202    }
203    Ok(())
204}
205
206/// Same as `validate_user_path` but accumulates errors over a slice.
207pub fn validate_user_paths(
208    paths: &[String],
209    field: &'static str,
210    errors: &mut Vec<GlobValidationError>,
211) {
212    for path in paths {
213        if let Err(e) = validate_user_path(path, field) {
214            errors.push(e);
215        }
216    }
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn relative_glob_accepted() {
225        assert!(compile_user_glob("src/**/*.ts", "entry").is_ok());
226        assert!(compile_user_glob("**/*.test.ts", "entry").is_ok());
227        assert!(compile_user_glob("./src/main.ts", "entry").is_ok());
228        assert!(compile_user_glob("packages/*/src/index.ts", "entry").is_ok());
229        assert!(compile_user_glob("**/{a,b}.ts", "entry").is_ok());
230    }
231
232    #[test]
233    fn bracket_character_class_accepted() {
234        // Library authors use bracket character classes for PascalCase
235        // component file globs (`[A-Z]*.tsx`). Make sure the validator
236        // doesn't confuse a legitimate `[A-Z]` opening with an unclosed
237        // class. See user-panel review (Aisha's case).
238        assert!(compile_user_glob("[A-Z]*.tsx", "entry").is_ok());
239        assert!(compile_user_glob("src/**/[A-Z]*.{ts,tsx}", "ignoreExports[].file").is_ok());
240        assert!(compile_user_glob("**/[0-9][0-9]*.md", "entry").is_ok());
241    }
242
243    #[test]
244    fn validate_user_path_rejects_traversal_and_absolute() {
245        assert!(validate_user_path("../escape", "boundaries.zones[].root").is_err());
246        assert!(validate_user_path("/abs/dir", "boundaries.zones[].root").is_err());
247        assert!(validate_user_path("packages/ui", "boundaries.zones[].root").is_ok());
248        // Non-glob paths skip syntax check, so `[abc]` is fine as a literal name.
249        assert!(validate_user_path("[brackets-literal]/dir", "boundaries.zones[].root").is_ok());
250    }
251
252    #[test]
253    fn absolute_unix_path_rejected() {
254        let err = compile_user_glob("/etc/passwd", "entry").unwrap_err();
255        assert!(matches!(err, GlobValidationError::AbsolutePath { .. }));
256        let msg = err.to_string();
257        assert!(msg.contains("/etc/passwd"), "msg: {msg}");
258        assert!(msg.contains("entry"), "msg: {msg}");
259        assert!(msg.contains("absolute"), "msg: {msg}");
260        assert!(msg.contains("relative to the project root"), "msg: {msg}");
261    }
262
263    #[test]
264    fn absolute_unix_glob_rejected() {
265        let err = compile_user_glob("/root/.ssh/**", "ignorePatterns").unwrap_err();
266        assert!(matches!(err, GlobValidationError::AbsolutePath { .. }));
267    }
268
269    #[test]
270    fn absolute_windows_backslash_path_rejected() {
271        let err = compile_user_glob("\\Windows\\System32", "entry").unwrap_err();
272        assert!(matches!(err, GlobValidationError::AbsolutePath { .. }));
273    }
274
275    #[test]
276    fn unc_path_rejected() {
277        let err = compile_user_glob("\\\\share\\secrets", "entry").unwrap_err();
278        assert!(matches!(err, GlobValidationError::AbsolutePath { .. }));
279    }
280
281    #[test]
282    fn unc_forward_slash_rejected() {
283        let err = compile_user_glob("//share/secrets", "entry").unwrap_err();
284        assert!(matches!(err, GlobValidationError::AbsolutePath { .. }));
285    }
286
287    #[test]
288    fn windows_drive_letter_rejected() {
289        for pat in ["C:\\Users", "c:/Users", "D:foo", "Z:\\"] {
290            let err = compile_user_glob(pat, "entry").unwrap_err();
291            assert!(
292                matches!(err, GlobValidationError::AbsolutePath { .. }),
293                "expected AbsolutePath for {pat}, got {err:?}"
294            );
295        }
296    }
297
298    #[test]
299    fn traversal_segment_rejected() {
300        let err = compile_user_glob("../foo", "entry").unwrap_err();
301        assert!(matches!(err, GlobValidationError::TraversalSegment { .. }));
302        assert!(err.to_string().contains("../foo"));
303    }
304
305    #[test]
306    fn traversal_in_middle_rejected() {
307        let err = compile_user_glob("src/../../../etc", "ignorePatterns").unwrap_err();
308        assert!(matches!(err, GlobValidationError::TraversalSegment { .. }));
309    }
310
311    #[test]
312    fn traversal_with_backslash_rejected() {
313        let err = compile_user_glob("..\\foo", "entry").unwrap_err();
314        assert!(matches!(err, GlobValidationError::TraversalSegment { .. }));
315    }
316
317    #[test]
318    fn traversal_in_glob_pattern_rejected() {
319        let err = compile_user_glob("**/../secrets", "entry").unwrap_err();
320        assert!(matches!(err, GlobValidationError::TraversalSegment { .. }));
321    }
322
323    #[test]
324    fn double_dot_filename_accepted() {
325        // `..` is a path-segment marker; `foo..bar` is a regular filename
326        // (extension separator) and must NOT be flagged.
327        assert!(compile_user_glob("foo..bar", "entry").is_ok());
328        assert!(compile_user_glob("src/file.with..dots.ts", "entry").is_ok());
329    }
330
331    #[test]
332    fn current_dir_dot_accepted() {
333        // `./` is a no-op prefix; `Component::CurDir`, not `ParentDir`.
334        assert!(compile_user_glob("./src/**", "entry").is_ok());
335    }
336
337    #[test]
338    fn invalid_glob_syntax_rejected() {
339        let err = compile_user_glob("[invalid", "entry").unwrap_err();
340        assert!(matches!(err, GlobValidationError::InvalidSyntax { .. }));
341        let msg = err.to_string();
342        assert!(msg.contains("entry"), "msg: {msg}");
343        // Pattern appears once (inside `'[invalid'`), not twice.
344        assert_eq!(msg.matches("[invalid").count(), 1, "msg: {msg}");
345        assert!(msg.contains("unclosed character class"), "msg: {msg}");
346    }
347
348    #[test]
349    fn empty_pattern_accepted_as_globset_handles_it() {
350        // globset accepts the empty pattern (matches the empty string); we
351        // pass it through rather than special-casing here. The downstream
352        // matcher will never see an empty relative path so the practical
353        // effect is a no-op.
354        assert!(compile_user_glob("", "entry").is_ok());
355    }
356
357    #[test]
358    fn validate_user_globs_collects_all_errors() {
359        let patterns = vec![
360            "src/**".to_owned(),
361            "../foo".to_owned(),
362            "/abs".to_owned(),
363            "[bad".to_owned(),
364            "**/*.ts".to_owned(),
365        ];
366        let mut errors = Vec::new();
367        validate_user_globs(&patterns, "ignorePatterns", &mut errors);
368        assert_eq!(errors.len(), 3);
369        assert!(matches!(
370            errors[0],
371            GlobValidationError::TraversalSegment { .. }
372        ));
373        assert!(matches!(
374            errors[1],
375            GlobValidationError::AbsolutePath { .. }
376        ));
377        assert!(matches!(
378            errors[2],
379            GlobValidationError::InvalidSyntax { .. }
380        ));
381    }
382
383    #[test]
384    fn field_name_in_error_message() {
385        let err = compile_user_glob("../oops", "duplicates.ignore").unwrap_err();
386        assert!(err.to_string().starts_with("duplicates.ignore:"));
387    }
388}