Skip to main content

snapdir_core/
excludes.rs

1//! Exclude-pattern expansion and matching, plus the follow/no-follow setting.
2//!
3//! The oracle (`snapdir-manifest`) applies excludes as an **extended regular
4//! expression** fed to `grep -E -v`: a path is excluded when the regex matches
5//! it. The user-supplied `--exclude` pattern may embed two macros that expand
6//! to built-in sets, lifted verbatim from `_snapdir_manifest_define_exclude_patterns`:
7//!
8//! - `%system%` expands to the system directory set and **forces `--no-follow`**.
9//! - `%common%` expands to the common directory set (`.git`, `.cache`,
10//!   `node_modules`, `.DS_Store`, Trash dirs, …).
11//!
12//! Per the library-purity principle, `snapdir-core` reads **no** environment.
13//! The oracle's `%system%` set interpolates two runtime paths — `${HOME}/.cache/`
14//! and the resolved cache directory `${_SNAPDIR_MANIFEST_CACHE_DIR}` — so those
15//! are passed in as parameters; the CLI lane resolves `$HOME` / `XDG_CACHE_HOME`
16//! and hands them to [`expand_excludes`]. The built-in literal sets themselves
17//! match the oracle's hard-coded defaults (when `SNAPDIR_SYSTEM_EXCLUDE_DIRS` /
18//! `SNAPDIR_COMMON_EXCLUDE_DIRS` are unset).
19//!
20//! The filesystem walk that actually consults [`ExcludeMatcher::is_excluded`]
21//! lands in a later gate; this module models the expansion + matcher + the
22//! follow/no-follow option semantics, validated against the Bash source.
23
24use regex::Regex;
25use thiserror::Error;
26
27/// The oracle's default system exclude directory list — the body of
28/// `SNAPDIR_SYSTEM_EXCLUDE_DIRS`'s default (the leading-`^`-anchored set,
29/// excluding the trailing `${HOME}/.cache/` and cache-dir entries, which are
30/// runtime values appended in [`expand_excludes`]).
31///
32/// Copied verbatim from `_snapdir_manifest_define_exclude_patterns`.
33pub const SYSTEM_EXCLUDE_DIRS: &str = "/vscode/|/dev/|/proc/|/sys/|/tmp/|/var/run/|/run/|/mnt/|/media/|/lost+found/|/var/snap/lxd/common/ns/shmounts/|/var/snap/lxd/common/ns/mntns/|/var/lib/lxcfs/";
34
35/// The oracle's default common exclude directory list — the body of
36/// `SNAPDIR_COMMON_EXCLUDE_DIRS`'s default.
37///
38/// Copied verbatim from `_snapdir_manifest_define_exclude_patterns`.
39pub const COMMON_EXCLUDE_DIRS: &str = ".cache|.git|.DS_Store|.vscode-server|.dbus|.gvfs|.local/share/gvfs-metadata|.local/share/Trash|.Trash|node_modules|Trash-1000";
40
41/// Whether the filesystem walk follows symbolic links.
42///
43/// Mirrors the oracle's `_snapdir_manifest_find_flags`: the default is
44/// [`Follow`](FollowMode::Follow) (`find -L`), and `--no-follow` (or a
45/// `%system%` expansion) switches to [`NoFollow`](FollowMode::NoFollow)
46/// (plain `find`, dropping symlinks).
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
48pub enum FollowMode {
49    /// Follow symlinks (the default; `find -L`).
50    #[default]
51    Follow,
52    /// Do not follow symlinks (`--no-follow`; plain `find`).
53    NoFollow,
54}
55
56impl FollowMode {
57    /// Returns `true` if symlinks are followed.
58    #[must_use]
59    pub fn follows_symlinks(self) -> bool {
60        matches!(self, Self::Follow)
61    }
62}
63
64/// Errors raised while expanding/compiling an exclude pattern.
65#[derive(Debug, Error)]
66pub enum ExcludeError {
67    /// The expanded pattern was not a valid extended regular expression.
68    #[error("invalid exclude regex: {0}")]
69    InvalidRegex(#[from] regex::Error),
70}
71
72/// The result of expanding a `--exclude` pattern: the final extended-regex
73/// string plus whether the expansion forced `--no-follow`.
74#[derive(Debug, Clone, PartialEq, Eq)]
75pub struct ExpandedExclude {
76    /// The final extended-regex pattern (with `%system%` / `%common%`
77    /// substituted), or `None` when the input was empty (no exclusion).
78    pub pattern: Option<String>,
79    /// `true` when `%system%` appeared and forced no-follow.
80    pub forces_no_follow: bool,
81}
82
83/// Expands the `%system%` / `%common%` macros in a `--exclude` pattern.
84///
85/// Reproduces `_snapdir_manifest_define_exclude_patterns` exactly:
86///
87/// - every occurrence of the literal `%system%` is replaced with
88///   `(^(<SYSTEM_EXCLUDE_DIRS>|<home_cache>|<cache_dir>))` and `forces_no_follow`
89///   is set;
90/// - every occurrence of the literal `%common%` is replaced with
91///   `(/(<COMMON_EXCLUDE_DIRS>)($|/))`.
92///
93/// `home_cache` is `${HOME}/.cache/` and `cache_dir` is the resolved
94/// `_SNAPDIR_MANIFEST_CACHE_DIR`; both are runtime values the CLI lane resolves
95/// and passes in (core reads no environment). An empty `pattern` yields no
96/// exclusion (matching the oracle, which only filters when the pattern is
97/// non-empty).
98#[must_use]
99pub fn expand_excludes(pattern: &str, home_cache: &str, cache_dir: &str) -> ExpandedExclude {
100    if pattern.is_empty() {
101        return ExpandedExclude {
102            pattern: None,
103            forces_no_follow: false,
104        };
105    }
106
107    let mut expanded = pattern.to_owned();
108    let mut forces_no_follow = false;
109
110    if expanded.contains("%system%") {
111        let system_set = format!("(^({SYSTEM_EXCLUDE_DIRS}|{home_cache}|{cache_dir}))");
112        expanded = expanded.replace("%system%", &system_set);
113        forces_no_follow = true;
114    }
115    if expanded.contains("%common%") {
116        let common_set = format!("(/({COMMON_EXCLUDE_DIRS})($|/))");
117        expanded = expanded.replace("%common%", &common_set);
118    }
119
120    ExpandedExclude {
121        pattern: Some(expanded),
122        forces_no_follow,
123    }
124}
125
126/// A compiled exclude matcher: a path is excluded when the (extended) regex
127/// matches anywhere in it, mirroring `grep -E -v`.
128#[derive(Debug, Clone)]
129pub struct ExcludeMatcher {
130    regex: Regex,
131}
132
133impl ExcludeMatcher {
134    /// Compiles an already-expanded extended-regex exclude pattern.
135    ///
136    /// # Errors
137    ///
138    /// Returns [`ExcludeError::InvalidRegex`] if `pattern` is not a valid
139    /// extended regular expression.
140    pub fn new(pattern: &str) -> Result<Self, ExcludeError> {
141        Ok(Self {
142            regex: Regex::new(pattern)?,
143        })
144    }
145
146    /// Returns `true` when `path` is excluded (the regex matches anywhere in
147    /// it), matching `grep -E -v`'s "drop matching lines" semantics.
148    #[must_use]
149    pub fn is_excluded(&self, path: &str) -> bool {
150        self.regex.is_match(path)
151    }
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    // Representative runtime values the CLI lane would resolve.
159    const HOME_CACHE: &str = "/home/user/.cache/";
160    const CACHE_DIR: &str = "/home/user/.cache/snapdir";
161
162    #[test]
163    fn exclude_system_expands_to_oracle_set_and_forces_no_follow() {
164        let out = expand_excludes("%system%", HOME_CACHE, CACHE_DIR);
165        let expected = format!("(^({SYSTEM_EXCLUDE_DIRS}|{HOME_CACHE}|{CACHE_DIR}))");
166        assert_eq!(out.pattern.as_deref(), Some(expected.as_str()));
167        assert!(out.forces_no_follow, "%system% must force no-follow");
168    }
169
170    #[test]
171    fn exclude_common_expands_to_oracle_set_without_forcing_no_follow() {
172        let out = expand_excludes("%common%", HOME_CACHE, CACHE_DIR);
173        let expected = format!("(/({COMMON_EXCLUDE_DIRS})($|/))");
174        assert_eq!(out.pattern.as_deref(), Some(expected.as_str()));
175        assert!(
176            !out.forces_no_follow,
177            "%common% alone must NOT force no-follow"
178        );
179    }
180
181    #[test]
182    fn exclude_combines_user_pattern_with_both_macros() {
183        // The oracle substitutes in place, leaving the user's literal alongside
184        // the expanded sets joined by the regex alternation the user wrote.
185        let out = expand_excludes(".ignore|%common%|%system%", HOME_CACHE, CACHE_DIR);
186        let pattern = out.pattern.expect("non-empty");
187        assert!(pattern.starts_with(".ignore|"));
188        assert!(pattern.contains("node_modules"));
189        assert!(pattern.contains("/proc/"));
190        assert!(out.forces_no_follow, "%system% present forces no-follow");
191    }
192
193    #[test]
194    fn exclude_empty_pattern_yields_no_exclusion() {
195        let out = expand_excludes("", HOME_CACHE, CACHE_DIR);
196        assert_eq!(out.pattern, None);
197        assert!(!out.forces_no_follow);
198    }
199
200    #[test]
201    fn exclude_user_pattern_passes_through_verbatim() {
202        // A plain user regex with no macros is used as-is.
203        let out = expand_excludes(".git|.DS_Store", HOME_CACHE, CACHE_DIR);
204        assert_eq!(out.pattern.as_deref(), Some(".git|.DS_Store"));
205        assert!(!out.forces_no_follow);
206    }
207
208    #[test]
209    fn exclude_matcher_matches_representative_common_paths() {
210        let out = expand_excludes("%common%", HOME_CACHE, CACHE_DIR);
211        let matcher = ExcludeMatcher::new(&out.pattern.unwrap()).expect("valid regex");
212
213        // The common set is anchored `(/(...)($|/))`: a `/.git` segment that
214        // ends the path or is followed by `/` matches.
215        assert!(matcher.is_excluded("/project/.git/config"));
216        assert!(matcher.is_excluded("/project/node_modules/pkg/index.js"));
217        assert!(matcher.is_excluded("/home/user/.DS_Store"));
218        assert!(matcher.is_excluded("/repo/.cache"));
219
220        // Non-matching: no excluded segment.
221        assert!(!matcher.is_excluded("/project/src/main.rs"));
222        assert!(!matcher.is_excluded("/project/readme.md"));
223        // `.gitignore` is NOT `.git` as a path segment, so it must NOT match.
224        assert!(!matcher.is_excluded("/project/.gitignore"));
225    }
226
227    #[test]
228    fn exclude_matcher_matches_representative_system_paths() {
229        let out = expand_excludes("%system%", HOME_CACHE, CACHE_DIR);
230        let matcher = ExcludeMatcher::new(&out.pattern.unwrap()).expect("valid regex");
231
232        // The system set is anchored at start-of-path `(^(...))`.
233        assert!(matcher.is_excluded("/proc/cpuinfo"));
234        assert!(matcher.is_excluded("/dev/null"));
235        assert!(matcher.is_excluded("/sys/kernel"));
236        assert!(matcher.is_excluded("/tmp/scratch"));
237        assert!(matcher.is_excluded("/home/user/.cache/thing"));
238
239        // Anchored at start: a `/proc/` appearing mid-path does NOT match.
240        assert!(!matcher.is_excluded("/data/proc/file"));
241        assert!(!matcher.is_excluded("/home/user/project/main.rs"));
242    }
243
244    #[test]
245    fn exclude_matcher_user_regex_is_extended_regex() {
246        // grep -E semantics: alternation without backslashes.
247        let matcher = ExcludeMatcher::new("foo|bar").expect("valid regex");
248        assert!(matcher.is_excluded("/a/foo/b"));
249        assert!(matcher.is_excluded("/x/bar"));
250        assert!(!matcher.is_excluded("/x/baz"));
251    }
252
253    // --- follow / no-follow option semantics ------------------------------
254
255    #[test]
256    fn no_follow_default_is_follow() {
257        assert_eq!(FollowMode::default(), FollowMode::Follow);
258        assert!(FollowMode::default().follows_symlinks());
259    }
260
261    #[test]
262    fn no_follow_drops_symlinks() {
263        assert!(!FollowMode::NoFollow.follows_symlinks());
264        assert!(FollowMode::Follow.follows_symlinks());
265    }
266
267    #[test]
268    fn no_follow_forced_by_system_exclude() {
269        // The %system% macro forces no-follow; the resolved FollowMode must
270        // flip to NoFollow even if the caller started from the Follow default.
271        let out = expand_excludes("%system%", HOME_CACHE, CACHE_DIR);
272        let mode = if out.forces_no_follow {
273            FollowMode::NoFollow
274        } else {
275            FollowMode::Follow
276        };
277        assert_eq!(mode, FollowMode::NoFollow);
278        assert!(!mode.follows_symlinks());
279    }
280
281    #[test]
282    fn no_follow_not_forced_by_common_or_plain_exclude() {
283        // %common% and plain user patterns leave the follow setting untouched.
284        for pat in ["%common%", ".git", ""] {
285            let out = expand_excludes(pat, HOME_CACHE, CACHE_DIR);
286            assert!(
287                !out.forces_no_follow,
288                "pattern {pat:?} must not force no-follow"
289            );
290        }
291    }
292}