Skip to main content

openjd_snapshots/ops/
filter.rs

1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// Copyright by contributors to this project.
3// SPDX-License-Identifier: (Apache-2.0 OR MIT)
4
5use crate::manifest::{DirEntry, FileEntry, Manifest, ManifestEntry};
6
7/// Filters a manifest's entries using the provided predicate.
8///
9/// Returns a new manifest of the same type containing only entries
10/// for which the filter returns `true`. Total size is recomputed.
11pub fn filter_manifest<P: Clone, K: Clone>(
12    manifest: &Manifest<P, K>,
13    filter: &dyn Fn(&ManifestEntry) -> bool,
14) -> Manifest<P, K> {
15    let files: Vec<FileEntry> = manifest
16        .files
17        .iter()
18        .filter(|f| filter(&ManifestEntry::File(f)))
19        .cloned()
20        .collect();
21    let dirs: Vec<DirEntry> = manifest
22        .dirs
23        .iter()
24        .filter(|d| filter(&ManifestEntry::Dir(d)))
25        .cloned()
26        .collect();
27
28    let mut result = Manifest::new(manifest.hash_alg, manifest.file_chunk_size_bytes);
29    result.files = files;
30    result.dirs = dirs;
31    result.parent_manifest_hash = manifest.parent_manifest_hash.clone();
32    result.recompute_total_size();
33    result
34}
35
36pub struct IncludeExcludePathsFilter {
37    include: Vec<glob::Pattern>,
38    exclude: Vec<glob::Pattern>,
39}
40
41impl IncludeExcludePathsFilter {
42    pub fn new(include: &[&str], exclude: &[&str]) -> crate::Result<Self> {
43        fn compile(patterns: &[&str]) -> crate::Result<Vec<glob::Pattern>> {
44            patterns
45                .iter()
46                .map(|p| {
47                    glob::Pattern::new(p).map_err(|e| {
48                        crate::SnapshotError::Validation(format!("invalid glob pattern '{p}': {e}"))
49                    })
50                })
51                .collect()
52        }
53        Ok(Self {
54            include: compile(include)?,
55            exclude: compile(exclude)?,
56        })
57    }
58
59    pub fn matches(&self, entry: &ManifestEntry) -> bool {
60        self.matches_path(entry.path())
61    }
62
63    pub fn matches_path(&self, path: &str) -> bool {
64        let included = self.include.is_empty() || self.include.iter().any(|p| p.matches(path));
65        included && !self.exclude.iter().any(|p| p.matches(path))
66    }
67}
68
69impl std::fmt::Debug for IncludeExcludePathsFilter {
70    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71        write!(
72            f,
73            "IncludeExcludePathsFilter {{ include: {:?}, exclude: {:?} }}",
74            self.include.iter().map(|p| p.as_str()).collect::<Vec<_>>(),
75            self.exclude.iter().map(|p| p.as_str()).collect::<Vec<_>>(),
76        )
77    }
78}
79
80impl std::fmt::Display for IncludeExcludePathsFilter {
81    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82        std::fmt::Debug::fmt(self, f)
83    }
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89    use crate::hash::HashAlgorithm;
90    use crate::{DirEntry, FileEntry, Manifest, Snapshot, DEFAULT_FILE_CHUNK_SIZE};
91
92    fn make_snapshot(files: Vec<FileEntry>, dirs: Vec<DirEntry>) -> Snapshot {
93        Manifest::new(HashAlgorithm::Xxh128, DEFAULT_FILE_CHUNK_SIZE)
94            .with_files(files)
95            .with_dirs(dirs)
96    }
97
98    #[test]
99    fn filter_keeps_matching_removes_nonmatching() {
100        let m = make_snapshot(
101            vec![
102                FileEntry::file("a.txt", 10, 1),
103                FileEntry::file("b.rs", 20, 2),
104            ],
105            vec![],
106        );
107        let result = filter_manifest(&m, &|e| e.path().ends_with(".txt"));
108        assert_eq!(result.files.len(), 1);
109        assert_eq!(result.files[0].path, "a.txt");
110    }
111
112    #[test]
113    fn include_patterns() {
114        let m = make_snapshot(
115            vec![
116                FileEntry::file("a.txt", 10, 1),
117                FileEntry::file("b.rs", 20, 2),
118                FileEntry::file("c.txt", 30, 3),
119            ],
120            vec![],
121        );
122        let f = IncludeExcludePathsFilter::new(&["*.txt"], &[]).unwrap();
123        let result = filter_manifest(&m, &|e| f.matches(e));
124        assert_eq!(result.files.len(), 2);
125        assert!(result.files.iter().all(|f| f.path.ends_with(".txt")));
126    }
127
128    #[test]
129    fn exclude_patterns() {
130        let m = make_snapshot(
131            vec![
132                FileEntry::file("a.txt", 10, 1),
133                FileEntry::file("b.tmp", 20, 2),
134                FileEntry::file("c.txt", 30, 3),
135            ],
136            vec![],
137        );
138        let f = IncludeExcludePathsFilter::new(&[], &["*.tmp"]).unwrap();
139        let result = filter_manifest(&m, &|e| f.matches(e));
140        assert_eq!(result.files.len(), 2);
141        assert!(result.files.iter().all(|f| !f.path.ends_with(".tmp")));
142    }
143
144    #[test]
145    fn include_and_exclude_patterns() {
146        let m = make_snapshot(
147            vec![
148                FileEntry::file("a.txt", 10, 1),
149                FileEntry::file("backup.txt", 20, 2),
150                FileEntry::file("c.rs", 30, 3),
151            ],
152            vec![],
153        );
154        let f = IncludeExcludePathsFilter::new(&["*.txt"], &["backup*"]).unwrap();
155        let result = filter_manifest(&m, &|e| f.matches(e));
156        assert_eq!(result.files.len(), 1);
157        assert_eq!(result.files[0].path, "a.txt");
158    }
159
160    #[test]
161    fn empty_include_means_include_all() {
162        let m = make_snapshot(
163            vec![
164                FileEntry::file("a.txt", 10, 1),
165                FileEntry::file("b.rs", 20, 2),
166            ],
167            vec![],
168        );
169        let f = IncludeExcludePathsFilter::new(&[], &[]).unwrap();
170        let result = filter_manifest(&m, &|e| f.matches(e));
171        assert_eq!(result.files.len(), 2);
172    }
173
174    #[test]
175    fn total_size_recomputed() {
176        let m = make_snapshot(
177            vec![
178                FileEntry::file("a.txt", 100, 1),
179                FileEntry::file("b.txt", 200, 2),
180                FileEntry::file("c.rs", 300, 3),
181            ],
182            vec![],
183        );
184        assert_eq!(m.total_size, 600);
185        let result = filter_manifest(&m, &|e| e.path().ends_with(".txt"));
186        assert_eq!(result.total_size, 300);
187    }
188
189    #[test]
190    fn filter_dirs() {
191        let m = make_snapshot(vec![], vec![DirEntry::new("src"), DirEntry::new("build")]);
192        let result = filter_manifest(&m, &|e| e.path() == "src");
193        assert_eq!(result.dirs.len(), 1);
194        assert_eq!(result.dirs[0].path, "src");
195    }
196
197    #[test]
198    fn preserves_parent_manifest_hash() {
199        let m = make_snapshot(vec![FileEntry::file("a.txt", 10, 1)], vec![])
200            .with_parent_hash(Some("abc123".into()));
201        let result = filter_manifest(&m, &|_| true);
202        assert_eq!(result.parent_manifest_hash.as_deref(), Some("abc123"));
203    }
204}