Skip to main content

mago_database/
membership.rs

1//! Single-path membership test: would a given file be part of the database
2//! under a [`DatabaseConfiguration`]?
3//!
4//! The loader discovers files by walking the configured `paths` / `includes`
5//! roots and filtering by extension and `excludes`.
6//!
7//! The decision mirrors [`crate::loader`] exactly: a path is tracked iff it lies
8//! under a configured root, carries a configured extension, and is not excluded.
9//! Excludes apply to `includes` roots too, just as they do during a full scan.
10
11use std::borrow::Cow;
12use std::ffi::OsString;
13use std::path::Path;
14use std::path::PathBuf;
15
16use foldhash::HashSet;
17use globset::GlobSet;
18
19use crate::DatabaseConfiguration;
20use crate::error::DatabaseError;
21use crate::exclusion::Exclusion;
22use crate::file::FileType;
23use crate::loader::calculate_pattern_specificity;
24use crate::loader::resolve_file_type;
25use crate::matcher::build_glob_set;
26use crate::utils::bytes_to_os_str;
27use crate::utils::bytes_to_path;
28
29/// Resolves whether a single path belongs to the workspace database and, if so,
30/// as which [`FileType`]. Built once from a [`DatabaseConfiguration`]; cheap to
31/// query per path.
32#[derive(Debug, Clone)]
33pub struct WorkspaceMatcher {
34    workspace: PathBuf,
35    extensions: HashSet<OsString>,
36    glob_excludes: GlobSet,
37    path_excludes: Vec<PathBuf>,
38    host_bases: Vec<(PathBuf, usize)>,
39    include_bases: Vec<(PathBuf, usize)>,
40    patch_bases: Vec<(PathBuf, usize)>,
41}
42
43impl WorkspaceMatcher {
44    /// Builds a matcher from a database configuration.
45    ///
46    /// # Errors
47    ///
48    /// Returns a [`DatabaseError`] if an exclude glob pattern fails to compile.
49    pub fn from_configuration(configuration: &DatabaseConfiguration<'_>) -> Result<Self, DatabaseError> {
50        let workspace = configuration.workspace.as_ref();
51        let workspace = workspace.canonicalize().unwrap_or_else(|_| workspace.to_path_buf());
52
53        let extensions: HashSet<OsString> = if configuration.extensions.is_empty() {
54            std::iter::once(OsString::from("php")).collect()
55        } else {
56            configuration.extensions.iter().map(|s| bytes_to_os_str(s.as_ref()).into_owned()).collect()
57        };
58
59        let glob_patterns: Vec<&str> = configuration
60            .excludes
61            .iter()
62            .filter_map(|ex| match ex {
63                Exclusion::Pattern(pattern) => Some(pattern.as_ref()),
64                Exclusion::Path(_) => None,
65            })
66            .collect();
67
68        let glob_excludes = build_glob_set(glob_patterns.iter().copied(), configuration.glob)?;
69        let path_excludes: Vec<PathBuf> = configuration
70            .excludes
71            .iter()
72            .filter_map(|ex| match ex {
73                Exclusion::Path(path) => {
74                    let path =
75                        if path.is_absolute() { path.as_ref().to_path_buf() } else { workspace.join(path.as_ref()) };
76                    Some(path.canonicalize().unwrap_or(path))
77                }
78                Exclusion::Pattern(_) => None,
79            })
80            .collect();
81
82        let make_bases = |patterns: &[Cow<'_, [u8]>]| -> Vec<(PathBuf, usize)> {
83            patterns
84                .iter()
85                .map(|pattern| {
86                    let specificity = calculate_pattern_specificity(pattern.as_ref());
87                    let base = extract_base_path(pattern.as_ref());
88                    let absolute = if base.is_absolute() { base } else { workspace.join(base) };
89                    (absolute.canonicalize().unwrap_or(absolute), specificity)
90                })
91                .collect()
92        };
93
94        // An empty `paths` means "scan the whole workspace", matching the loader.
95        let host_bases = if configuration.paths.is_empty() {
96            vec![(workspace.clone(), calculate_pattern_specificity(workspace.to_string_lossy().as_bytes()))]
97        } else {
98            make_bases(&configuration.paths)
99        };
100
101        let include_bases = make_bases(&configuration.includes);
102        let patch_bases = make_bases(&configuration.patches);
103
104        Ok(Self { workspace, extensions, glob_excludes, path_excludes, host_bases, include_bases, patch_bases })
105    }
106
107    /// Returns the [`FileType`] for `file` if it would be part of the database,
108    /// or `None` if the path lies outside every configured root or is excluded.
109    #[must_use]
110    pub fn classify(&self, file: &Path) -> Option<FileType> {
111        let canonical = file.canonicalize().unwrap_or_else(|_| file.to_path_buf());
112
113        let host = max_specificity(&self.host_bases, &canonical);
114        let include = max_specificity(&self.include_bases, &canonical);
115        let patch = max_specificity(&self.patch_bases, &canonical);
116        if host.is_none() && include.is_none() && patch.is_none() {
117            return None;
118        }
119
120        if self.is_excluded(&canonical) {
121            return None;
122        }
123
124        let extension_ok = canonical.extension().is_some_and(|ext| self.extensions.contains(ext));
125        let exact_base = self
126            .host_bases
127            .iter()
128            .chain(self.include_bases.iter())
129            .chain(self.patch_bases.iter())
130            .any(|(base, _)| base == &canonical);
131        if !extension_ok && !exact_base {
132            return None;
133        }
134
135        Some(resolve_file_type(host, include, patch))
136    }
137
138    /// Returns `true` if `path` would be part of the database.
139    #[must_use]
140    pub fn contains(&self, path: &Path) -> bool {
141        self.classify(path).is_some()
142    }
143
144    fn is_excluded(&self, canonical: &Path) -> bool {
145        if !self.glob_excludes.is_empty() {
146            if self.glob_excludes.is_match(canonical) {
147                return true;
148            }
149
150            if let Ok(relative) = canonical.strip_prefix(&self.workspace)
151                && self.glob_excludes.is_match(relative)
152            {
153                return true;
154            }
155        }
156
157        self.path_excludes.iter().any(|excluded| canonical.starts_with(excluded))
158    }
159}
160
161fn max_specificity(bases: &[(PathBuf, usize)], path: &Path) -> Option<usize> {
162    bases.iter().filter(|(base, _)| path.starts_with(base)).map(|(_, specificity)| *specificity).max()
163}
164
165/// Extracts the literal directory portion of a path pattern, dropping any
166/// trailing glob segment. Mirrors the loader's view of a configured root: a
167/// glob like `src/**/*.php` is rooted at `src`, while a plain `tests/fixtures`
168/// is returned unchanged.
169fn extract_base_path(pattern: &[u8]) -> PathBuf {
170    let is_glob = pattern.iter().any(|&b| matches!(b, b'*' | b'?' | b'[' | b'{'));
171    if !is_glob {
172        return bytes_to_path(pattern).into_owned();
173    }
174
175    let first_glob = pattern.iter().position(|&b| matches!(b, b'*' | b'?' | b'[' | b'{')).unwrap_or(pattern.len());
176    let mut end = first_glob;
177    while end > 0 && matches!(pattern[end - 1], b'/' | b'\\') {
178        end -= 1;
179    }
180
181    let base = &pattern[..end];
182    if base.is_empty() { PathBuf::from(".") } else { bytes_to_path(base).into_owned() }
183}
184
185#[cfg(test)]
186#[allow(clippy::unwrap_used)]
187mod tests {
188    use std::borrow::Cow;
189
190    use tempfile::TempDir;
191
192    use crate::GlobSettings;
193
194    use super::*;
195
196    fn touch(dir: &TempDir, relative: &str) {
197        let path = dir.path().join(relative);
198        if let Some(parent) = path.parent() {
199            std::fs::create_dir_all(parent).unwrap();
200        }
201        std::fs::write(path, "<?php\n").unwrap();
202    }
203
204    fn config(
205        dir: &TempDir,
206        paths: &[&str],
207        includes: &[&str],
208        excludes: Vec<Exclusion<'static>>,
209    ) -> DatabaseConfiguration<'static> {
210        DatabaseConfiguration {
211            workspace: Cow::Owned(dir.path().to_path_buf()),
212            paths: paths.iter().map(|s| Cow::Owned(s.as_bytes().to_vec())).collect(),
213            includes: includes.iter().map(|s| Cow::Owned(s.as_bytes().to_vec())).collect(),
214            patches: Vec::new(),
215            excludes,
216            extensions: vec![Cow::Borrowed(b"php")],
217            glob: GlobSettings::default(),
218        }
219    }
220
221    fn matcher(configuration: &DatabaseConfiguration<'_>) -> WorkspaceMatcher {
222        WorkspaceMatcher::from_configuration(configuration).unwrap()
223    }
224
225    #[test]
226    fn file_under_a_configured_path_is_tracked_as_host() {
227        let dir = TempDir::new().unwrap();
228        touch(&dir, "src/Foo.php");
229        let matcher = matcher(&config(&dir, &["src"], &[], vec![]));
230
231        assert_eq!(matcher.classify(&dir.path().join("src/Foo.php")), Some(FileType::Host));
232    }
233
234    #[test]
235    fn file_outside_every_configured_path_is_not_tracked() {
236        let dir = TempDir::new().unwrap();
237        touch(&dir, "src/Foo.php");
238        touch(&dir, "scripts/build.php");
239        let matcher = matcher(&config(&dir, &["src"], &[], vec![]));
240
241        assert_eq!(matcher.classify(&dir.path().join("scripts/build.php")), None);
242    }
243
244    #[test]
245    fn glob_excluded_file_is_not_tracked() {
246        let dir = TempDir::new().unwrap();
247        touch(&dir, "src/Foo.php");
248        touch(&dir, "src/generated/Bar.php");
249        let excludes = vec![Exclusion::Pattern(Cow::Borrowed("src/generated/**"))];
250        let matcher = matcher(&config(&dir, &["src"], &[], excludes));
251
252        assert_eq!(matcher.classify(&dir.path().join("src/Foo.php")), Some(FileType::Host));
253        assert_eq!(matcher.classify(&dir.path().join("src/generated/Bar.php")), None);
254    }
255
256    #[test]
257    fn path_excluded_directory_is_not_tracked() {
258        let dir = TempDir::new().unwrap();
259        touch(&dir, "app/Foo.php");
260        touch(&dir, "app/cache/Bar.php");
261        let excludes = vec![Exclusion::Path(Cow::Owned(dir.path().join("app/cache")))];
262        let matcher = matcher(&config(&dir, &["app"], &[], excludes));
263
264        assert_eq!(matcher.classify(&dir.path().join("app/cache/Bar.php")), None);
265    }
266
267    #[test]
268    fn included_path_is_tracked_as_vendored() {
269        let dir = TempDir::new().unwrap();
270        touch(&dir, "vendor/foo/Lib.php");
271        let matcher = matcher(&config(&dir, &["src"], &["vendor/foo"], vec![]));
272
273        assert_eq!(matcher.classify(&dir.path().join("vendor/foo/Lib.php")), Some(FileType::Vendored));
274    }
275
276    #[test]
277    fn excludes_apply_to_included_paths_too() {
278        // Matches the loader: an `includes` root does not override `excludes`.
279        let dir = TempDir::new().unwrap();
280        touch(&dir, "vendor/foo/Lib.php");
281        let excludes = vec![Exclusion::Pattern(Cow::Borrowed("vendor/**"))];
282        let matcher = matcher(&config(&dir, &["src"], &["vendor/foo"], excludes));
283
284        assert_eq!(matcher.classify(&dir.path().join("vendor/foo/Lib.php")), None);
285    }
286
287    #[test]
288    fn patch_path_is_tracked_as_patch() {
289        let dir = TempDir::new().unwrap();
290        touch(&dir, "stubs/Override.php");
291        let mut configuration = config(&dir, &["src"], &[], vec![]);
292        configuration.patches = vec![Cow::Owned(b"stubs".to_vec())];
293        let matcher = matcher(&configuration);
294
295        assert_eq!(matcher.classify(&dir.path().join("stubs/Override.php")), Some(FileType::Patch));
296    }
297
298    #[test]
299    fn patch_beats_vendored_at_equal_specificity() {
300        // Mirrors the loader's USER_DEFINED > PATCH > BUILT_IN > VENDORED tier order: a file
301        // covered by both `includes` and `patches` at the same specificity resolves to Patch.
302        let dir = TempDir::new().unwrap();
303        touch(&dir, "lib/Lib.php");
304        let mut configuration = config(&dir, &["src"], &["lib"], vec![]);
305        configuration.patches = vec![Cow::Owned(b"lib".to_vec())];
306        let matcher = matcher(&configuration);
307
308        assert_eq!(matcher.classify(&dir.path().join("lib/Lib.php")), Some(FileType::Patch));
309    }
310
311    #[test]
312    fn wrong_extension_is_not_tracked() {
313        let dir = TempDir::new().unwrap();
314        touch(&dir, "src/notes.txt");
315        let matcher = matcher(&config(&dir, &["src"], &[], vec![]));
316
317        assert_eq!(matcher.classify(&dir.path().join("src/notes.txt")), None);
318    }
319
320    #[test]
321    fn empty_paths_track_the_whole_workspace() {
322        let dir = TempDir::new().unwrap();
323        touch(&dir, "anywhere/Foo.php");
324        let matcher = matcher(&config(&dir, &[], &[], vec![]));
325
326        assert_eq!(matcher.classify(&dir.path().join("anywhere/Foo.php")), Some(FileType::Host));
327    }
328}