1use std::borrow::Cow;
12use std::ffi::OsString;
13use std::path::Path;
14use std::path::PathBuf;
15
16use foldhash::HashSet;
17use globset::GlobSet;
18
19use crate::DatabaseConfiguration;
20use crate::error::DatabaseError;
21use crate::exclusion::Exclusion;
22use crate::file::FileType;
23use crate::loader::calculate_pattern_specificity;
24use crate::loader::resolve_file_type;
25use crate::matcher::build_glob_set;
26use crate::utils::bytes_to_os_str;
27use crate::utils::bytes_to_path;
28
29#[derive(Debug, Clone)]
33pub struct WorkspaceMatcher {
34 workspace: PathBuf,
35 extensions: HashSet<OsString>,
36 glob_excludes: GlobSet,
37 path_excludes: Vec<PathBuf>,
38 host_bases: Vec<(PathBuf, usize)>,
39 include_bases: Vec<(PathBuf, usize)>,
40 patch_bases: Vec<(PathBuf, usize)>,
41}
42
43impl WorkspaceMatcher {
44 pub fn from_configuration(configuration: &DatabaseConfiguration<'_>) -> Result<Self, DatabaseError> {
50 let workspace = configuration.workspace.as_ref();
51 let workspace = workspace.canonicalize().unwrap_or_else(|_| workspace.to_path_buf());
52
53 let extensions: HashSet<OsString> = if configuration.extensions.is_empty() {
54 std::iter::once(OsString::from("php")).collect()
55 } else {
56 configuration.extensions.iter().map(|s| bytes_to_os_str(s.as_ref()).into_owned()).collect()
57 };
58
59 let glob_patterns: Vec<&str> = configuration
60 .excludes
61 .iter()
62 .filter_map(|ex| match ex {
63 Exclusion::Pattern(pattern) => Some(pattern.as_ref()),
64 Exclusion::Path(_) => None,
65 })
66 .collect();
67
68 let glob_excludes = build_glob_set(glob_patterns.iter().copied(), configuration.glob)?;
69 let path_excludes: Vec<PathBuf> = configuration
70 .excludes
71 .iter()
72 .filter_map(|ex| match ex {
73 Exclusion::Path(path) => {
74 let path =
75 if path.is_absolute() { path.as_ref().to_path_buf() } else { workspace.join(path.as_ref()) };
76 Some(path.canonicalize().unwrap_or(path))
77 }
78 Exclusion::Pattern(_) => None,
79 })
80 .collect();
81
82 let make_bases = |patterns: &[Cow<'_, [u8]>]| -> Vec<(PathBuf, usize)> {
83 patterns
84 .iter()
85 .map(|pattern| {
86 let specificity = calculate_pattern_specificity(pattern.as_ref());
87 let base = extract_base_path(pattern.as_ref());
88 let absolute = if base.is_absolute() { base } else { workspace.join(base) };
89 (absolute.canonicalize().unwrap_or(absolute), specificity)
90 })
91 .collect()
92 };
93
94 let host_bases = if configuration.paths.is_empty() {
96 vec![(workspace.clone(), calculate_pattern_specificity(workspace.to_string_lossy().as_bytes()))]
97 } else {
98 make_bases(&configuration.paths)
99 };
100
101 let include_bases = make_bases(&configuration.includes);
102 let patch_bases = make_bases(&configuration.patches);
103
104 Ok(Self { workspace, extensions, glob_excludes, path_excludes, host_bases, include_bases, patch_bases })
105 }
106
107 #[must_use]
110 pub fn classify(&self, file: &Path) -> Option<FileType> {
111 let canonical = file.canonicalize().unwrap_or_else(|_| file.to_path_buf());
112
113 let host = max_specificity(&self.host_bases, &canonical);
114 let include = max_specificity(&self.include_bases, &canonical);
115 let patch = max_specificity(&self.patch_bases, &canonical);
116 if host.is_none() && include.is_none() && patch.is_none() {
117 return None;
118 }
119
120 if self.is_excluded(&canonical) {
121 return None;
122 }
123
124 let extension_ok = canonical.extension().is_some_and(|ext| self.extensions.contains(ext));
125 let exact_base = self
126 .host_bases
127 .iter()
128 .chain(self.include_bases.iter())
129 .chain(self.patch_bases.iter())
130 .any(|(base, _)| base == &canonical);
131 if !extension_ok && !exact_base {
132 return None;
133 }
134
135 Some(resolve_file_type(host, include, patch))
136 }
137
138 #[must_use]
140 pub fn contains(&self, path: &Path) -> bool {
141 self.classify(path).is_some()
142 }
143
144 fn is_excluded(&self, canonical: &Path) -> bool {
145 if !self.glob_excludes.is_empty() {
146 if self.glob_excludes.is_match(canonical) {
147 return true;
148 }
149
150 if let Ok(relative) = canonical.strip_prefix(&self.workspace)
151 && self.glob_excludes.is_match(relative)
152 {
153 return true;
154 }
155 }
156
157 self.path_excludes.iter().any(|excluded| canonical.starts_with(excluded))
158 }
159}
160
161fn max_specificity(bases: &[(PathBuf, usize)], path: &Path) -> Option<usize> {
162 bases.iter().filter(|(base, _)| path.starts_with(base)).map(|(_, specificity)| *specificity).max()
163}
164
165fn extract_base_path(pattern: &[u8]) -> PathBuf {
170 let is_glob = pattern.iter().any(|&b| matches!(b, b'*' | b'?' | b'[' | b'{'));
171 if !is_glob {
172 return bytes_to_path(pattern).into_owned();
173 }
174
175 let first_glob = pattern.iter().position(|&b| matches!(b, b'*' | b'?' | b'[' | b'{')).unwrap_or(pattern.len());
176 let mut end = first_glob;
177 while end > 0 && matches!(pattern[end - 1], b'/' | b'\\') {
178 end -= 1;
179 }
180
181 let base = &pattern[..end];
182 if base.is_empty() { PathBuf::from(".") } else { bytes_to_path(base).into_owned() }
183}
184
185#[cfg(test)]
186#[allow(clippy::unwrap_used)]
187mod tests {
188 use std::borrow::Cow;
189
190 use tempfile::TempDir;
191
192 use crate::GlobSettings;
193
194 use super::*;
195
196 fn touch(dir: &TempDir, relative: &str) {
197 let path = dir.path().join(relative);
198 if let Some(parent) = path.parent() {
199 std::fs::create_dir_all(parent).unwrap();
200 }
201 std::fs::write(path, "<?php\n").unwrap();
202 }
203
204 fn config(
205 dir: &TempDir,
206 paths: &[&str],
207 includes: &[&str],
208 excludes: Vec<Exclusion<'static>>,
209 ) -> DatabaseConfiguration<'static> {
210 DatabaseConfiguration {
211 workspace: Cow::Owned(dir.path().to_path_buf()),
212 paths: paths.iter().map(|s| Cow::Owned(s.as_bytes().to_vec())).collect(),
213 includes: includes.iter().map(|s| Cow::Owned(s.as_bytes().to_vec())).collect(),
214 patches: Vec::new(),
215 excludes,
216 extensions: vec![Cow::Borrowed(b"php")],
217 glob: GlobSettings::default(),
218 }
219 }
220
221 fn matcher(configuration: &DatabaseConfiguration<'_>) -> WorkspaceMatcher {
222 WorkspaceMatcher::from_configuration(configuration).unwrap()
223 }
224
225 #[test]
226 fn file_under_a_configured_path_is_tracked_as_host() {
227 let dir = TempDir::new().unwrap();
228 touch(&dir, "src/Foo.php");
229 let matcher = matcher(&config(&dir, &["src"], &[], vec![]));
230
231 assert_eq!(matcher.classify(&dir.path().join("src/Foo.php")), Some(FileType::Host));
232 }
233
234 #[test]
235 fn file_outside_every_configured_path_is_not_tracked() {
236 let dir = TempDir::new().unwrap();
237 touch(&dir, "src/Foo.php");
238 touch(&dir, "scripts/build.php");
239 let matcher = matcher(&config(&dir, &["src"], &[], vec![]));
240
241 assert_eq!(matcher.classify(&dir.path().join("scripts/build.php")), None);
242 }
243
244 #[test]
245 fn glob_excluded_file_is_not_tracked() {
246 let dir = TempDir::new().unwrap();
247 touch(&dir, "src/Foo.php");
248 touch(&dir, "src/generated/Bar.php");
249 let excludes = vec![Exclusion::Pattern(Cow::Borrowed("src/generated/**"))];
250 let matcher = matcher(&config(&dir, &["src"], &[], excludes));
251
252 assert_eq!(matcher.classify(&dir.path().join("src/Foo.php")), Some(FileType::Host));
253 assert_eq!(matcher.classify(&dir.path().join("src/generated/Bar.php")), None);
254 }
255
256 #[test]
257 fn path_excluded_directory_is_not_tracked() {
258 let dir = TempDir::new().unwrap();
259 touch(&dir, "app/Foo.php");
260 touch(&dir, "app/cache/Bar.php");
261 let excludes = vec![Exclusion::Path(Cow::Owned(dir.path().join("app/cache")))];
262 let matcher = matcher(&config(&dir, &["app"], &[], excludes));
263
264 assert_eq!(matcher.classify(&dir.path().join("app/cache/Bar.php")), None);
265 }
266
267 #[test]
268 fn included_path_is_tracked_as_vendored() {
269 let dir = TempDir::new().unwrap();
270 touch(&dir, "vendor/foo/Lib.php");
271 let matcher = matcher(&config(&dir, &["src"], &["vendor/foo"], vec![]));
272
273 assert_eq!(matcher.classify(&dir.path().join("vendor/foo/Lib.php")), Some(FileType::Vendored));
274 }
275
276 #[test]
277 fn excludes_apply_to_included_paths_too() {
278 let dir = TempDir::new().unwrap();
280 touch(&dir, "vendor/foo/Lib.php");
281 let excludes = vec![Exclusion::Pattern(Cow::Borrowed("vendor/**"))];
282 let matcher = matcher(&config(&dir, &["src"], &["vendor/foo"], excludes));
283
284 assert_eq!(matcher.classify(&dir.path().join("vendor/foo/Lib.php")), None);
285 }
286
287 #[test]
288 fn patch_path_is_tracked_as_patch() {
289 let dir = TempDir::new().unwrap();
290 touch(&dir, "stubs/Override.php");
291 let mut configuration = config(&dir, &["src"], &[], vec![]);
292 configuration.patches = vec![Cow::Owned(b"stubs".to_vec())];
293 let matcher = matcher(&configuration);
294
295 assert_eq!(matcher.classify(&dir.path().join("stubs/Override.php")), Some(FileType::Patch));
296 }
297
298 #[test]
299 fn patch_beats_vendored_at_equal_specificity() {
300 let dir = TempDir::new().unwrap();
303 touch(&dir, "lib/Lib.php");
304 let mut configuration = config(&dir, &["src"], &["lib"], vec![]);
305 configuration.patches = vec![Cow::Owned(b"lib".to_vec())];
306 let matcher = matcher(&configuration);
307
308 assert_eq!(matcher.classify(&dir.path().join("lib/Lib.php")), Some(FileType::Patch));
309 }
310
311 #[test]
312 fn wrong_extension_is_not_tracked() {
313 let dir = TempDir::new().unwrap();
314 touch(&dir, "src/notes.txt");
315 let matcher = matcher(&config(&dir, &["src"], &[], vec![]));
316
317 assert_eq!(matcher.classify(&dir.path().join("src/notes.txt")), None);
318 }
319
320 #[test]
321 fn empty_paths_track_the_whole_workspace() {
322 let dir = TempDir::new().unwrap();
323 touch(&dir, "anywhere/Foo.php");
324 let matcher = matcher(&config(&dir, &[], &[], vec![]));
325
326 assert_eq!(matcher.classify(&dir.path().join("anywhere/Foo.php")), Some(FileType::Host));
327 }
328}