1use std::borrow::Cow;
4use std::collections::hash_map::Entry;
5use std::ffi::OsString;
6use std::path::Path;
7use std::path::PathBuf;
8
9use foldhash::HashMap;
10use foldhash::HashSet;
11use globset::GlobSet;
12use rayon::prelude::*;
13use walkdir::WalkDir;
14
15use crate::Database;
16use crate::DatabaseConfiguration;
17use crate::error::DatabaseError;
18use crate::exclusion::Exclusion;
19use crate::file::File;
20use crate::file::FileId;
21use crate::file::FileType;
22use crate::matcher::build_glob_set;
23use crate::utils::bytes_to_os_str;
24use crate::utils::bytes_to_path;
25use crate::utils::bytes_to_string_lossy;
26use crate::utils::read_file;
27
28#[derive(Debug)]
34struct FileWithSpecificity {
35 file: File,
36 specificity: usize,
37}
38
39pub struct DatabaseLoader<'config> {
41 database: Option<Database<'config>>,
42 configuration: DatabaseConfiguration<'config>,
43 memory_sources: Vec<(&'static [u8], &'static [u8], FileType)>,
44 stdin_override: Option<(Cow<'config, [u8]>, Vec<u8>)>,
45}
46
47impl<'config> DatabaseLoader<'config> {
48 #[inline]
49 #[must_use]
50 pub fn new(configuration: DatabaseConfiguration<'config>) -> Self {
51 Self { configuration, memory_sources: vec![], database: None, stdin_override: None }
52 }
53
54 #[inline]
55 #[must_use]
56 pub fn with_database(mut self, database: Database<'config>) -> Self {
57 self.database = Some(database);
58 self
59 }
60
61 #[inline]
67 #[must_use]
68 pub fn with_stdin_override(mut self, logical_name: impl AsRef<[u8]>, content: Vec<u8>) -> Self {
69 self.stdin_override = Some((Cow::Owned(logical_name.as_ref().to_vec()), content));
70 self
71 }
72
73 #[inline]
74 pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
75 self.memory_sources.push((name.as_bytes(), contents.as_bytes(), file_type));
76 }
77
78 #[inline]
87 pub fn load(mut self) -> Result<Database<'config>, DatabaseError> {
88 let mut db = self.database.take().unwrap_or_else(|| Database::new(self.configuration.clone()));
89
90 db.configuration = self.configuration.clone();
93
94 let extensions_set: HashSet<OsString> =
95 self.configuration.extensions.iter().map(|s| bytes_to_os_str(s.as_ref()).into_owned()).collect();
96
97 let glob_exclude_patterns: Vec<&str> = self
98 .configuration
99 .excludes
100 .iter()
101 .filter_map(|ex| match ex {
102 Exclusion::Pattern(pat) => Some(pat.as_ref()),
103 Exclusion::Path(_) => None,
104 })
105 .collect();
106
107 let glob_excludes = build_glob_set(glob_exclude_patterns.iter().copied(), self.configuration.glob)?;
108 let dir_prune_patterns: Vec<&str> = glob_exclude_patterns
109 .iter()
110 .filter_map(|pat| {
111 let stripped =
112 pat.strip_suffix("/**/*").or_else(|| pat.strip_suffix("/**")).or_else(|| pat.strip_suffix("/*"))?;
113 if stripped.is_empty() || stripped == "*" || stripped == "**" {
114 return None;
115 }
116 Some(stripped)
117 })
118 .collect();
119
120 let dir_prune_globs = build_glob_set(dir_prune_patterns.iter().copied(), self.configuration.glob)?;
121
122 let path_excludes: HashSet<_> = self
123 .configuration
124 .excludes
125 .iter()
126 .filter_map(|ex| match ex {
127 Exclusion::Path(p) => Some(p),
128 Exclusion::Pattern(_) => None,
129 })
130 .collect();
131
132 let host_files_with_spec = self.load_paths(
133 &self.configuration.paths,
134 FileType::Host,
135 &extensions_set,
136 &glob_excludes,
137 &dir_prune_globs,
138 &path_excludes,
139 )?;
140
141 let vendored_files_with_spec = self.load_paths(
142 &self.configuration.includes,
143 FileType::Vendored,
144 &extensions_set,
145 &glob_excludes,
146 &dir_prune_globs,
147 &path_excludes,
148 )?;
149
150 let patch_files_with_spec = self.load_paths(
151 &self.configuration.patches,
152 FileType::Patch,
153 &extensions_set,
154 &glob_excludes,
155 &dir_prune_globs,
156 &path_excludes,
157 )?;
158
159 let mut all_files: HashMap<FileId, File> = HashMap::default();
160 type TierSpecs = (Option<usize>, Option<usize>, Option<usize>);
164 let mut tier_specs: HashMap<FileId, TierSpecs> = HashMap::default();
165
166 for file_with_spec in host_files_with_spec {
168 let file_id = file_with_spec.file.id;
169 let specificity = file_with_spec.specificity;
170
171 all_files.insert(file_id, file_with_spec.file);
172 bump_spec(&mut tier_specs.entry(file_id).or_insert((None, None, None)).0, specificity);
173 }
174
175 if let Some((name, content)) = &self.stdin_override {
180 let virtual_path = self.configuration.workspace.join(bytes_to_path(name.as_ref()).as_ref());
181 let virtual_path_canonical = virtual_path.canonicalize().unwrap_or_else(|_| virtual_path.clone());
182 let virtual_path_str = virtual_path_canonical.to_string_lossy();
183
184 let matched_glob = !glob_excludes.is_empty()
185 && (glob_excludes.is_match(virtual_path_canonical.as_path())
186 || glob_excludes.is_match(bytes_to_path(name.as_ref()).as_ref()));
187
188 let matched_path = path_excludes.iter().any(|excl| {
189 let canonical = if Path::new(excl.as_ref()).is_absolute() {
190 excl.as_ref().to_path_buf()
191 } else {
192 self.configuration.workspace.join(excl.as_ref())
193 };
194 let canonical = canonical.canonicalize().unwrap_or(canonical);
195 let canonical_str = canonical.to_string_lossy();
196
197 virtual_path_str.starts_with(canonical_str.as_ref())
198 && matches!(virtual_path_str.as_bytes().get(canonical_str.len()), None | Some(&b'/' | &b'\\'))
199 });
200
201 if !matched_glob && !matched_path {
202 let file = File::ephemeral(Cow::Owned(name.as_ref().to_vec()), Cow::Owned(content.clone()));
203 let file_id = file.id;
204 if let Entry::Vacant(e) = all_files.entry(file_id) {
205 e.insert(file);
206
207 bump_spec(&mut tier_specs.entry(file_id).or_insert((None, None, None)).0, usize::MAX);
208 }
209 }
210 }
211
212 for file_with_spec in vendored_files_with_spec {
213 let file_id = file_with_spec.file.id;
214 let vendored_specificity = file_with_spec.specificity;
215
216 all_files.entry(file_id).or_insert(file_with_spec.file);
217 bump_spec(&mut tier_specs.entry(file_id).or_insert((None, None, None)).1, vendored_specificity);
218 }
219
220 for file_with_spec in patch_files_with_spec {
221 let file_id = file_with_spec.file.id;
222 let specificity = file_with_spec.specificity;
223 all_files.entry(file_id).or_insert(file_with_spec.file);
224 bump_spec(&mut tier_specs.entry(file_id).or_insert((None, None, None)).2, specificity);
225 }
226
227 db.reserve(tier_specs.len() + self.memory_sources.len());
228
229 for (file_id, (host_spec, vendored_spec, patch_spec)) in tier_specs {
230 if let Some(mut file) = all_files.remove(&file_id) {
231 file.file_type = resolve_file_type(host_spec, vendored_spec, patch_spec);
232 db.add(file);
233 }
234 }
235
236 for (name, contents, file_type) in self.memory_sources {
237 let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
238
239 db.add(file);
240 }
241
242 Ok(db)
243 }
244
245 fn load_paths(
253 &self,
254 roots: &[Cow<'config, [u8]>],
255 file_type: FileType,
256 extensions: &HashSet<OsString>,
257 glob_excludes: &GlobSet,
258 dir_prune_globs: &GlobSet,
259 path_excludes: &HashSet<&Cow<'config, Path>>,
260 ) -> Result<Vec<FileWithSpecificity>, DatabaseError> {
261 let canonical_workspace =
265 self.configuration.workspace.canonicalize().unwrap_or_else(|_| self.configuration.workspace.to_path_buf());
266
267 let canonical_excludes: Vec<String> = path_excludes
271 .iter()
272 .filter_map(|ex| {
273 let p = if Path::new(ex.as_ref()).is_absolute() {
274 ex.as_ref().to_path_buf()
275 } else {
276 self.configuration.workspace.join(ex.as_ref())
277 };
278
279 p.canonicalize().ok()?.into_os_string().into_string().ok()
280 })
281 .collect();
282
283 let workspace_relative_str = |path: &Path| -> String {
284 let rel = path.strip_prefix(canonical_workspace.as_path()).unwrap_or(path);
285 let s = rel.to_string_lossy();
286 #[cfg(windows)]
287 {
288 s.replace('\\', "/")
289 }
290 #[cfg(not(windows))]
291 {
292 s.into_owned()
293 }
294 };
295
296 let mut paths_to_process: Vec<(PathBuf, usize, bool)> = Vec::new();
299
300 for root in roots {
301 let root_path = bytes_to_path(root.as_ref());
305 let resolved_path = if root_path.is_absolute() {
306 root_path.as_ref().to_path_buf()
307 } else {
308 self.configuration.workspace.join(root_path.as_ref())
309 };
310
311 let is_glob_pattern = !resolved_path.exists()
312 && (root.contains(&b'*') || root.contains(&b'?') || root.contains(&b'[') || root.contains(&b'{'));
313
314 let specificity = calculate_pattern_specificity(root.as_ref());
315 if is_glob_pattern {
316 let pattern = if root_path.is_absolute() {
318 bytes_to_string_lossy(root.as_ref()).into_owned()
319 } else {
320 self.configuration.workspace.join(root_path.as_ref()).to_string_lossy().to_string()
322 };
323
324 match glob::glob(&pattern) {
325 Ok(entries) => {
326 for entry in entries {
327 match entry {
328 Ok(path) => {
329 if path.is_file() {
330 let canonical = path.canonicalize().unwrap_or(path);
335 paths_to_process.push((canonical, specificity, false));
336 }
337 }
338 Err(e) => {
339 tracing::warn!("Failed to read glob entry: {}", e);
340 }
341 }
342 }
343 }
344 Err(e) => {
345 return Err(DatabaseError::Glob(e.to_string()));
346 }
347 }
348 } else {
349 let canonical_root = resolved_path.canonicalize().unwrap_or(resolved_path);
350
351 if canonical_root.is_file() {
355 paths_to_process.push((canonical_root, specificity, true));
356 continue;
357 }
358
359 let has_dir_prunes = !dir_prune_globs.is_empty();
360 let has_path_prunes = !canonical_excludes.is_empty();
361 let walker = WalkDir::new(&canonical_root).follow_links(true).into_iter().filter_entry(|entry| {
362 if entry.depth() == 0 || !entry.file_type().is_dir() {
363 return true;
364 }
365
366 let path = entry.path();
367
368 if has_path_prunes
369 && let Some(p) = path.to_str()
370 && canonical_excludes.iter().any(|excl| {
371 p.starts_with(excl.as_str())
372 && matches!(p.as_bytes().get(excl.len()), None | Some(&b'/' | &b'\\'))
373 })
374 {
375 return false;
376 }
377
378 if has_dir_prunes
379 && (dir_prune_globs.is_match(path) || dir_prune_globs.is_match(workspace_relative_str(path)))
380 {
381 return false;
382 }
383
384 true
385 });
386
387 for entry in walker {
388 match entry {
389 Ok(entry) => {
390 if !entry.file_type().is_dir() {
391 paths_to_process.push((entry.into_path(), specificity, false));
392 }
393 }
394 Err(err) => {
395 let path = err.path().unwrap_or(canonical_root.as_path()).display();
396 if let Some(ancestor) = err.loop_ancestor() {
397 tracing::warn!(
398 "Skipping symlink loop at `{path}`: link cycles back to `{}`.",
399 ancestor.display(),
400 );
401 } else {
402 tracing::warn!("Failed to walk `{path}`: {err}. Entry will be skipped.");
403 }
404 }
405 }
406 }
407 }
408 }
409
410 let has_path_excludes = !canonical_excludes.is_empty();
411 let has_glob_excludes = !glob_excludes.is_empty();
412 let files: Vec<FileWithSpecificity> = paths_to_process
413 .into_par_iter()
414 .filter_map(|(path, specificity, skip_ext_check)| {
415 if has_glob_excludes
416 && (glob_excludes.is_match(&path) || glob_excludes.is_match(workspace_relative_str(&path)))
417 {
418 return None;
419 }
420
421 if !skip_ext_check {
422 let ext = path.extension()?;
423 if !extensions.contains(ext) {
424 return None;
425 }
426 }
427
428 if has_path_excludes {
429 let excluded = path.to_str().is_some_and(|s| {
430 canonical_excludes.iter().any(|excl| {
431 s.starts_with(excl.as_str())
432 && matches!(s.as_bytes().get(excl.len()), None | Some(&b'/' | &b'\\'))
433 })
434 });
435
436 if excluded {
437 return None;
438 }
439 }
440
441 let workspace = canonical_workspace.as_path();
442 #[cfg(windows)]
443 let logical_name =
444 path.strip_prefix(workspace).unwrap_or(path.as_path()).to_string_lossy().replace('\\', "/");
445 #[cfg(not(windows))]
446 let logical_name =
447 path.strip_prefix(workspace).unwrap_or(path.as_path()).to_string_lossy().into_owned();
448
449 if let Some((override_name, override_content)) = &self.stdin_override
450 && override_name.as_ref() == logical_name.as_bytes()
451 {
452 let file = File::new(
453 Cow::Owned(logical_name.into_bytes()),
454 file_type,
455 Some(path),
456 Cow::Owned(override_content.clone()),
457 );
458
459 return Some(Ok(FileWithSpecificity { file, specificity }));
460 }
461
462 match read_file(workspace, &path, file_type) {
463 Ok(file) => Some(Ok(FileWithSpecificity { file, specificity })),
464 Err(e) => Some(Err(e)),
465 }
466 })
467 .collect::<Result<Vec<FileWithSpecificity>, _>>()?;
468
469 Ok(files)
470 }
471}
472
473fn bump_spec(slot: &mut Option<usize>, s: usize) {
474 *slot = Some(slot.map_or(s, |e| e.max(s)));
475}
476
477pub(crate) fn resolve_file_type(
486 host_spec: Option<usize>,
487 vendored_spec: Option<usize>,
488 patch_spec: Option<usize>,
489) -> FileType {
490 let mut decision: Option<(FileType, usize)> = host_spec.map(|s| (FileType::Host, s));
491
492 if let Some(v) = vendored_spec {
493 decision = match decision {
494 Some((FileType::Host, h)) if v < h => decision,
495 _ => Some((FileType::Vendored, v)),
496 };
497 }
498
499 if let Some(p) = patch_spec {
500 decision = match decision {
501 Some((FileType::Host | FileType::Patch, e)) if p <= e => decision,
502 _ => Some((FileType::Patch, p)),
503 };
504 }
505
506 decision.map(|(ft, _)| ft).unwrap_or(FileType::Host)
507}
508
509pub(crate) fn calculate_pattern_specificity(pattern: &[u8]) -> usize {
517 let pattern_path = bytes_to_path(pattern);
518
519 let component_count = pattern_path.components().count();
520 let is_glob =
521 pattern.contains(&b'*') || pattern.contains(&b'?') || pattern.contains(&b'[') || pattern.contains(&b'{');
522
523 if is_glob {
524 let non_wildcard_components = pattern_path
525 .components()
526 .filter(|c| {
527 let s = c.as_os_str().to_string_lossy();
528 !s.contains('*') && !s.contains('?') && !s.contains('[') && !s.contains('{')
529 })
530 .count();
531 non_wildcard_components * 10
532 } else if pattern_path.is_file()
533 || pattern_path.extension().is_some()
534 || pattern.rsplit(|&b| b == b'.').next().is_some_and(|ext| ext.eq_ignore_ascii_case(b"php"))
535 {
536 component_count * 1000
537 } else {
538 component_count * 100
539 }
540}
541
542#[cfg(test)]
543mod resolution_tests {
544 use super::*;
545
546 #[test]
547 fn defaults_to_host_when_nothing_matches() {
548 assert_eq!(resolve_file_type(None, None, None), FileType::Host);
549 }
550
551 #[test]
552 fn host_only_match_yields_host() {
553 assert_eq!(resolve_file_type(Some(100), None, None), FileType::Host);
554 }
555
556 #[test]
557 fn vendored_only_match_yields_vendored() {
558 assert_eq!(resolve_file_type(None, Some(100), None), FileType::Vendored);
559 }
560
561 #[test]
562 fn patch_only_match_yields_patch() {
563 assert_eq!(resolve_file_type(None, None, Some(100)), FileType::Patch);
564 }
565
566 #[test]
567 fn vendored_beats_host_at_equal_specificity() {
568 assert_eq!(resolve_file_type(Some(100), Some(100), None), FileType::Vendored);
569 }
570
571 #[test]
572 fn vendored_beats_host_when_more_specific() {
573 assert_eq!(resolve_file_type(Some(100), Some(2000), None), FileType::Vendored);
574 }
575
576 #[test]
577 fn host_beats_vendored_only_when_strictly_more_specific() {
578 assert_eq!(resolve_file_type(Some(2000), Some(100), None), FileType::Host);
579 }
580
581 #[test]
582 fn patch_beats_vendored_unconditionally() {
583 assert_eq!(resolve_file_type(None, Some(2000), Some(100)), FileType::Patch);
584 }
585
586 #[test]
587 fn host_beats_patch_at_equal_specificity() {
588 assert_eq!(resolve_file_type(Some(100), None, Some(100)), FileType::Host);
589 }
590
591 #[test]
592 fn patch_beats_host_when_strictly_more_specific() {
593 assert_eq!(resolve_file_type(Some(100), None, Some(2000)), FileType::Patch);
594 }
595
596 #[test]
597 fn patch_beats_host_that_won_over_vendored() {
598 assert_eq!(resolve_file_type(Some(100), Some(2000), Some(50)), FileType::Patch);
599 }
600
601 #[test]
602 fn exact_file_path_beats_directory_at_same_component_count() {
603 assert!(calculate_pattern_specificity(b"src/foo.php") > calculate_pattern_specificity(b"src/foo"));
604 }
605
606 #[test]
607 fn directory_beats_glob_at_same_non_wildcard_count() {
608 assert!(calculate_pattern_specificity(b"src/") > calculate_pattern_specificity(b"src/**/*.php"));
609 }
610
611 #[test]
612 fn deeper_path_beats_shallower_at_same_kind() {
613 assert!(calculate_pattern_specificity(b"src/inner/") > calculate_pattern_specificity(b"src/"));
614 }
615
616 #[test]
617 fn extensionless_phpish_pattern_treated_as_file() {
618 assert_eq!(calculate_pattern_specificity(b"src/foo.PHP"), calculate_pattern_specificity(b"src/foo.php"),);
619 }
620}
621
622#[cfg(test)]
623#[allow(clippy::unwrap_used, clippy::expect_used)]
624mod tests {
625 use super::*;
626 use crate::DatabaseReader;
627 use crate::GlobSettings;
628 use std::borrow::Cow;
629 use tempfile::TempDir;
630
631 fn create_test_config(temp_dir: &TempDir, paths: Vec<&str>, includes: Vec<&str>) -> DatabaseConfiguration<'static> {
632 create_test_config_with_patches(temp_dir, paths, includes, vec![])
633 }
634
635 fn create_test_config_with_patches(
636 temp_dir: &TempDir,
637 paths: Vec<&str>,
638 includes: Vec<&str>,
639 patches: Vec<&str>,
640 ) -> DatabaseConfiguration<'static> {
641 let normalize = |s: &str| s.replace('/', std::path::MAIN_SEPARATOR_STR);
643
644 DatabaseConfiguration {
645 workspace: Cow::Owned(temp_dir.path().to_path_buf()),
646 paths: paths.into_iter().map(|s| Cow::Owned(normalize(s).into_bytes())).collect(),
647 includes: includes.into_iter().map(|s| Cow::Owned(normalize(s).into_bytes())).collect(),
648 patches: patches.into_iter().map(|s| Cow::Owned(normalize(s).into_bytes())).collect(),
649 excludes: vec![],
650 extensions: vec![Cow::Borrowed(b"php")],
651 glob: GlobSettings::default(),
652 }
653 }
654
655 fn name_str(name: &[u8]) -> std::borrow::Cow<'_, str> {
657 String::from_utf8_lossy(name)
658 }
659
660 fn create_test_file(temp_dir: &TempDir, relative_path: &str, content: &str) {
661 let file_path = temp_dir.path().join(relative_path);
662 if let Some(parent) = file_path.parent() {
663 std::fs::create_dir_all(parent).unwrap();
664 }
665 std::fs::write(file_path, content).unwrap();
666 }
667
668 #[test]
669 fn test_exact_file_vs_directory() {
670 let temp_dir = TempDir::new().unwrap();
671
672 create_test_file(&temp_dir, "src/b.php", "<?php");
673 create_test_file(&temp_dir, "src/a.php", "<?php");
674
675 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/"]);
676 let loader = DatabaseLoader::new(config);
677 let db = loader.load().unwrap();
678
679 let b_file = db.files().find(|f| name_str(&f.name).contains("b.php")).unwrap();
680 assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host (exact file beats directory)");
681
682 let a_file = db.files().find(|f| name_str(&f.name).contains("a.php")).unwrap();
683 assert_eq!(a_file.file_type, FileType::Vendored, "src/a.php should be Vendored");
684 }
685
686 #[test]
687 fn test_deeper_vs_shallower_directory() {
688 let temp_dir = TempDir::new().unwrap();
689
690 create_test_file(&temp_dir, "src/foo/bar.php", "<?php");
691
692 let config = create_test_config(&temp_dir, vec!["src/foo/"], vec!["src/"]);
693 let loader = DatabaseLoader::new(config);
694 let db = loader.load().unwrap();
695
696 let file = db.files().find(|f| name_str(&f.name).contains("bar.php")).unwrap();
697 assert_eq!(file.file_type, FileType::Host, "Deeper directory pattern should win");
698 }
699
700 #[test]
701 fn test_exact_file_vs_glob() {
702 let temp_dir = TempDir::new().unwrap();
703
704 create_test_file(&temp_dir, "src/b.php", "<?php");
705
706 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/*.php"]);
707 let loader = DatabaseLoader::new(config);
708 let db = loader.load().unwrap();
709
710 let file = db.files().find(|f| name_str(&f.name).contains("b.php")).unwrap();
711 assert_eq!(file.file_type, FileType::Host, "Exact file should beat glob pattern");
712 }
713
714 #[test]
715 fn test_equal_specificity_includes_wins() {
716 let temp_dir = TempDir::new().unwrap();
717
718 create_test_file(&temp_dir, "src/a.php", "<?php");
719
720 let config = create_test_config(&temp_dir, vec!["src/"], vec!["src/"]);
721 let loader = DatabaseLoader::new(config);
722 let db = loader.load().unwrap();
723
724 let file = db.files().find(|f| name_str(&f.name).contains("a.php")).unwrap();
725 assert_eq!(file.file_type, FileType::Vendored, "Equal specificity: includes should win");
726 }
727
728 #[test]
729 fn test_complex_scenario_from_bug_report() {
730 let temp_dir = TempDir::new().unwrap();
731
732 create_test_file(&temp_dir, "src/a.php", "<?php");
733 create_test_file(&temp_dir, "src/b.php", "<?php");
734 create_test_file(&temp_dir, "src/c/d.php", "<?php");
735 create_test_file(&temp_dir, "src/c/e.php", "<?php");
736 create_test_file(&temp_dir, "vendor/lib1.php", "<?php");
737 create_test_file(&temp_dir, "vendor/lib2.php", "<?php");
738
739 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["vendor", "src/c", "src/"]);
740 let loader = DatabaseLoader::new(config);
741 let db = loader.load().unwrap();
742
743 let b_file = db
744 .files()
745 .find(|f| name_str(&f.name).contains("src/b.php") || name_str(&f.name).ends_with("b.php"))
746 .unwrap();
747 assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host in bug scenario");
748
749 let d_file = db.files().find(|f| name_str(&f.name).contains("d.php")).unwrap();
750 assert_eq!(d_file.file_type, FileType::Vendored, "src/c/d.php should be Vendored");
751
752 let lib_file = db.files().find(|f| name_str(&f.name).contains("lib1.php")).unwrap();
753 assert_eq!(lib_file.file_type, FileType::Vendored, "vendor/lib1.php should be Vendored");
754 }
755
756 #[test]
757 fn test_files_only_in_paths() {
758 let temp_dir = TempDir::new().unwrap();
759
760 create_test_file(&temp_dir, "src/a.php", "<?php");
761
762 let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
763 let loader = DatabaseLoader::new(config);
764 let db = loader.load().unwrap();
765
766 let file = db.files().find(|f| name_str(&f.name).contains("a.php")).unwrap();
767 assert_eq!(file.file_type, FileType::Host, "File only in paths should be Host");
768 }
769
770 #[test]
771 fn test_files_only_in_includes() {
772 let temp_dir = TempDir::new().unwrap();
773
774 create_test_file(&temp_dir, "vendor/lib.php", "<?php");
775
776 let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
777 let loader = DatabaseLoader::new(config);
778 let db = loader.load().unwrap();
779
780 let file = db.files().find(|f| name_str(&f.name).contains("lib.php")).unwrap();
781 assert_eq!(file.file_type, FileType::Vendored, "File only in includes should be Vendored");
782 }
783
784 #[test]
785 fn test_stdin_override_replaces_file_content() {
786 let temp_dir = TempDir::new().unwrap();
787 create_test_file(&temp_dir, "src/foo.php", "<?php\n// on disk");
788
789 let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
790 let loader = DatabaseLoader::new(config).with_stdin_override("src/foo.php", b"<?php\n// from stdin".to_vec());
791 let db = loader.load().unwrap();
792
793 let file = db.files().find(|f| name_str(&f.name).contains("foo.php")).unwrap();
794 assert_eq!(
795 file.contents.as_ref(),
796 b"<?php\n// from stdin",
797 "stdin override content should be used instead of disk"
798 );
799 }
800
801 #[test]
802 fn test_glob_excludes_match_workspace_relative_paths() {
803 let temp_dir = TempDir::new().unwrap();
804
805 create_test_file(&temp_dir, "src/Absences/Foo/Foo.php", "<?php");
806 create_test_file(&temp_dir, "src/Absences/Test/Faker/Provider/AbsencesProvider.php", "<?php");
807 create_test_file(&temp_dir, "src/Calendar/Test/Helper.php", "<?php");
808
809 let mut config = create_test_config(&temp_dir, vec!["src"], vec![]);
810 config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("src/*/Test/**"))];
811
812 let loader = DatabaseLoader::new(config);
813 let db = loader.load().unwrap();
814
815 let names: Vec<String> = db.files().map(|f| name_str(&f.name).into_owned()).collect();
816 assert!(names.iter().any(|n| n.ends_with("src/Absences/Foo/Foo.php")), "non-Test file should be loaded");
817 assert!(
818 !names.iter().any(|n| n.contains("src/Absences/Test/")),
819 "files under src/*/Test/** should be excluded, got {names:?}"
820 );
821 assert!(
822 !names.iter().any(|n| n.contains("src/Calendar/Test/")),
823 "files under src/*/Test/** should be excluded, got {names:?}"
824 );
825 }
826
827 #[test]
828 fn test_glob_excludes_match_legacy_absolute_prefix_patterns() {
829 let temp_dir = TempDir::new().unwrap();
830
831 create_test_file(&temp_dir, "packages/foo/src/main.php", "<?php");
832 create_test_file(&temp_dir, "packages/foo/vendor/lib.php", "<?php");
833
834 let mut config = create_test_config(&temp_dir, vec!["packages"], vec![]);
835 config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("*/packages/**/vendor/*"))];
836
837 let loader = DatabaseLoader::new(config);
838 let db = loader.load().unwrap();
839
840 let names: Vec<String> = db.files().map(|f| name_str(&f.name).into_owned()).collect();
841 assert!(names.iter().any(|n| n.ends_with("packages/foo/src/main.php")));
842 assert!(
843 !names.iter().any(|n| n.contains("/vendor/")),
844 "legacy `*/packages/**/vendor/*` style should still exclude vendor files, got {names:?}"
845 );
846 }
847
848 #[test]
849 fn test_glob_dir_prune_skips_relative_directories() {
850 let temp_dir = TempDir::new().unwrap();
851
852 create_test_file(&temp_dir, "vendor/slevomat/coding-standard/main.php", "<?php");
853 create_test_file(&temp_dir, "vendor/slevomat/coding-standard/tests/Sniffs/Foo.php", "<?php");
854 create_test_file(&temp_dir, "vendor/another/lib.php", "<?php");
855
856 let mut config = create_test_config(&temp_dir, vec![], vec!["vendor"]);
857 config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("vendor/**/tests/**"))];
858
859 let loader = DatabaseLoader::new(config);
860 let db = loader.load().unwrap();
861
862 let names: Vec<String> = db.files().map(|f| name_str(&f.name).into_owned()).collect();
863 assert!(names.iter().any(|n| n.ends_with("vendor/slevomat/coding-standard/main.php")));
864 assert!(names.iter().any(|n| n.ends_with("vendor/another/lib.php")));
865 assert!(
866 !names.iter().any(|n| n.contains("/tests/")),
867 "files under vendor/**/tests/** should be pruned, got {names:?}"
868 );
869 }
870
871 #[test]
872 fn test_stdin_override_adds_file_when_not_on_disk() {
873 let temp_dir = TempDir::new().unwrap();
874 create_test_file(&temp_dir, "src/.gitkeep", "");
875
876 let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
877 let loader =
878 DatabaseLoader::new(config).with_stdin_override("src/unsaved.php", b"<?php\n// unsaved buffer".to_vec());
879 let db = loader.load().unwrap();
880
881 let file = db.files().find(|f| name_str(&f.name).contains("unsaved.php")).unwrap();
882 assert_eq!(file.file_type, FileType::Host);
883 assert_eq!(file.contents.as_ref(), b"<?php\n// unsaved buffer");
884 }
885
886 #[test]
887 fn test_stdin_override_accepts_non_utf8_content() {
888 let temp_dir = TempDir::new().unwrap();
889 create_test_file(&temp_dir, "src/.gitkeep", "");
890
891 let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
892 let content = b"<?php\n\nfunction f\xC9\xFF(): void {}\n".to_vec();
895 assert!(std::str::from_utf8(&content).is_err(), "test buffer must contain non-UTF-8 bytes");
896
897 let loader = DatabaseLoader::new(config).with_stdin_override("src/buffer.php", content.clone());
898 let db = loader.load().unwrap();
899
900 let file = db.files().find(|f| name_str(&f.name).contains("buffer.php")).unwrap();
901 assert_eq!(file.contents.as_ref(), content.as_slice());
902 }
903
904 #[cfg(unix)]
905 #[test]
906 fn test_symlinked_file_under_include_is_loaded() {
907 let temp_dir = TempDir::new().unwrap();
908 let external = TempDir::new().unwrap();
909
910 create_test_file(&external, "Bar.php", "<?php class Bar {}\n");
911 std::fs::create_dir_all(temp_dir.path().join("vendor")).unwrap();
912 std::os::unix::fs::symlink(external.path().join("Bar.php"), temp_dir.path().join("vendor/Bar.php")).unwrap();
913
914 let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
915 let db = DatabaseLoader::new(config).load().unwrap();
916
917 let bar = db.files().find(|f| name_str(&f.name).contains("Bar.php"));
918 assert!(bar.is_some(), "symlinked Bar.php should be loaded via include = ['vendor/']");
919 }
920
921 #[cfg(unix)]
922 #[test]
923 fn test_symlinked_directory_under_include_is_descended() {
924 let temp_dir = TempDir::new().unwrap();
925 let external = TempDir::new().unwrap();
926
927 create_test_file(&external, "src/Foo.php", "<?php class Foo {}\n");
928 create_test_file(&external, "src/Bar.php", "<?php class Bar {}\n");
929
930 std::fs::create_dir_all(temp_dir.path().join("vendor")).unwrap();
931 std::os::unix::fs::symlink(external.path(), temp_dir.path().join("vendor/example-package")).unwrap();
932
933 let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
934 let db = DatabaseLoader::new(config).load().unwrap();
935
936 assert!(db.files().any(|f| name_str(&f.name).contains("Foo.php")), "Foo.php inside symlinked dir not found");
937 assert!(db.files().any(|f| name_str(&f.name).contains("Bar.php")), "Bar.php inside symlinked dir not found");
938 }
939
940 #[cfg(unix)]
941 #[test]
942 fn test_symlink_cycle_is_warned_and_skipped() {
943 let temp_dir = TempDir::new().unwrap();
944 create_test_file(&temp_dir, "src/Real.php", "<?php class Real {}\n");
945 std::os::unix::fs::symlink(temp_dir.path().join("src"), temp_dir.path().join("src/loop")).unwrap();
946
947 let config = create_test_config(&temp_dir, vec![], vec!["src/"]);
948 let db = DatabaseLoader::new(config).load().expect("symlink cycle should not abort the load");
949
950 assert!(
951 db.files().any(|f| name_str(&f.name).contains("Real.php")),
952 "Real.php still reachable despite the loop"
953 );
954 }
955
956 #[test]
957 fn test_exact_extensionless_file_is_loaded() {
958 let temp_dir = TempDir::new().unwrap();
959 create_test_file(&temp_dir, "bin/console", "<?php\n// entrypoint");
960
961 let config = create_test_config(&temp_dir, vec!["bin/console"], vec![]);
964 let db = DatabaseLoader::new(config).load().unwrap();
965
966 let file = db.files().find(|f| name_str(&f.name).ends_with("bin/console")).unwrap();
967 assert_eq!(file.file_type, FileType::Host);
968 assert_eq!(file.contents.as_ref(), b"<?php\n// entrypoint");
969 }
970
971 #[test]
972 fn test_extensionless_file_in_directory_is_skipped() {
973 let temp_dir = TempDir::new().unwrap();
974 create_test_file(&temp_dir, "bin/console", "<?php");
975 create_test_file(&temp_dir, "bin/run.php", "<?php");
976
977 let config = create_test_config(&temp_dir, vec!["bin"], vec![]);
979 let db = DatabaseLoader::new(config).load().unwrap();
980
981 let names: Vec<String> = db.files().map(|f| name_str(&f.name).into_owned()).collect();
982 assert!(names.iter().any(|n| n.ends_with("bin/run.php")), "run.php should be loaded, got {names:?}");
983 assert!(!names.iter().any(|n| n.ends_with("bin/console")), "extensionless console should be skipped");
984 }
985
986 #[test]
987 fn test_patch_beats_vendored_at_equal_specificity() {
988 let temp_dir = TempDir::new().unwrap();
991 create_test_file(&temp_dir, "lib/Foo.php", "<?php");
992
993 let config = create_test_config_with_patches(&temp_dir, vec![], vec!["lib/"], vec!["lib/"]);
994 let db = DatabaseLoader::new(config).load().unwrap();
995
996 let file = db.files().find(|f| String::from_utf8_lossy(&f.name).contains("Foo.php")).unwrap();
997 assert_eq!(file.file_type, FileType::Patch, "patch should beat vendored at equal specificity");
998 }
999
1000 #[test]
1001 fn test_host_beats_patch_at_equal_specificity() {
1002 let temp_dir = TempDir::new().unwrap();
1005 create_test_file(&temp_dir, "src/Foo.php", "<?php");
1006
1007 let config = create_test_config_with_patches(&temp_dir, vec!["src/"], vec![], vec!["src/"]);
1008 let db = DatabaseLoader::new(config).load().unwrap();
1009
1010 let file = db.files().find(|f| String::from_utf8_lossy(&f.name).contains("Foo.php")).unwrap();
1011 assert_eq!(file.file_type, FileType::Host, "host should beat patch at equal specificity");
1012 }
1013
1014 #[test]
1015 fn test_patch_beats_host_when_strictly_more_specific() {
1016 let temp_dir = TempDir::new().unwrap();
1019 create_test_file(&temp_dir, "src/Foo.php", "<?php");
1020 create_test_file(&temp_dir, "src/Bar.php", "<?php");
1021
1022 let config = create_test_config_with_patches(&temp_dir, vec!["src/"], vec![], vec!["src/Foo.php"]);
1024 let db = DatabaseLoader::new(config).load().unwrap();
1025
1026 let foo = db.files().find(|f| String::from_utf8_lossy(&f.name).contains("Foo.php")).unwrap();
1027 assert_eq!(foo.file_type, FileType::Patch, "exact-file patch should beat directory-level host pattern");
1028
1029 let bar = db.files().find(|f| String::from_utf8_lossy(&f.name).contains("Bar.php")).unwrap();
1030 assert_eq!(bar.file_type, FileType::Host, "file not covered by patch should remain Host");
1031 }
1032}