1use std::borrow::Cow;
4use std::collections::hash_map::Entry;
5use std::ffi::OsString;
6use std::path::Path;
7use std::path::PathBuf;
8
9use foldhash::HashMap;
10use foldhash::HashSet;
11use globset::GlobSet;
12use rayon::prelude::*;
13use walkdir::WalkDir;
14
15use crate::Database;
16use crate::DatabaseConfiguration;
17use crate::error::DatabaseError;
18use crate::exclusion::Exclusion;
19use crate::file::File;
20use crate::file::FileId;
21use crate::file::FileType;
22use crate::matcher::build_glob_set;
23use crate::utils::bytes_to_os_str;
24use crate::utils::bytes_to_path;
25use crate::utils::bytes_to_string_lossy;
26use crate::utils::read_file;
27
28#[derive(Debug)]
34struct FileWithSpecificity {
35 file: File,
36 specificity: usize,
37}
38
39pub struct DatabaseLoader<'config> {
41 database: Option<Database<'config>>,
42 configuration: DatabaseConfiguration<'config>,
43 memory_sources: Vec<(&'static [u8], &'static [u8], FileType)>,
44 stdin_override: Option<(Cow<'config, [u8]>, Vec<u8>)>,
45}
46
47impl<'config> DatabaseLoader<'config> {
48 #[inline]
49 #[must_use]
50 pub fn new(configuration: DatabaseConfiguration<'config>) -> Self {
51 Self { configuration, memory_sources: vec![], database: None, stdin_override: None }
52 }
53
54 #[inline]
55 #[must_use]
56 pub fn with_database(mut self, database: Database<'config>) -> Self {
57 self.database = Some(database);
58 self
59 }
60
61 #[inline]
67 #[must_use]
68 pub fn with_stdin_override(mut self, logical_name: impl AsRef<[u8]>, content: Vec<u8>) -> Self {
69 self.stdin_override = Some((Cow::Owned(logical_name.as_ref().to_vec()), content));
70 self
71 }
72
73 #[inline]
74 pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
75 self.memory_sources.push((name.as_bytes(), contents.as_bytes(), file_type));
76 }
77
78 #[inline]
87 pub fn load(mut self) -> Result<Database<'config>, DatabaseError> {
88 let mut db = self.database.take().unwrap_or_else(|| Database::new(self.configuration.clone()));
89
90 db.configuration = self.configuration.clone();
93
94 let extensions_set: HashSet<OsString> =
95 self.configuration.extensions.iter().map(|s| bytes_to_os_str(s.as_ref()).into_owned()).collect();
96
97 let glob_exclude_patterns: Vec<&str> = self
98 .configuration
99 .excludes
100 .iter()
101 .filter_map(|ex| match ex {
102 Exclusion::Pattern(pat) => Some(pat.as_ref()),
103 Exclusion::Path(_) => None,
104 })
105 .collect();
106
107 let glob_excludes = build_glob_set(glob_exclude_patterns.iter().copied(), self.configuration.glob)?;
108 let dir_prune_patterns: Vec<&str> = glob_exclude_patterns
109 .iter()
110 .filter_map(|pat| {
111 let stripped =
112 pat.strip_suffix("/**/*").or_else(|| pat.strip_suffix("/**")).or_else(|| pat.strip_suffix("/*"))?;
113 if stripped.is_empty() || stripped == "*" || stripped == "**" {
114 return None;
115 }
116 Some(stripped)
117 })
118 .collect();
119
120 let dir_prune_globs = build_glob_set(dir_prune_patterns.iter().copied(), self.configuration.glob)?;
121
122 let path_excludes: HashSet<_> = self
123 .configuration
124 .excludes
125 .iter()
126 .filter_map(|ex| match ex {
127 Exclusion::Path(p) => Some(p),
128 Exclusion::Pattern(_) => None,
129 })
130 .collect();
131
132 let host_files_with_spec = self.load_paths(
133 &self.configuration.paths,
134 FileType::Host,
135 &extensions_set,
136 &glob_excludes,
137 &dir_prune_globs,
138 &path_excludes,
139 )?;
140
141 let vendored_files_with_spec = self.load_paths(
142 &self.configuration.includes,
143 FileType::Vendored,
144 &extensions_set,
145 &glob_excludes,
146 &dir_prune_globs,
147 &path_excludes,
148 )?;
149
150 let mut all_files: HashMap<FileId, File> = HashMap::default();
151 let mut file_decisions: HashMap<FileId, (FileType, usize)> = HashMap::default();
152
153 for file_with_spec in host_files_with_spec {
155 let file_id = file_with_spec.file.id;
156 let specificity = file_with_spec.specificity;
157
158 all_files.insert(file_id, file_with_spec.file);
159 file_decisions.insert(file_id, (FileType::Host, specificity));
160 }
161
162 if let Some((name, content)) = &self.stdin_override {
167 let virtual_path = self.configuration.workspace.join(bytes_to_path(name.as_ref()).as_ref());
168 let virtual_path_canonical = virtual_path.canonicalize().unwrap_or_else(|_| virtual_path.clone());
169 let virtual_path_str = virtual_path_canonical.to_string_lossy();
170
171 let matched_glob = !glob_excludes.is_empty()
172 && (glob_excludes.is_match(virtual_path_canonical.as_path())
173 || glob_excludes.is_match(bytes_to_path(name.as_ref()).as_ref()));
174
175 let matched_path = path_excludes.iter().any(|excl| {
176 let canonical = if Path::new(excl.as_ref()).is_absolute() {
177 excl.as_ref().to_path_buf()
178 } else {
179 self.configuration.workspace.join(excl.as_ref())
180 };
181 let canonical = canonical.canonicalize().unwrap_or(canonical);
182 let canonical_str = canonical.to_string_lossy();
183
184 virtual_path_str.starts_with(canonical_str.as_ref())
185 && matches!(virtual_path_str.as_bytes().get(canonical_str.len()), None | Some(&b'/' | &b'\\'))
186 });
187
188 if !matched_glob && !matched_path {
189 let file = File::ephemeral(Cow::Owned(name.as_ref().to_vec()), Cow::Owned(content.clone()));
190 let file_id = file.id;
191 if let Entry::Vacant(e) = all_files.entry(file_id) {
192 e.insert(file);
193
194 file_decisions.insert(file_id, (FileType::Host, usize::MAX));
195 }
196 }
197 }
198
199 for file_with_spec in vendored_files_with_spec {
200 let file_id = file_with_spec.file.id;
201 let vendored_specificity = file_with_spec.specificity;
202
203 all_files.entry(file_id).or_insert(file_with_spec.file);
204
205 match file_decisions.get(&file_id) {
206 Some((FileType::Host, host_specificity)) if vendored_specificity < *host_specificity => {
207 }
209 _ => {
210 file_decisions.insert(file_id, (FileType::Vendored, vendored_specificity));
211 }
212 }
213 }
214
215 db.reserve(file_decisions.len() + self.memory_sources.len());
216
217 for (file_id, (final_type, _)) in file_decisions {
218 if let Some(mut file) = all_files.remove(&file_id) {
219 file.file_type = final_type;
220 db.add(file);
221 }
222 }
223
224 for (name, contents, file_type) in self.memory_sources {
225 let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
226
227 db.add(file);
228 }
229
230 Ok(db)
231 }
232
233 fn load_paths(
241 &self,
242 roots: &[Cow<'config, [u8]>],
243 file_type: FileType,
244 extensions: &HashSet<OsString>,
245 glob_excludes: &GlobSet,
246 dir_prune_globs: &GlobSet,
247 path_excludes: &HashSet<&Cow<'config, Path>>,
248 ) -> Result<Vec<FileWithSpecificity>, DatabaseError> {
249 let canonical_workspace =
253 self.configuration.workspace.canonicalize().unwrap_or_else(|_| self.configuration.workspace.to_path_buf());
254
255 let canonical_excludes: Vec<String> = path_excludes
259 .iter()
260 .filter_map(|ex| {
261 let p = if Path::new(ex.as_ref()).is_absolute() {
262 ex.as_ref().to_path_buf()
263 } else {
264 self.configuration.workspace.join(ex.as_ref())
265 };
266
267 p.canonicalize().ok()?.into_os_string().into_string().ok()
268 })
269 .collect();
270
271 let workspace_relative_str = |path: &Path| -> String {
272 let rel = path.strip_prefix(canonical_workspace.as_path()).unwrap_or(path);
273 let s = rel.to_string_lossy();
274 #[cfg(windows)]
275 {
276 s.replace('\\', "/")
277 }
278 #[cfg(not(windows))]
279 {
280 s.into_owned()
281 }
282 };
283
284 let mut paths_to_process: Vec<(PathBuf, usize)> = Vec::new();
285
286 for root in roots {
287 let root_path = bytes_to_path(root.as_ref());
291 let resolved_path = if root_path.is_absolute() {
292 root_path.as_ref().to_path_buf()
293 } else {
294 self.configuration.workspace.join(root_path.as_ref())
295 };
296
297 let is_glob_pattern = !resolved_path.exists()
298 && (root.contains(&b'*') || root.contains(&b'?') || root.contains(&b'[') || root.contains(&b'{'));
299
300 let specificity = Self::calculate_pattern_specificity(root.as_ref());
301 if is_glob_pattern {
302 let pattern = if root_path.is_absolute() {
304 bytes_to_string_lossy(root.as_ref()).into_owned()
305 } else {
306 self.configuration.workspace.join(root_path.as_ref()).to_string_lossy().to_string()
308 };
309
310 match glob::glob(&pattern) {
311 Ok(entries) => {
312 for entry in entries {
313 match entry {
314 Ok(path) => {
315 if path.is_file() {
316 let canonical = path.canonicalize().unwrap_or(path);
321 paths_to_process.push((canonical, specificity));
322 }
323 }
324 Err(e) => {
325 tracing::warn!("Failed to read glob entry: {}", e);
326 }
327 }
328 }
329 }
330 Err(e) => {
331 return Err(DatabaseError::Glob(e.to_string()));
332 }
333 }
334 } else {
335 let canonical_root = resolved_path.canonicalize().unwrap_or(resolved_path);
336 let has_dir_prunes = !dir_prune_globs.is_empty();
337 let has_path_prunes = !canonical_excludes.is_empty();
338 let walker = WalkDir::new(&canonical_root).follow_links(true).into_iter().filter_entry(|entry| {
339 if entry.depth() == 0 || !entry.file_type().is_dir() {
340 return true;
341 }
342
343 let path = entry.path();
344
345 if has_path_prunes
346 && let Some(p) = path.to_str()
347 && canonical_excludes.iter().any(|excl| {
348 p.starts_with(excl.as_str())
349 && matches!(p.as_bytes().get(excl.len()), None | Some(&b'/' | &b'\\'))
350 })
351 {
352 return false;
353 }
354
355 if has_dir_prunes
356 && (dir_prune_globs.is_match(path) || dir_prune_globs.is_match(workspace_relative_str(path)))
357 {
358 return false;
359 }
360
361 true
362 });
363
364 for entry in walker {
365 match entry {
366 Ok(entry) => {
367 if !entry.file_type().is_dir() {
368 paths_to_process.push((entry.into_path(), specificity));
369 }
370 }
371 Err(err) => {
372 let path = err.path().unwrap_or(canonical_root.as_path()).display();
373 if let Some(ancestor) = err.loop_ancestor() {
374 tracing::warn!(
375 "Skipping symlink loop at `{path}`: link cycles back to `{}`.",
376 ancestor.display(),
377 );
378 } else {
379 tracing::warn!("Failed to walk `{path}`: {err}. Entry will be skipped.");
380 }
381 }
382 }
383 }
384 }
385 }
386
387 let has_path_excludes = !canonical_excludes.is_empty();
388 let has_glob_excludes = !glob_excludes.is_empty();
389 let files: Vec<FileWithSpecificity> = paths_to_process
390 .into_par_iter()
391 .filter_map(|(path, specificity)| {
392 if has_glob_excludes
393 && (glob_excludes.is_match(&path) || glob_excludes.is_match(workspace_relative_str(&path)))
394 {
395 return None;
396 }
397
398 let ext = path.extension()?;
399 if !extensions.contains(ext) {
400 return None;
401 }
402
403 if has_path_excludes {
404 let excluded = path.to_str().is_some_and(|s| {
405 canonical_excludes.iter().any(|excl| {
406 s.starts_with(excl.as_str())
407 && matches!(s.as_bytes().get(excl.len()), None | Some(&b'/' | &b'\\'))
408 })
409 });
410
411 if excluded {
412 return None;
413 }
414 }
415
416 let workspace = canonical_workspace.as_path();
417 #[cfg(windows)]
418 let logical_name =
419 path.strip_prefix(workspace).unwrap_or(path.as_path()).to_string_lossy().replace('\\', "/");
420 #[cfg(not(windows))]
421 let logical_name =
422 path.strip_prefix(workspace).unwrap_or(path.as_path()).to_string_lossy().into_owned();
423
424 if let Some((override_name, override_content)) = &self.stdin_override
425 && override_name.as_ref() == logical_name.as_bytes()
426 {
427 let file = File::new(
428 Cow::Owned(logical_name.into_bytes()),
429 file_type,
430 Some(path.clone()),
431 Cow::Owned(override_content.clone()),
432 );
433
434 return Some(Ok(FileWithSpecificity { file, specificity }));
435 }
436
437 match read_file(workspace, &path, file_type) {
438 Ok(file) => Some(Ok(FileWithSpecificity { file, specificity })),
439 Err(e) => Some(Err(e)),
440 }
441 })
442 .collect::<Result<Vec<FileWithSpecificity>, _>>()?;
443
444 Ok(files)
445 }
446
447 fn calculate_pattern_specificity(pattern: &[u8]) -> usize {
455 let pattern_path = bytes_to_path(pattern);
456
457 let component_count = pattern_path.components().count();
458 let is_glob =
459 pattern.contains(&b'*') || pattern.contains(&b'?') || pattern.contains(&b'[') || pattern.contains(&b'{');
460
461 if is_glob {
462 let non_wildcard_components = pattern_path
463 .components()
464 .filter(|c| {
465 let s = c.as_os_str().to_string_lossy();
466 !s.contains('*') && !s.contains('?') && !s.contains('[') && !s.contains('{')
467 })
468 .count();
469 non_wildcard_components * 10
470 } else if pattern_path.is_file()
471 || pattern_path.extension().is_some()
472 || pattern.rsplit(|&b| b == b'.').next().is_some_and(|ext| ext.eq_ignore_ascii_case(b"php"))
473 {
474 component_count * 1000
475 } else {
476 component_count * 100
477 }
478 }
479}
480
481#[cfg(test)]
482#[allow(clippy::unwrap_used)]
483mod tests {
484 use super::*;
485 use crate::DatabaseReader;
486 use crate::GlobSettings;
487 use std::borrow::Cow;
488 use tempfile::TempDir;
489
490 fn create_test_config(temp_dir: &TempDir, paths: Vec<&str>, includes: Vec<&str>) -> DatabaseConfiguration<'static> {
491 let normalize = |s: &str| s.replace('/', std::path::MAIN_SEPARATOR_STR);
493
494 DatabaseConfiguration {
495 workspace: Cow::Owned(temp_dir.path().to_path_buf()),
496 paths: paths.into_iter().map(|s| Cow::Owned(normalize(s).into_bytes())).collect(),
497 includes: includes.into_iter().map(|s| Cow::Owned(normalize(s).into_bytes())).collect(),
498 excludes: vec![],
499 extensions: vec![Cow::Borrowed(b"php")],
500 glob: GlobSettings::default(),
501 }
502 }
503
504 fn name_str(name: &[u8]) -> std::borrow::Cow<'_, str> {
506 String::from_utf8_lossy(name)
507 }
508
509 fn create_test_file(temp_dir: &TempDir, relative_path: &str, content: &str) {
510 let file_path = temp_dir.path().join(relative_path);
511 if let Some(parent) = file_path.parent() {
512 std::fs::create_dir_all(parent).unwrap();
513 }
514 std::fs::write(file_path, content).unwrap();
515 }
516
517 #[test]
518 fn test_specificity_calculation_exact_file() {
519 let spec = DatabaseLoader::calculate_pattern_specificity(b"src/b.php");
520 assert!(spec >= 2000, "Exact file should have high specificity, got {spec}");
521 }
522
523 #[test]
524 fn test_specificity_calculation_directory() {
525 let spec = DatabaseLoader::calculate_pattern_specificity(b"src/");
526 assert!((100..1000).contains(&spec), "Directory should have moderate specificity, got {spec}");
527 }
528
529 #[test]
530 fn test_specificity_calculation_glob() {
531 let spec = DatabaseLoader::calculate_pattern_specificity(b"src/*.php");
532 assert!(spec < 100, "Glob pattern should have low specificity, got {spec}");
533 }
534
535 #[test]
536 fn test_specificity_calculation_deeper_path() {
537 let shallow_spec = DatabaseLoader::calculate_pattern_specificity(b"src/");
538 let deep_spec = DatabaseLoader::calculate_pattern_specificity(b"src/foo/bar/");
539 assert!(deep_spec > shallow_spec, "Deeper path should have higher specificity");
540 }
541
542 #[test]
543 fn test_exact_file_vs_directory() {
544 let temp_dir = TempDir::new().unwrap();
545
546 create_test_file(&temp_dir, "src/b.php", "<?php");
547 create_test_file(&temp_dir, "src/a.php", "<?php");
548
549 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/"]);
550 let loader = DatabaseLoader::new(config);
551 let db = loader.load().unwrap();
552
553 let b_file = db.files().find(|f| name_str(&f.name).contains("b.php")).unwrap();
554 assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host (exact file beats directory)");
555
556 let a_file = db.files().find(|f| name_str(&f.name).contains("a.php")).unwrap();
557 assert_eq!(a_file.file_type, FileType::Vendored, "src/a.php should be Vendored");
558 }
559
560 #[test]
561 fn test_deeper_vs_shallower_directory() {
562 let temp_dir = TempDir::new().unwrap();
563
564 create_test_file(&temp_dir, "src/foo/bar.php", "<?php");
565
566 let config = create_test_config(&temp_dir, vec!["src/foo/"], vec!["src/"]);
567 let loader = DatabaseLoader::new(config);
568 let db = loader.load().unwrap();
569
570 let file = db.files().find(|f| name_str(&f.name).contains("bar.php")).unwrap();
571 assert_eq!(file.file_type, FileType::Host, "Deeper directory pattern should win");
572 }
573
574 #[test]
575 fn test_exact_file_vs_glob() {
576 let temp_dir = TempDir::new().unwrap();
577
578 create_test_file(&temp_dir, "src/b.php", "<?php");
579
580 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/*.php"]);
581 let loader = DatabaseLoader::new(config);
582 let db = loader.load().unwrap();
583
584 let file = db.files().find(|f| name_str(&f.name).contains("b.php")).unwrap();
585 assert_eq!(file.file_type, FileType::Host, "Exact file should beat glob pattern");
586 }
587
588 #[test]
589 fn test_equal_specificity_includes_wins() {
590 let temp_dir = TempDir::new().unwrap();
591
592 create_test_file(&temp_dir, "src/a.php", "<?php");
593
594 let config = create_test_config(&temp_dir, vec!["src/"], vec!["src/"]);
595 let loader = DatabaseLoader::new(config);
596 let db = loader.load().unwrap();
597
598 let file = db.files().find(|f| name_str(&f.name).contains("a.php")).unwrap();
599 assert_eq!(file.file_type, FileType::Vendored, "Equal specificity: includes should win");
600 }
601
602 #[test]
603 fn test_complex_scenario_from_bug_report() {
604 let temp_dir = TempDir::new().unwrap();
605
606 create_test_file(&temp_dir, "src/a.php", "<?php");
607 create_test_file(&temp_dir, "src/b.php", "<?php");
608 create_test_file(&temp_dir, "src/c/d.php", "<?php");
609 create_test_file(&temp_dir, "src/c/e.php", "<?php");
610 create_test_file(&temp_dir, "vendor/lib1.php", "<?php");
611 create_test_file(&temp_dir, "vendor/lib2.php", "<?php");
612
613 let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["vendor", "src/c", "src/"]);
614 let loader = DatabaseLoader::new(config);
615 let db = loader.load().unwrap();
616
617 let b_file = db
618 .files()
619 .find(|f| name_str(&f.name).contains("src/b.php") || name_str(&f.name).ends_with("b.php"))
620 .unwrap();
621 assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host in bug scenario");
622
623 let d_file = db.files().find(|f| name_str(&f.name).contains("d.php")).unwrap();
624 assert_eq!(d_file.file_type, FileType::Vendored, "src/c/d.php should be Vendored");
625
626 let lib_file = db.files().find(|f| name_str(&f.name).contains("lib1.php")).unwrap();
627 assert_eq!(lib_file.file_type, FileType::Vendored, "vendor/lib1.php should be Vendored");
628 }
629
630 #[test]
631 fn test_files_only_in_paths() {
632 let temp_dir = TempDir::new().unwrap();
633
634 create_test_file(&temp_dir, "src/a.php", "<?php");
635
636 let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
637 let loader = DatabaseLoader::new(config);
638 let db = loader.load().unwrap();
639
640 let file = db.files().find(|f| name_str(&f.name).contains("a.php")).unwrap();
641 assert_eq!(file.file_type, FileType::Host, "File only in paths should be Host");
642 }
643
644 #[test]
645 fn test_files_only_in_includes() {
646 let temp_dir = TempDir::new().unwrap();
647
648 create_test_file(&temp_dir, "vendor/lib.php", "<?php");
649
650 let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
651 let loader = DatabaseLoader::new(config);
652 let db = loader.load().unwrap();
653
654 let file = db.files().find(|f| name_str(&f.name).contains("lib.php")).unwrap();
655 assert_eq!(file.file_type, FileType::Vendored, "File only in includes should be Vendored");
656 }
657
658 #[test]
659 fn test_stdin_override_replaces_file_content() {
660 let temp_dir = TempDir::new().unwrap();
661 create_test_file(&temp_dir, "src/foo.php", "<?php\n// on disk");
662
663 let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
664 let loader = DatabaseLoader::new(config).with_stdin_override("src/foo.php", b"<?php\n// from stdin".to_vec());
665 let db = loader.load().unwrap();
666
667 let file = db.files().find(|f| name_str(&f.name).contains("foo.php")).unwrap();
668 assert_eq!(
669 file.contents.as_ref(),
670 b"<?php\n// from stdin",
671 "stdin override content should be used instead of disk"
672 );
673 }
674
675 #[test]
676 fn test_glob_excludes_match_workspace_relative_paths() {
677 let temp_dir = TempDir::new().unwrap();
678
679 create_test_file(&temp_dir, "src/Absences/Foo/Foo.php", "<?php");
680 create_test_file(&temp_dir, "src/Absences/Test/Faker/Provider/AbsencesProvider.php", "<?php");
681 create_test_file(&temp_dir, "src/Calendar/Test/Helper.php", "<?php");
682
683 let mut config = create_test_config(&temp_dir, vec!["src"], vec![]);
684 config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("src/*/Test/**"))];
685
686 let loader = DatabaseLoader::new(config);
687 let db = loader.load().unwrap();
688
689 let names: Vec<String> = db.files().map(|f| name_str(&f.name).into_owned()).collect();
690 assert!(names.iter().any(|n| n.ends_with("src/Absences/Foo/Foo.php")), "non-Test file should be loaded");
691 assert!(
692 !names.iter().any(|n| n.contains("src/Absences/Test/")),
693 "files under src/*/Test/** should be excluded, got {names:?}"
694 );
695 assert!(
696 !names.iter().any(|n| n.contains("src/Calendar/Test/")),
697 "files under src/*/Test/** should be excluded, got {names:?}"
698 );
699 }
700
701 #[test]
702 fn test_glob_excludes_match_legacy_absolute_prefix_patterns() {
703 let temp_dir = TempDir::new().unwrap();
704
705 create_test_file(&temp_dir, "packages/foo/src/main.php", "<?php");
706 create_test_file(&temp_dir, "packages/foo/vendor/lib.php", "<?php");
707
708 let mut config = create_test_config(&temp_dir, vec!["packages"], vec![]);
709 config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("*/packages/**/vendor/*"))];
710
711 let loader = DatabaseLoader::new(config);
712 let db = loader.load().unwrap();
713
714 let names: Vec<String> = db.files().map(|f| name_str(&f.name).into_owned()).collect();
715 assert!(names.iter().any(|n| n.ends_with("packages/foo/src/main.php")));
716 assert!(
717 !names.iter().any(|n| n.contains("/vendor/")),
718 "legacy `*/packages/**/vendor/*` style should still exclude vendor files, got {names:?}"
719 );
720 }
721
722 #[test]
723 fn test_glob_dir_prune_skips_relative_directories() {
724 let temp_dir = TempDir::new().unwrap();
725
726 create_test_file(&temp_dir, "vendor/slevomat/coding-standard/main.php", "<?php");
727 create_test_file(&temp_dir, "vendor/slevomat/coding-standard/tests/Sniffs/Foo.php", "<?php");
728 create_test_file(&temp_dir, "vendor/another/lib.php", "<?php");
729
730 let mut config = create_test_config(&temp_dir, vec![], vec!["vendor"]);
731 config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("vendor/**/tests/**"))];
732
733 let loader = DatabaseLoader::new(config);
734 let db = loader.load().unwrap();
735
736 let names: Vec<String> = db.files().map(|f| name_str(&f.name).into_owned()).collect();
737 assert!(names.iter().any(|n| n.ends_with("vendor/slevomat/coding-standard/main.php")));
738 assert!(names.iter().any(|n| n.ends_with("vendor/another/lib.php")));
739 assert!(
740 !names.iter().any(|n| n.contains("/tests/")),
741 "files under vendor/**/tests/** should be pruned, got {names:?}"
742 );
743 }
744
745 #[test]
746 fn test_stdin_override_adds_file_when_not_on_disk() {
747 let temp_dir = TempDir::new().unwrap();
748 create_test_file(&temp_dir, "src/.gitkeep", "");
749
750 let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
751 let loader =
752 DatabaseLoader::new(config).with_stdin_override("src/unsaved.php", b"<?php\n// unsaved buffer".to_vec());
753 let db = loader.load().unwrap();
754
755 let file = db.files().find(|f| name_str(&f.name).contains("unsaved.php")).unwrap();
756 assert_eq!(file.file_type, FileType::Host);
757 assert_eq!(file.contents.as_ref(), b"<?php\n// unsaved buffer");
758 }
759
760 #[test]
761 fn test_stdin_override_accepts_non_utf8_content() {
762 let temp_dir = TempDir::new().unwrap();
763 create_test_file(&temp_dir, "src/.gitkeep", "");
764
765 let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
766 let content = b"<?php\n\nfunction f\xC9\xFF(): void {}\n".to_vec();
769 assert!(std::str::from_utf8(&content).is_err(), "test buffer must contain non-UTF-8 bytes");
770
771 let loader = DatabaseLoader::new(config).with_stdin_override("src/buffer.php", content.clone());
772 let db = loader.load().unwrap();
773
774 let file = db.files().find(|f| name_str(&f.name).contains("buffer.php")).unwrap();
775 assert_eq!(file.contents.as_ref(), content.as_slice());
776 }
777
778 #[cfg(unix)]
779 #[test]
780 fn test_symlinked_file_under_include_is_loaded() {
781 let temp_dir = TempDir::new().unwrap();
782 let external = TempDir::new().unwrap();
783
784 create_test_file(&external, "Bar.php", "<?php class Bar {}\n");
785 std::fs::create_dir_all(temp_dir.path().join("vendor")).unwrap();
786 std::os::unix::fs::symlink(external.path().join("Bar.php"), temp_dir.path().join("vendor/Bar.php")).unwrap();
787
788 let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
789 let db = DatabaseLoader::new(config).load().unwrap();
790
791 let bar = db.files().find(|f| name_str(&f.name).contains("Bar.php"));
792 assert!(bar.is_some(), "symlinked Bar.php should be loaded via include = ['vendor/']");
793 }
794
795 #[cfg(unix)]
796 #[test]
797 fn test_symlinked_directory_under_include_is_descended() {
798 let temp_dir = TempDir::new().unwrap();
799 let external = TempDir::new().unwrap();
800
801 create_test_file(&external, "src/Foo.php", "<?php class Foo {}\n");
802 create_test_file(&external, "src/Bar.php", "<?php class Bar {}\n");
803
804 std::fs::create_dir_all(temp_dir.path().join("vendor")).unwrap();
805 std::os::unix::fs::symlink(external.path(), temp_dir.path().join("vendor/example-package")).unwrap();
806
807 let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
808 let db = DatabaseLoader::new(config).load().unwrap();
809
810 assert!(db.files().any(|f| name_str(&f.name).contains("Foo.php")), "Foo.php inside symlinked dir not found");
811 assert!(db.files().any(|f| name_str(&f.name).contains("Bar.php")), "Bar.php inside symlinked dir not found");
812 }
813
814 #[cfg(unix)]
815 #[test]
816 fn test_symlink_cycle_is_warned_and_skipped() {
817 let temp_dir = TempDir::new().unwrap();
818 create_test_file(&temp_dir, "src/Real.php", "<?php class Real {}\n");
819 std::os::unix::fs::symlink(temp_dir.path().join("src"), temp_dir.path().join("src/loop")).unwrap();
820
821 let config = create_test_config(&temp_dir, vec![], vec!["src/"]);
822 let db = DatabaseLoader::new(config).load().expect("symlink cycle should not abort the load");
823
824 assert!(
825 db.files().any(|f| name_str(&f.name).contains("Real.php")),
826 "Real.php still reachable despite the loop"
827 );
828 }
829}