Skip to main content

mago_database/
loader.rs

1//! Database loader for scanning and loading project files.
2
3use std::borrow::Cow;
4use std::collections::hash_map::Entry;
5use std::ffi::OsString;
6use std::path::Path;
7
8use foldhash::HashMap;
9use foldhash::HashSet;
10use globset::GlobSet;
11use rayon::prelude::*;
12use walkdir::WalkDir;
13
14use crate::Database;
15use crate::DatabaseConfiguration;
16use crate::error::DatabaseError;
17use crate::exclusion::Exclusion;
18use crate::file::File;
19use crate::file::FileId;
20use crate::file::FileType;
21use crate::matcher::build_glob_set;
22use crate::utils::read_file;
23
24/// Holds a file along with the specificity of the pattern that matched it.
25///
26/// Specificity is used to resolve conflicts when a file matches both `paths` and `includes`.
27/// Higher specificity values indicate more specific matches (e.g., exact file paths have higher
28/// specificity than directory patterns).
29#[derive(Debug)]
30struct FileWithSpecificity {
31    file: File,
32    specificity: usize,
33}
34
35/// Builder for loading files into a Database from the filesystem and memory.
36pub struct DatabaseLoader<'a> {
37    database: Option<Database<'a>>,
38    configuration: DatabaseConfiguration<'a>,
39    memory_sources: Vec<(&'static str, &'static str, FileType)>,
40    /// When set, content for this file (by logical name) is taken from here instead of disk.
41    /// Used for editor integrations: read content from stdin but use the given path for baseline and reporting.
42    stdin_override: Option<(Cow<'a, str>, String)>,
43}
44
45impl<'a> DatabaseLoader<'a> {
46    #[must_use]
47    pub fn new(configuration: DatabaseConfiguration<'a>) -> Self {
48        Self { configuration, memory_sources: vec![], database: None, stdin_override: None }
49    }
50
51    #[must_use]
52    pub fn with_database(mut self, database: Database<'a>) -> Self {
53        self.database = Some(database);
54        self
55    }
56
57    /// When set, the file with this logical name (workspace-relative path) will use the given
58    /// content instead of being read from disk. The logical name is used for baseline and reporting.
59    #[must_use]
60    pub fn with_stdin_override(mut self, logical_name: impl Into<Cow<'a, str>>, content: String) -> Self {
61        self.stdin_override = Some((logical_name.into(), content));
62        self
63    }
64
65    pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
66        self.memory_sources.push((name, contents, file_type));
67    }
68
69    /// Loads files from disk into the database.
70    ///
71    /// # Errors
72    ///
73    /// Returns a [`DatabaseError`] if:
74    /// - A glob pattern is invalid
75    /// - File system operations fail (reading directories, files)
76    /// - File content cannot be read as valid UTF-8
77    pub fn load(mut self) -> Result<Database<'a>, DatabaseError> {
78        let mut db = self.database.take().unwrap_or_else(|| Database::new(self.configuration.clone()));
79
80        // Update database configuration to use the loader's configuration
81        // (fixes workspace path when merging with prelude database)
82        db.configuration = self.configuration.clone();
83
84        let extensions_set: HashSet<OsString> =
85            self.configuration.extensions.iter().map(|s| OsString::from(s.as_ref())).collect();
86
87        let glob_exclude_patterns: Vec<&str> = self
88            .configuration
89            .excludes
90            .iter()
91            .filter_map(|ex| match ex {
92                Exclusion::Pattern(pat) => Some(pat.as_ref()),
93                Exclusion::Path(_) => None,
94            })
95            .collect();
96
97        let glob_excludes = build_glob_set(glob_exclude_patterns.iter().copied(), self.configuration.glob)?;
98        let dir_prune_patterns: Vec<&str> = glob_exclude_patterns
99            .iter()
100            .filter_map(|pat| {
101                let stripped =
102                    pat.strip_suffix("/**/*").or_else(|| pat.strip_suffix("/**")).or_else(|| pat.strip_suffix("/*"))?;
103                if stripped.is_empty() || stripped == "*" || stripped == "**" {
104                    return None;
105                }
106                Some(stripped)
107            })
108            .collect();
109
110        let dir_prune_globs = build_glob_set(dir_prune_patterns.iter().copied(), self.configuration.glob)?;
111
112        let path_excludes: HashSet<_> = self
113            .configuration
114            .excludes
115            .iter()
116            .filter_map(|ex| match ex {
117                Exclusion::Path(p) => Some(p),
118                _ => None,
119            })
120            .collect();
121
122        let host_files_with_spec = self.load_paths(
123            &self.configuration.paths,
124            FileType::Host,
125            &extensions_set,
126            &glob_excludes,
127            &dir_prune_globs,
128            &path_excludes,
129        )?;
130
131        let vendored_files_with_spec = self.load_paths(
132            &self.configuration.includes,
133            FileType::Vendored,
134            &extensions_set,
135            &glob_excludes,
136            &dir_prune_globs,
137            &path_excludes,
138        )?;
139
140        let mut all_files: HashMap<FileId, File> = HashMap::default();
141        let mut file_decisions: HashMap<FileId, (FileType, usize)> = HashMap::default();
142
143        // Process host files (from paths)
144        for file_with_spec in host_files_with_spec {
145            let file_id = file_with_spec.file.id;
146            let specificity = file_with_spec.specificity;
147
148            all_files.insert(file_id, file_with_spec.file);
149            file_decisions.insert(file_id, (FileType::Host, specificity));
150        }
151
152        // When stdin override is set, ensure that the file is in the database
153        // (covers new/unsaved files, not on disk)
154        if let Some((ref name, ref content)) = self.stdin_override {
155            let file = File::ephemeral(Cow::Owned(name.as_ref().to_string()), Cow::Owned(content.clone()));
156            let file_id = file.id;
157            if let Entry::Vacant(e) = all_files.entry(file_id) {
158                e.insert(file);
159
160                file_decisions.insert(file_id, (FileType::Host, usize::MAX));
161            }
162        }
163
164        for file_with_spec in vendored_files_with_spec {
165            let file_id = file_with_spec.file.id;
166            let vendored_specificity = file_with_spec.specificity;
167
168            all_files.entry(file_id).or_insert(file_with_spec.file);
169
170            match file_decisions.get(&file_id) {
171                Some((FileType::Host, host_specificity)) if vendored_specificity < *host_specificity => {
172                    // Keep Host
173                }
174                _ => {
175                    file_decisions.insert(file_id, (FileType::Vendored, vendored_specificity));
176                }
177            }
178        }
179
180        db.reserve(file_decisions.len() + self.memory_sources.len());
181
182        for (file_id, (final_type, _)) in file_decisions {
183            if let Some(mut file) = all_files.remove(&file_id) {
184                file.file_type = final_type;
185                db.add(file);
186            }
187        }
188
189        for (name, contents, file_type) in self.memory_sources {
190            let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
191
192            db.add(file);
193        }
194
195        Ok(db)
196    }
197
198    /// Discovers and reads all files from a set of root paths or glob patterns in parallel.
199    ///
200    /// Supports both:
201    /// - Directory paths (e.g., "src", "tests") - recursively walks all files
202    /// - Glob patterns (e.g., "src/**/*.php", "tests/Unit/*Test.php") - matches files using glob syntax
203    ///
204    /// Returns files along with their pattern specificity for conflict resolution.
205    fn load_paths(
206        &self,
207        roots: &[Cow<'a, str>],
208        file_type: FileType,
209        extensions: &HashSet<OsString>,
210        glob_excludes: &GlobSet,
211        dir_prune_globs: &GlobSet,
212        path_excludes: &HashSet<&Cow<'a, Path>>,
213    ) -> Result<Vec<FileWithSpecificity>, DatabaseError> {
214        // Canonicalize the workspace once.  All WalkDir roots are canonicalized
215        // before traversal so their paths inherit the canonical prefix without
216        // any per-file syscalls.
217        let canonical_workspace =
218            self.configuration.workspace.canonicalize().unwrap_or_else(|_| self.configuration.workspace.to_path_buf());
219
220        // Pre-canonicalize path excludes once as strings.  A plain byte-string
221        // prefix check is then sufficient in the parallel section, replacing the
222        // per-file canonicalize() + Path::starts_with (Components iteration).
223        let canonical_excludes: Vec<String> = path_excludes
224            .iter()
225            .filter_map(|ex| {
226                let p = if Path::new(ex.as_ref()).is_absolute() {
227                    ex.as_ref().to_path_buf()
228                } else {
229                    self.configuration.workspace.join(ex.as_ref())
230                };
231
232                p.canonicalize().ok()?.into_os_string().into_string().ok()
233            })
234            .collect();
235
236        let workspace_relative_str = |path: &Path| -> String {
237            let rel = path.strip_prefix(canonical_workspace.as_path()).unwrap_or(path);
238            let s = rel.to_string_lossy();
239            #[cfg(windows)]
240            {
241                s.replace('\\', "/")
242            }
243            #[cfg(not(windows))]
244            {
245                s.into_owned()
246            }
247        };
248
249        let mut paths_to_process: Vec<(std::path::PathBuf, usize)> = Vec::new();
250
251        for root in roots {
252            // Check if this is a glob pattern (contains glob metacharacters).
253            // First check if it's an actual file/directory on disk. if so, treat it
254            // as a literal path even if the name contains glob metacharacters like `[]`.
255            let resolved_path = if Path::new(root.as_ref()).is_absolute() {
256                Path::new(root.as_ref()).to_path_buf()
257            } else {
258                self.configuration.workspace.join(root.as_ref())
259            };
260
261            let is_glob_pattern = !resolved_path.exists()
262                && (root.contains('*') || root.contains('?') || root.contains('[') || root.contains('{'));
263
264            let specificity = Self::calculate_pattern_specificity(root.as_ref());
265            if is_glob_pattern {
266                // Handle as glob pattern
267                let pattern = if Path::new(root.as_ref()).is_absolute() {
268                    root.to_string()
269                } else {
270                    // Make relative patterns absolute by prepending workspace
271                    self.configuration.workspace.join(root.as_ref()).to_string_lossy().to_string()
272                };
273
274                match glob::glob(&pattern) {
275                    Ok(entries) => {
276                        for entry in entries {
277                            match entry {
278                                Ok(path) => {
279                                    if path.is_file() {
280                                        // Canonicalize so the path shares the same prefix as
281                                        // `canonical_workspace` (important on macOS where
282                                        // TempDir / glob return /var/… but canonicalize gives
283                                        // /private/var/…).  Fall back to the original on error.
284                                        let canonical = path.canonicalize().unwrap_or(path);
285                                        paths_to_process.push((canonical, specificity));
286                                    }
287                                }
288                                Err(e) => {
289                                    tracing::warn!("Failed to read glob entry: {}", e);
290                                }
291                            }
292                        }
293                    }
294                    Err(e) => {
295                        return Err(DatabaseError::Glob(e.to_string()));
296                    }
297                }
298            } else {
299                let canonical_root = resolved_path.canonicalize().unwrap_or(resolved_path);
300                let has_dir_prunes = !dir_prune_globs.is_empty();
301                let has_path_prunes = !canonical_excludes.is_empty();
302                let walker = WalkDir::new(&canonical_root).into_iter().filter_entry(|entry| {
303                    if entry.depth() == 0 || !entry.file_type().is_dir() {
304                        return true;
305                    }
306
307                    let path = entry.path();
308
309                    if has_path_prunes
310                        && let Some(p) = path.to_str()
311                        && canonical_excludes.iter().any(|excl| {
312                            p.starts_with(excl.as_str())
313                                && matches!(p.as_bytes().get(excl.len()), None | Some(&b'/' | &b'\\'))
314                        })
315                    {
316                        return false;
317                    }
318
319                    if has_dir_prunes
320                        && (dir_prune_globs.is_match(path) || dir_prune_globs.is_match(workspace_relative_str(path)))
321                    {
322                        return false;
323                    }
324
325                    true
326                });
327
328                for entry in walker.filter_map(Result::ok) {
329                    if entry.file_type().is_file() {
330                        paths_to_process.push((entry.into_path(), specificity));
331                    }
332                }
333            }
334        }
335
336        let has_path_excludes = !canonical_excludes.is_empty();
337        let has_glob_excludes = !glob_excludes.is_empty();
338        let files: Vec<FileWithSpecificity> = paths_to_process
339            .into_par_iter()
340            .filter_map(|(path, specificity)| {
341                if has_glob_excludes
342                    && (glob_excludes.is_match(&path) || glob_excludes.is_match(workspace_relative_str(&path)))
343                {
344                    return None;
345                }
346
347                let ext = path.extension()?;
348                if !extensions.contains(ext) {
349                    return None;
350                }
351
352                if has_path_excludes {
353                    let excluded = path.to_str().is_some_and(|s| {
354                        canonical_excludes.iter().any(|excl| {
355                            s.starts_with(excl.as_str())
356                                && matches!(s.as_bytes().get(excl.len()), None | Some(&b'/' | &b'\\'))
357                        })
358                    });
359
360                    if excluded {
361                        return None;
362                    }
363                }
364
365                let workspace = canonical_workspace.as_path();
366                #[cfg(windows)]
367                let logical_name = path
368                    .strip_prefix(workspace)
369                    .unwrap_or_else(|_| path.as_path())
370                    .to_string_lossy()
371                    .replace('\\', "/");
372                #[cfg(not(windows))]
373                let logical_name =
374                    path.strip_prefix(workspace).unwrap_or(path.as_path()).to_string_lossy().into_owned();
375
376                if let Some((ref override_name, ref override_content)) = self.stdin_override
377                    && override_name.as_ref() == logical_name
378                {
379                    let file = File::new(
380                        Cow::Owned(logical_name),
381                        file_type,
382                        Some(path.clone()),
383                        Cow::Owned(override_content.clone()),
384                    );
385
386                    return Some(Ok(FileWithSpecificity { file, specificity }));
387                }
388
389                match read_file(workspace, &path, file_type) {
390                    Ok(file) => Some(Ok(FileWithSpecificity { file, specificity })),
391                    Err(e) => Some(Err(e)),
392                }
393            })
394            .collect::<Result<Vec<FileWithSpecificity>, _>>()?;
395
396        Ok(files)
397    }
398
399    /// Calculates how specific a pattern is for a given file path.
400    ///
401    /// Examples:
402    ///
403    /// - "src/b.php" matching src/b.php: ~2000 (exact file, 2 components)
404    /// - "src/" matching src/b.php: ~100 (directory, 1 component)
405    /// - "src" matching src/b.php: ~100 (directory, 1 component)
406    fn calculate_pattern_specificity(pattern: &str) -> usize {
407        let pattern_path = Path::new(pattern);
408
409        let component_count = pattern_path.components().count();
410        let is_glob = pattern.contains('*') || pattern.contains('?') || pattern.contains('[') || pattern.contains('{');
411
412        if is_glob {
413            let non_wildcard_components = pattern_path
414                .components()
415                .filter(|c| {
416                    let s = c.as_os_str().to_string_lossy();
417                    !s.contains('*') && !s.contains('?') && !s.contains('[') && !s.contains('{')
418                })
419                .count();
420            non_wildcard_components * 10
421        } else if pattern_path.is_file() || pattern_path.extension().is_some() || pattern.ends_with(".php") {
422            component_count * 1000
423        } else {
424            component_count * 100
425        }
426    }
427}
428
429#[cfg(test)]
430mod tests {
431    use super::*;
432    use crate::DatabaseReader;
433    use crate::GlobSettings;
434    use std::borrow::Cow;
435    use tempfile::TempDir;
436
437    fn create_test_config(temp_dir: &TempDir, paths: Vec<&str>, includes: Vec<&str>) -> DatabaseConfiguration<'static> {
438        // Normalize path separators to platform-specific separators
439        let normalize = |s: &str| s.replace('/', std::path::MAIN_SEPARATOR_STR);
440
441        DatabaseConfiguration {
442            workspace: Cow::Owned(temp_dir.path().to_path_buf()),
443            paths: paths.into_iter().map(|s| Cow::Owned(normalize(s))).collect(),
444            includes: includes.into_iter().map(|s| Cow::Owned(normalize(s))).collect(),
445            excludes: vec![],
446            extensions: vec![Cow::Borrowed("php")],
447            glob: GlobSettings::default(),
448        }
449    }
450
451    fn create_test_file(temp_dir: &TempDir, relative_path: &str, content: &str) {
452        let file_path = temp_dir.path().join(relative_path);
453        if let Some(parent) = file_path.parent() {
454            std::fs::create_dir_all(parent).unwrap();
455        }
456        std::fs::write(file_path, content).unwrap();
457    }
458
459    #[test]
460    fn test_specificity_calculation_exact_file() {
461        let spec = DatabaseLoader::calculate_pattern_specificity("src/b.php");
462        assert!(spec >= 2000, "Exact file should have high specificity, got {spec}");
463    }
464
465    #[test]
466    fn test_specificity_calculation_directory() {
467        let spec = DatabaseLoader::calculate_pattern_specificity("src/");
468        assert!((100..1000).contains(&spec), "Directory should have moderate specificity, got {spec}");
469    }
470
471    #[test]
472    fn test_specificity_calculation_glob() {
473        let spec = DatabaseLoader::calculate_pattern_specificity("src/*.php");
474        assert!(spec < 100, "Glob pattern should have low specificity, got {spec}");
475    }
476
477    #[test]
478    fn test_specificity_calculation_deeper_path() {
479        let shallow_spec = DatabaseLoader::calculate_pattern_specificity("src/");
480        let deep_spec = DatabaseLoader::calculate_pattern_specificity("src/foo/bar/");
481        assert!(deep_spec > shallow_spec, "Deeper path should have higher specificity");
482    }
483
484    #[test]
485    fn test_exact_file_vs_directory() {
486        let temp_dir = TempDir::new().unwrap();
487
488        create_test_file(&temp_dir, "src/b.php", "<?php");
489        create_test_file(&temp_dir, "src/a.php", "<?php");
490
491        let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/"]);
492        let loader = DatabaseLoader::new(config);
493        let db = loader.load().unwrap();
494
495        let b_file = db.files().find(|f| f.name.contains("b.php")).unwrap();
496        assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host (exact file beats directory)");
497
498        let a_file = db.files().find(|f| f.name.contains("a.php")).unwrap();
499        assert_eq!(a_file.file_type, FileType::Vendored, "src/a.php should be Vendored");
500    }
501
502    #[test]
503    fn test_deeper_vs_shallower_directory() {
504        let temp_dir = TempDir::new().unwrap();
505
506        create_test_file(&temp_dir, "src/foo/bar.php", "<?php");
507
508        let config = create_test_config(&temp_dir, vec!["src/foo/"], vec!["src/"]);
509        let loader = DatabaseLoader::new(config);
510        let db = loader.load().unwrap();
511
512        let file = db.files().find(|f| f.name.contains("bar.php")).unwrap();
513        assert_eq!(file.file_type, FileType::Host, "Deeper directory pattern should win");
514    }
515
516    #[test]
517    fn test_exact_file_vs_glob() {
518        let temp_dir = TempDir::new().unwrap();
519
520        create_test_file(&temp_dir, "src/b.php", "<?php");
521
522        let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/*.php"]);
523        let loader = DatabaseLoader::new(config);
524        let db = loader.load().unwrap();
525
526        let file = db.files().find(|f| f.name.contains("b.php")).unwrap();
527        assert_eq!(file.file_type, FileType::Host, "Exact file should beat glob pattern");
528    }
529
530    #[test]
531    fn test_equal_specificity_includes_wins() {
532        let temp_dir = TempDir::new().unwrap();
533
534        create_test_file(&temp_dir, "src/a.php", "<?php");
535
536        let config = create_test_config(&temp_dir, vec!["src/"], vec!["src/"]);
537        let loader = DatabaseLoader::new(config);
538        let db = loader.load().unwrap();
539
540        let file = db.files().find(|f| f.name.contains("a.php")).unwrap();
541        assert_eq!(file.file_type, FileType::Vendored, "Equal specificity: includes should win");
542    }
543
544    #[test]
545    fn test_complex_scenario_from_bug_report() {
546        let temp_dir = TempDir::new().unwrap();
547
548        create_test_file(&temp_dir, "src/a.php", "<?php");
549        create_test_file(&temp_dir, "src/b.php", "<?php");
550        create_test_file(&temp_dir, "src/c/d.php", "<?php");
551        create_test_file(&temp_dir, "src/c/e.php", "<?php");
552        create_test_file(&temp_dir, "vendor/lib1.php", "<?php");
553        create_test_file(&temp_dir, "vendor/lib2.php", "<?php");
554
555        let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["vendor", "src/c", "src/"]);
556        let loader = DatabaseLoader::new(config);
557        let db = loader.load().unwrap();
558
559        let b_file = db.files().find(|f| f.name.contains("src/b.php") || f.name.ends_with("b.php")).unwrap();
560        assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host in bug scenario");
561
562        let d_file = db.files().find(|f| f.name.contains("d.php")).unwrap();
563        assert_eq!(d_file.file_type, FileType::Vendored, "src/c/d.php should be Vendored");
564
565        let lib_file = db.files().find(|f| f.name.contains("lib1.php")).unwrap();
566        assert_eq!(lib_file.file_type, FileType::Vendored, "vendor/lib1.php should be Vendored");
567    }
568
569    #[test]
570    fn test_files_only_in_paths() {
571        let temp_dir = TempDir::new().unwrap();
572
573        create_test_file(&temp_dir, "src/a.php", "<?php");
574
575        let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
576        let loader = DatabaseLoader::new(config);
577        let db = loader.load().unwrap();
578
579        let file = db.files().find(|f| f.name.contains("a.php")).unwrap();
580        assert_eq!(file.file_type, FileType::Host, "File only in paths should be Host");
581    }
582
583    #[test]
584    fn test_files_only_in_includes() {
585        let temp_dir = TempDir::new().unwrap();
586
587        create_test_file(&temp_dir, "vendor/lib.php", "<?php");
588
589        let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
590        let loader = DatabaseLoader::new(config);
591        let db = loader.load().unwrap();
592
593        let file = db.files().find(|f| f.name.contains("lib.php")).unwrap();
594        assert_eq!(file.file_type, FileType::Vendored, "File only in includes should be Vendored");
595    }
596
597    #[test]
598    fn test_stdin_override_replaces_file_content() {
599        let temp_dir = TempDir::new().unwrap();
600        create_test_file(&temp_dir, "src/foo.php", "<?php\n// on disk");
601
602        let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
603        let loader = DatabaseLoader::new(config).with_stdin_override("src/foo.php", "<?php\n// from stdin".to_string());
604        let db = loader.load().unwrap();
605
606        let file = db.files().find(|f| f.name.contains("foo.php")).unwrap();
607        assert_eq!(
608            file.contents.as_ref(),
609            "<?php\n// from stdin",
610            "stdin override content should be used instead of disk"
611        );
612    }
613
614    #[test]
615    fn test_glob_excludes_match_workspace_relative_paths() {
616        let temp_dir = TempDir::new().unwrap();
617
618        create_test_file(&temp_dir, "src/Absences/Foo/Foo.php", "<?php");
619        create_test_file(&temp_dir, "src/Absences/Test/Faker/Provider/AbsencesProvider.php", "<?php");
620        create_test_file(&temp_dir, "src/Calendar/Test/Helper.php", "<?php");
621
622        let mut config = create_test_config(&temp_dir, vec!["src"], vec![]);
623        config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("src/*/Test/**"))];
624
625        let loader = DatabaseLoader::new(config);
626        let db = loader.load().unwrap();
627
628        let names: Vec<String> = db.files().map(|f| f.name.to_string()).collect();
629        assert!(names.iter().any(|n| n.ends_with("src/Absences/Foo/Foo.php")), "non-Test file should be loaded");
630        assert!(
631            !names.iter().any(|n| n.contains("src/Absences/Test/")),
632            "files under src/*/Test/** should be excluded, got {names:?}"
633        );
634        assert!(
635            !names.iter().any(|n| n.contains("src/Calendar/Test/")),
636            "files under src/*/Test/** should be excluded, got {names:?}"
637        );
638    }
639
640    #[test]
641    fn test_glob_excludes_match_legacy_absolute_prefix_patterns() {
642        let temp_dir = TempDir::new().unwrap();
643
644        create_test_file(&temp_dir, "packages/foo/src/main.php", "<?php");
645        create_test_file(&temp_dir, "packages/foo/vendor/lib.php", "<?php");
646
647        let mut config = create_test_config(&temp_dir, vec!["packages"], vec![]);
648        config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("*/packages/**/vendor/*"))];
649
650        let loader = DatabaseLoader::new(config);
651        let db = loader.load().unwrap();
652
653        let names: Vec<String> = db.files().map(|f| f.name.to_string()).collect();
654        assert!(names.iter().any(|n| n.ends_with("packages/foo/src/main.php")));
655        assert!(
656            !names.iter().any(|n| n.contains("/vendor/")),
657            "legacy `*/packages/**/vendor/*` style should still exclude vendor files, got {names:?}"
658        );
659    }
660
661    #[test]
662    fn test_glob_dir_prune_skips_relative_directories() {
663        let temp_dir = TempDir::new().unwrap();
664
665        create_test_file(&temp_dir, "vendor/slevomat/coding-standard/main.php", "<?php");
666        create_test_file(&temp_dir, "vendor/slevomat/coding-standard/tests/Sniffs/Foo.php", "<?php");
667        create_test_file(&temp_dir, "vendor/another/lib.php", "<?php");
668
669        let mut config = create_test_config(&temp_dir, vec![], vec!["vendor"]);
670        config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("vendor/**/tests/**"))];
671
672        let loader = DatabaseLoader::new(config);
673        let db = loader.load().unwrap();
674
675        let names: Vec<String> = db.files().map(|f| f.name.to_string()).collect();
676        assert!(names.iter().any(|n| n.ends_with("vendor/slevomat/coding-standard/main.php")));
677        assert!(names.iter().any(|n| n.ends_with("vendor/another/lib.php")));
678        assert!(
679            !names.iter().any(|n| n.contains("/tests/")),
680            "files under vendor/**/tests/** should be pruned, got {names:?}"
681        );
682    }
683
684    #[test]
685    fn test_stdin_override_adds_file_when_not_on_disk() {
686        let temp_dir = TempDir::new().unwrap();
687        create_test_file(&temp_dir, "src/.gitkeep", "");
688
689        let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
690        let loader =
691            DatabaseLoader::new(config).with_stdin_override("src/unsaved.php", "<?php\n// unsaved buffer".to_string());
692        let db = loader.load().unwrap();
693
694        let file = db.files().find(|f| f.name.contains("unsaved.php")).unwrap();
695        assert_eq!(file.file_type, FileType::Host);
696        assert_eq!(file.contents.as_ref(), "<?php\n// unsaved buffer");
697    }
698}