Skip to main content

mago_database/
loader.rs

1//! Database loader for scanning and loading project files.
2
3use std::borrow::Cow;
4use std::collections::hash_map::Entry;
5use std::ffi::OsString;
6use std::path::Path;
7use std::path::PathBuf;
8
9use foldhash::HashMap;
10use foldhash::HashSet;
11use globset::GlobSet;
12use rayon::prelude::*;
13use walkdir::WalkDir;
14
15use crate::Database;
16use crate::DatabaseConfiguration;
17use crate::error::DatabaseError;
18use crate::exclusion::Exclusion;
19use crate::file::File;
20use crate::file::FileId;
21use crate::file::FileType;
22use crate::matcher::build_glob_set;
23use crate::utils::read_file;
24
25/// Holds a file along with the specificity of the pattern that matched it.
26///
27/// Specificity is used to resolve conflicts when a file matches both `paths` and `includes`.
28/// Higher specificity values indicate more specific matches (e.g., exact file paths have higher
29/// specificity than directory patterns).
30#[derive(Debug)]
31struct FileWithSpecificity {
32    file: File,
33    specificity: usize,
34}
35
36/// Builder for loading files into a Database from the filesystem and memory.
37pub struct DatabaseLoader<'config> {
38    database: Option<Database<'config>>,
39    configuration: DatabaseConfiguration<'config>,
40    memory_sources: Vec<(&'static str, &'static str, FileType)>,
41    stdin_override: Option<(Cow<'config, str>, String)>,
42}
43
44impl<'config> DatabaseLoader<'config> {
45    #[inline]
46    #[must_use]
47    pub fn new(configuration: DatabaseConfiguration<'config>) -> Self {
48        Self { configuration, memory_sources: vec![], database: None, stdin_override: None }
49    }
50
51    #[inline]
52    #[must_use]
53    pub fn with_database(mut self, database: Database<'config>) -> Self {
54        self.database = Some(database);
55        self
56    }
57
58    /// When set, the file with this logical name (workspace-relative path) will use the given
59    /// content instead of being read from disk. The logical name is used for baseline and reporting.
60    #[inline]
61    #[must_use]
62    pub fn with_stdin_override(mut self, logical_name: impl Into<Cow<'config, str>>, content: String) -> Self {
63        self.stdin_override = Some((logical_name.into(), content));
64        self
65    }
66
67    #[inline]
68    pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
69        self.memory_sources.push((name, contents, file_type));
70    }
71
72    /// Loads files from disk into the database.
73    ///
74    /// # Errors
75    ///
76    /// Returns a [`DatabaseError`] if:
77    /// - A glob pattern is invalid
78    /// - File system operations fail (reading directories, files)
79    /// - File content cannot be read as valid UTF-8
80    #[inline]
81    pub fn load(mut self) -> Result<Database<'config>, DatabaseError> {
82        let mut db = self.database.take().unwrap_or_else(|| Database::new(self.configuration.clone()));
83
84        // Update database configuration to use the loader's configuration
85        // (fixes workspace path when merging with prelude database)
86        db.configuration = self.configuration.clone();
87
88        let extensions_set: HashSet<OsString> =
89            self.configuration.extensions.iter().map(|s| OsString::from(s.as_ref())).collect();
90
91        let glob_exclude_patterns: Vec<&str> = self
92            .configuration
93            .excludes
94            .iter()
95            .filter_map(|ex| match ex {
96                Exclusion::Pattern(pat) => Some(pat.as_ref()),
97                Exclusion::Path(_) => None,
98            })
99            .collect();
100
101        let glob_excludes = build_glob_set(glob_exclude_patterns.iter().copied(), self.configuration.glob)?;
102        let dir_prune_patterns: Vec<&str> = glob_exclude_patterns
103            .iter()
104            .filter_map(|pat| {
105                let stripped =
106                    pat.strip_suffix("/**/*").or_else(|| pat.strip_suffix("/**")).or_else(|| pat.strip_suffix("/*"))?;
107                if stripped.is_empty() || stripped == "*" || stripped == "**" {
108                    return None;
109                }
110                Some(stripped)
111            })
112            .collect();
113
114        let dir_prune_globs = build_glob_set(dir_prune_patterns.iter().copied(), self.configuration.glob)?;
115
116        let path_excludes: HashSet<_> = self
117            .configuration
118            .excludes
119            .iter()
120            .filter_map(|ex| match ex {
121                Exclusion::Path(p) => Some(p),
122                Exclusion::Pattern(_) => None,
123            })
124            .collect();
125
126        let host_files_with_spec = self.load_paths(
127            &self.configuration.paths,
128            FileType::Host,
129            &extensions_set,
130            &glob_excludes,
131            &dir_prune_globs,
132            &path_excludes,
133        )?;
134
135        let vendored_files_with_spec = self.load_paths(
136            &self.configuration.includes,
137            FileType::Vendored,
138            &extensions_set,
139            &glob_excludes,
140            &dir_prune_globs,
141            &path_excludes,
142        )?;
143
144        let mut all_files: HashMap<FileId, File> = HashMap::default();
145        let mut file_decisions: HashMap<FileId, (FileType, usize)> = HashMap::default();
146
147        // Process host files (from paths)
148        for file_with_spec in host_files_with_spec {
149            let file_id = file_with_spec.file.id;
150            let specificity = file_with_spec.specificity;
151
152            all_files.insert(file_id, file_with_spec.file);
153            file_decisions.insert(file_id, (FileType::Host, specificity));
154        }
155
156        // When stdin override is set, ensure that the file is in the database
157        // (covers new/unsaved files, not on disk). Excluded paths are skipped
158        // so that editor integrations using `--stdin-input` honor the same
159        // exclude rules as a regular filesystem scan.
160        if let Some((name, content)) = &self.stdin_override {
161            let virtual_path = self.configuration.workspace.join(name.as_ref());
162            let virtual_path_canonical = virtual_path.canonicalize().unwrap_or_else(|_| virtual_path.clone());
163            let virtual_path_str = virtual_path_canonical.to_string_lossy();
164
165            let matched_glob = !glob_excludes.is_empty()
166                && (glob_excludes.is_match(virtual_path_canonical.as_path()) || glob_excludes.is_match(name.as_ref()));
167
168            let matched_path = path_excludes.iter().any(|excl| {
169                let canonical = if Path::new(excl.as_ref()).is_absolute() {
170                    excl.as_ref().to_path_buf()
171                } else {
172                    self.configuration.workspace.join(excl.as_ref())
173                };
174                let canonical = canonical.canonicalize().unwrap_or(canonical);
175                let canonical_str = canonical.to_string_lossy();
176
177                virtual_path_str.starts_with(canonical_str.as_ref())
178                    && matches!(virtual_path_str.as_bytes().get(canonical_str.len()), None | Some(&b'/' | &b'\\'))
179            });
180
181            if !matched_glob && !matched_path {
182                let file = File::ephemeral(Cow::Owned(name.as_ref().to_string()), Cow::Owned(content.clone()));
183                let file_id = file.id;
184                if let Entry::Vacant(e) = all_files.entry(file_id) {
185                    e.insert(file);
186
187                    file_decisions.insert(file_id, (FileType::Host, usize::MAX));
188                }
189            }
190        }
191
192        for file_with_spec in vendored_files_with_spec {
193            let file_id = file_with_spec.file.id;
194            let vendored_specificity = file_with_spec.specificity;
195
196            all_files.entry(file_id).or_insert(file_with_spec.file);
197
198            match file_decisions.get(&file_id) {
199                Some((FileType::Host, host_specificity)) if vendored_specificity < *host_specificity => {
200                    // Keep Host
201                }
202                _ => {
203                    file_decisions.insert(file_id, (FileType::Vendored, vendored_specificity));
204                }
205            }
206        }
207
208        db.reserve(file_decisions.len() + self.memory_sources.len());
209
210        for (file_id, (final_type, _)) in file_decisions {
211            if let Some(mut file) = all_files.remove(&file_id) {
212                file.file_type = final_type;
213                db.add(file);
214            }
215        }
216
217        for (name, contents, file_type) in self.memory_sources {
218            let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
219
220            db.add(file);
221        }
222
223        Ok(db)
224    }
225
226    /// Discovers and reads all files from a set of root paths or glob patterns in parallel.
227    ///
228    /// Supports both:
229    /// - Directory paths (e.g., "src", "tests") - recursively walks all files
230    /// - Glob patterns (e.g., "src/**/*.php", "tests/Unit/*Test.php") - matches files using glob syntax
231    ///
232    /// Returns files along with their pattern specificity for conflict resolution.
233    fn load_paths(
234        &self,
235        roots: &[Cow<'config, str>],
236        file_type: FileType,
237        extensions: &HashSet<OsString>,
238        glob_excludes: &GlobSet,
239        dir_prune_globs: &GlobSet,
240        path_excludes: &HashSet<&Cow<'config, Path>>,
241    ) -> Result<Vec<FileWithSpecificity>, DatabaseError> {
242        // Canonicalize the workspace once.  All WalkDir roots are canonicalized
243        // before traversal so their paths inherit the canonical prefix without
244        // any per-file syscalls.
245        let canonical_workspace =
246            self.configuration.workspace.canonicalize().unwrap_or_else(|_| self.configuration.workspace.to_path_buf());
247
248        // Pre-canonicalize path excludes once as strings.  A plain byte-string
249        // prefix check is then sufficient in the parallel section, replacing the
250        // per-file canonicalize() + Path::starts_with (Components iteration).
251        let canonical_excludes: Vec<String> = path_excludes
252            .iter()
253            .filter_map(|ex| {
254                let p = if Path::new(ex.as_ref()).is_absolute() {
255                    ex.as_ref().to_path_buf()
256                } else {
257                    self.configuration.workspace.join(ex.as_ref())
258                };
259
260                p.canonicalize().ok()?.into_os_string().into_string().ok()
261            })
262            .collect();
263
264        let workspace_relative_str = |path: &Path| -> String {
265            let rel = path.strip_prefix(canonical_workspace.as_path()).unwrap_or(path);
266            let s = rel.to_string_lossy();
267            #[cfg(windows)]
268            {
269                s.replace('\\', "/")
270            }
271            #[cfg(not(windows))]
272            {
273                s.into_owned()
274            }
275        };
276
277        let mut paths_to_process: Vec<(PathBuf, usize)> = Vec::new();
278
279        for root in roots {
280            // Check if this is a glob pattern (contains glob metacharacters).
281            // First check if it's an actual file/directory on disk. if so, treat it
282            // as a literal path even if the name contains glob metacharacters like `[]`.
283            let resolved_path = if Path::new(root.as_ref()).is_absolute() {
284                Path::new(root.as_ref()).to_path_buf()
285            } else {
286                self.configuration.workspace.join(root.as_ref())
287            };
288
289            let is_glob_pattern = !resolved_path.exists()
290                && (root.contains('*') || root.contains('?') || root.contains('[') || root.contains('{'));
291
292            let specificity = Self::calculate_pattern_specificity(root.as_ref());
293            if is_glob_pattern {
294                // Handle as glob pattern
295                let pattern = if Path::new(root.as_ref()).is_absolute() {
296                    root.to_string()
297                } else {
298                    // Make relative patterns absolute by prepending workspace
299                    self.configuration.workspace.join(root.as_ref()).to_string_lossy().to_string()
300                };
301
302                match glob::glob(&pattern) {
303                    Ok(entries) => {
304                        for entry in entries {
305                            match entry {
306                                Ok(path) => {
307                                    if path.is_file() {
308                                        // Canonicalize so the path shares the same prefix as
309                                        // `canonical_workspace` (important on macOS where
310                                        // TempDir / glob return /var/… but canonicalize gives
311                                        // /private/var/…).  Fall back to the original on error.
312                                        let canonical = path.canonicalize().unwrap_or(path);
313                                        paths_to_process.push((canonical, specificity));
314                                    }
315                                }
316                                Err(e) => {
317                                    tracing::warn!("Failed to read glob entry: {}", e);
318                                }
319                            }
320                        }
321                    }
322                    Err(e) => {
323                        return Err(DatabaseError::Glob(e.to_string()));
324                    }
325                }
326            } else {
327                let canonical_root = resolved_path.canonicalize().unwrap_or(resolved_path);
328                let has_dir_prunes = !dir_prune_globs.is_empty();
329                let has_path_prunes = !canonical_excludes.is_empty();
330                let walker = WalkDir::new(&canonical_root).into_iter().filter_entry(|entry| {
331                    if entry.depth() == 0 || !entry.file_type().is_dir() {
332                        return true;
333                    }
334
335                    let path = entry.path();
336
337                    if has_path_prunes
338                        && let Some(p) = path.to_str()
339                        && canonical_excludes.iter().any(|excl| {
340                            p.starts_with(excl.as_str())
341                                && matches!(p.as_bytes().get(excl.len()), None | Some(&b'/' | &b'\\'))
342                        })
343                    {
344                        return false;
345                    }
346
347                    if has_dir_prunes
348                        && (dir_prune_globs.is_match(path) || dir_prune_globs.is_match(workspace_relative_str(path)))
349                    {
350                        return false;
351                    }
352
353                    true
354                });
355
356                for entry in walker.filter_map(Result::ok) {
357                    let file_type = entry.file_type();
358                    #[allow(clippy::filetype_is_file)]
359                    let include = file_type.is_file() || file_type.is_symlink();
360                    if include {
361                        paths_to_process.push((entry.into_path(), specificity));
362                    }
363                }
364            }
365        }
366
367        let has_path_excludes = !canonical_excludes.is_empty();
368        let has_glob_excludes = !glob_excludes.is_empty();
369        let files: Vec<FileWithSpecificity> = paths_to_process
370            .into_par_iter()
371            .filter_map(|(path, specificity)| {
372                if has_glob_excludes
373                    && (glob_excludes.is_match(&path) || glob_excludes.is_match(workspace_relative_str(&path)))
374                {
375                    return None;
376                }
377
378                let ext = path.extension()?;
379                if !extensions.contains(ext) {
380                    return None;
381                }
382
383                if has_path_excludes {
384                    let excluded = path.to_str().is_some_and(|s| {
385                        canonical_excludes.iter().any(|excl| {
386                            s.starts_with(excl.as_str())
387                                && matches!(s.as_bytes().get(excl.len()), None | Some(&b'/' | &b'\\'))
388                        })
389                    });
390
391                    if excluded {
392                        return None;
393                    }
394                }
395
396                let workspace = canonical_workspace.as_path();
397                #[cfg(windows)]
398                let logical_name =
399                    path.strip_prefix(workspace).unwrap_or(path.as_path()).to_string_lossy().replace('\\', "/");
400                #[cfg(not(windows))]
401                let logical_name =
402                    path.strip_prefix(workspace).unwrap_or(path.as_path()).to_string_lossy().into_owned();
403
404                if let Some((override_name, override_content)) = &self.stdin_override
405                    && override_name.as_ref() == logical_name
406                {
407                    let file = File::new(
408                        Cow::Owned(logical_name),
409                        file_type,
410                        Some(path.clone()),
411                        Cow::Owned(override_content.clone()),
412                    );
413
414                    return Some(Ok(FileWithSpecificity { file, specificity }));
415                }
416
417                match read_file(workspace, &path, file_type) {
418                    Ok(file) => Some(Ok(FileWithSpecificity { file, specificity })),
419                    Err(e) => Some(Err(e)),
420                }
421            })
422            .collect::<Result<Vec<FileWithSpecificity>, _>>()?;
423
424        Ok(files)
425    }
426
427    /// Calculates how specific a pattern is for a given file path.
428    ///
429    /// Examples:
430    ///
431    /// - "src/b.php" matching src/b.php: ~2000 (exact file, 2 components)
432    /// - "src/" matching src/b.php: ~100 (directory, 1 component)
433    /// - "src" matching src/b.php: ~100 (directory, 1 component)
434    fn calculate_pattern_specificity(pattern: &str) -> usize {
435        let pattern_path = Path::new(pattern);
436
437        let component_count = pattern_path.components().count();
438        let is_glob = pattern.contains('*') || pattern.contains('?') || pattern.contains('[') || pattern.contains('{');
439
440        if is_glob {
441            let non_wildcard_components = pattern_path
442                .components()
443                .filter(|c| {
444                    let s = c.as_os_str().to_string_lossy();
445                    !s.contains('*') && !s.contains('?') && !s.contains('[') && !s.contains('{')
446                })
447                .count();
448            non_wildcard_components * 10
449        } else if pattern_path.is_file()
450            || pattern_path.extension().is_some()
451            || pattern.rsplit('.').next().is_some_and(|ext| ext.eq_ignore_ascii_case("php"))
452        {
453            component_count * 1000
454        } else {
455            component_count * 100
456        }
457    }
458}
459
460#[cfg(test)]
461#[allow(clippy::unwrap_used)]
462mod tests {
463    use super::*;
464    use crate::DatabaseReader;
465    use crate::GlobSettings;
466    use std::borrow::Cow;
467    use tempfile::TempDir;
468
469    fn create_test_config(temp_dir: &TempDir, paths: Vec<&str>, includes: Vec<&str>) -> DatabaseConfiguration<'static> {
470        // Normalize path separators to platform-specific separators
471        let normalize = |s: &str| s.replace('/', std::path::MAIN_SEPARATOR_STR);
472
473        DatabaseConfiguration {
474            workspace: Cow::Owned(temp_dir.path().to_path_buf()),
475            paths: paths.into_iter().map(|s| Cow::Owned(normalize(s))).collect(),
476            includes: includes.into_iter().map(|s| Cow::Owned(normalize(s))).collect(),
477            excludes: vec![],
478            extensions: vec![Cow::Borrowed("php")],
479            glob: GlobSettings::default(),
480        }
481    }
482
483    fn create_test_file(temp_dir: &TempDir, relative_path: &str, content: &str) {
484        let file_path = temp_dir.path().join(relative_path);
485        if let Some(parent) = file_path.parent() {
486            std::fs::create_dir_all(parent).unwrap();
487        }
488        std::fs::write(file_path, content).unwrap();
489    }
490
491    #[test]
492    fn test_specificity_calculation_exact_file() {
493        let spec = DatabaseLoader::calculate_pattern_specificity("src/b.php");
494        assert!(spec >= 2000, "Exact file should have high specificity, got {spec}");
495    }
496
497    #[test]
498    fn test_specificity_calculation_directory() {
499        let spec = DatabaseLoader::calculate_pattern_specificity("src/");
500        assert!((100..1000).contains(&spec), "Directory should have moderate specificity, got {spec}");
501    }
502
503    #[test]
504    fn test_specificity_calculation_glob() {
505        let spec = DatabaseLoader::calculate_pattern_specificity("src/*.php");
506        assert!(spec < 100, "Glob pattern should have low specificity, got {spec}");
507    }
508
509    #[test]
510    fn test_specificity_calculation_deeper_path() {
511        let shallow_spec = DatabaseLoader::calculate_pattern_specificity("src/");
512        let deep_spec = DatabaseLoader::calculate_pattern_specificity("src/foo/bar/");
513        assert!(deep_spec > shallow_spec, "Deeper path should have higher specificity");
514    }
515
516    #[test]
517    fn test_exact_file_vs_directory() {
518        let temp_dir = TempDir::new().unwrap();
519
520        create_test_file(&temp_dir, "src/b.php", "<?php");
521        create_test_file(&temp_dir, "src/a.php", "<?php");
522
523        let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/"]);
524        let loader = DatabaseLoader::new(config);
525        let db = loader.load().unwrap();
526
527        let b_file = db.files().find(|f| f.name.contains("b.php")).unwrap();
528        assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host (exact file beats directory)");
529
530        let a_file = db.files().find(|f| f.name.contains("a.php")).unwrap();
531        assert_eq!(a_file.file_type, FileType::Vendored, "src/a.php should be Vendored");
532    }
533
534    #[test]
535    fn test_deeper_vs_shallower_directory() {
536        let temp_dir = TempDir::new().unwrap();
537
538        create_test_file(&temp_dir, "src/foo/bar.php", "<?php");
539
540        let config = create_test_config(&temp_dir, vec!["src/foo/"], vec!["src/"]);
541        let loader = DatabaseLoader::new(config);
542        let db = loader.load().unwrap();
543
544        let file = db.files().find(|f| f.name.contains("bar.php")).unwrap();
545        assert_eq!(file.file_type, FileType::Host, "Deeper directory pattern should win");
546    }
547
548    #[test]
549    fn test_exact_file_vs_glob() {
550        let temp_dir = TempDir::new().unwrap();
551
552        create_test_file(&temp_dir, "src/b.php", "<?php");
553
554        let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/*.php"]);
555        let loader = DatabaseLoader::new(config);
556        let db = loader.load().unwrap();
557
558        let file = db.files().find(|f| f.name.contains("b.php")).unwrap();
559        assert_eq!(file.file_type, FileType::Host, "Exact file should beat glob pattern");
560    }
561
562    #[test]
563    fn test_equal_specificity_includes_wins() {
564        let temp_dir = TempDir::new().unwrap();
565
566        create_test_file(&temp_dir, "src/a.php", "<?php");
567
568        let config = create_test_config(&temp_dir, vec!["src/"], vec!["src/"]);
569        let loader = DatabaseLoader::new(config);
570        let db = loader.load().unwrap();
571
572        let file = db.files().find(|f| f.name.contains("a.php")).unwrap();
573        assert_eq!(file.file_type, FileType::Vendored, "Equal specificity: includes should win");
574    }
575
576    #[test]
577    fn test_complex_scenario_from_bug_report() {
578        let temp_dir = TempDir::new().unwrap();
579
580        create_test_file(&temp_dir, "src/a.php", "<?php");
581        create_test_file(&temp_dir, "src/b.php", "<?php");
582        create_test_file(&temp_dir, "src/c/d.php", "<?php");
583        create_test_file(&temp_dir, "src/c/e.php", "<?php");
584        create_test_file(&temp_dir, "vendor/lib1.php", "<?php");
585        create_test_file(&temp_dir, "vendor/lib2.php", "<?php");
586
587        let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["vendor", "src/c", "src/"]);
588        let loader = DatabaseLoader::new(config);
589        let db = loader.load().unwrap();
590
591        let b_file = db.files().find(|f| f.name.contains("src/b.php") || f.name.ends_with("b.php")).unwrap();
592        assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host in bug scenario");
593
594        let d_file = db.files().find(|f| f.name.contains("d.php")).unwrap();
595        assert_eq!(d_file.file_type, FileType::Vendored, "src/c/d.php should be Vendored");
596
597        let lib_file = db.files().find(|f| f.name.contains("lib1.php")).unwrap();
598        assert_eq!(lib_file.file_type, FileType::Vendored, "vendor/lib1.php should be Vendored");
599    }
600
601    #[test]
602    fn test_files_only_in_paths() {
603        let temp_dir = TempDir::new().unwrap();
604
605        create_test_file(&temp_dir, "src/a.php", "<?php");
606
607        let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
608        let loader = DatabaseLoader::new(config);
609        let db = loader.load().unwrap();
610
611        let file = db.files().find(|f| f.name.contains("a.php")).unwrap();
612        assert_eq!(file.file_type, FileType::Host, "File only in paths should be Host");
613    }
614
615    #[test]
616    fn test_files_only_in_includes() {
617        let temp_dir = TempDir::new().unwrap();
618
619        create_test_file(&temp_dir, "vendor/lib.php", "<?php");
620
621        let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
622        let loader = DatabaseLoader::new(config);
623        let db = loader.load().unwrap();
624
625        let file = db.files().find(|f| f.name.contains("lib.php")).unwrap();
626        assert_eq!(file.file_type, FileType::Vendored, "File only in includes should be Vendored");
627    }
628
629    #[test]
630    fn test_stdin_override_replaces_file_content() {
631        let temp_dir = TempDir::new().unwrap();
632        create_test_file(&temp_dir, "src/foo.php", "<?php\n// on disk");
633
634        let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
635        let loader = DatabaseLoader::new(config).with_stdin_override("src/foo.php", "<?php\n// from stdin".to_string());
636        let db = loader.load().unwrap();
637
638        let file = db.files().find(|f| f.name.contains("foo.php")).unwrap();
639        assert_eq!(
640            file.contents.as_ref(),
641            "<?php\n// from stdin",
642            "stdin override content should be used instead of disk"
643        );
644    }
645
646    #[test]
647    fn test_glob_excludes_match_workspace_relative_paths() {
648        let temp_dir = TempDir::new().unwrap();
649
650        create_test_file(&temp_dir, "src/Absences/Foo/Foo.php", "<?php");
651        create_test_file(&temp_dir, "src/Absences/Test/Faker/Provider/AbsencesProvider.php", "<?php");
652        create_test_file(&temp_dir, "src/Calendar/Test/Helper.php", "<?php");
653
654        let mut config = create_test_config(&temp_dir, vec!["src"], vec![]);
655        config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("src/*/Test/**"))];
656
657        let loader = DatabaseLoader::new(config);
658        let db = loader.load().unwrap();
659
660        let names: Vec<String> = db.files().map(|f| f.name.to_string()).collect();
661        assert!(names.iter().any(|n| n.ends_with("src/Absences/Foo/Foo.php")), "non-Test file should be loaded");
662        assert!(
663            !names.iter().any(|n| n.contains("src/Absences/Test/")),
664            "files under src/*/Test/** should be excluded, got {names:?}"
665        );
666        assert!(
667            !names.iter().any(|n| n.contains("src/Calendar/Test/")),
668            "files under src/*/Test/** should be excluded, got {names:?}"
669        );
670    }
671
672    #[test]
673    fn test_glob_excludes_match_legacy_absolute_prefix_patterns() {
674        let temp_dir = TempDir::new().unwrap();
675
676        create_test_file(&temp_dir, "packages/foo/src/main.php", "<?php");
677        create_test_file(&temp_dir, "packages/foo/vendor/lib.php", "<?php");
678
679        let mut config = create_test_config(&temp_dir, vec!["packages"], vec![]);
680        config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("*/packages/**/vendor/*"))];
681
682        let loader = DatabaseLoader::new(config);
683        let db = loader.load().unwrap();
684
685        let names: Vec<String> = db.files().map(|f| f.name.to_string()).collect();
686        assert!(names.iter().any(|n| n.ends_with("packages/foo/src/main.php")));
687        assert!(
688            !names.iter().any(|n| n.contains("/vendor/")),
689            "legacy `*/packages/**/vendor/*` style should still exclude vendor files, got {names:?}"
690        );
691    }
692
693    #[test]
694    fn test_glob_dir_prune_skips_relative_directories() {
695        let temp_dir = TempDir::new().unwrap();
696
697        create_test_file(&temp_dir, "vendor/slevomat/coding-standard/main.php", "<?php");
698        create_test_file(&temp_dir, "vendor/slevomat/coding-standard/tests/Sniffs/Foo.php", "<?php");
699        create_test_file(&temp_dir, "vendor/another/lib.php", "<?php");
700
701        let mut config = create_test_config(&temp_dir, vec![], vec!["vendor"]);
702        config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("vendor/**/tests/**"))];
703
704        let loader = DatabaseLoader::new(config);
705        let db = loader.load().unwrap();
706
707        let names: Vec<String> = db.files().map(|f| f.name.to_string()).collect();
708        assert!(names.iter().any(|n| n.ends_with("vendor/slevomat/coding-standard/main.php")));
709        assert!(names.iter().any(|n| n.ends_with("vendor/another/lib.php")));
710        assert!(
711            !names.iter().any(|n| n.contains("/tests/")),
712            "files under vendor/**/tests/** should be pruned, got {names:?}"
713        );
714    }
715
716    #[test]
717    fn test_stdin_override_adds_file_when_not_on_disk() {
718        let temp_dir = TempDir::new().unwrap();
719        create_test_file(&temp_dir, "src/.gitkeep", "");
720
721        let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
722        let loader =
723            DatabaseLoader::new(config).with_stdin_override("src/unsaved.php", "<?php\n// unsaved buffer".to_string());
724        let db = loader.load().unwrap();
725
726        let file = db.files().find(|f| f.name.contains("unsaved.php")).unwrap();
727        assert_eq!(file.file_type, FileType::Host);
728        assert_eq!(file.contents.as_ref(), "<?php\n// unsaved buffer");
729    }
730}