Skip to main content

mago_database/
loader.rs

1//! Database loader for scanning and loading project files.
2
3use std::borrow::Cow;
4use std::collections::hash_map::Entry;
5use std::ffi::OsString;
6use std::path::Path;
7
8use foldhash::HashMap;
9use foldhash::HashSet;
10use globset::GlobSet;
11use rayon::prelude::*;
12use walkdir::WalkDir;
13
14use crate::Database;
15use crate::DatabaseConfiguration;
16use crate::error::DatabaseError;
17use crate::exclusion::Exclusion;
18use crate::file::File;
19use crate::file::FileId;
20use crate::file::FileType;
21use crate::matcher::build_glob_set;
22use crate::utils::read_file;
23
24/// Holds a file along with the specificity of the pattern that matched it.
25///
26/// Specificity is used to resolve conflicts when a file matches both `paths` and `includes`.
27/// Higher specificity values indicate more specific matches (e.g., exact file paths have higher
28/// specificity than directory patterns).
29#[derive(Debug)]
30struct FileWithSpecificity {
31    file: File,
32    specificity: usize,
33}
34
35/// Builder for loading files into a Database from the filesystem and memory.
36pub struct DatabaseLoader<'a> {
37    database: Option<Database<'a>>,
38    configuration: DatabaseConfiguration<'a>,
39    memory_sources: Vec<(&'static str, &'static str, FileType)>,
40    /// When set, content for this file (by logical name) is taken from here instead of disk.
41    /// Used for editor integrations: read content from stdin but use the given path for baseline and reporting.
42    stdin_override: Option<(Cow<'a, str>, String)>,
43}
44
45impl<'a> DatabaseLoader<'a> {
46    #[must_use]
47    pub fn new(configuration: DatabaseConfiguration<'a>) -> Self {
48        Self { configuration, memory_sources: vec![], database: None, stdin_override: None }
49    }
50
51    #[must_use]
52    pub fn with_database(mut self, database: Database<'a>) -> Self {
53        self.database = Some(database);
54        self
55    }
56
57    /// When set, the file with this logical name (workspace-relative path) will use the given
58    /// content instead of being read from disk. The logical name is used for baseline and reporting.
59    #[must_use]
60    pub fn with_stdin_override(mut self, logical_name: impl Into<Cow<'a, str>>, content: String) -> Self {
61        self.stdin_override = Some((logical_name.into(), content));
62        self
63    }
64
65    pub fn add_memory_source(&mut self, name: &'static str, contents: &'static str, file_type: FileType) {
66        self.memory_sources.push((name, contents, file_type));
67    }
68
69    /// Loads files from disk into the database.
70    ///
71    /// # Errors
72    ///
73    /// Returns a [`DatabaseError`] if:
74    /// - A glob pattern is invalid
75    /// - File system operations fail (reading directories, files)
76    /// - File content cannot be read as valid UTF-8
77    pub fn load(mut self) -> Result<Database<'a>, DatabaseError> {
78        let mut db = self.database.take().unwrap_or_else(|| Database::new(self.configuration.clone()));
79
80        // Update database configuration to use the loader's configuration
81        // (fixes workspace path when merging with prelude database)
82        db.configuration = self.configuration.clone();
83
84        let extensions_set: HashSet<OsString> =
85            self.configuration.extensions.iter().map(|s| OsString::from(s.as_ref())).collect();
86
87        let glob_exclude_patterns: Vec<&str> = self
88            .configuration
89            .excludes
90            .iter()
91            .filter_map(|ex| match ex {
92                Exclusion::Pattern(pat) => Some(pat.as_ref()),
93                Exclusion::Path(_) => None,
94            })
95            .collect();
96
97        let glob_excludes = build_glob_set(glob_exclude_patterns.iter().copied(), self.configuration.glob)?;
98        let dir_prune_patterns: Vec<&str> = glob_exclude_patterns
99            .iter()
100            .filter_map(|pat| {
101                let stripped =
102                    pat.strip_suffix("/**/*").or_else(|| pat.strip_suffix("/**")).or_else(|| pat.strip_suffix("/*"))?;
103                if stripped.is_empty() || stripped == "*" || stripped == "**" {
104                    return None;
105                }
106                Some(stripped)
107            })
108            .collect();
109
110        let dir_prune_globs = build_glob_set(dir_prune_patterns.iter().copied(), self.configuration.glob)?;
111
112        let path_excludes: HashSet<_> = self
113            .configuration
114            .excludes
115            .iter()
116            .filter_map(|ex| match ex {
117                Exclusion::Path(p) => Some(p),
118                _ => None,
119            })
120            .collect();
121
122        let host_files_with_spec = self.load_paths(
123            &self.configuration.paths,
124            FileType::Host,
125            &extensions_set,
126            &glob_excludes,
127            &dir_prune_globs,
128            &path_excludes,
129        )?;
130
131        let vendored_files_with_spec = self.load_paths(
132            &self.configuration.includes,
133            FileType::Vendored,
134            &extensions_set,
135            &glob_excludes,
136            &dir_prune_globs,
137            &path_excludes,
138        )?;
139
140        let mut all_files: HashMap<FileId, File> = HashMap::default();
141        let mut file_decisions: HashMap<FileId, (FileType, usize)> = HashMap::default();
142
143        // Process host files (from paths)
144        for file_with_spec in host_files_with_spec {
145            let file_id = file_with_spec.file.id;
146            let specificity = file_with_spec.specificity;
147
148            all_files.insert(file_id, file_with_spec.file);
149            file_decisions.insert(file_id, (FileType::Host, specificity));
150        }
151
152        // When stdin override is set, ensure that the file is in the database
153        // (covers new/unsaved files, not on disk). Excluded paths are skipped
154        // so that editor integrations using `--stdin-input` honor the same
155        // exclude rules as a regular filesystem scan.
156        if let Some((ref name, ref content)) = self.stdin_override {
157            let virtual_path = self.configuration.workspace.join(name.as_ref());
158            let virtual_path_canonical = virtual_path.canonicalize().unwrap_or_else(|_| virtual_path.clone());
159            let virtual_path_str = virtual_path_canonical.to_string_lossy();
160
161            let glob_excluded = !glob_excludes.is_empty()
162                && (glob_excludes.is_match(virtual_path_canonical.as_path()) || glob_excludes.is_match(name.as_ref()));
163
164            let path_excluded = path_excludes.iter().any(|excl| {
165                let canonical = if Path::new(excl.as_ref()).is_absolute() {
166                    excl.as_ref().to_path_buf()
167                } else {
168                    self.configuration.workspace.join(excl.as_ref())
169                };
170                let canonical = canonical.canonicalize().unwrap_or(canonical);
171                let canonical_str = canonical.to_string_lossy();
172
173                virtual_path_str.starts_with(canonical_str.as_ref())
174                    && matches!(virtual_path_str.as_bytes().get(canonical_str.len()), None | Some(&b'/' | &b'\\'))
175            });
176
177            if !glob_excluded && !path_excluded {
178                let file = File::ephemeral(Cow::Owned(name.as_ref().to_string()), Cow::Owned(content.clone()));
179                let file_id = file.id;
180                if let Entry::Vacant(e) = all_files.entry(file_id) {
181                    e.insert(file);
182
183                    file_decisions.insert(file_id, (FileType::Host, usize::MAX));
184                }
185            }
186        }
187
188        for file_with_spec in vendored_files_with_spec {
189            let file_id = file_with_spec.file.id;
190            let vendored_specificity = file_with_spec.specificity;
191
192            all_files.entry(file_id).or_insert(file_with_spec.file);
193
194            match file_decisions.get(&file_id) {
195                Some((FileType::Host, host_specificity)) if vendored_specificity < *host_specificity => {
196                    // Keep Host
197                }
198                _ => {
199                    file_decisions.insert(file_id, (FileType::Vendored, vendored_specificity));
200                }
201            }
202        }
203
204        db.reserve(file_decisions.len() + self.memory_sources.len());
205
206        for (file_id, (final_type, _)) in file_decisions {
207            if let Some(mut file) = all_files.remove(&file_id) {
208                file.file_type = final_type;
209                db.add(file);
210            }
211        }
212
213        for (name, contents, file_type) in self.memory_sources {
214            let file = File::new(Cow::Borrowed(name), file_type, None, Cow::Borrowed(contents));
215
216            db.add(file);
217        }
218
219        Ok(db)
220    }
221
222    /// Discovers and reads all files from a set of root paths or glob patterns in parallel.
223    ///
224    /// Supports both:
225    /// - Directory paths (e.g., "src", "tests") - recursively walks all files
226    /// - Glob patterns (e.g., "src/**/*.php", "tests/Unit/*Test.php") - matches files using glob syntax
227    ///
228    /// Returns files along with their pattern specificity for conflict resolution.
229    fn load_paths(
230        &self,
231        roots: &[Cow<'a, str>],
232        file_type: FileType,
233        extensions: &HashSet<OsString>,
234        glob_excludes: &GlobSet,
235        dir_prune_globs: &GlobSet,
236        path_excludes: &HashSet<&Cow<'a, Path>>,
237    ) -> Result<Vec<FileWithSpecificity>, DatabaseError> {
238        // Canonicalize the workspace once.  All WalkDir roots are canonicalized
239        // before traversal so their paths inherit the canonical prefix without
240        // any per-file syscalls.
241        let canonical_workspace =
242            self.configuration.workspace.canonicalize().unwrap_or_else(|_| self.configuration.workspace.to_path_buf());
243
244        // Pre-canonicalize path excludes once as strings.  A plain byte-string
245        // prefix check is then sufficient in the parallel section, replacing the
246        // per-file canonicalize() + Path::starts_with (Components iteration).
247        let canonical_excludes: Vec<String> = path_excludes
248            .iter()
249            .filter_map(|ex| {
250                let p = if Path::new(ex.as_ref()).is_absolute() {
251                    ex.as_ref().to_path_buf()
252                } else {
253                    self.configuration.workspace.join(ex.as_ref())
254                };
255
256                p.canonicalize().ok()?.into_os_string().into_string().ok()
257            })
258            .collect();
259
260        let workspace_relative_str = |path: &Path| -> String {
261            let rel = path.strip_prefix(canonical_workspace.as_path()).unwrap_or(path);
262            let s = rel.to_string_lossy();
263            #[cfg(windows)]
264            {
265                s.replace('\\', "/")
266            }
267            #[cfg(not(windows))]
268            {
269                s.into_owned()
270            }
271        };
272
273        let mut paths_to_process: Vec<(std::path::PathBuf, usize)> = Vec::new();
274
275        for root in roots {
276            // Check if this is a glob pattern (contains glob metacharacters).
277            // First check if it's an actual file/directory on disk. if so, treat it
278            // as a literal path even if the name contains glob metacharacters like `[]`.
279            let resolved_path = if Path::new(root.as_ref()).is_absolute() {
280                Path::new(root.as_ref()).to_path_buf()
281            } else {
282                self.configuration.workspace.join(root.as_ref())
283            };
284
285            let is_glob_pattern = !resolved_path.exists()
286                && (root.contains('*') || root.contains('?') || root.contains('[') || root.contains('{'));
287
288            let specificity = Self::calculate_pattern_specificity(root.as_ref());
289            if is_glob_pattern {
290                // Handle as glob pattern
291                let pattern = if Path::new(root.as_ref()).is_absolute() {
292                    root.to_string()
293                } else {
294                    // Make relative patterns absolute by prepending workspace
295                    self.configuration.workspace.join(root.as_ref()).to_string_lossy().to_string()
296                };
297
298                match glob::glob(&pattern) {
299                    Ok(entries) => {
300                        for entry in entries {
301                            match entry {
302                                Ok(path) => {
303                                    if path.is_file() {
304                                        // Canonicalize so the path shares the same prefix as
305                                        // `canonical_workspace` (important on macOS where
306                                        // TempDir / glob return /var/… but canonicalize gives
307                                        // /private/var/…).  Fall back to the original on error.
308                                        let canonical = path.canonicalize().unwrap_or(path);
309                                        paths_to_process.push((canonical, specificity));
310                                    }
311                                }
312                                Err(e) => {
313                                    tracing::warn!("Failed to read glob entry: {}", e);
314                                }
315                            }
316                        }
317                    }
318                    Err(e) => {
319                        return Err(DatabaseError::Glob(e.to_string()));
320                    }
321                }
322            } else {
323                let canonical_root = resolved_path.canonicalize().unwrap_or(resolved_path);
324                let has_dir_prunes = !dir_prune_globs.is_empty();
325                let has_path_prunes = !canonical_excludes.is_empty();
326                let walker = WalkDir::new(&canonical_root).into_iter().filter_entry(|entry| {
327                    if entry.depth() == 0 || !entry.file_type().is_dir() {
328                        return true;
329                    }
330
331                    let path = entry.path();
332
333                    if has_path_prunes
334                        && let Some(p) = path.to_str()
335                        && canonical_excludes.iter().any(|excl| {
336                            p.starts_with(excl.as_str())
337                                && matches!(p.as_bytes().get(excl.len()), None | Some(&b'/' | &b'\\'))
338                        })
339                    {
340                        return false;
341                    }
342
343                    if has_dir_prunes
344                        && (dir_prune_globs.is_match(path) || dir_prune_globs.is_match(workspace_relative_str(path)))
345                    {
346                        return false;
347                    }
348
349                    true
350                });
351
352                for entry in walker.filter_map(Result::ok) {
353                    if entry.file_type().is_file() {
354                        paths_to_process.push((entry.into_path(), specificity));
355                    }
356                }
357            }
358        }
359
360        let has_path_excludes = !canonical_excludes.is_empty();
361        let has_glob_excludes = !glob_excludes.is_empty();
362        let files: Vec<FileWithSpecificity> = paths_to_process
363            .into_par_iter()
364            .filter_map(|(path, specificity)| {
365                if has_glob_excludes
366                    && (glob_excludes.is_match(&path) || glob_excludes.is_match(workspace_relative_str(&path)))
367                {
368                    return None;
369                }
370
371                let ext = path.extension()?;
372                if !extensions.contains(ext) {
373                    return None;
374                }
375
376                if has_path_excludes {
377                    let excluded = path.to_str().is_some_and(|s| {
378                        canonical_excludes.iter().any(|excl| {
379                            s.starts_with(excl.as_str())
380                                && matches!(s.as_bytes().get(excl.len()), None | Some(&b'/' | &b'\\'))
381                        })
382                    });
383
384                    if excluded {
385                        return None;
386                    }
387                }
388
389                let workspace = canonical_workspace.as_path();
390                #[cfg(windows)]
391                let logical_name = path
392                    .strip_prefix(workspace)
393                    .unwrap_or_else(|_| path.as_path())
394                    .to_string_lossy()
395                    .replace('\\', "/");
396                #[cfg(not(windows))]
397                let logical_name =
398                    path.strip_prefix(workspace).unwrap_or(path.as_path()).to_string_lossy().into_owned();
399
400                if let Some((ref override_name, ref override_content)) = self.stdin_override
401                    && override_name.as_ref() == logical_name
402                {
403                    let file = File::new(
404                        Cow::Owned(logical_name),
405                        file_type,
406                        Some(path.clone()),
407                        Cow::Owned(override_content.clone()),
408                    );
409
410                    return Some(Ok(FileWithSpecificity { file, specificity }));
411                }
412
413                match read_file(workspace, &path, file_type) {
414                    Ok(file) => Some(Ok(FileWithSpecificity { file, specificity })),
415                    Err(e) => Some(Err(e)),
416                }
417            })
418            .collect::<Result<Vec<FileWithSpecificity>, _>>()?;
419
420        Ok(files)
421    }
422
423    /// Calculates how specific a pattern is for a given file path.
424    ///
425    /// Examples:
426    ///
427    /// - "src/b.php" matching src/b.php: ~2000 (exact file, 2 components)
428    /// - "src/" matching src/b.php: ~100 (directory, 1 component)
429    /// - "src" matching src/b.php: ~100 (directory, 1 component)
430    fn calculate_pattern_specificity(pattern: &str) -> usize {
431        let pattern_path = Path::new(pattern);
432
433        let component_count = pattern_path.components().count();
434        let is_glob = pattern.contains('*') || pattern.contains('?') || pattern.contains('[') || pattern.contains('{');
435
436        if is_glob {
437            let non_wildcard_components = pattern_path
438                .components()
439                .filter(|c| {
440                    let s = c.as_os_str().to_string_lossy();
441                    !s.contains('*') && !s.contains('?') && !s.contains('[') && !s.contains('{')
442                })
443                .count();
444            non_wildcard_components * 10
445        } else if pattern_path.is_file() || pattern_path.extension().is_some() || pattern.ends_with(".php") {
446            component_count * 1000
447        } else {
448            component_count * 100
449        }
450    }
451}
452
453#[cfg(test)]
454mod tests {
455    use super::*;
456    use crate::DatabaseReader;
457    use crate::GlobSettings;
458    use std::borrow::Cow;
459    use tempfile::TempDir;
460
461    fn create_test_config(temp_dir: &TempDir, paths: Vec<&str>, includes: Vec<&str>) -> DatabaseConfiguration<'static> {
462        // Normalize path separators to platform-specific separators
463        let normalize = |s: &str| s.replace('/', std::path::MAIN_SEPARATOR_STR);
464
465        DatabaseConfiguration {
466            workspace: Cow::Owned(temp_dir.path().to_path_buf()),
467            paths: paths.into_iter().map(|s| Cow::Owned(normalize(s))).collect(),
468            includes: includes.into_iter().map(|s| Cow::Owned(normalize(s))).collect(),
469            excludes: vec![],
470            extensions: vec![Cow::Borrowed("php")],
471            glob: GlobSettings::default(),
472        }
473    }
474
475    fn create_test_file(temp_dir: &TempDir, relative_path: &str, content: &str) {
476        let file_path = temp_dir.path().join(relative_path);
477        if let Some(parent) = file_path.parent() {
478            std::fs::create_dir_all(parent).unwrap();
479        }
480        std::fs::write(file_path, content).unwrap();
481    }
482
483    #[test]
484    fn test_specificity_calculation_exact_file() {
485        let spec = DatabaseLoader::calculate_pattern_specificity("src/b.php");
486        assert!(spec >= 2000, "Exact file should have high specificity, got {spec}");
487    }
488
489    #[test]
490    fn test_specificity_calculation_directory() {
491        let spec = DatabaseLoader::calculate_pattern_specificity("src/");
492        assert!((100..1000).contains(&spec), "Directory should have moderate specificity, got {spec}");
493    }
494
495    #[test]
496    fn test_specificity_calculation_glob() {
497        let spec = DatabaseLoader::calculate_pattern_specificity("src/*.php");
498        assert!(spec < 100, "Glob pattern should have low specificity, got {spec}");
499    }
500
501    #[test]
502    fn test_specificity_calculation_deeper_path() {
503        let shallow_spec = DatabaseLoader::calculate_pattern_specificity("src/");
504        let deep_spec = DatabaseLoader::calculate_pattern_specificity("src/foo/bar/");
505        assert!(deep_spec > shallow_spec, "Deeper path should have higher specificity");
506    }
507
508    #[test]
509    fn test_exact_file_vs_directory() {
510        let temp_dir = TempDir::new().unwrap();
511
512        create_test_file(&temp_dir, "src/b.php", "<?php");
513        create_test_file(&temp_dir, "src/a.php", "<?php");
514
515        let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/"]);
516        let loader = DatabaseLoader::new(config);
517        let db = loader.load().unwrap();
518
519        let b_file = db.files().find(|f| f.name.contains("b.php")).unwrap();
520        assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host (exact file beats directory)");
521
522        let a_file = db.files().find(|f| f.name.contains("a.php")).unwrap();
523        assert_eq!(a_file.file_type, FileType::Vendored, "src/a.php should be Vendored");
524    }
525
526    #[test]
527    fn test_deeper_vs_shallower_directory() {
528        let temp_dir = TempDir::new().unwrap();
529
530        create_test_file(&temp_dir, "src/foo/bar.php", "<?php");
531
532        let config = create_test_config(&temp_dir, vec!["src/foo/"], vec!["src/"]);
533        let loader = DatabaseLoader::new(config);
534        let db = loader.load().unwrap();
535
536        let file = db.files().find(|f| f.name.contains("bar.php")).unwrap();
537        assert_eq!(file.file_type, FileType::Host, "Deeper directory pattern should win");
538    }
539
540    #[test]
541    fn test_exact_file_vs_glob() {
542        let temp_dir = TempDir::new().unwrap();
543
544        create_test_file(&temp_dir, "src/b.php", "<?php");
545
546        let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["src/*.php"]);
547        let loader = DatabaseLoader::new(config);
548        let db = loader.load().unwrap();
549
550        let file = db.files().find(|f| f.name.contains("b.php")).unwrap();
551        assert_eq!(file.file_type, FileType::Host, "Exact file should beat glob pattern");
552    }
553
554    #[test]
555    fn test_equal_specificity_includes_wins() {
556        let temp_dir = TempDir::new().unwrap();
557
558        create_test_file(&temp_dir, "src/a.php", "<?php");
559
560        let config = create_test_config(&temp_dir, vec!["src/"], vec!["src/"]);
561        let loader = DatabaseLoader::new(config);
562        let db = loader.load().unwrap();
563
564        let file = db.files().find(|f| f.name.contains("a.php")).unwrap();
565        assert_eq!(file.file_type, FileType::Vendored, "Equal specificity: includes should win");
566    }
567
568    #[test]
569    fn test_complex_scenario_from_bug_report() {
570        let temp_dir = TempDir::new().unwrap();
571
572        create_test_file(&temp_dir, "src/a.php", "<?php");
573        create_test_file(&temp_dir, "src/b.php", "<?php");
574        create_test_file(&temp_dir, "src/c/d.php", "<?php");
575        create_test_file(&temp_dir, "src/c/e.php", "<?php");
576        create_test_file(&temp_dir, "vendor/lib1.php", "<?php");
577        create_test_file(&temp_dir, "vendor/lib2.php", "<?php");
578
579        let config = create_test_config(&temp_dir, vec!["src/b.php"], vec!["vendor", "src/c", "src/"]);
580        let loader = DatabaseLoader::new(config);
581        let db = loader.load().unwrap();
582
583        let b_file = db.files().find(|f| f.name.contains("src/b.php") || f.name.ends_with("b.php")).unwrap();
584        assert_eq!(b_file.file_type, FileType::Host, "src/b.php should be Host in bug scenario");
585
586        let d_file = db.files().find(|f| f.name.contains("d.php")).unwrap();
587        assert_eq!(d_file.file_type, FileType::Vendored, "src/c/d.php should be Vendored");
588
589        let lib_file = db.files().find(|f| f.name.contains("lib1.php")).unwrap();
590        assert_eq!(lib_file.file_type, FileType::Vendored, "vendor/lib1.php should be Vendored");
591    }
592
593    #[test]
594    fn test_files_only_in_paths() {
595        let temp_dir = TempDir::new().unwrap();
596
597        create_test_file(&temp_dir, "src/a.php", "<?php");
598
599        let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
600        let loader = DatabaseLoader::new(config);
601        let db = loader.load().unwrap();
602
603        let file = db.files().find(|f| f.name.contains("a.php")).unwrap();
604        assert_eq!(file.file_type, FileType::Host, "File only in paths should be Host");
605    }
606
607    #[test]
608    fn test_files_only_in_includes() {
609        let temp_dir = TempDir::new().unwrap();
610
611        create_test_file(&temp_dir, "vendor/lib.php", "<?php");
612
613        let config = create_test_config(&temp_dir, vec![], vec!["vendor/"]);
614        let loader = DatabaseLoader::new(config);
615        let db = loader.load().unwrap();
616
617        let file = db.files().find(|f| f.name.contains("lib.php")).unwrap();
618        assert_eq!(file.file_type, FileType::Vendored, "File only in includes should be Vendored");
619    }
620
621    #[test]
622    fn test_stdin_override_replaces_file_content() {
623        let temp_dir = TempDir::new().unwrap();
624        create_test_file(&temp_dir, "src/foo.php", "<?php\n// on disk");
625
626        let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
627        let loader = DatabaseLoader::new(config).with_stdin_override("src/foo.php", "<?php\n// from stdin".to_string());
628        let db = loader.load().unwrap();
629
630        let file = db.files().find(|f| f.name.contains("foo.php")).unwrap();
631        assert_eq!(
632            file.contents.as_ref(),
633            "<?php\n// from stdin",
634            "stdin override content should be used instead of disk"
635        );
636    }
637
638    #[test]
639    fn test_glob_excludes_match_workspace_relative_paths() {
640        let temp_dir = TempDir::new().unwrap();
641
642        create_test_file(&temp_dir, "src/Absences/Foo/Foo.php", "<?php");
643        create_test_file(&temp_dir, "src/Absences/Test/Faker/Provider/AbsencesProvider.php", "<?php");
644        create_test_file(&temp_dir, "src/Calendar/Test/Helper.php", "<?php");
645
646        let mut config = create_test_config(&temp_dir, vec!["src"], vec![]);
647        config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("src/*/Test/**"))];
648
649        let loader = DatabaseLoader::new(config);
650        let db = loader.load().unwrap();
651
652        let names: Vec<String> = db.files().map(|f| f.name.to_string()).collect();
653        assert!(names.iter().any(|n| n.ends_with("src/Absences/Foo/Foo.php")), "non-Test file should be loaded");
654        assert!(
655            !names.iter().any(|n| n.contains("src/Absences/Test/")),
656            "files under src/*/Test/** should be excluded, got {names:?}"
657        );
658        assert!(
659            !names.iter().any(|n| n.contains("src/Calendar/Test/")),
660            "files under src/*/Test/** should be excluded, got {names:?}"
661        );
662    }
663
664    #[test]
665    fn test_glob_excludes_match_legacy_absolute_prefix_patterns() {
666        let temp_dir = TempDir::new().unwrap();
667
668        create_test_file(&temp_dir, "packages/foo/src/main.php", "<?php");
669        create_test_file(&temp_dir, "packages/foo/vendor/lib.php", "<?php");
670
671        let mut config = create_test_config(&temp_dir, vec!["packages"], vec![]);
672        config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("*/packages/**/vendor/*"))];
673
674        let loader = DatabaseLoader::new(config);
675        let db = loader.load().unwrap();
676
677        let names: Vec<String> = db.files().map(|f| f.name.to_string()).collect();
678        assert!(names.iter().any(|n| n.ends_with("packages/foo/src/main.php")));
679        assert!(
680            !names.iter().any(|n| n.contains("/vendor/")),
681            "legacy `*/packages/**/vendor/*` style should still exclude vendor files, got {names:?}"
682        );
683    }
684
685    #[test]
686    fn test_glob_dir_prune_skips_relative_directories() {
687        let temp_dir = TempDir::new().unwrap();
688
689        create_test_file(&temp_dir, "vendor/slevomat/coding-standard/main.php", "<?php");
690        create_test_file(&temp_dir, "vendor/slevomat/coding-standard/tests/Sniffs/Foo.php", "<?php");
691        create_test_file(&temp_dir, "vendor/another/lib.php", "<?php");
692
693        let mut config = create_test_config(&temp_dir, vec![], vec!["vendor"]);
694        config.excludes = vec![Exclusion::Pattern(Cow::Borrowed("vendor/**/tests/**"))];
695
696        let loader = DatabaseLoader::new(config);
697        let db = loader.load().unwrap();
698
699        let names: Vec<String> = db.files().map(|f| f.name.to_string()).collect();
700        assert!(names.iter().any(|n| n.ends_with("vendor/slevomat/coding-standard/main.php")));
701        assert!(names.iter().any(|n| n.ends_with("vendor/another/lib.php")));
702        assert!(
703            !names.iter().any(|n| n.contains("/tests/")),
704            "files under vendor/**/tests/** should be pruned, got {names:?}"
705        );
706    }
707
708    #[test]
709    fn test_stdin_override_adds_file_when_not_on_disk() {
710        let temp_dir = TempDir::new().unwrap();
711        create_test_file(&temp_dir, "src/.gitkeep", "");
712
713        let config = create_test_config(&temp_dir, vec!["src/"], vec![]);
714        let loader =
715            DatabaseLoader::new(config).with_stdin_override("src/unsaved.php", "<?php\n// unsaved buffer".to_string());
716        let db = loader.load().unwrap();
717
718        let file = db.files().find(|f| f.name.contains("unsaved.php")).unwrap();
719        assert_eq!(file.file_type, FileType::Host);
720        assert_eq!(file.contents.as_ref(), "<?php\n// unsaved buffer");
721    }
722}