Skip to main content

source_map_php/
scanner.rs

1use std::path::{Path, PathBuf};
2
3use anyhow::Result;
4use globset::{Glob, GlobMatcher};
5use walkdir::WalkDir;
6
7use crate::config::PathsConfig;
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct ScannedFile {
11    pub absolute_path: PathBuf,
12    pub relative_path: PathBuf,
13}
14
15pub fn scan_repo(repo: &Path, paths: &PathsConfig) -> Result<Vec<ScannedFile>> {
16    let deny_matchers = build_matchers(&paths.deny)?;
17    let vendor_matchers = build_matchers(&paths.allow_vendor_paths)?;
18    let mut results = Vec::new();
19
20    for allow in &paths.allow {
21        let root = repo.join(allow);
22        if !root.exists() {
23            continue;
24        }
25        if root.is_file() {
26            let rel = root.strip_prefix(repo).unwrap().to_path_buf();
27            if is_allowed_file(&rel, &deny_matchers, paths, &vendor_matchers) {
28                results.push(ScannedFile {
29                    absolute_path: root,
30                    relative_path: rel,
31                });
32            }
33            continue;
34        }
35
36        for entry in WalkDir::new(&root)
37            .into_iter()
38            .filter_map(Result::ok)
39            .filter(|entry| entry.file_type().is_file())
40        {
41            let rel = entry.path().strip_prefix(repo).unwrap().to_path_buf();
42            if is_allowed_file(&rel, &deny_matchers, paths, &vendor_matchers) {
43                results.push(ScannedFile {
44                    absolute_path: entry.path().to_path_buf(),
45                    relative_path: rel,
46                });
47            }
48        }
49    }
50
51    if paths.allow_vendor {
52        let vendor_root = repo.join("vendor");
53        if vendor_root.exists() {
54            for entry in WalkDir::new(&vendor_root)
55                .into_iter()
56                .filter_map(Result::ok)
57                .filter(|entry| entry.file_type().is_file())
58            {
59                let rel = entry.path().strip_prefix(repo).unwrap().to_path_buf();
60                if is_allowed_file(&rel, &deny_matchers, paths, &vendor_matchers) {
61                    results.push(ScannedFile {
62                        absolute_path: entry.path().to_path_buf(),
63                        relative_path: rel,
64                    });
65                }
66            }
67        }
68    }
69
70    results.sort_by(|left, right| left.relative_path.cmp(&right.relative_path));
71    results.dedup_by(|left, right| left.relative_path == right.relative_path);
72    Ok(results)
73}
74
75fn build_matchers(globs: &[String]) -> Result<Vec<GlobMatcher>> {
76    globs
77        .iter()
78        .map(|glob| Ok(Glob::new(glob)?.compile_matcher()))
79        .collect()
80}
81
82fn is_allowed_file(
83    relative: &Path,
84    deny_matchers: &[GlobMatcher],
85    paths: &PathsConfig,
86    vendor_matchers: &[GlobMatcher],
87) -> bool {
88    let rel = relative.to_string_lossy();
89    if deny_matchers.iter().any(|matcher| matcher.is_match(&*rel)) {
90        return false;
91    }
92
93    let is_vendor = rel.starts_with("vendor/");
94    if is_vendor {
95        if !paths.allow_vendor {
96            return false;
97        }
98        if !vendor_matchers.iter().any(|matcher| {
99            matcher.is_match(&*rel)
100                || relative
101                    .ancestors()
102                    .any(|ancestor| matcher.is_match(ancestor.to_string_lossy().as_ref()))
103        }) {
104            return false;
105        }
106    }
107
108    rel.ends_with(".php")
109        || matches!(
110            rel.as_ref(),
111            "composer.json" | "composer.lock" | "phpunit.xml" | "pest.php"
112        )
113}
114
115#[cfg(test)]
116mod tests {
117    use std::fs;
118
119    use tempfile::tempdir;
120
121    use crate::config::IndexerConfig;
122
123    use super::scan_repo;
124
125    #[test]
126    fn scans_allowlisted_php_and_blocks_denied_files() {
127        let dir = tempdir().unwrap();
128        fs::create_dir_all(dir.path().join("app")).unwrap();
129        fs::create_dir_all(dir.path().join("storage")).unwrap();
130        fs::create_dir_all(dir.path().join("routes")).unwrap();
131        fs::write(dir.path().join("app/Service.php"), "<?php class Service {}").unwrap();
132        fs::write(dir.path().join("routes/web.php"), "<?php").unwrap();
133        fs::write(dir.path().join("storage/secret.php"), "<?php").unwrap();
134        fs::write(dir.path().join(".env"), "DB_PASSWORD=secret").unwrap();
135        fs::write(dir.path().join("dump.csv"), "bad").unwrap();
136
137        let scanned = scan_repo(dir.path(), &IndexerConfig::default().paths).unwrap();
138        let files: Vec<_> = scanned
139            .into_iter()
140            .map(|entry| entry.relative_path.to_string_lossy().into_owned())
141            .collect();
142
143        assert_eq!(files, vec!["app/Service.php", "routes/web.php"]);
144    }
145
146    #[test]
147    fn vendor_paths_respect_flag_and_glob() {
148        let dir = tempdir().unwrap();
149        fs::create_dir_all(dir.path().join("vendor/acme/package/src")).unwrap();
150        fs::create_dir_all(dir.path().join("vendor/acme/package/tests")).unwrap();
151        fs::write(
152            dir.path().join("vendor/acme/package/src/Thing.php"),
153            "<?php class Thing {}",
154        )
155        .unwrap();
156        fs::write(
157            dir.path().join("vendor/acme/package/tests/ThingTest.php"),
158            "<?php",
159        )
160        .unwrap();
161
162        let config = IndexerConfig::default();
163        let scanned = scan_repo(dir.path(), &config.paths).unwrap();
164        let files: Vec<_> = scanned
165            .into_iter()
166            .map(|entry| entry.relative_path.to_string_lossy().into_owned())
167            .collect();
168
169        assert_eq!(files, vec!["vendor/acme/package/src/Thing.php"]);
170    }
171}