source_map_php/
scanner.rs1use std::path::{Path, PathBuf};
2
3use anyhow::Result;
4use globset::{Glob, GlobMatcher};
5use walkdir::WalkDir;
6
7use crate::config::PathsConfig;
8
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct ScannedFile {
11 pub absolute_path: PathBuf,
12 pub relative_path: PathBuf,
13}
14
15pub fn scan_repo(repo: &Path, paths: &PathsConfig) -> Result<Vec<ScannedFile>> {
16 let deny_matchers = build_matchers(&paths.deny)?;
17 let vendor_matchers = build_matchers(&paths.allow_vendor_paths)?;
18 let mut results = Vec::new();
19
20 for allow in &paths.allow {
21 let root = repo.join(allow);
22 if !root.exists() {
23 continue;
24 }
25 if root.is_file() {
26 let rel = root.strip_prefix(repo).unwrap().to_path_buf();
27 if is_allowed_file(&rel, &deny_matchers, paths, &vendor_matchers) {
28 results.push(ScannedFile {
29 absolute_path: root,
30 relative_path: rel,
31 });
32 }
33 continue;
34 }
35
36 for entry in WalkDir::new(&root)
37 .into_iter()
38 .filter_map(Result::ok)
39 .filter(|entry| entry.file_type().is_file())
40 {
41 let rel = entry.path().strip_prefix(repo).unwrap().to_path_buf();
42 if is_allowed_file(&rel, &deny_matchers, paths, &vendor_matchers) {
43 results.push(ScannedFile {
44 absolute_path: entry.path().to_path_buf(),
45 relative_path: rel,
46 });
47 }
48 }
49 }
50
51 if paths.allow_vendor {
52 let vendor_root = repo.join("vendor");
53 if vendor_root.exists() {
54 for entry in WalkDir::new(&vendor_root)
55 .into_iter()
56 .filter_map(Result::ok)
57 .filter(|entry| entry.file_type().is_file())
58 {
59 let rel = entry.path().strip_prefix(repo).unwrap().to_path_buf();
60 if is_allowed_file(&rel, &deny_matchers, paths, &vendor_matchers) {
61 results.push(ScannedFile {
62 absolute_path: entry.path().to_path_buf(),
63 relative_path: rel,
64 });
65 }
66 }
67 }
68 }
69
70 results.sort_by(|left, right| left.relative_path.cmp(&right.relative_path));
71 results.dedup_by(|left, right| left.relative_path == right.relative_path);
72 Ok(results)
73}
74
75fn build_matchers(globs: &[String]) -> Result<Vec<GlobMatcher>> {
76 globs
77 .iter()
78 .map(|glob| Ok(Glob::new(glob)?.compile_matcher()))
79 .collect()
80}
81
82fn is_allowed_file(
83 relative: &Path,
84 deny_matchers: &[GlobMatcher],
85 paths: &PathsConfig,
86 vendor_matchers: &[GlobMatcher],
87) -> bool {
88 let rel = relative.to_string_lossy();
89 if deny_matchers.iter().any(|matcher| matcher.is_match(&*rel)) {
90 return false;
91 }
92
93 let is_vendor = rel.starts_with("vendor/");
94 if is_vendor {
95 if !paths.allow_vendor {
96 return false;
97 }
98 if !vendor_matchers.iter().any(|matcher| {
99 matcher.is_match(&*rel)
100 || relative
101 .ancestors()
102 .any(|ancestor| matcher.is_match(ancestor.to_string_lossy().as_ref()))
103 }) {
104 return false;
105 }
106 }
107
108 rel.ends_with(".php")
109 || matches!(
110 rel.as_ref(),
111 "composer.json" | "composer.lock" | "phpunit.xml" | "pest.php"
112 )
113}
114
115#[cfg(test)]
116mod tests {
117 use std::fs;
118
119 use tempfile::tempdir;
120
121 use crate::config::IndexerConfig;
122
123 use super::scan_repo;
124
125 #[test]
126 fn scans_allowlisted_php_and_blocks_denied_files() {
127 let dir = tempdir().unwrap();
128 fs::create_dir_all(dir.path().join("app")).unwrap();
129 fs::create_dir_all(dir.path().join("storage")).unwrap();
130 fs::create_dir_all(dir.path().join("routes")).unwrap();
131 fs::write(dir.path().join("app/Service.php"), "<?php class Service {}").unwrap();
132 fs::write(dir.path().join("routes/web.php"), "<?php").unwrap();
133 fs::write(dir.path().join("storage/secret.php"), "<?php").unwrap();
134 fs::write(dir.path().join(".env"), "DB_PASSWORD=secret").unwrap();
135 fs::write(dir.path().join("dump.csv"), "bad").unwrap();
136
137 let scanned = scan_repo(dir.path(), &IndexerConfig::default().paths).unwrap();
138 let files: Vec<_> = scanned
139 .into_iter()
140 .map(|entry| entry.relative_path.to_string_lossy().into_owned())
141 .collect();
142
143 assert_eq!(files, vec!["app/Service.php", "routes/web.php"]);
144 }
145
146 #[test]
147 fn vendor_paths_respect_flag_and_glob() {
148 let dir = tempdir().unwrap();
149 fs::create_dir_all(dir.path().join("vendor/acme/package/src")).unwrap();
150 fs::create_dir_all(dir.path().join("vendor/acme/package/tests")).unwrap();
151 fs::write(
152 dir.path().join("vendor/acme/package/src/Thing.php"),
153 "<?php class Thing {}",
154 )
155 .unwrap();
156 fs::write(
157 dir.path().join("vendor/acme/package/tests/ThingTest.php"),
158 "<?php",
159 )
160 .unwrap();
161
162 let config = IndexerConfig::default();
163 let scanned = scan_repo(dir.path(), &config.paths).unwrap();
164 let files: Vec<_> = scanned
165 .into_iter()
166 .map(|entry| entry.relative_path.to_string_lossy().into_owned())
167 .collect();
168
169 assert_eq!(files, vec!["vendor/acme/package/src/Thing.php"]);
170 }
171}