1use std::path::{Path, PathBuf};
2use walkdir::WalkDir;
3
4#[derive(Debug, Clone, Default)]
5pub struct ScanConfig {
6 pub include_patterns: Vec<String>,
7 pub exclude_patterns: Vec<String>,
8 pub respect_gitignore: bool,
9}
10
11impl ScanConfig {
12 pub fn default_enabled() -> Self {
13 Self {
14 include_patterns: Vec::new(),
15 exclude_patterns: Vec::new(),
16 respect_gitignore: true,
17 }
18 }
19}
20
21pub fn parse_csv_patterns(raw: Option<&str>) -> Vec<String> {
22 raw.unwrap_or_default()
23 .split(',')
24 .map(str::trim)
25 .filter(|p| !p.is_empty())
26 .map(|p| p.replace('\\', "/"))
27 .collect()
28}
29
30pub fn path_matches_any(path: &str, patterns: &[String]) -> bool {
31 patterns.iter().any(|p| pattern_matches(path, p))
32}
33
34pub fn pattern_matches(path: &str, pattern: &str) -> bool {
35 let path = normalize(path);
36 let pattern = normalize(pattern);
37
38 if pattern.is_empty() {
39 return false;
40 }
41
42 if !pattern.contains('/') {
43 if wildcard_match(&path, &pattern) {
44 return true;
45 }
46 return path.split('/').any(|seg| wildcard_match(seg, &pattern));
47 }
48
49 if let Some(tail) = pattern.strip_prefix("**/") {
50 return path.split('/').enumerate().any(|(idx, _)| {
51 wildcard_match(
52 &path.split('/').skip(idx).collect::<Vec<_>>().join("/"),
53 tail,
54 )
55 });
56 }
57
58 if wildcard_match(&path, &pattern) {
59 return true;
60 }
61
62 if path.len() >= pattern.len() {
63 return path.ends_with(&pattern);
64 }
65
66 false
67}
68
69fn wildcard_match(text: &str, pattern: &str) -> bool {
70 let t = text.as_bytes();
71 let p = pattern.as_bytes();
72
73 let (mut ti, mut pi) = (0usize, 0usize);
74 let mut star = None::<usize>;
75 let mut match_i = 0usize;
76
77 while ti < t.len() {
78 if pi < p.len() && (p[pi] == b'?' || p[pi] == t[ti]) {
79 ti += 1;
80 pi += 1;
81 } else if pi < p.len() && p[pi] == b'*' {
82 star = Some(pi);
83 pi += 1;
84 match_i = ti;
85 } else if let Some(star_pos) = star {
86 pi = star_pos + 1;
87 match_i += 1;
88 ti = match_i;
89 } else {
90 return false;
91 }
92 }
93
94 while pi < p.len() && p[pi] == b'*' {
95 pi += 1;
96 }
97
98 pi == p.len()
99}
100
101fn normalize(input: &str) -> String {
102 input
103 .trim()
104 .replace('\\', "/")
105 .trim_start_matches("./")
106 .trim_matches('/')
107 .to_string()
108}
109
110#[derive(Debug, Clone)]
111pub struct GitignoreRule {
112 pub pattern: String,
113 pub negated: bool,
114 pub directory_only: bool,
115}
116
117pub fn load_root_gitignore_rules(root: &Path) -> Vec<GitignoreRule> {
118 let path = root.join(".gitignore");
119 let Ok(content) = std::fs::read_to_string(path) else {
120 return Vec::new();
121 };
122
123 content
124 .lines()
125 .map(str::trim)
126 .filter(|line| !line.is_empty() && !line.starts_with('#'))
127 .filter_map(|line| {
128 let negated = line.starts_with('!');
129 let mut body = if negated { &line[1..] } else { line };
130 body = body.trim();
131 if body.is_empty() {
132 return None;
133 }
134
135 let directory_only = body.ends_with('/');
136 let pattern = body.trim_end_matches('/').replace('\\', "/");
137 Some(GitignoreRule {
138 pattern,
139 negated,
140 directory_only,
141 })
142 })
143 .collect()
144}
145
146pub fn is_ignored_by_rules(rel_path: &str, _is_dir: bool, rules: &[GitignoreRule]) -> bool {
147 let path = normalize(rel_path);
148 if path.is_empty() {
149 return false;
150 }
151
152 let mut ignored = false;
153
154 for rule in rules {
155 let anchored = rule.pattern.starts_with('/');
156 let rule_pattern = rule.pattern.trim_start_matches('/');
157 let dir_prefix = format!("{rule_pattern}/");
158
159 let matches_candidate = |candidate: &str| {
160 if rule.directory_only {
161 candidate == rule_pattern || candidate.starts_with(&dir_prefix)
162 } else {
163 pattern_matches(candidate, rule_pattern)
164 }
165 };
166
167 let matched = if anchored {
168 matches_candidate(&path)
169 } else {
170 matches_candidate(&path)
171 || path.split('/').enumerate().any(|(idx, _)| {
172 matches_candidate(&path.split('/').skip(idx).collect::<Vec<_>>().join("/"))
173 })
174 };
175
176 if matched {
177 ignored = !rule.negated;
178 }
179 }
180
181 ignored
182}
183
184#[cfg(test)]
185mod tests {
186 use super::*;
187
188 #[test]
189 fn test_parse_csv_patterns() {
190 let patterns = parse_csv_patterns(Some("src/**, tests/*.ts , ,node_modules/**"));
191 assert_eq!(
192 patterns,
193 vec![
194 "src/**".to_string(),
195 "tests/*.ts".to_string(),
196 "node_modules/**".to_string()
197 ]
198 );
199 }
200
201 #[test]
202 fn test_pattern_matches_globs_and_suffix() {
203 assert!(pattern_matches("src/a/b/file.ts", "src/**"));
204 assert!(pattern_matches("src/a/b/file.ts", "**/*.ts"));
205 assert!(pattern_matches("src/a/b/file.ts", "*.ts"));
206 assert!(pattern_matches("src/a/b/file.ts", "a/b/file.ts"));
207 assert!(!pattern_matches("src/a/b/file.ts", "*.tsx"));
208 }
209
210 #[test]
211 fn test_gitignore_rule_evaluation_with_negation() {
212 let rules = vec![
213 GitignoreRule {
214 pattern: "dist".to_string(),
215 negated: false,
216 directory_only: true,
217 },
218 GitignoreRule {
219 pattern: "dist/keep.ts".to_string(),
220 negated: true,
221 directory_only: false,
222 },
223 ];
224
225 assert!(is_ignored_by_rules("dist", true, &rules));
226 assert!(is_ignored_by_rules("dist/a.ts", false, &rules));
227 assert!(!is_ignored_by_rules("dist/keep.ts", false, &rules));
228 }
229}
230
231pub fn walk_source_files_with_config<F>(
232 root: &Path,
233 config: &ScanConfig,
234 is_supported: F,
235) -> Result<Vec<PathBuf>, std::io::Error>
236where
237 F: Fn(&Path) -> bool,
238{
239 let mut out = Vec::new();
240 let rules = if config.respect_gitignore {
241 load_root_gitignore_rules(root)
242 } else {
243 Vec::new()
244 };
245
246 for entry in WalkDir::new(root)
247 .into_iter()
248 .filter_entry(|e| should_descend(root, e.path(), config, &rules))
249 {
250 let entry = entry?;
251 let path = entry.path();
252 if !path.is_file() || !is_supported(path) {
253 continue;
254 }
255
256 if should_include_file(root, path, config, &rules) {
257 out.push(path.to_path_buf());
258 }
259 }
260
261 out.sort();
262 Ok(out)
263}
264
265pub fn should_include_relative_path(
266 relative_path: &str,
267 is_dir: bool,
268 config: &ScanConfig,
269 rules: &[GitignoreRule],
270) -> bool {
271 let rel = relative_path.replace('\\', "/");
272
273 if rel.is_empty() || rel == "." {
274 return true;
275 }
276
277 if rel.starts_with(".graphyn")
278 || rel.starts_with(".git")
279 || rel.contains("/node_modules/")
280 || rel.starts_with("node_modules")
281 || rel.contains("/target/")
282 || rel.starts_with("target")
283 {
284 return false;
285 }
286
287 if config.respect_gitignore && is_ignored_by_rules(&rel, is_dir, rules) {
288 return false;
289 }
290
291 if !config.exclude_patterns.is_empty() && path_matches_any(&rel, &config.exclude_patterns) {
292 return false;
293 }
294
295 if config.include_patterns.is_empty() {
296 return true;
297 }
298
299 path_matches_any(&rel, &config.include_patterns)
300}
301
302fn should_descend(root: &Path, path: &Path, config: &ScanConfig, rules: &[GitignoreRule]) -> bool {
303 if !path.is_dir() {
304 return true;
305 }
306
307 let Ok(relative) = path.strip_prefix(root) else {
308 return true;
309 };
310 let rel = relative.to_string_lossy().replace('\\', "/");
311
312 should_include_relative_path(&rel, true, config, rules)
313}
314
315fn should_include_file(
316 root: &Path,
317 path: &Path,
318 config: &ScanConfig,
319 rules: &[GitignoreRule],
320) -> bool {
321 let Ok(relative) = path.strip_prefix(root) else {
322 return false;
323 };
324
325 let rel = relative.to_string_lossy().replace('\\', "/");
326 should_include_relative_path(&rel, false, config, rules)
327}