Skip to main content

graphyn_core/
scan.rs

1use std::path::{Path, PathBuf};
2use walkdir::WalkDir;
3
4#[derive(Debug, Clone, Default)]
5pub struct ScanConfig {
6    pub include_patterns: Vec<String>,
7    pub exclude_patterns: Vec<String>,
8    pub respect_gitignore: bool,
9}
10
11impl ScanConfig {
12    pub fn default_enabled() -> Self {
13        Self {
14            include_patterns: Vec::new(),
15            exclude_patterns: Vec::new(),
16            respect_gitignore: true,
17        }
18    }
19}
20
21pub fn parse_csv_patterns(raw: Option<&str>) -> Vec<String> {
22    raw.unwrap_or_default()
23        .split(',')
24        .map(str::trim)
25        .filter(|p| !p.is_empty())
26        .map(|p| p.replace('\\', "/"))
27        .collect()
28}
29
30pub fn path_matches_any(path: &str, patterns: &[String]) -> bool {
31    patterns.iter().any(|p| pattern_matches(path, p))
32}
33
34pub fn pattern_matches(path: &str, pattern: &str) -> bool {
35    let path = normalize(path);
36    let pattern = normalize(pattern);
37
38    if pattern.is_empty() {
39        return false;
40    }
41
42    if !pattern.contains('/') {
43        if wildcard_match(&path, &pattern) {
44            return true;
45        }
46        return path.split('/').any(|seg| wildcard_match(seg, &pattern));
47    }
48
49    if let Some(tail) = pattern.strip_prefix("**/") {
50        return path.split('/').enumerate().any(|(idx, _)| {
51            wildcard_match(
52                &path.split('/').skip(idx).collect::<Vec<_>>().join("/"),
53                tail,
54            )
55        });
56    }
57
58    if wildcard_match(&path, &pattern) {
59        return true;
60    }
61
62    if path.len() >= pattern.len() {
63        return path.ends_with(&pattern);
64    }
65
66    false
67}
68
69fn wildcard_match(text: &str, pattern: &str) -> bool {
70    let t = text.as_bytes();
71    let p = pattern.as_bytes();
72
73    let (mut ti, mut pi) = (0usize, 0usize);
74    let mut star = None::<usize>;
75    let mut match_i = 0usize;
76
77    while ti < t.len() {
78        if pi < p.len() && (p[pi] == b'?' || p[pi] == t[ti]) {
79            ti += 1;
80            pi += 1;
81        } else if pi < p.len() && p[pi] == b'*' {
82            star = Some(pi);
83            pi += 1;
84            match_i = ti;
85        } else if let Some(star_pos) = star {
86            pi = star_pos + 1;
87            match_i += 1;
88            ti = match_i;
89        } else {
90            return false;
91        }
92    }
93
94    while pi < p.len() && p[pi] == b'*' {
95        pi += 1;
96    }
97
98    pi == p.len()
99}
100
101fn normalize(input: &str) -> String {
102    input
103        .trim()
104        .replace('\\', "/")
105        .trim_start_matches("./")
106        .trim_matches('/')
107        .to_string()
108}
109
110#[derive(Debug, Clone)]
111pub struct GitignoreRule {
112    pub pattern: String,
113    pub negated: bool,
114    pub directory_only: bool,
115}
116
117pub fn load_root_gitignore_rules(root: &Path) -> Vec<GitignoreRule> {
118    let path = root.join(".gitignore");
119    let Ok(content) = std::fs::read_to_string(path) else {
120        return Vec::new();
121    };
122
123    content
124        .lines()
125        .map(str::trim)
126        .filter(|line| !line.is_empty() && !line.starts_with('#'))
127        .filter_map(|line| {
128            let negated = line.starts_with('!');
129            let mut body = if negated { &line[1..] } else { line };
130            body = body.trim();
131            if body.is_empty() {
132                return None;
133            }
134
135            let directory_only = body.ends_with('/');
136            let pattern = body.trim_end_matches('/').replace('\\', "/");
137            Some(GitignoreRule {
138                pattern,
139                negated,
140                directory_only,
141            })
142        })
143        .collect()
144}
145
146pub fn is_ignored_by_rules(rel_path: &str, _is_dir: bool, rules: &[GitignoreRule]) -> bool {
147    let path = normalize(rel_path);
148    if path.is_empty() {
149        return false;
150    }
151
152    let mut ignored = false;
153
154    for rule in rules {
155        let anchored = rule.pattern.starts_with('/');
156        let rule_pattern = rule.pattern.trim_start_matches('/');
157        let dir_prefix = format!("{rule_pattern}/");
158
159        let matches_candidate = |candidate: &str| {
160            if rule.directory_only {
161                candidate == rule_pattern || candidate.starts_with(&dir_prefix)
162            } else {
163                pattern_matches(candidate, rule_pattern)
164            }
165        };
166
167        let matched = if anchored {
168            matches_candidate(&path)
169        } else {
170            matches_candidate(&path)
171                || path.split('/').enumerate().any(|(idx, _)| {
172                    matches_candidate(&path.split('/').skip(idx).collect::<Vec<_>>().join("/"))
173                })
174        };
175
176        if matched {
177            ignored = !rule.negated;
178        }
179    }
180
181    ignored
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187
188    #[test]
189    fn test_parse_csv_patterns() {
190        let patterns = parse_csv_patterns(Some("src/**,  tests/*.ts , ,node_modules/**"));
191        assert_eq!(
192            patterns,
193            vec![
194                "src/**".to_string(),
195                "tests/*.ts".to_string(),
196                "node_modules/**".to_string()
197            ]
198        );
199    }
200
201    #[test]
202    fn test_pattern_matches_globs_and_suffix() {
203        assert!(pattern_matches("src/a/b/file.ts", "src/**"));
204        assert!(pattern_matches("src/a/b/file.ts", "**/*.ts"));
205        assert!(pattern_matches("src/a/b/file.ts", "*.ts"));
206        assert!(pattern_matches("src/a/b/file.ts", "a/b/file.ts"));
207        assert!(!pattern_matches("src/a/b/file.ts", "*.tsx"));
208    }
209
210    #[test]
211    fn test_gitignore_rule_evaluation_with_negation() {
212        let rules = vec![
213            GitignoreRule {
214                pattern: "dist".to_string(),
215                negated: false,
216                directory_only: true,
217            },
218            GitignoreRule {
219                pattern: "dist/keep.ts".to_string(),
220                negated: true,
221                directory_only: false,
222            },
223        ];
224
225        assert!(is_ignored_by_rules("dist", true, &rules));
226        assert!(is_ignored_by_rules("dist/a.ts", false, &rules));
227        assert!(!is_ignored_by_rules("dist/keep.ts", false, &rules));
228    }
229}
230
231pub fn walk_source_files_with_config<F>(
232    root: &Path,
233    config: &ScanConfig,
234    is_supported: F,
235) -> Result<Vec<PathBuf>, std::io::Error>
236where
237    F: Fn(&Path) -> bool,
238{
239    let mut out = Vec::new();
240    let rules = if config.respect_gitignore {
241        load_root_gitignore_rules(root)
242    } else {
243        Vec::new()
244    };
245
246    for entry in WalkDir::new(root)
247        .into_iter()
248        .filter_entry(|e| should_descend(root, e.path(), config, &rules))
249    {
250        let entry = entry?;
251        let path = entry.path();
252        if !path.is_file() || !is_supported(path) {
253            continue;
254        }
255
256        if should_include_file(root, path, config, &rules) {
257            out.push(path.to_path_buf());
258        }
259    }
260
261    out.sort();
262    Ok(out)
263}
264
265pub fn should_include_relative_path(
266    relative_path: &str,
267    is_dir: bool,
268    config: &ScanConfig,
269    rules: &[GitignoreRule],
270) -> bool {
271    let rel = relative_path.replace('\\', "/");
272
273    if rel.is_empty() || rel == "." {
274        return true;
275    }
276
277    if rel.starts_with(".graphyn")
278        || rel.starts_with(".git")
279        || rel.contains("/node_modules/")
280        || rel.starts_with("node_modules")
281        || rel.contains("/target/")
282        || rel.starts_with("target")
283    {
284        return false;
285    }
286
287    if config.respect_gitignore && is_ignored_by_rules(&rel, is_dir, rules) {
288        return false;
289    }
290
291    if !config.exclude_patterns.is_empty() && path_matches_any(&rel, &config.exclude_patterns) {
292        return false;
293    }
294
295    if config.include_patterns.is_empty() {
296        return true;
297    }
298
299    path_matches_any(&rel, &config.include_patterns)
300}
301
302fn should_descend(root: &Path, path: &Path, config: &ScanConfig, rules: &[GitignoreRule]) -> bool {
303    if !path.is_dir() {
304        return true;
305    }
306
307    let Ok(relative) = path.strip_prefix(root) else {
308        return true;
309    };
310    let rel = relative.to_string_lossy().replace('\\', "/");
311
312    should_include_relative_path(&rel, true, config, rules)
313}
314
315fn should_include_file(
316    root: &Path,
317    path: &Path,
318    config: &ScanConfig,
319    rules: &[GitignoreRule],
320) -> bool {
321    let Ok(relative) = path.strip_prefix(root) else {
322        return false;
323    };
324
325    let rel = relative.to_string_lossy().replace('\\', "/");
326    should_include_relative_path(&rel, false, config, rules)
327}