Skip to main content

dupes_core/
scanner.rs

1use std::path::{Path, PathBuf};
2use walkdir::WalkDir;
3
4/// Configuration for scanning the filesystem for source files.
5pub struct ScanConfig {
6    /// Root directory to scan.
7    pub root: PathBuf,
8    /// Glob patterns to exclude (simple substring matching for now).
9    pub exclude_patterns: Vec<String>,
10    /// File extensions to include (without the leading dot). Defaults to `["rs"]`.
11    pub extensions: Vec<String>,
12}
13
14impl ScanConfig {
15    #[must_use]
16    pub fn new(root: PathBuf) -> Self {
17        Self {
18            root,
19            exclude_patterns: Vec::new(),
20            extensions: vec!["rs".to_string()],
21        }
22    }
23
24    #[must_use]
25    pub fn with_excludes(mut self, patterns: Vec<String>) -> Self {
26        self.exclude_patterns = patterns;
27        self
28    }
29
30    #[must_use]
31    pub fn with_extensions(mut self, extensions: Vec<String>) -> Self {
32        self.extensions = extensions;
33        self
34    }
35}
36
37/// Scan for source files under the given config.
38/// Always skips `target/` directories.
39#[must_use]
40pub fn scan_files(config: &ScanConfig) -> Vec<PathBuf> {
41    let mut files = Vec::new();
42
43    for entry in WalkDir::new(&config.root)
44        .into_iter()
45        .filter_entry(|e| {
46            let path = e.path();
47            // Only filter directories (not the root itself for hidden check)
48            if path.is_dir()
49                && let Some(name) = path.file_name().and_then(|n| n.to_str())
50            {
51                if name == "target" {
52                    return false;
53                }
54                // Skip hidden directories, but not the root
55                if name.starts_with('.') && path != config.root.as_path() {
56                    return false;
57                }
58            }
59            true
60        })
61        .flatten()
62    {
63        let path = entry.path();
64        if path.is_file()
65            && path
66                .extension()
67                .and_then(|ext| ext.to_str())
68                .is_some_and(|ext| {
69                    config
70                        .extensions
71                        .iter()
72                        .any(|e| e.eq_ignore_ascii_case(ext))
73                })
74            && !is_excluded(path, &config.exclude_patterns)
75        {
76            files.push(path.to_path_buf());
77        }
78    }
79
80    files
81}
82
83/// Check if a path should be excluded based on exclusion patterns.
84#[must_use]
85pub fn is_excluded(path: &Path, patterns: &[String]) -> bool {
86    let path_str = path.to_string_lossy();
87    patterns
88        .iter()
89        .any(|pattern| path_str.contains(pattern.as_str()))
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95    use std::fs;
96    use tempfile::TempDir;
97
98    fn create_test_tree(dir: &Path) {
99        fs::create_dir_all(dir.join("src")).unwrap();
100        fs::create_dir_all(dir.join("src/utils")).unwrap();
101        fs::create_dir_all(dir.join("target/debug")).unwrap();
102        fs::create_dir_all(dir.join(".hidden")).unwrap();
103        fs::write(dir.join("src/main.rs"), "fn main() {}").unwrap();
104        fs::write(dir.join("src/lib.rs"), "pub mod utils;").unwrap();
105        fs::write(dir.join("src/utils/helper.rs"), "pub fn help() {}").unwrap();
106        fs::write(dir.join("target/debug/build.rs"), "fn build() {}").unwrap();
107        fs::write(dir.join(".hidden/secret.rs"), "fn secret() {}").unwrap();
108        fs::write(dir.join("src/readme.md"), "# README").unwrap();
109    }
110
111    #[test]
112    fn scan_finds_rust_files() {
113        let tmp = TempDir::new().unwrap();
114        create_test_tree(tmp.path());
115        let config = ScanConfig::new(tmp.path().to_path_buf());
116        let files = scan_files(&config);
117        assert_eq!(files.len(), 3);
118        assert!(files.iter().all(|f| f.extension().unwrap() == "rs"));
119    }
120
121    #[test]
122    fn scan_skips_target_directory() {
123        let tmp = TempDir::new().unwrap();
124        create_test_tree(tmp.path());
125        let config = ScanConfig::new(tmp.path().to_path_buf());
126        let files = scan_files(&config);
127        assert!(!files.iter().any(|f| f.to_string_lossy().contains("target")));
128    }
129
130    #[test]
131    fn scan_skips_hidden_directories() {
132        let tmp = TempDir::new().unwrap();
133        create_test_tree(tmp.path());
134        let config = ScanConfig::new(tmp.path().to_path_buf());
135        let files = scan_files(&config);
136        assert!(
137            !files
138                .iter()
139                .any(|f| f.to_string_lossy().contains(".hidden"))
140        );
141    }
142
143    #[test]
144    fn scan_respects_exclude_patterns() {
145        let tmp = TempDir::new().unwrap();
146        create_test_tree(tmp.path());
147        let config =
148            ScanConfig::new(tmp.path().to_path_buf()).with_excludes(vec!["utils".to_string()]);
149        let files = scan_files(&config);
150        assert!(!files.iter().any(|f| f.to_string_lossy().contains("utils")));
151        assert_eq!(files.len(), 2);
152    }
153
154    #[test]
155    fn scan_empty_directory() {
156        let tmp = TempDir::new().unwrap();
157        let config = ScanConfig::new(tmp.path().to_path_buf());
158        let files = scan_files(&config);
159        assert!(files.is_empty());
160    }
161
162    #[test]
163    fn is_excluded_works() {
164        let path = Path::new("/foo/bar/tests/test.rs");
165        assert!(is_excluded(path, &["tests".to_string()]));
166        assert!(!is_excluded(path, &["benches".to_string()]));
167    }
168}