codemod_core/scanner/
walker.rs1use std::fs;
7use std::path::{Path, PathBuf};
8
9use globset::{Glob, GlobSet, GlobSetBuilder};
10use walkdir::WalkDir;
11
12use crate::error::CodemodError;
13use crate::language::LanguageAdapter;
14use crate::scanner::ScanConfig;
15
16pub struct FileWalker {
18 target_dir: PathBuf,
19 include_set: Option<GlobSet>,
20 exclude_set: Option<GlobSet>,
21 respect_gitignore: bool,
22 max_file_size: usize,
23 gitignore_patterns: Vec<GlobSet>,
24}
25
26impl FileWalker {
27 pub fn new(config: &ScanConfig) -> crate::Result<Self> {
34 let target_dir = config.target_dir.clone();
35
36 if !target_dir.is_dir() {
37 return Err(CodemodError::Scan(format!(
38 "Target directory does not exist or is not a directory: {}",
39 target_dir.display()
40 )));
41 }
42
43 let include_set = Self::build_globset(&config.include_patterns)?;
44 let exclude_set = Self::build_globset(&config.exclude_patterns)?;
45
46 let gitignore_patterns = if config.respect_gitignore {
47 Self::load_gitignore(&target_dir)
48 } else {
49 Vec::new()
50 };
51
52 Ok(Self {
53 target_dir,
54 include_set,
55 exclude_set,
56 respect_gitignore: config.respect_gitignore,
57 max_file_size: config.max_file_size,
58 gitignore_patterns,
59 })
60 }
61
62 pub fn collect_files(&self, language: &dyn LanguageAdapter) -> crate::Result<Vec<PathBuf>> {
69 let mut files = Vec::new();
70
71 for entry in WalkDir::new(&self.target_dir)
72 .follow_links(false)
73 .into_iter()
74 .filter_entry(|e| !self.is_hidden(e.path()))
75 {
76 let entry =
77 entry.map_err(|e| CodemodError::Scan(format!("Error walking directory: {e}")))?;
78
79 let path = entry.path();
80
81 if !entry.file_type().is_file() {
83 continue;
84 }
85
86 if !language.supports_file(path) {
88 continue;
89 }
90
91 if let Some(ref inc) = self.include_set {
93 if !inc.is_match(path) {
94 continue;
95 }
96 }
97
98 if let Some(ref exc) = self.exclude_set {
100 if exc.is_match(path) {
101 continue;
102 }
103 }
104
105 if self.respect_gitignore && self.is_gitignored(path) {
107 continue;
108 }
109
110 if let Ok(meta) = fs::metadata(path) {
112 if meta.len() as usize > self.max_file_size {
113 log::debug!(
114 "Skipping large file ({} bytes): {}",
115 meta.len(),
116 path.display()
117 );
118 continue;
119 }
120 }
121
122 files.push(path.to_path_buf());
123 }
124
125 Ok(files)
126 }
127
128 fn build_globset(patterns: &[String]) -> crate::Result<Option<GlobSet>> {
134 if patterns.is_empty() {
135 return Ok(None);
136 }
137
138 let mut builder = GlobSetBuilder::new();
139 for pat in patterns {
140 let glob = Glob::new(pat)
141 .map_err(|e| CodemodError::Scan(format!("Invalid glob pattern '{pat}': {e}")))?;
142 builder.add(glob);
143 }
144
145 let set = builder
146 .build()
147 .map_err(|e| CodemodError::Scan(format!("Failed to build glob set: {e}")))?;
148
149 Ok(Some(set))
150 }
151
152 fn load_gitignore(target_dir: &Path) -> Vec<GlobSet> {
154 let gitignore_path = target_dir.join(".gitignore");
155 if !gitignore_path.is_file() {
156 return Vec::new();
157 }
158
159 let content = match fs::read_to_string(&gitignore_path) {
160 Ok(c) => c,
161 Err(_) => return Vec::new(),
162 };
163
164 let mut sets = Vec::new();
165 let mut builder = GlobSetBuilder::new();
166 let mut has_patterns = false;
167
168 for line in content.lines() {
169 let trimmed = line.trim();
170 if trimmed.is_empty() || trimmed.starts_with('#') {
171 continue;
172 }
173
174 let pattern = if trimmed.ends_with('/') {
176 format!("**/{}", trimmed.trim_end_matches('/'))
177 } else {
178 format!("**/{trimmed}")
179 };
180
181 if let Ok(glob) = Glob::new(&pattern) {
182 builder.add(glob);
183 has_patterns = true;
184 }
185 }
186
187 if has_patterns {
188 if let Ok(set) = builder.build() {
189 sets.push(set);
190 }
191 }
192
193 sets
194 }
195
196 fn is_gitignored(&self, path: &Path) -> bool {
198 for set in &self.gitignore_patterns {
199 if set.is_match(path) {
200 return true;
201 }
202 }
203 false
204 }
205
206 fn is_hidden(&self, path: &Path) -> bool {
208 path.file_name()
209 .and_then(|name| name.to_str())
210 .map(|name| name.starts_with('.'))
211 .unwrap_or(false)
212 }
213}
214
215#[cfg(test)]
216mod tests {
217 use super::*;
218
219 #[test]
220 fn test_build_globset_empty() {
221 let result = FileWalker::build_globset(&[]).unwrap();
222 assert!(result.is_none());
223 }
224
225 #[test]
226 fn test_build_globset_valid() {
227 let patterns = vec!["**/*.rs".to_string(), "**/*.toml".to_string()];
228 let result = FileWalker::build_globset(&patterns).unwrap();
229 assert!(result.is_some());
230 let set = result.unwrap();
231 assert!(set.is_match("src/main.rs"));
232 assert!(set.is_match("Cargo.toml"));
233 assert!(!set.is_match("README.md"));
234 }
235
236 #[test]
237 fn test_build_globset_invalid() {
238 let patterns = vec!["[invalid".to_string()];
239 let result = FileWalker::build_globset(&patterns);
240 assert!(result.is_err());
241 }
242}