codemod_core/scanner/
mod.rs1pub mod parallel;
10pub mod walker;
11
12pub use walker::FileWalker;
13
14use serde::{Deserialize, Serialize};
15use std::path::PathBuf;
16use std::time::Instant;
17
18use crate::language::LanguageAdapter;
19use crate::pattern::matcher::PatternMatcher;
20use crate::pattern::Pattern;
21
22#[derive(Debug, Clone)]
28pub struct ScanConfig {
29 pub target_dir: PathBuf,
31 pub include_patterns: Vec<String>,
33 pub exclude_patterns: Vec<String>,
35 pub respect_gitignore: bool,
37 pub max_file_size: usize,
39}
40
41impl Default for ScanConfig {
42 fn default() -> Self {
43 Self {
44 target_dir: PathBuf::from("."),
45 include_patterns: vec![],
46 exclude_patterns: vec![],
47 respect_gitignore: true,
48 max_file_size: 1_000_000, }
50 }
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct ScanResult {
60 pub total_files_scanned: usize,
62 pub total_matches: usize,
64 pub matches: Vec<ScanMatch>,
66 pub duration_ms: u64,
68}
69
70#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct ScanMatch {
73 pub file_path: PathBuf,
75 pub line: usize,
77 pub column: usize,
79 pub matched_text: String,
81 pub context_before: String,
83 pub context_after: String,
85}
86
87pub struct Scanner {
93 config: ScanConfig,
94 language: Box<dyn LanguageAdapter>,
95}
96
97impl Scanner {
98 pub fn new(config: ScanConfig, language: Box<dyn LanguageAdapter>) -> Self {
100 Self { config, language }
101 }
102
103 pub fn scan(&self, pattern: &Pattern) -> crate::Result<ScanResult> {
114 let start = Instant::now();
115
116 let walker = FileWalker::new(&self.config)?;
118 let files = walker.collect_files(&*self.language)?;
119
120 log::info!("Found {} eligible files to scan", files.len());
121
122 let matcher = PatternMatcher::new(self.make_language_clone());
124 let mut scan_matches = Vec::new();
125 let mut total_files_scanned: usize = 0;
126
127 for file_path in &files {
128 let source = match std::fs::read_to_string(file_path) {
129 Ok(s) => s,
130 Err(e) => {
131 log::warn!("Skipping {}: {e}", file_path.display());
132 continue;
133 }
134 };
135
136 total_files_scanned += 1;
137
138 match matcher.find_matches(&source, pattern) {
139 Ok(matches) => {
140 for m in matches {
141 let (ctx_before, ctx_after) =
142 Self::extract_context(&source, m.start_position.line, 3);
143 scan_matches.push(ScanMatch {
144 file_path: file_path.clone(),
145 line: m.start_position.line + 1, column: m.start_position.column,
147 matched_text: m.matched_text.clone(),
148 context_before: ctx_before,
149 context_after: ctx_after,
150 });
151 }
152 }
153 Err(e) => {
154 log::warn!("Error matching in {}: {e}", file_path.display());
155 }
156 }
157 }
158
159 let duration_ms = start.elapsed().as_millis() as u64;
160 let total_matches = scan_matches.len();
161
162 Ok(ScanResult {
163 total_files_scanned,
164 total_matches,
165 matches: scan_matches,
166 duration_ms,
167 })
168 }
169
170 fn extract_context(source: &str, line: usize, radius: usize) -> (String, String) {
176 let lines: Vec<&str> = source.lines().collect();
177 let start_before = line.saturating_sub(radius);
178 let end_after = (line + radius + 1).min(lines.len());
179
180 let before = lines[start_before..line].join("\n");
181 let after = if line + 1 < lines.len() {
182 lines[(line + 1)..end_after].join("\n")
183 } else {
184 String::new()
185 };
186
187 (before, after)
188 }
189
190 fn make_language_clone(&self) -> Box<dyn LanguageAdapter> {
195 Box::new(StaticLanguageInfo::snapshot(&*self.language))
196 }
197}
198
199pub(crate) struct StaticLanguageInfo {
210 name: String,
211 lang: tree_sitter::Language,
212 extensions: Vec<String>,
213 statements: Vec<String>,
214 expressions: Vec<String>,
215 identifiers: Vec<String>,
216}
217
218impl StaticLanguageInfo {
219 pub(crate) fn snapshot(adapter: &dyn LanguageAdapter) -> Self {
220 Self {
221 name: adapter.name().to_string(),
222 lang: adapter.language(),
223 extensions: adapter
224 .file_extensions()
225 .iter()
226 .map(|s| s.to_string())
227 .collect(),
228 statements: adapter
229 .statement_node_types()
230 .iter()
231 .map(|s| s.to_string())
232 .collect(),
233 expressions: adapter
234 .expression_node_types()
235 .iter()
236 .map(|s| s.to_string())
237 .collect(),
238 identifiers: adapter
239 .identifier_node_types()
240 .iter()
241 .map(|s| s.to_string())
242 .collect(),
243 }
244 }
245}
246
247impl LanguageAdapter for StaticLanguageInfo {
248 fn name(&self) -> &str {
249 &self.name
250 }
251
252 fn language(&self) -> tree_sitter::Language {
253 self.lang.clone()
254 }
255
256 fn file_extensions(&self) -> &[&str] {
257 let refs: Vec<&str> = self.extensions.iter().map(|s| s.as_str()).collect();
261 Box::leak(refs.into_boxed_slice())
262 }
263
264 fn statement_node_types(&self) -> &[&str] {
265 let refs: Vec<&str> = self.statements.iter().map(|s| s.as_str()).collect();
266 Box::leak(refs.into_boxed_slice())
267 }
268
269 fn expression_node_types(&self) -> &[&str] {
270 let refs: Vec<&str> = self.expressions.iter().map(|s| s.as_str()).collect();
271 Box::leak(refs.into_boxed_slice())
272 }
273
274 fn identifier_node_types(&self) -> &[&str] {
275 let refs: Vec<&str> = self.identifiers.iter().map(|s| s.as_str()).collect();
276 Box::leak(refs.into_boxed_slice())
277 }
278}