Skip to main content

codemod_core/scanner/
parallel.rs

1//! Parallel file scanning using Rayon.
2//!
3//! When scanning large codebases the I/O and parsing work can be distributed
4//! across multiple threads. This module provides a thin wrapper around
5//! [`rayon`] that parallelizes the per-file matching step.
6
7use std::path::PathBuf;
8use std::sync::{Arc, Mutex};
9
10use rayon::prelude::*;
11
12use crate::language::LanguageAdapter;
13use crate::pattern::matcher::PatternMatcher;
14use crate::pattern::Pattern;
15use crate::scanner::{ScanMatch, StaticLanguageInfo};
16
17/// Result of scanning a single file in parallel.
18#[derive(Debug)]
19pub struct FileResult {
20    /// The file that was scanned.
21    pub file_path: PathBuf,
22    /// Matches found in this file.
23    pub matches: Vec<ScanMatch>,
24    /// Error message if scanning failed for this file.
25    pub error: Option<String>,
26}
27
28/// Scan a batch of files in parallel using Rayon's thread pool.
29///
30/// Each file is read, parsed, and matched independently. Results are
31/// collected into a `Vec<FileResult>`.
32///
33/// # Arguments
34///
35/// - `files`: Paths to the files to scan.
36/// - `pattern`: The transformation pattern to match against.
37/// - `language`: The language adapter (must be `Send + Sync`).
38///
39/// # Returns
40///
41/// A vector of [`FileResult`]s, one per input file (in arbitrary order).
42pub fn scan_files_parallel(
43    files: &[PathBuf],
44    pattern: &Pattern,
45    language: &dyn LanguageAdapter,
46) -> Vec<FileResult> {
47    // Capture a snapshot of the language adapter data so each Rayon task can
48    // build its own PatternMatcher without needing Send + Sync on the
49    // original adapter.
50    let snapshot = StaticLanguageInfo::snapshot(language);
51    let lang_name = snapshot.name().to_string();
52    let lang_obj = snapshot.language();
53    let lang_exts: Vec<String> = snapshot
54        .file_extensions()
55        .iter()
56        .map(|s| s.to_string())
57        .collect();
58    let lang_stmts: Vec<String> = snapshot
59        .statement_node_types()
60        .iter()
61        .map(|s| s.to_string())
62        .collect();
63    let lang_exprs: Vec<String> = snapshot
64        .expression_node_types()
65        .iter()
66        .map(|s| s.to_string())
67        .collect();
68    let lang_ids: Vec<String> = snapshot
69        .identifier_node_types()
70        .iter()
71        .map(|s| s.to_string())
72        .collect();
73
74    let pattern = pattern.clone();
75    let results: Arc<Mutex<Vec<FileResult>>> =
76        Arc::new(Mutex::new(Vec::with_capacity(files.len())));
77
78    files.par_iter().for_each(|file_path| {
79        let file_result = scan_single_file(
80            file_path,
81            &pattern,
82            &lang_name,
83            &lang_obj,
84            &lang_exts,
85            &lang_stmts,
86            &lang_exprs,
87            &lang_ids,
88        );
89        results.lock().unwrap().push(file_result);
90    });
91
92    Arc::try_unwrap(results).unwrap().into_inner().unwrap()
93}
94
95/// Scan a single file (called from within a Rayon task).
96#[allow(clippy::too_many_arguments)]
97fn scan_single_file(
98    file_path: &PathBuf,
99    pattern: &Pattern,
100    lang_name: &str,
101    lang_obj: &tree_sitter::Language,
102    lang_exts: &[String],
103    lang_stmts: &[String],
104    lang_exprs: &[String],
105    lang_ids: &[String],
106) -> FileResult {
107    // Read file.
108    let source = match std::fs::read_to_string(file_path) {
109        Ok(s) => s,
110        Err(e) => {
111            return FileResult {
112                file_path: file_path.clone(),
113                matches: Vec::new(),
114                error: Some(format!("Failed to read file: {e}")),
115            };
116        }
117    };
118
119    // Build a per-thread language adapter.
120    let adapter = InlineLanguageAdapter {
121        name: lang_name.to_string(),
122        lang: lang_obj.clone(),
123        extensions: lang_exts.to_vec(),
124        statements: lang_stmts.to_vec(),
125        expressions: lang_exprs.to_vec(),
126        identifiers: lang_ids.to_vec(),
127    };
128
129    let matcher = PatternMatcher::new(Box::new(adapter));
130
131    match matcher.find_matches(&source, pattern) {
132        Ok(matches) => {
133            let scan_matches: Vec<ScanMatch> = matches
134                .into_iter()
135                .map(|m| {
136                    let (ctx_before, ctx_after) =
137                        extract_context(&source, m.start_position.line, 3);
138                    ScanMatch {
139                        file_path: file_path.clone(),
140                        line: m.start_position.line + 1,
141                        column: m.start_position.column,
142                        matched_text: m.matched_text,
143                        context_before: ctx_before,
144                        context_after: ctx_after,
145                    }
146                })
147                .collect();
148            FileResult {
149                file_path: file_path.clone(),
150                matches: scan_matches,
151                error: None,
152            }
153        }
154        Err(e) => FileResult {
155            file_path: file_path.clone(),
156            matches: Vec::new(),
157            error: Some(format!("Matching error: {e}")),
158        },
159    }
160}
161
162/// Extract context lines around a given 0-indexed line number.
163fn extract_context(source: &str, line: usize, radius: usize) -> (String, String) {
164    let lines: Vec<&str> = source.lines().collect();
165    let start = line.saturating_sub(radius);
166    let end = (line + radius + 1).min(lines.len());
167
168    let before = lines[start..line].join("\n");
169    let after = if line + 1 < lines.len() {
170        lines[(line + 1)..end].join("\n")
171    } else {
172        String::new()
173    };
174
175    (before, after)
176}
177
178// ---------------------------------------------------------------------------
179// Inline language adapter for parallel tasks
180// ---------------------------------------------------------------------------
181
182/// A small owned language adapter used inside Rayon tasks.
183struct InlineLanguageAdapter {
184    name: String,
185    lang: tree_sitter::Language,
186    extensions: Vec<String>,
187    statements: Vec<String>,
188    expressions: Vec<String>,
189    identifiers: Vec<String>,
190}
191
192unsafe impl Send for InlineLanguageAdapter {}
193unsafe impl Sync for InlineLanguageAdapter {}
194
195impl LanguageAdapter for InlineLanguageAdapter {
196    fn name(&self) -> &str {
197        &self.name
198    }
199    fn language(&self) -> tree_sitter::Language {
200        self.lang.clone()
201    }
202    fn file_extensions(&self) -> &[&str] {
203        let refs: Vec<&str> = self.extensions.iter().map(|s| s.as_str()).collect();
204        Box::leak(refs.into_boxed_slice())
205    }
206    fn statement_node_types(&self) -> &[&str] {
207        let refs: Vec<&str> = self.statements.iter().map(|s| s.as_str()).collect();
208        Box::leak(refs.into_boxed_slice())
209    }
210    fn expression_node_types(&self) -> &[&str] {
211        let refs: Vec<&str> = self.expressions.iter().map(|s| s.as_str()).collect();
212        Box::leak(refs.into_boxed_slice())
213    }
214    fn identifier_node_types(&self) -> &[&str] {
215        let refs: Vec<&str> = self.identifiers.iter().map(|s| s.as_str()).collect();
216        Box::leak(refs.into_boxed_slice())
217    }
218}