Skip to main content

impactsense_parser/
scanner_incremental.rs

1use std::fs;
2use std::path::{Path, PathBuf};
3
4use rayon::prelude::*;
5
6use crate::graph::{append_csharp_structural_ir, append_java_class_ir, derive_project_name};
7use crate::ir::{FileIr, ProjectIr};
8use crate::scanner::{FileScanConfig, ParsedFile, ScannerError};
9use crate::{parse_once, LanguageId};
10
11/// Map a file extension to a supported language.
12fn language_from_extension(path: &Path) -> Option<LanguageId> {
13    let ext = path.extension()?.to_str()?.to_ascii_lowercase();
14    match ext.as_str() {
15        "java" => Some(LanguageId::Java),
16        "js" => Some(LanguageId::JavaScript),
17        "ts" => Some(LanguageId::TypeScript),
18        "tsx" => Some(LanguageId::Tsx),
19        "py" => Some(LanguageId::Python),
20        "rs" => Some(LanguageId::Rust),
21        "go" => Some(LanguageId::Go),
22        "erl" | "hrl" => Some(LanguageId::Erlang),
23        "cs" => Some(LanguageId::CSharp),
24        _ => None,
25    }
26}
27
28/// Detect whether a file path indicates a test file based on common patterns.
29fn is_test_file(path: &Path) -> bool {
30    let path_str = path.to_string_lossy().to_lowercase();
31
32    let test_dir_patterns = [
33        "/test/",
34        "/tests/",
35        "/spec/",
36        "/specs/",
37        "/__tests__/",
38        "/__test__/",
39        "/testing/",
40        "/testcases/",
41        "/src/test/",
42        "/t/",
43    ];
44
45    for pattern in test_dir_patterns {
46        if path_str.contains(pattern) {
47            return true;
48        }
49    }
50
51    if let Some(file_name) = path.file_stem().and_then(|s| s.to_str()) {
52        let name_lower = file_name.to_lowercase();
53        if name_lower.starts_with("test_")
54            || name_lower.starts_with("test-")
55            || name_lower.ends_with("_test")
56            || name_lower.ends_with("-test")
57            || name_lower.ends_with("test")
58            || name_lower.ends_with("_spec")
59            || name_lower.ends_with(".spec")
60            || name_lower.ends_with(".test")
61            || name_lower.ends_with("tests")
62            || name_lower.contains("_test_")
63            || name_lower.contains("-test-")
64            || name_lower.ends_with("_eunit")
65        {
66            return true;
67        }
68    }
69
70    false
71}
72
73/// Incremental execution plan for parse targets.
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75pub enum IncrementalPlan {
76    /// Return a full `Vec<ParsedFile>` (AST + source retained in memory).
77    VectorParsedFiles,
78    /// Stream parse targets and build compact IR batches instead of retaining AST/source.
79    StreamingIr,
80}
81
82/// Result wrapper for incremental execution plans.
83#[derive(Debug)]
84pub enum IncrementalScanResult {
85    VectorParsedFiles(Vec<ParsedFile>),
86    StreamingIr(ProjectIr),
87}
88
89/// Normalize targets so all paths are absolute under `config.root`.
90pub fn normalize_targets(config: &FileScanConfig, parse_targets: &[PathBuf]) -> Vec<PathBuf> {
91    parse_targets
92        .iter()
93        .map(|p| {
94            if p.is_absolute() {
95                p.clone()
96            } else {
97                config.root.join(p)
98            }
99        })
100        .collect()
101}
102
103/// Keep only targets with supported language extensions.
104pub fn iter_supported_targets(normalized_targets: &[PathBuf]) -> Vec<(PathBuf, LanguageId)> {
105    normalized_targets
106        .iter()
107        .filter_map(|path| {
108            let language = language_from_extension(path)?;
109            Some((path.clone(), language))
110        })
111        .collect()
112}
113
114/// Parse only caller-provided targets and return `Vec<ParsedFile>`.
115pub fn scan_and_parse_incremental_vector(
116    config: &FileScanConfig,
117    parse_targets: &[PathBuf],
118) -> Result<Vec<ParsedFile>, ScannerError> {
119    let normalized_targets = normalize_targets(config, parse_targets);
120    let supported_targets = iter_supported_targets(&normalized_targets);
121
122    let results: Result<Vec<_>, ScannerError> = supported_targets
123        .into_par_iter()
124        .map(|(path, language)| {
125            let metadata = fs::metadata(&path).map_err(|source| ScannerError::ReadFile {
126                path: path.clone(),
127                source,
128            })?;
129
130            if !metadata.is_file() {
131                return Err(ScannerError::ReadFile {
132                    path: path.clone(),
133                    source: std::io::Error::new(
134                        std::io::ErrorKind::InvalidInput,
135                        "incremental target is not a file",
136                    ),
137                });
138            }
139
140            if let Some(max) = config.max_file_size {
141                if metadata.len() > max {
142                    return Err(ScannerError::ReadFile {
143                        path: path.clone(),
144                        source: std::io::Error::new(
145                            std::io::ErrorKind::InvalidData,
146                            "incremental target exceeds max_file_size",
147                        ),
148                    });
149                }
150            }
151
152            let source = fs::read_to_string(&path).map_err(|source| ScannerError::ReadFile {
153                path: path.clone(),
154                source,
155            })?;
156
157            let tree = parse_once(language, &source).map_err(|source| ScannerError::Parse {
158                path: path.clone(),
159                source,
160            })?;
161            let is_test = is_test_file(&path);
162
163            Ok(ParsedFile {
164                path,
165                language,
166                tree,
167                source,
168                is_test,
169            })
170        })
171        .collect();
172
173    results
174}
175
176/// Stream parse targets and return compact project IR.
177///
178/// This retains much less memory than `scan_and_parse_incremental_vector`
179/// because AST/source are not kept after each target is converted to IR.
180pub fn scan_and_stream_incremental_ir(
181    config: &FileScanConfig,
182    parse_targets: &[PathBuf],
183) -> Result<ProjectIr, ScannerError> {
184    let normalized_targets = normalize_targets(config, parse_targets);
185    let supported_targets = iter_supported_targets(&normalized_targets);
186
187    let mut ir = ProjectIr::empty();
188
189    for (path, language) in supported_targets {
190        let metadata = fs::metadata(&path).map_err(|source| ScannerError::ReadFile {
191            path: path.clone(),
192            source,
193        })?;
194
195        if !metadata.is_file() {
196            return Err(ScannerError::ReadFile {
197                path: path.clone(),
198                source: std::io::Error::new(
199                    std::io::ErrorKind::InvalidInput,
200                    "incremental target is not a file",
201                ),
202            });
203        }
204
205        if let Some(max) = config.max_file_size {
206            if metadata.len() > max {
207                return Err(ScannerError::ReadFile {
208                    path: path.clone(),
209                    source: std::io::Error::new(
210                        std::io::ErrorKind::InvalidData,
211                        "incremental target exceeds max_file_size",
212                    ),
213                });
214            }
215        }
216
217        let source = fs::read_to_string(&path).map_err(|source| ScannerError::ReadFile {
218            path: path.clone(),
219            source,
220        })?;
221
222        // Parse for parity with vector mode. AST and source are dropped after this loop body.
223        let tree = parse_once(language, &source).map_err(|source| ScannerError::Parse {
224            path: path.clone(),
225            source,
226        })?;
227
228        let file_path = path.display().to_string();
229        let project_name = derive_project_name(&path, &config.root);
230
231        ir.files.push(FileIr {
232            path: file_path.clone(),
233            language: language.to_string(),
234            framework: None,
235            project_name: project_name.clone(),
236        });
237
238        match language {
239            LanguageId::CSharp => {
240                append_csharp_structural_ir(&mut ir, &file_path, project_name, &tree, &source);
241            }
242            LanguageId::Java => {
243                append_java_class_ir(&mut ir, &file_path, project_name, &tree, &source);
244            }
245            _ => {}
246        }
247    }
248
249    Ok(ir)
250}
251
252/// Dispatcher to select incremental execution plan by flag/config.
253pub fn scan_incremental(
254    plan: IncrementalPlan,
255    config: &FileScanConfig,
256    parse_targets: &[PathBuf],
257) -> Result<IncrementalScanResult, ScannerError> {
258    match plan {
259        IncrementalPlan::VectorParsedFiles => {
260            let files = scan_and_parse_incremental_vector(config, parse_targets)?;
261            Ok(IncrementalScanResult::VectorParsedFiles(files))
262        }
263        IncrementalPlan::StreamingIr => {
264            let ir = scan_and_stream_incremental_ir(config, parse_targets)?;
265            Ok(IncrementalScanResult::StreamingIr(ir))
266        }
267    }
268}
269
270/// Backward-compatible alias to current incremental vector behavior.
271pub fn scan_and_parse_incremental(
272    config: &FileScanConfig,
273    parse_targets: &[PathBuf],
274) -> Result<Vec<ParsedFile>, ScannerError> {
275    scan_and_parse_incremental_vector(config, parse_targets)
276}