use std::fs;
use std::path::{Path, PathBuf};
use rayon::prelude::*;
use crate::graph::{append_csharp_structural_ir, append_java_class_ir, derive_project_name};
use crate::ir::{FileIr, ProjectIr};
use crate::scanner::{FileScanConfig, ParsedFile, ScannerError};
use crate::{parse_once, LanguageId};
fn language_from_extension(path: &Path) -> Option<LanguageId> {
let ext = path.extension()?.to_str()?.to_ascii_lowercase();
match ext.as_str() {
"java" => Some(LanguageId::Java),
"js" => Some(LanguageId::JavaScript),
"ts" => Some(LanguageId::TypeScript),
"tsx" => Some(LanguageId::Tsx),
"py" => Some(LanguageId::Python),
"rs" => Some(LanguageId::Rust),
"go" => Some(LanguageId::Go),
"erl" | "hrl" => Some(LanguageId::Erlang),
"cs" => Some(LanguageId::CSharp),
_ => None,
}
}
fn is_test_file(path: &Path) -> bool {
let path_str = path.to_string_lossy().to_lowercase();
let test_dir_patterns = [
"/test/",
"/tests/",
"/spec/",
"/specs/",
"/__tests__/",
"/__test__/",
"/testing/",
"/testcases/",
"/src/test/",
"/t/",
];
for pattern in test_dir_patterns {
if path_str.contains(pattern) {
return true;
}
}
if let Some(file_name) = path.file_stem().and_then(|s| s.to_str()) {
let name_lower = file_name.to_lowercase();
if name_lower.starts_with("test_")
|| name_lower.starts_with("test-")
|| name_lower.ends_with("_test")
|| name_lower.ends_with("-test")
|| name_lower.ends_with("test")
|| name_lower.ends_with("_spec")
|| name_lower.ends_with(".spec")
|| name_lower.ends_with(".test")
|| name_lower.ends_with("tests")
|| name_lower.contains("_test_")
|| name_lower.contains("-test-")
|| name_lower.ends_with("_eunit")
{
return true;
}
}
false
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IncrementalPlan {
VectorParsedFiles,
StreamingIr,
}
#[derive(Debug)]
pub enum IncrementalScanResult {
VectorParsedFiles(Vec<ParsedFile>),
StreamingIr(ProjectIr),
}
pub fn normalize_targets(config: &FileScanConfig, parse_targets: &[PathBuf]) -> Vec<PathBuf> {
parse_targets
.iter()
.map(|p| {
if p.is_absolute() {
p.clone()
} else {
config.root.join(p)
}
})
.collect()
}
pub fn iter_supported_targets(normalized_targets: &[PathBuf]) -> Vec<(PathBuf, LanguageId)> {
normalized_targets
.iter()
.filter_map(|path| {
let language = language_from_extension(path)?;
Some((path.clone(), language))
})
.collect()
}
pub fn scan_and_parse_incremental_vector(
config: &FileScanConfig,
parse_targets: &[PathBuf],
) -> Result<Vec<ParsedFile>, ScannerError> {
let normalized_targets = normalize_targets(config, parse_targets);
let supported_targets = iter_supported_targets(&normalized_targets);
let results: Result<Vec<_>, ScannerError> = supported_targets
.into_par_iter()
.map(|(path, language)| {
let metadata = fs::metadata(&path).map_err(|source| ScannerError::ReadFile {
path: path.clone(),
source,
})?;
if !metadata.is_file() {
return Err(ScannerError::ReadFile {
path: path.clone(),
source: std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"incremental target is not a file",
),
});
}
if let Some(max) = config.max_file_size {
if metadata.len() > max {
return Err(ScannerError::ReadFile {
path: path.clone(),
source: std::io::Error::new(
std::io::ErrorKind::InvalidData,
"incremental target exceeds max_file_size",
),
});
}
}
let source = fs::read_to_string(&path).map_err(|source| ScannerError::ReadFile {
path: path.clone(),
source,
})?;
let tree = parse_once(language, &source).map_err(|source| ScannerError::Parse {
path: path.clone(),
source,
})?;
let is_test = is_test_file(&path);
Ok(ParsedFile {
path,
language,
tree,
source,
is_test,
})
})
.collect();
results
}
pub fn scan_and_stream_incremental_ir(
config: &FileScanConfig,
parse_targets: &[PathBuf],
) -> Result<ProjectIr, ScannerError> {
let normalized_targets = normalize_targets(config, parse_targets);
let supported_targets = iter_supported_targets(&normalized_targets);
let mut ir = ProjectIr::empty();
for (path, language) in supported_targets {
let metadata = fs::metadata(&path).map_err(|source| ScannerError::ReadFile {
path: path.clone(),
source,
})?;
if !metadata.is_file() {
return Err(ScannerError::ReadFile {
path: path.clone(),
source: std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"incremental target is not a file",
),
});
}
if let Some(max) = config.max_file_size {
if metadata.len() > max {
return Err(ScannerError::ReadFile {
path: path.clone(),
source: std::io::Error::new(
std::io::ErrorKind::InvalidData,
"incremental target exceeds max_file_size",
),
});
}
}
let source = fs::read_to_string(&path).map_err(|source| ScannerError::ReadFile {
path: path.clone(),
source,
})?;
let tree = parse_once(language, &source).map_err(|source| ScannerError::Parse {
path: path.clone(),
source,
})?;
let file_path = path.display().to_string();
let project_name = derive_project_name(&path, &config.root);
ir.files.push(FileIr {
path: file_path.clone(),
language: language.to_string(),
framework: None,
project_name: project_name.clone(),
});
match language {
LanguageId::CSharp => {
append_csharp_structural_ir(&mut ir, &file_path, project_name, &tree, &source);
}
LanguageId::Java => {
append_java_class_ir(&mut ir, &file_path, project_name, &tree, &source);
}
_ => {}
}
}
Ok(ir)
}
pub fn scan_incremental(
plan: IncrementalPlan,
config: &FileScanConfig,
parse_targets: &[PathBuf],
) -> Result<IncrementalScanResult, ScannerError> {
match plan {
IncrementalPlan::VectorParsedFiles => {
let files = scan_and_parse_incremental_vector(config, parse_targets)?;
Ok(IncrementalScanResult::VectorParsedFiles(files))
}
IncrementalPlan::StreamingIr => {
let ir = scan_and_stream_incremental_ir(config, parse_targets)?;
Ok(IncrementalScanResult::StreamingIr(ir))
}
}
}
pub fn scan_and_parse_incremental(
config: &FileScanConfig,
parse_targets: &[PathBuf],
) -> Result<Vec<ParsedFile>, ScannerError> {
scan_and_parse_incremental_vector(config, parse_targets)
}