use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::time::Instant;
use crate::types::{AcbError, AcbResult, Language};
use super::cpp::CppParser;
use super::csharp::CSharpParser;
use super::go::GoParser;
use super::java::JavaParser;
use super::python::PythonParser;
use super::rust::RustParser;
use super::treesitter::parse_with_language;
use super::typescript::TypeScriptParser;
use super::{LanguageParser, ParseFileError, RawCodeUnit, Severity};
#[derive(Debug, Clone)]
pub struct ParseOptions {
pub languages: Vec<Language>,
pub exclude: Vec<String>,
pub include_tests: bool,
pub max_file_size: usize,
}
impl Default for ParseOptions {
fn default() -> Self {
Self {
languages: vec![],
exclude: vec![
"**/node_modules/**".into(),
"**/target/**".into(),
"**/.git/**".into(),
"**/__pycache__/**".into(),
"**/venv/**".into(),
"**/.venv/**".into(),
"**/dist/**".into(),
"**/build/**".into(),
],
include_tests: true,
max_file_size: 10 * 1024 * 1024, }
}
}
#[derive(Debug)]
pub struct ParseResult {
pub units: Vec<RawCodeUnit>,
pub errors: Vec<ParseFileError>,
pub stats: ParseStats,
}
#[derive(Debug, Clone)]
pub struct ParseStats {
pub files_parsed: usize,
pub files_skipped: usize,
pub files_errored: usize,
pub total_lines: usize,
pub parse_time_ms: u64,
pub by_language: HashMap<Language, usize>,
pub coverage: ParseCoverageStats,
}
#[derive(Debug, Clone, Default)]
pub struct ParseCoverageStats {
pub files_seen: usize,
pub files_candidate: usize,
pub skipped_unknown_language: usize,
pub skipped_language_filter: usize,
pub skipped_excluded_pattern: usize,
pub skipped_too_large: usize,
pub skipped_test_file: usize,
pub read_errors: usize,
pub parse_errors: usize,
pub unsupported_extensions: HashMap<String, usize>,
}
impl ParseCoverageStats {
pub fn total_skipped(&self) -> usize {
self.skipped_unknown_language
+ self.skipped_language_filter
+ self.skipped_excluded_pattern
+ self.skipped_too_large
+ self.skipped_test_file
}
}
struct CollectFilesResult {
files: Vec<PathBuf>,
coverage: ParseCoverageStats,
}
pub struct Parser {
parsers: HashMap<Language, Box<dyn LanguageParser>>,
}
impl Parser {
pub fn new() -> Self {
let mut parsers: HashMap<Language, Box<dyn LanguageParser>> = HashMap::new();
parsers.insert(Language::Python, Box::new(PythonParser::new()));
parsers.insert(Language::Rust, Box::new(RustParser::new()));
parsers.insert(Language::TypeScript, Box::new(TypeScriptParser::new()));
parsers.insert(Language::JavaScript, Box::new(TypeScriptParser::new()));
parsers.insert(Language::Go, Box::new(GoParser::new()));
parsers.insert(Language::Cpp, Box::new(CppParser::new()));
parsers.insert(Language::Java, Box::new(JavaParser::new()));
parsers.insert(Language::CSharp, Box::new(CSharpParser::new()));
Self { parsers }
}
pub fn parse_file(&self, path: &Path, content: &str) -> AcbResult<Vec<RawCodeUnit>> {
let lang = Language::from_path(path);
if lang == Language::Unknown {
return Err(AcbError::ParseError {
path: path.to_path_buf(),
message: "Unknown language".into(),
});
}
let parser = self
.parsers
.get(&lang)
.ok_or_else(|| AcbError::ParseError {
path: path.to_path_buf(),
message: format!("No parser for language: {}", lang),
})?;
let ts_lang = if matches!(
path.extension().and_then(|e| e.to_str()),
Some("tsx") | Some("jsx")
) {
tree_sitter_typescript::language_tsx()
} else {
lang.tree_sitter_language()
.ok_or_else(|| AcbError::ParseError {
path: path.to_path_buf(),
message: format!("No tree-sitter grammar for: {}", lang),
})?
};
let tree = parse_with_language(content, ts_lang)?;
parser.extract_units(&tree, content, path)
}
pub fn parse_directory(&self, root: &Path, options: &ParseOptions) -> AcbResult<ParseResult> {
let start = Instant::now();
let collected = self.collect_files(root, options)?;
let files = collected.files;
let mut all_units = Vec::new();
let mut all_errors = Vec::new();
let mut files_parsed = 0usize;
let mut files_errored = 0usize;
let mut total_lines = 0usize;
let mut by_language: HashMap<Language, usize> = HashMap::new();
let mut coverage = collected.coverage;
for file_path in &files {
let content = match std::fs::read_to_string(file_path) {
Ok(c) => c,
Err(e) => {
all_errors.push(ParseFileError {
path: file_path.clone(),
span: None,
message: format!("Could not read file: {}", e),
severity: Severity::Error,
});
files_errored += 1;
coverage.read_errors += 1;
continue;
}
};
if content.len() > options.max_file_size {
coverage.skipped_too_large += 1;
continue;
}
let lang = Language::from_path(file_path);
if lang == Language::Unknown {
coverage.skipped_unknown_language += 1;
continue;
}
if !options.include_tests {
if let Some(parser) = self.parsers.get(&lang) {
if parser.is_test_file(file_path, &content) {
coverage.skipped_test_file += 1;
continue;
}
}
}
match self.parse_file(file_path, &content) {
Ok(units) => {
total_lines += content.lines().count();
*by_language.entry(lang).or_insert(0) += 1;
all_units.extend(units);
files_parsed += 1;
}
Err(e) => {
all_errors.push(ParseFileError {
path: file_path.clone(),
span: None,
message: format!("{}", e),
severity: Severity::Error,
});
files_errored += 1;
coverage.parse_errors += 1;
}
}
}
let elapsed = start.elapsed();
let files_skipped = coverage.total_skipped();
Ok(ParseResult {
units: all_units,
errors: all_errors,
stats: ParseStats {
files_parsed,
files_skipped,
files_errored,
total_lines,
parse_time_ms: elapsed.as_millis() as u64,
by_language,
coverage,
},
})
}
pub fn should_parse(&self, path: &Path) -> bool {
let lang = Language::from_path(path);
lang != Language::Unknown && self.parsers.contains_key(&lang)
}
fn collect_files(&self, root: &Path, options: &ParseOptions) -> AcbResult<CollectFilesResult> {
use ignore::WalkBuilder;
let mut files = Vec::new();
let mut coverage = ParseCoverageStats::default();
let walker = WalkBuilder::new(root).hidden(true).git_ignore(true).build();
for entry in walker {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
let path = entry.path();
if !path.is_file() {
continue;
}
coverage.files_seen += 1;
let lang = Language::from_path(path);
if lang == Language::Unknown {
coverage.skipped_unknown_language += 1;
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
*coverage
.unsupported_extensions
.entry(ext.to_lowercase())
.or_insert(0) += 1;
}
continue;
}
if !options.languages.is_empty() && !options.languages.contains(&lang) {
coverage.skipped_language_filter += 1;
continue;
}
if self.is_excluded(path, &options.exclude) {
coverage.skipped_excluded_pattern += 1;
continue;
}
files.push(path.to_path_buf());
}
coverage.files_candidate = files.len();
Ok(CollectFilesResult { files, coverage })
}
fn is_excluded(&self, path: &Path, excludes: &[String]) -> bool {
let path_str = path.to_string_lossy();
for pattern in excludes {
let pattern_str = pattern.replace("**", "");
let pattern_str = pattern_str.trim_matches('/');
if !pattern_str.is_empty() && path_str.contains(pattern_str) {
return true;
}
}
false
}
}
impl Default for Parser {
fn default() -> Self {
Self::new()
}
}