use crate::config::ParserConfig;
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Duration;
use tracing::{debug, info, instrument, warn};
#[derive(Debug, Clone)]
pub struct FileInfo {
pub file_path: PathBuf,
pub functions: Vec<String>,
pub classes: Vec<String>,
pub modules: Vec<String>,
pub traits: Vec<String>,
pub lines: usize,
pub parse_time: Duration,
}
impl FileInfo {
pub fn new(file_path: PathBuf) -> Self {
Self {
file_path,
functions: Vec::new(),
classes: Vec::new(),
modules: Vec::new(),
traits: Vec::new(),
lines: 0,
parse_time: Duration::from_secs(0),
}
}
pub fn entity_count(&self) -> usize {
self.functions.len() + self.classes.len() + self.modules.len() + self.traits.len()
}
}
#[derive(Debug, Clone)]
pub struct ProjectInfo {
pub files: Vec<FileInfo>,
pub failed_files: HashMap<PathBuf, String>,
pub total_functions: usize,
pub total_classes: usize,
pub total_traits: usize,
pub total_lines: usize,
pub total_time: Duration,
}
impl ProjectInfo {
pub fn new() -> Self {
Self {
files: Vec::new(),
failed_files: HashMap::new(),
total_functions: 0,
total_classes: 0,
total_traits: 0,
total_lines: 0,
total_time: Duration::from_secs(0),
}
}
pub fn success_rate(&self) -> f64 {
let total = self.files.len() + self.failed_files.len();
if total == 0 {
return 100.0;
}
(self.files.len() as f64 / total as f64) * 100.0
}
pub fn avg_parse_time(&self) -> Duration {
if self.files.is_empty() {
return Duration::from_secs(0);
}
self.total_time / self.files.len() as u32
}
pub fn add_file(&mut self, file_info: FileInfo) {
self.total_functions += file_info.functions.len();
self.total_classes += file_info.classes.len();
self.total_traits += file_info.traits.len();
self.total_lines += file_info.lines;
self.total_time += file_info.parse_time;
self.files.push(file_info);
}
pub fn add_failure(&mut self, path: PathBuf, error: String) {
self.failed_files.insert(path, error);
}
}
impl Default for ProjectInfo {
fn default() -> Self {
Self::new()
}
}
pub struct Parser {
config: ParserConfig,
}
impl Parser {
pub fn new() -> Self {
Self {
config: ParserConfig::default(),
}
}
pub fn with_config(config: ParserConfig) -> Self {
Self { config }
}
pub fn config(&self) -> &ParserConfig {
&self.config
}
pub fn parse_source(
&self,
source: &str,
file_path: &std::path::Path,
graph: &mut codegraph::CodeGraph,
) -> crate::error::Result<FileInfo> {
use std::time::Instant;
let start = Instant::now();
let ir = crate::extractor::extract(source, file_path, &self.config).map_err(|e| {
crate::error::ParseError::SyntaxError {
file: file_path.display().to_string(),
line: 0,
column: 0,
message: e,
}
})?;
let file_id = crate::builder::build_graph(graph, &ir, file_path.to_str().unwrap_or(""))?;
let mut file_info = FileInfo::new(file_path.to_path_buf());
file_info.functions = ir
.functions
.iter()
.map(|f| {
if let Some(ref class_name) = f.parent_class {
format!("{}.{}", class_name, f.name)
} else {
f.name.clone()
}
})
.collect();
file_info.classes = ir.classes.iter().map(|c| c.name.clone()).collect();
file_info.traits = ir.traits.iter().map(|t| t.name.clone()).collect();
if let Some(ref module) = ir.module {
file_info.modules.push(module.name.clone());
file_info.lines = module.line_count;
}
file_info.parse_time = start.elapsed();
let _ = file_id;
Ok(file_info)
}
#[instrument(skip(self, graph), fields(file = %file_path.display()))]
pub fn parse_file(
&self,
file_path: &std::path::Path,
graph: &mut codegraph::CodeGraph,
) -> crate::error::Result<FileInfo> {
use std::fs;
debug!("Starting file parse");
if let Some(ext) = file_path.extension() {
if let Some(ext_str) = ext.to_str() {
if !self.config.should_parse_extension(ext_str) {
warn!("Invalid file extension: {}", ext_str);
return Err(crate::error::ParseError::InvalidConfig(format!(
"File extension not allowed: {file_path:?}"
)));
}
}
}
let metadata = fs::metadata(file_path).map_err(|e| crate::error::ParseError::IoError {
path: file_path.to_path_buf(),
source: e,
})?;
if metadata.len() > self.config.max_file_size as u64 {
warn!("File too large: {} bytes", metadata.len());
return Err(crate::error::ParseError::FileTooLarge {
path: file_path.to_path_buf(),
max_size: self.config.max_file_size,
actual_size: metadata.len() as usize,
});
}
let source =
fs::read_to_string(file_path).map_err(|e| crate::error::ParseError::IoError {
path: file_path.to_path_buf(),
source: e,
})?;
let result = self.parse_source(&source, file_path, graph)?;
info!(
functions = result.functions.len(),
classes = result.classes.len(),
lines = result.lines,
time_ms = result.parse_time.as_millis(),
"File parsed successfully"
);
Ok(result)
}
#[instrument(skip(self, graph), fields(dir = %dir_path.display()))]
pub fn parse_directory(
&self,
dir_path: &std::path::Path,
graph: &mut codegraph::CodeGraph,
) -> crate::error::Result<ProjectInfo> {
use std::time::Instant;
use walkdir::WalkDir;
let start = Instant::now();
let mut project_info = ProjectInfo::new();
info!("Starting directory parse");
let mut files_to_parse = Vec::new();
for entry in WalkDir::new(dir_path)
.follow_links(false)
.into_iter()
.filter_entry(|e| {
if e.file_type().is_dir() {
if let Some(name) = e.file_name().to_str() {
return !self.config.should_exclude_dir(name);
}
}
true
})
{
match entry {
Ok(entry) => {
if entry.file_type().is_file() {
if let Some(ext) = entry.path().extension() {
if let Some(ext_str) = ext.to_str() {
if self.config.should_parse_extension(ext_str) {
files_to_parse.push(entry.path().to_path_buf());
}
}
}
}
}
Err(e) => {
if let Some(path) = e.path() {
project_info.add_failure(path.to_path_buf(), e.to_string());
}
}
}
}
if self.config.parallel {
self.parse_files_parallel(&files_to_parse, graph, &mut project_info)?;
} else {
self.parse_files_sequential(&files_to_parse, graph, &mut project_info);
}
project_info.total_time = start.elapsed();
info!(
files_parsed = project_info.files.len(),
files_failed = project_info.failed_files.len(),
total_functions = project_info.total_functions,
total_classes = project_info.total_classes,
total_lines = project_info.total_lines,
total_time_ms = project_info.total_time.as_millis(),
success_rate = project_info.success_rate(),
"Directory parse completed"
);
Ok(project_info)
}
fn parse_files_sequential(
&self,
files: &[PathBuf],
graph: &mut codegraph::CodeGraph,
project_info: &mut ProjectInfo,
) {
for file_path in files {
match self.parse_file(file_path, graph) {
Ok(file_info) => {
project_info.add_file(file_info);
}
Err(e) => {
project_info.add_failure(file_path.clone(), e.to_string());
}
}
}
}
fn parse_files_parallel(
&self,
files: &[PathBuf],
graph: &mut codegraph::CodeGraph,
project_info: &mut ProjectInfo,
) -> crate::error::Result<()> {
use rayon::prelude::*;
use std::sync::Mutex;
let graph_mutex = Mutex::new(graph);
let project_info_mutex = Mutex::new(project_info);
let pool = if let Some(num_threads) = self.config.num_threads {
rayon::ThreadPoolBuilder::new()
.num_threads(num_threads)
.build()
.map_err(|e| {
crate::error::ParseError::InvalidConfig(format!(
"Failed to create thread pool: {e}"
))
})?
} else {
rayon::ThreadPoolBuilder::new().build().map_err(|e| {
crate::error::ParseError::InvalidConfig(format!(
"Failed to create thread pool: {e}"
))
})?
};
pool.install(|| {
files.par_iter().for_each(|file_path| {
let parse_result = {
let mut graph = graph_mutex.lock().unwrap();
self.parse_file(file_path, &mut graph)
};
let mut project_info = project_info_mutex.lock().unwrap();
match parse_result {
Ok(file_info) => {
project_info.add_file(file_info);
}
Err(e) => {
project_info.add_failure(file_path.clone(), e.to_string());
}
}
});
});
Ok(())
}
}
impl Default for Parser {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_file_info_new() {
let info = FileInfo::new(PathBuf::from("test.py"));
assert_eq!(info.file_path, PathBuf::from("test.py"));
assert_eq!(info.entity_count(), 0);
}
#[test]
fn test_project_info_success_rate() {
let mut info = ProjectInfo::new();
assert_eq!(info.success_rate(), 100.0);
info.add_file(FileInfo::new(PathBuf::from("file1.py")));
info.add_file(FileInfo::new(PathBuf::from("file2.py")));
info.add_failure(PathBuf::from("file3.py"), "error".to_string());
assert_eq!(info.success_rate(), 66.66666666666666);
}
#[test]
fn test_parser_new() {
let parser = Parser::new();
assert!(parser.config().include_private);
}
}