1use crate::config::ParserConfig;
2use std::collections::HashMap;
3use std::path::PathBuf;
4use std::time::Duration;
5use tracing::{debug, info, instrument, warn};
6
7#[derive(Debug, Clone)]
9pub struct FileInfo {
10 pub file_path: PathBuf,
12
13 pub functions: Vec<String>,
15
16 pub classes: Vec<String>,
18
19 pub modules: Vec<String>,
21
22 pub traits: Vec<String>,
24
25 pub lines: usize,
27
28 pub parse_time: Duration,
30}
31
32impl FileInfo {
33 pub fn new(file_path: PathBuf) -> Self {
35 Self {
36 file_path,
37 functions: Vec::new(),
38 classes: Vec::new(),
39 modules: Vec::new(),
40 traits: Vec::new(),
41 lines: 0,
42 parse_time: Duration::from_secs(0),
43 }
44 }
45
46 pub fn entity_count(&self) -> usize {
48 self.functions.len() + self.classes.len() + self.modules.len() + self.traits.len()
49 }
50}
51
52#[derive(Debug, Clone)]
54pub struct ProjectInfo {
55 pub files: Vec<FileInfo>,
57
58 pub failed_files: HashMap<PathBuf, String>,
60
61 pub total_functions: usize,
63
64 pub total_classes: usize,
66
67 pub total_traits: usize,
69
70 pub total_lines: usize,
72
73 pub total_time: Duration,
75}
76
77impl ProjectInfo {
78 pub fn new() -> Self {
80 Self {
81 files: Vec::new(),
82 failed_files: HashMap::new(),
83 total_functions: 0,
84 total_classes: 0,
85 total_traits: 0,
86 total_lines: 0,
87 total_time: Duration::from_secs(0),
88 }
89 }
90
91 pub fn success_rate(&self) -> f64 {
93 let total = self.files.len() + self.failed_files.len();
94 if total == 0 {
95 return 100.0;
96 }
97 (self.files.len() as f64 / total as f64) * 100.0
98 }
99
100 pub fn avg_parse_time(&self) -> Duration {
102 if self.files.is_empty() {
103 return Duration::from_secs(0);
104 }
105 self.total_time / self.files.len() as u32
106 }
107
108 pub fn add_file(&mut self, file_info: FileInfo) {
110 self.total_functions += file_info.functions.len();
111 self.total_classes += file_info.classes.len();
112 self.total_traits += file_info.traits.len();
113 self.total_lines += file_info.lines;
114 self.total_time += file_info.parse_time;
115 self.files.push(file_info);
116 }
117
118 pub fn add_failure(&mut self, path: PathBuf, error: String) {
120 self.failed_files.insert(path, error);
121 }
122}
123
124impl Default for ProjectInfo {
125 fn default() -> Self {
126 Self::new()
127 }
128}
129
130pub struct Parser {
132 config: ParserConfig,
133}
134
135impl Parser {
136 pub fn new() -> Self {
138 Self {
139 config: ParserConfig::default(),
140 }
141 }
142
143 pub fn with_config(config: ParserConfig) -> Self {
145 Self { config }
146 }
147
148 pub fn config(&self) -> &ParserConfig {
150 &self.config
151 }
152
153 pub fn parse_source(
165 &self,
166 source: &str,
167 file_path: &std::path::Path,
168 graph: &mut codegraph::CodeGraph,
169 ) -> crate::error::Result<FileInfo> {
170 use std::time::Instant;
171
172 let start = Instant::now();
173
174 let ir = crate::extractor::extract(source, file_path, &self.config).map_err(|e| {
176 crate::error::ParseError::SyntaxError {
177 file: file_path.display().to_string(),
178 line: 0,
179 column: 0,
180 message: e,
181 }
182 })?;
183
184 let file_id = crate::builder::build_graph(graph, &ir, file_path.to_str().unwrap_or(""))?;
186
187 let mut file_info = FileInfo::new(file_path.to_path_buf());
189
190 file_info.functions = ir.functions.iter().map(|f| f.name.clone()).collect();
192
193 for class in &ir.classes {
195 for method in &class.methods {
196 file_info
197 .functions
198 .push(format!("{}.{}", class.name, method.name));
199 }
200 }
201
202 file_info.classes = ir.classes.iter().map(|c| c.name.clone()).collect();
203 file_info.traits = ir.traits.iter().map(|t| t.name.clone()).collect();
204
205 if let Some(ref module) = ir.module {
206 file_info.modules.push(module.name.clone());
207 file_info.lines = module.line_count();
208 }
209
210 file_info.parse_time = start.elapsed();
211
212 let _ = file_id;
214
215 Ok(file_info)
216 }
217
218 #[instrument(skip(self, graph), fields(file = %file_path.display()))]
229 pub fn parse_file(
230 &self,
231 file_path: &std::path::Path,
232 graph: &mut codegraph::CodeGraph,
233 ) -> crate::error::Result<FileInfo> {
234 use std::fs;
235
236 debug!("Starting file parse");
237
238 if let Some(ext) = file_path.extension() {
240 if let Some(ext_str) = ext.to_str() {
241 if !self.config.should_parse_extension(ext_str) {
242 warn!("Invalid file extension: {}", ext_str);
243 return Err(crate::error::ParseError::InvalidConfig(format!(
244 "File extension not allowed: {file_path:?}"
245 )));
246 }
247 }
248 }
249
250 let metadata = fs::metadata(file_path).map_err(|e| crate::error::ParseError::IoError {
252 path: file_path.to_path_buf(),
253 source: e,
254 })?;
255
256 if metadata.len() > self.config.max_file_size as u64 {
257 warn!("File too large: {} bytes", metadata.len());
258 return Err(crate::error::ParseError::FileTooLarge {
259 path: file_path.to_path_buf(),
260 max_size: self.config.max_file_size,
261 actual_size: metadata.len() as usize,
262 });
263 }
264
265 let source =
267 fs::read_to_string(file_path).map_err(|e| crate::error::ParseError::IoError {
268 path: file_path.to_path_buf(),
269 source: e,
270 })?;
271
272 let result = self.parse_source(&source, file_path, graph)?;
274
275 info!(
276 functions = result.functions.len(),
277 classes = result.classes.len(),
278 lines = result.lines,
279 time_ms = result.parse_time.as_millis(),
280 "File parsed successfully"
281 );
282
283 Ok(result)
284 }
285
286 #[instrument(skip(self, graph), fields(dir = %dir_path.display()))]
297 pub fn parse_directory(
298 &self,
299 dir_path: &std::path::Path,
300 graph: &mut codegraph::CodeGraph,
301 ) -> crate::error::Result<ProjectInfo> {
302 use std::time::Instant;
303 use walkdir::WalkDir;
304
305 let start = Instant::now();
306 let mut project_info = ProjectInfo::new();
307
308 info!("Starting directory parse");
309
310 let mut files_to_parse = Vec::new();
312
313 for entry in WalkDir::new(dir_path)
314 .follow_links(false)
315 .into_iter()
316 .filter_entry(|e| {
317 if e.file_type().is_dir() {
319 if let Some(name) = e.file_name().to_str() {
320 return !self.config.should_exclude_dir(name);
321 }
322 }
323 true
324 })
325 {
326 match entry {
327 Ok(entry) => {
328 if entry.file_type().is_file() {
329 if let Some(ext) = entry.path().extension() {
330 if let Some(ext_str) = ext.to_str() {
331 if self.config.should_parse_extension(ext_str) {
332 files_to_parse.push(entry.path().to_path_buf());
333 }
334 }
335 }
336 }
337 }
338 Err(e) => {
339 if let Some(path) = e.path() {
341 project_info.add_failure(path.to_path_buf(), e.to_string());
342 }
343 }
344 }
345 }
346
347 if self.config.parallel {
349 self.parse_files_parallel(&files_to_parse, graph, &mut project_info)?;
350 } else {
351 self.parse_files_sequential(&files_to_parse, graph, &mut project_info);
352 }
353
354 project_info.total_time = start.elapsed();
355
356 info!(
357 files_parsed = project_info.files.len(),
358 files_failed = project_info.failed_files.len(),
359 total_functions = project_info.total_functions,
360 total_classes = project_info.total_classes,
361 total_lines = project_info.total_lines,
362 total_time_ms = project_info.total_time.as_millis(),
363 success_rate = project_info.success_rate(),
364 "Directory parse completed"
365 );
366
367 Ok(project_info)
368 }
369
370 fn parse_files_sequential(
372 &self,
373 files: &[PathBuf],
374 graph: &mut codegraph::CodeGraph,
375 project_info: &mut ProjectInfo,
376 ) {
377 for file_path in files {
378 match self.parse_file(file_path, graph) {
379 Ok(file_info) => {
380 project_info.add_file(file_info);
381 }
382 Err(e) => {
383 project_info.add_failure(file_path.clone(), e.to_string());
384 }
385 }
386 }
387 }
388
389 fn parse_files_parallel(
391 &self,
392 files: &[PathBuf],
393 graph: &mut codegraph::CodeGraph,
394 project_info: &mut ProjectInfo,
395 ) -> crate::error::Result<()> {
396 use rayon::prelude::*;
397 use std::sync::Mutex;
398
399 let graph_mutex = Mutex::new(graph);
400 let project_info_mutex = Mutex::new(project_info);
401
402 let pool = if let Some(num_threads) = self.config.num_threads {
404 rayon::ThreadPoolBuilder::new()
405 .num_threads(num_threads)
406 .build()
407 .map_err(|e| {
408 crate::error::ParseError::InvalidConfig(format!(
409 "Failed to create thread pool: {e}"
410 ))
411 })?
412 } else {
413 rayon::ThreadPoolBuilder::new().build().map_err(|e| {
414 crate::error::ParseError::InvalidConfig(format!(
415 "Failed to create thread pool: {e}"
416 ))
417 })?
418 };
419
420 pool.install(|| {
421 files.par_iter().for_each(|file_path| {
422 let parse_result = {
425 let mut graph = graph_mutex.lock().unwrap();
426 self.parse_file(file_path, &mut graph)
427 };
428
429 let mut project_info = project_info_mutex.lock().unwrap();
430 match parse_result {
431 Ok(file_info) => {
432 project_info.add_file(file_info);
433 }
434 Err(e) => {
435 project_info.add_failure(file_path.clone(), e.to_string());
436 }
437 }
438 });
439 });
440
441 Ok(())
442 }
443}
444
445impl Default for Parser {
446 fn default() -> Self {
447 Self::new()
448 }
449}
450
451#[cfg(test)]
452mod tests {
453 use super::*;
454
455 #[test]
456 fn test_file_info_new() {
457 let info = FileInfo::new(PathBuf::from("test.py"));
458 assert_eq!(info.file_path, PathBuf::from("test.py"));
459 assert_eq!(info.entity_count(), 0);
460 }
461
462 #[test]
463 fn test_project_info_success_rate() {
464 let mut info = ProjectInfo::new();
465 assert_eq!(info.success_rate(), 100.0);
466
467 info.add_file(FileInfo::new(PathBuf::from("file1.py")));
468 info.add_file(FileInfo::new(PathBuf::from("file2.py")));
469 info.add_failure(PathBuf::from("file3.py"), "error".to_string());
470
471 assert_eq!(info.success_rate(), 66.66666666666666);
472 }
473
474 #[test]
475 fn test_parser_new() {
476 let parser = Parser::new();
477 assert!(parser.config().include_private);
478 }
479}