1use crate::{
4 languages::*, CodeConstruct, ConstructMetadata, Error, ErrorType, FileError, Language,
5 LanguageDetection, ParseOptions, ParsedFile, ParsedProject,
6};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9
10use tokio::fs;
11use tree_sitter::{Node, Parser, Tree};
12use walkdir::WalkDir;
13
14pub async fn parse_file(file_path: &str, language: Language) -> Result<ParsedFile, Error> {
49 let content = fs::read_to_string(file_path)
51 .await
52 .map_err(|e| Error::Io(e.to_string()))?;
53
54 let file_size_bytes = content.len();
55
56 let ts_language = get_tree_sitter_language(&language)?;
58
59 let mut parser = Parser::new();
61 parser
62 .set_language(&ts_language)
63 .map_err(|e| Error::Parse(e.to_string()))?;
64
65 let tree = parser
67 .parse(&content, None)
68 .ok_or_else(|| Error::Parse("Failed to parse file".to_string()))?;
69
70 let constructs = extract_constructs(&tree, &content, &language);
72
73 let path = Path::new(file_path);
74 let relative_path = path
75 .file_name()
76 .unwrap_or_default()
77 .to_string_lossy()
78 .to_string();
79
80 Ok(ParsedFile {
81 file_path: file_path.to_string(),
82 relative_path,
83 language,
84 constructs,
85 syntax_tree: Some(tree),
86 file_size_bytes,
87
88 })
89}
90
91pub async fn parse_directory(
129 dir_path: &str,
130 options: ParseOptions,
131) -> Result<ParsedProject, Error> {
132 let root_path = PathBuf::from(dir_path);
133
134 if !root_path.exists() {
135 return Err(Error::Io(format!("Directory does not exist: {}", dir_path)));
136 }
137
138 let files_to_parse = collect_files(&root_path, &options)?;
140
141 let (parsed_files, error_files) = parse_files_parallel(files_to_parse, &options).await;
143
144 let total_files_processed = parsed_files.len();
146 let mut language_distribution = HashMap::new();
147 for file in &parsed_files {
148 *language_distribution.entry(file.language.clone()).or_insert(0) += 1;
149 }
150
151 Ok(ParsedProject {
152 root_path: dir_path.to_string(),
153 files: parsed_files,
154 total_files_processed,
155 language_distribution,
156 error_files,
157 })
158}
159
160pub async fn parse_directory_with_filter(
203 dir_path: &str,
204 file_filter: &crate::FileFilter,
205 options: ParseOptions,
206) -> Result<ParsedProject, Error> {
207 let root_path = PathBuf::from(dir_path);
208
209 if !root_path.exists() {
210 return Err(Error::Io(format!("Directory does not exist: {}", dir_path)));
211 }
212
213 let files_to_parse = collect_files_with_filter(&root_path, &options, file_filter)?;
215
216 let (parsed_files, error_files) = parse_files_parallel(files_to_parse, &options).await;
218
219 let total_files_processed = parsed_files.len();
221 let mut language_distribution = HashMap::new();
222 for file in &parsed_files {
223 *language_distribution.entry(file.language.clone()).or_insert(0) += 1;
224 }
225
226 Ok(ParsedProject {
227 root_path: dir_path.to_string(),
228 files: parsed_files,
229 total_files_processed,
230
231 language_distribution,
232 error_files,
233 })
234}
235
236fn collect_files(root_path: &Path, options: &ParseOptions) -> Result<Vec<PathBuf>, Error> {
251 let mut files = Vec::new();
252
253 let walker = if options.recursive {
254 WalkDir::new(root_path)
255 } else {
256 WalkDir::new(root_path).max_depth(1)
257 };
258
259 for entry in walker {
260 let entry = entry.map_err(|e| Error::Io(e.to_string()))?;
261 let path = entry.path();
262
263 if path.is_dir() {
265 continue;
266 }
267
268 if !options.include_hidden_files && is_hidden_file(path) {
270 continue;
271 }
272
273 if should_ignore_file(path, &options.ignore_patterns) {
275 continue;
276 }
277
278 if let Ok(metadata) = path.metadata() {
280 let size_mb = metadata.len() as usize / (1024 * 1024);
281 if size_mb > options.max_file_size_mb {
282 continue;
283 }
284 }
285
286 if detect_language_by_extension(&path.to_string_lossy()).is_some() {
288 files.push(path.to_path_buf());
289 }
290 }
291
292 Ok(files)
293}
294
295fn collect_files_with_filter(
311 root_path: &Path,
312 options: &ParseOptions,
313 filter: &crate::FileFilter,
314) -> Result<Vec<PathBuf>, Error> {
315 let mut files = collect_files(root_path, options)?;
316
317 files.retain(|path| {
319 if let Some(ref extensions) = filter.extensions {
321 if let Some(ext) = path.extension() {
322 if !extensions.contains(&ext.to_string_lossy().to_lowercase()) {
323 return false;
324 }
325 } else {
326 return false;
327 }
328 }
329
330 if let Some(ref languages) = filter.languages {
332 if let Some(detected_lang) = detect_language_by_extension(&path.to_string_lossy()) {
333 if !languages.contains(&detected_lang) {
334 return false;
335 }
336 } else {
337 return false;
338 }
339 }
340
341 if let Ok(metadata) = path.metadata() {
343 let size = metadata.len() as usize;
344
345 if let Some(min_size) = filter.min_size_bytes {
346 if size < min_size {
347 return false;
348 }
349 }
350
351 if let Some(max_size) = filter.max_size_bytes {
352 if size > max_size {
353 return false;
354 }
355 }
356 }
357
358 if let Some(ref predicate) = filter.custom_predicate {
360 if !predicate(path) {
361 return false;
362 }
363 }
364
365 true
366 });
367
368 Ok(files)
369}
370
371async fn parse_files_parallel(
373 files: Vec<PathBuf>,
374 options: &ParseOptions,
375) -> (Vec<ParsedFile>, Vec<FileError>) {
376 let chunk_size = std::cmp::max(1, files.len() / options.max_concurrent_files);
377 let mut parsed_files = Vec::new();
378 let mut error_files = Vec::new();
379
380 for chunk in files.chunks(chunk_size) {
381 let chunk_results: Vec<_> = chunk
382 .iter()
383 .map(|path| async move {
384 let path_str = path.to_string_lossy().to_string();
385
386 let language = match options.language_detection {
388 LanguageDetection::ByExtension => detect_language_by_extension(&path_str),
389 LanguageDetection::Combined => {
390 if let Ok(content) = tokio::fs::read_to_string(path).await {
392 detect_language(&path_str, Some(&content))
393 } else {
394 detect_language_by_extension(&path_str)
395 }
396 }
397 _ => detect_language_by_extension(&path_str), };
399
400 if let Some(lang) = language {
401 match parse_file(&path_str, lang).await {
402 Ok(parsed) => Ok(parsed),
403 Err(e) => Err(FileError {
404 file_path: path_str,
405 error_type: ErrorType::ParseError,
406 message: e.to_string(),
407 }),
408 }
409 } else {
410 Err(FileError {
411 file_path: path_str,
412 error_type: ErrorType::UnsupportedLanguage,
413 message: "Could not detect language".to_string(),
414 })
415 }
416 })
417 .collect();
418
419 for result in futures::future::join_all(chunk_results).await {
421 match result {
422 Ok(parsed_file) => parsed_files.push(parsed_file),
423 Err(error) => error_files.push(error),
424 }
425 }
426 }
427
428 (parsed_files, error_files)
429}
430
431fn extract_constructs(tree: &Tree, source: &str, language: &Language) -> Vec<CodeConstruct> {
433 let root_node = tree.root_node();
434 let mut root_constructs = Vec::new();
435
436 extract_constructs_hierarchical(root_node, source, language, &mut root_constructs, None);
438
439 let mut all_constructs = Vec::new();
441 flatten_constructs(&root_constructs, &mut all_constructs);
442
443 all_constructs
444}
445
446fn extract_constructs_hierarchical(
448 node: Node,
449 source: &str,
450 language: &Language,
451 constructs: &mut Vec<CodeConstruct>,
452 parent_construct: Option<&CodeConstruct>,
453) {
454 let node_type = node.kind();
455 let supported_types = get_supported_node_types(language);
456
457 if supported_types.contains(&node_type.to_string()) {
458 let mut construct = create_code_construct_with_parent(node, source, language, parent_construct);
459
460 let mut child_constructs = Vec::new();
462 for i in 0..node.child_count() {
463 if let Some(child) = node.child(i) {
464 extract_constructs_hierarchical(child, source, language, &mut child_constructs, Some(&construct));
465 }
466 }
467
468 construct.children = child_constructs;
469 constructs.push(construct);
470 } else {
471 for i in 0..node.child_count() {
473 if let Some(child) = node.child(i) {
474 extract_constructs_hierarchical(child, source, language, constructs, parent_construct);
475 }
476 }
477 }
478}
479
480fn flatten_constructs(constructs: &[CodeConstruct], flattened: &mut Vec<CodeConstruct>) {
482 for construct in constructs {
483 flattened.push(construct.clone());
484 flatten_constructs(&construct.children, flattened);
485 }
486}
487
488fn create_code_construct_with_parent(
490 node: Node,
491 source: &str,
492 language: &Language,
493 parent_construct: Option<&CodeConstruct>
494) -> CodeConstruct {
495 let start_byte = node.start_byte();
496 let end_byte = node.end_byte();
497 let source_code = source[start_byte..end_byte].to_string();
498
499 let start_point = node.start_position();
500 let end_point = node.end_position();
501
502 let name = extract_construct_name(node, source);
504
505 let metadata = extract_metadata(node, source, language);
507
508 let parent = parent_construct.map(|p| Box::new(p.clone()));
510
511 CodeConstruct {
512 node_type: node.kind().to_string(),
513 name,
514 source_code,
515 start_line: start_point.row + 1, end_line: end_point.row + 1,
517 start_byte,
518 end_byte,
519 parent,
520 children: Vec::new(), metadata,
522 }
523}
524
525fn extract_construct_name(node: Node, source: &str) -> Option<String> {
527 for i in 0..node.child_count() {
529 if let Some(child) = node.child(i) {
530 if child.kind() == "identifier" || child.kind() == "name" {
531 let start = child.start_byte();
532 let end = child.end_byte();
533 return Some(source[start..end].to_string());
534 }
535 }
536 }
537 None
538}
539
540#[cfg(test)]
541mod tests {
542 use super::*;
543 use crate::Language;
544
545 #[test]
546 fn test_parent_child_relationships() {
547 let source = "class TestClass:\n def test_method(self):\n pass";
549
550 let mut parser = Parser::new();
552 let language = crate::languages::get_tree_sitter_language(&Language::Python).unwrap();
553 parser.set_language(&language).unwrap();
554
555 let tree = parser.parse(source, None).unwrap();
556 let constructs = extract_constructs(&tree, source, &Language::Python);
557
558 let class_construct = constructs.iter().find(|c| c.node_type == "class_definition");
560 let method_construct = constructs.iter().find(|c| c.node_type == "function_definition");
561
562 assert!(class_construct.is_some(), "Should find class construct");
563 assert!(method_construct.is_some(), "Should find method construct");
564
565 let method = method_construct.unwrap();
566
567 assert!(method.parent.is_some(), "Method should have a parent");
569
570 if let Some(parent) = &method.parent {
571 assert_eq!(parent.node_type, "class_definition", "Method's parent should be the class");
572 }
573
574 let class = class_construct.unwrap();
576 assert!(!class.children.is_empty(), "Class should have children");
577
578 let child_method = class.children.iter().find(|c| c.node_type == "function_definition");
579 assert!(child_method.is_some(), "Class should contain the method as a child");
580 }
581}
582
583fn extract_metadata(_node: Node, _source: &str, _language: &Language) -> ConstructMetadata {
585 ConstructMetadata {
586 visibility: None,
587 modifiers: Vec::new(),
588 parameters: Vec::new(),
589 return_type: None,
590 inheritance: Vec::new(),
591 annotations: Vec::new(),
592 documentation: None,
593 }
594}
595
596fn is_hidden_file(path: &Path) -> bool {
598 path.file_name()
599 .and_then(|name| name.to_str())
600 .map(|name| name.starts_with('.'))
601 .unwrap_or(false)
602}
603
604fn should_ignore_file(path: &Path, ignore_patterns: &[String]) -> bool {
606 let path_str = path.to_string_lossy();
607
608 for pattern in ignore_patterns {
609 if path_str.contains(pattern) {
610 return true;
611 }
612 }
613
614 false
615}