1use crate::{
4 languages::*, CodeConstruct, ConstructMetadata, Error, ErrorType, FileError, Language,
5 LanguageDetection, ParseOptions, ParsedFile, ParsedProject,
6};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::time::Instant;
10use tokio::fs;
11use tree_sitter::{Node, Parser, Tree};
12use walkdir::WalkDir;
13
14pub async fn parse_file(file_path: &str, language: Language) -> Result<ParsedFile, Error> {
49 let start_time = Instant::now();
50
51 let content = fs::read_to_string(file_path)
53 .await
54 .map_err(|e| Error::Io(e.to_string()))?;
55
56 let file_size_bytes = content.len();
57
58 let ts_language = get_tree_sitter_language(&language)?;
60
61 let mut parser = Parser::new();
63 parser
64 .set_language(&ts_language)
65 .map_err(|e| Error::Parse(e.to_string()))?;
66
67 let tree = parser
69 .parse(&content, None)
70 .ok_or_else(|| Error::Parse("Failed to parse file".to_string()))?;
71
72 let constructs = extract_constructs(&tree, &content, &language);
74
75 let parse_time_ms = start_time.elapsed().as_millis() as u64;
76
77 let path = Path::new(file_path);
78 let relative_path = path
79 .file_name()
80 .unwrap_or_default()
81 .to_string_lossy()
82 .to_string();
83
84 Ok(ParsedFile {
85 file_path: file_path.to_string(),
86 relative_path,
87 language,
88 constructs,
89 syntax_tree: Some(tree),
90 file_size_bytes,
91 parse_time_ms,
92 })
93}
94
95pub async fn parse_directory(
133 dir_path: &str,
134 options: ParseOptions,
135) -> Result<ParsedProject, Error> {
136 let start_time = Instant::now();
137 let root_path = PathBuf::from(dir_path);
138
139 if !root_path.exists() {
140 return Err(Error::Io(format!("Directory does not exist: {}", dir_path)));
141 }
142
143 let files_to_parse = collect_files(&root_path, &options)?;
145
146 let (parsed_files, error_files) = parse_files_parallel(files_to_parse, &options).await;
148
149 let total_files_processed = parsed_files.len();
151 let processing_time_ms = start_time.elapsed().as_millis() as u64;
152
153 let mut language_distribution = HashMap::new();
154 for file in &parsed_files {
155 *language_distribution.entry(file.language.clone()).or_insert(0) += 1;
156 }
157
158 Ok(ParsedProject {
159 root_path: dir_path.to_string(),
160 files: parsed_files,
161 total_files_processed,
162 processing_time_ms,
163 language_distribution,
164 error_files,
165 })
166}
167
168pub async fn parse_directory_with_filter(
211 dir_path: &str,
212 file_filter: &crate::FileFilter,
213 options: ParseOptions,
214) -> Result<ParsedProject, Error> {
215 let start_time = Instant::now();
216 let root_path = PathBuf::from(dir_path);
217
218 if !root_path.exists() {
219 return Err(Error::Io(format!("Directory does not exist: {}", dir_path)));
220 }
221
222 let files_to_parse = collect_files_with_filter(&root_path, &options, file_filter)?;
224
225 let (parsed_files, error_files) = parse_files_parallel(files_to_parse, &options).await;
227
228 let total_files_processed = parsed_files.len();
230 let processing_time_ms = start_time.elapsed().as_millis() as u64;
231
232 let mut language_distribution = HashMap::new();
233 for file in &parsed_files {
234 *language_distribution.entry(file.language.clone()).or_insert(0) += 1;
235 }
236
237 Ok(ParsedProject {
238 root_path: dir_path.to_string(),
239 files: parsed_files,
240 total_files_processed,
241 processing_time_ms,
242 language_distribution,
243 error_files,
244 })
245}
246
247fn collect_files(root_path: &Path, options: &ParseOptions) -> Result<Vec<PathBuf>, Error> {
262 let mut files = Vec::new();
263
264 let walker = if options.recursive {
265 WalkDir::new(root_path)
266 } else {
267 WalkDir::new(root_path).max_depth(1)
268 };
269
270 for entry in walker {
271 let entry = entry.map_err(|e| Error::Io(e.to_string()))?;
272 let path = entry.path();
273
274 if path.is_dir() {
276 continue;
277 }
278
279 if !options.include_hidden_files && is_hidden_file(path) {
281 continue;
282 }
283
284 if should_ignore_file(path, &options.ignore_patterns) {
286 continue;
287 }
288
289 if let Ok(metadata) = path.metadata() {
291 let size_mb = metadata.len() as usize / (1024 * 1024);
292 if size_mb > options.max_file_size_mb {
293 continue;
294 }
295 }
296
297 if detect_language_by_extension(&path.to_string_lossy()).is_some() {
299 files.push(path.to_path_buf());
300 }
301 }
302
303 Ok(files)
304}
305
306fn collect_files_with_filter(
322 root_path: &Path,
323 options: &ParseOptions,
324 filter: &crate::FileFilter,
325) -> Result<Vec<PathBuf>, Error> {
326 let mut files = collect_files(root_path, options)?;
327
328 files.retain(|path| {
330 if let Some(ref extensions) = filter.extensions {
332 if let Some(ext) = path.extension() {
333 if !extensions.contains(&ext.to_string_lossy().to_lowercase()) {
334 return false;
335 }
336 } else {
337 return false;
338 }
339 }
340
341 if let Some(ref languages) = filter.languages {
343 if let Some(detected_lang) = detect_language_by_extension(&path.to_string_lossy()) {
344 if !languages.contains(&detected_lang) {
345 return false;
346 }
347 } else {
348 return false;
349 }
350 }
351
352 if let Ok(metadata) = path.metadata() {
354 let size = metadata.len() as usize;
355
356 if let Some(min_size) = filter.min_size_bytes {
357 if size < min_size {
358 return false;
359 }
360 }
361
362 if let Some(max_size) = filter.max_size_bytes {
363 if size > max_size {
364 return false;
365 }
366 }
367 }
368
369 if let Some(ref predicate) = filter.custom_predicate {
371 if !predicate(path) {
372 return false;
373 }
374 }
375
376 true
377 });
378
379 Ok(files)
380}
381
382async fn parse_files_parallel(
384 files: Vec<PathBuf>,
385 options: &ParseOptions,
386) -> (Vec<ParsedFile>, Vec<FileError>) {
387 let chunk_size = std::cmp::max(1, files.len() / options.max_concurrent_files);
388 let mut parsed_files = Vec::new();
389 let mut error_files = Vec::new();
390
391 for chunk in files.chunks(chunk_size) {
392 let chunk_results: Vec<_> = chunk
393 .iter()
394 .map(|path| async move {
395 let path_str = path.to_string_lossy().to_string();
396
397 let language = match options.language_detection {
399 LanguageDetection::ByExtension => detect_language_by_extension(&path_str),
400 LanguageDetection::Combined => {
401 if let Ok(content) = tokio::fs::read_to_string(path).await {
403 detect_language(&path_str, Some(&content))
404 } else {
405 detect_language_by_extension(&path_str)
406 }
407 }
408 _ => detect_language_by_extension(&path_str), };
410
411 if let Some(lang) = language {
412 match parse_file(&path_str, lang).await {
413 Ok(parsed) => Ok(parsed),
414 Err(e) => Err(FileError {
415 file_path: path_str,
416 error_type: ErrorType::ParseError,
417 message: e.to_string(),
418 }),
419 }
420 } else {
421 Err(FileError {
422 file_path: path_str,
423 error_type: ErrorType::UnsupportedLanguage,
424 message: "Could not detect language".to_string(),
425 })
426 }
427 })
428 .collect();
429
430 for result in futures::future::join_all(chunk_results).await {
432 match result {
433 Ok(parsed_file) => parsed_files.push(parsed_file),
434 Err(error) => error_files.push(error),
435 }
436 }
437 }
438
439 (parsed_files, error_files)
440}
441
442fn extract_constructs(tree: &Tree, source: &str, language: &Language) -> Vec<CodeConstruct> {
444 let mut constructs = Vec::new();
445 let root_node = tree.root_node();
446
447 extract_constructs_recursive(root_node, source, language, &mut constructs, None);
448
449 constructs
450}
451
452fn extract_constructs_recursive(
454 node: Node,
455 source: &str,
456 language: &Language,
457 constructs: &mut Vec<CodeConstruct>,
458 parent: Option<&CodeConstruct>,
459) {
460 let node_type = node.kind();
461 let supported_types = get_supported_node_types(language);
462
463 if supported_types.contains(&node_type.to_string()) {
464 let construct = create_code_construct(node, source, language);
465 constructs.push(construct);
466 }
467
468 for i in 0..node.child_count() {
470 if let Some(child) = node.child(i) {
471 extract_constructs_recursive(child, source, language, constructs, parent);
472 }
473 }
474}
475
476fn create_code_construct(node: Node, source: &str, language: &Language) -> CodeConstruct {
478 let start_byte = node.start_byte();
479 let end_byte = node.end_byte();
480 let source_code = source[start_byte..end_byte].to_string();
481
482 let start_point = node.start_position();
483 let end_point = node.end_position();
484
485 let name = extract_construct_name(node, source);
487
488 let metadata = extract_metadata(node, source, language);
490
491 CodeConstruct {
492 node_type: node.kind().to_string(),
493 name,
494 source_code,
495 start_line: start_point.row + 1, end_line: end_point.row + 1,
497 start_byte,
498 end_byte,
499 parent: None, children: Vec::new(), metadata,
502 }
503}
504
505fn extract_construct_name(node: Node, source: &str) -> Option<String> {
507 for i in 0..node.child_count() {
509 if let Some(child) = node.child(i) {
510 if child.kind() == "identifier" || child.kind() == "name" {
511 let start = child.start_byte();
512 let end = child.end_byte();
513 return Some(source[start..end].to_string());
514 }
515 }
516 }
517 None
518}
519
520fn extract_metadata(_node: Node, _source: &str, _language: &Language) -> ConstructMetadata {
522 ConstructMetadata {
523 visibility: None,
524 modifiers: Vec::new(),
525 parameters: Vec::new(),
526 return_type: None,
527 inheritance: Vec::new(),
528 annotations: Vec::new(),
529 documentation: None,
530 }
531}
532
533fn is_hidden_file(path: &Path) -> bool {
535 path.file_name()
536 .and_then(|name| name.to_str())
537 .map(|name| name.starts_with('.'))
538 .unwrap_or(false)
539}
540
541fn should_ignore_file(path: &Path, ignore_patterns: &[String]) -> bool {
543 let path_str = path.to_string_lossy();
544
545 for pattern in ignore_patterns {
546 if path_str.contains(pattern) {
547 return true;
548 }
549 }
550
551 false
552}