1use crate::{
4 languages::*, CodeConstruct, ConstructMetadata, Error, ErrorType, FileError, Language,
5 LanguageDetection, ParseOptions, ParsedFile, ParsedProject,
6};
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9
10use tokio::fs;
11use tree_sitter::{Node, Parser, Tree};
12use walkdir::WalkDir;
13
14pub async fn parse_file(file_path: &str, language: Language) -> Result<ParsedFile, Error> {
49 let content = fs::read_to_string(file_path)
51 .await
52 .map_err(|e| Error::Io(e.to_string()))?;
53
54 let file_size_bytes = content.len();
55
56 let ts_language = get_tree_sitter_language(&language)?;
58
59 let mut parser = Parser::new();
61 parser
62 .set_language(&ts_language)
63 .map_err(|e| Error::Parse(e.to_string()))?;
64
65 let tree = parser
67 .parse(&content, None)
68 .ok_or_else(|| Error::Parse("Failed to parse file".to_string()))?;
69
70 let constructs = extract_constructs(&tree, &content, &language);
72
73 let path = Path::new(file_path);
74 let relative_path = path
75 .file_name()
76 .unwrap_or_default()
77 .to_string_lossy()
78 .to_string();
79
80 Ok(ParsedFile {
81 file_path: file_path.to_string(),
82 relative_path,
83 language,
84 constructs,
85 syntax_tree: Some(tree),
86 file_size_bytes,
87
88 })
89}
90
91pub async fn parse_directory(
129 dir_path: &str,
130 options: ParseOptions,
131) -> Result<ParsedProject, Error> {
132 let root_path = PathBuf::from(dir_path);
133
134 if !root_path.exists() {
135 return Err(Error::Io(format!("Directory does not exist: {}", dir_path)));
136 }
137
138 let files_to_parse = collect_files(&root_path, &options)?;
140
141 let (parsed_files, error_files) = parse_files_parallel(files_to_parse, &options).await;
143
144 let total_files_processed = parsed_files.len();
146 let mut language_distribution = HashMap::new();
147 for file in &parsed_files {
148 *language_distribution.entry(file.language.clone()).or_insert(0) += 1;
149 }
150
151 Ok(ParsedProject {
152 root_path: dir_path.to_string(),
153 files: parsed_files,
154 total_files_processed,
155
156 language_distribution,
157 error_files,
158 })
159}
160
161pub async fn parse_directory_with_filter(
204 dir_path: &str,
205 file_filter: &crate::FileFilter,
206 options: ParseOptions,
207) -> Result<ParsedProject, Error> {
208 let root_path = PathBuf::from(dir_path);
209
210 if !root_path.exists() {
211 return Err(Error::Io(format!("Directory does not exist: {}", dir_path)));
212 }
213
214 let files_to_parse = collect_files_with_filter(&root_path, &options, file_filter)?;
216
217 let (parsed_files, error_files) = parse_files_parallel(files_to_parse, &options).await;
219
220 let total_files_processed = parsed_files.len();
222 let mut language_distribution = HashMap::new();
223 for file in &parsed_files {
224 *language_distribution.entry(file.language.clone()).or_insert(0) += 1;
225 }
226
227 Ok(ParsedProject {
228 root_path: dir_path.to_string(),
229 files: parsed_files,
230 total_files_processed,
231
232 language_distribution,
233 error_files,
234 })
235}
236
237fn collect_files(root_path: &Path, options: &ParseOptions) -> Result<Vec<PathBuf>, Error> {
252 let mut files = Vec::new();
253
254 let walker = if options.recursive {
255 WalkDir::new(root_path)
256 } else {
257 WalkDir::new(root_path).max_depth(1)
258 };
259
260 for entry in walker {
261 let entry = entry.map_err(|e| Error::Io(e.to_string()))?;
262 let path = entry.path();
263
264 if path.is_dir() {
266 continue;
267 }
268
269 if !options.include_hidden_files && is_hidden_file(path) {
271 continue;
272 }
273
274 if should_ignore_file(path, &options.ignore_patterns) {
276 continue;
277 }
278
279 if let Ok(metadata) = path.metadata() {
281 let size_mb = metadata.len() as usize / (1024 * 1024);
282 if size_mb > options.max_file_size_mb {
283 continue;
284 }
285 }
286
287 if detect_language_by_extension(&path.to_string_lossy()).is_some() {
289 files.push(path.to_path_buf());
290 }
291 }
292
293 Ok(files)
294}
295
296fn collect_files_with_filter(
312 root_path: &Path,
313 options: &ParseOptions,
314 filter: &crate::FileFilter,
315) -> Result<Vec<PathBuf>, Error> {
316 let mut files = collect_files(root_path, options)?;
317
318 files.retain(|path| {
320 if let Some(ref extensions) = filter.extensions {
322 if let Some(ext) = path.extension() {
323 if !extensions.contains(&ext.to_string_lossy().to_lowercase()) {
324 return false;
325 }
326 } else {
327 return false;
328 }
329 }
330
331 if let Some(ref languages) = filter.languages {
333 if let Some(detected_lang) = detect_language_by_extension(&path.to_string_lossy()) {
334 if !languages.contains(&detected_lang) {
335 return false;
336 }
337 } else {
338 return false;
339 }
340 }
341
342 if let Ok(metadata) = path.metadata() {
344 let size = metadata.len() as usize;
345
346 if let Some(min_size) = filter.min_size_bytes {
347 if size < min_size {
348 return false;
349 }
350 }
351
352 if let Some(max_size) = filter.max_size_bytes {
353 if size > max_size {
354 return false;
355 }
356 }
357 }
358
359 if let Some(ref predicate) = filter.custom_predicate {
361 if !predicate(path) {
362 return false;
363 }
364 }
365
366 true
367 });
368
369 Ok(files)
370}
371
372async fn parse_files_parallel(
374 files: Vec<PathBuf>,
375 options: &ParseOptions,
376) -> (Vec<ParsedFile>, Vec<FileError>) {
377 let chunk_size = std::cmp::max(1, files.len() / options.max_concurrent_files);
378 let mut parsed_files = Vec::new();
379 let mut error_files = Vec::new();
380
381 for chunk in files.chunks(chunk_size) {
382 let chunk_results: Vec<_> = chunk
383 .iter()
384 .map(|path| async move {
385 let path_str = path.to_string_lossy().to_string();
386
387 let language = match options.language_detection {
389 LanguageDetection::ByExtension => detect_language_by_extension(&path_str),
390 LanguageDetection::Combined => {
391 if let Ok(content) = tokio::fs::read_to_string(path).await {
393 detect_language(&path_str, Some(&content))
394 } else {
395 detect_language_by_extension(&path_str)
396 }
397 }
398 _ => detect_language_by_extension(&path_str), };
400
401 if let Some(lang) = language {
402 match parse_file(&path_str, lang).await {
403 Ok(parsed) => Ok(parsed),
404 Err(e) => Err(FileError {
405 file_path: path_str,
406 error_type: ErrorType::ParseError,
407 message: e.to_string(),
408 }),
409 }
410 } else {
411 Err(FileError {
412 file_path: path_str,
413 error_type: ErrorType::UnsupportedLanguage,
414 message: "Could not detect language".to_string(),
415 })
416 }
417 })
418 .collect();
419
420 for result in futures::future::join_all(chunk_results).await {
422 match result {
423 Ok(parsed_file) => parsed_files.push(parsed_file),
424 Err(error) => error_files.push(error),
425 }
426 }
427 }
428
429 (parsed_files, error_files)
430}
431
432fn extract_constructs(tree: &Tree, source: &str, language: &Language) -> Vec<CodeConstruct> {
434 let mut constructs = Vec::new();
435 let root_node = tree.root_node();
436
437 extract_constructs_recursive(root_node, source, language, &mut constructs, None);
438
439 constructs
440}
441
442fn extract_constructs_recursive(
444 node: Node,
445 source: &str,
446 language: &Language,
447 constructs: &mut Vec<CodeConstruct>,
448 parent: Option<&CodeConstruct>,
449) {
450 let node_type = node.kind();
451 let supported_types = get_supported_node_types(language);
452
453 if supported_types.contains(&node_type.to_string()) {
454 let construct = create_code_construct(node, source, language);
455 constructs.push(construct);
456 }
457
458 for i in 0..node.child_count() {
460 if let Some(child) = node.child(i) {
461 extract_constructs_recursive(child, source, language, constructs, parent);
462 }
463 }
464}
465
466fn create_code_construct(node: Node, source: &str, language: &Language) -> CodeConstruct {
468 let start_byte = node.start_byte();
469 let end_byte = node.end_byte();
470 let source_code = source[start_byte..end_byte].to_string();
471
472 let start_point = node.start_position();
473 let end_point = node.end_position();
474
475 let name = extract_construct_name(node, source);
477
478 let metadata = extract_metadata(node, source, language);
480
481 CodeConstruct {
482 node_type: node.kind().to_string(),
483 name,
484 source_code,
485 start_line: start_point.row + 1, end_line: end_point.row + 1,
487 start_byte,
488 end_byte,
489 parent: None, children: Vec::new(), metadata,
492 }
493}
494
495fn extract_construct_name(node: Node, source: &str) -> Option<String> {
497 for i in 0..node.child_count() {
499 if let Some(child) = node.child(i) {
500 if child.kind() == "identifier" || child.kind() == "name" {
501 let start = child.start_byte();
502 let end = child.end_byte();
503 return Some(source[start..end].to_string());
504 }
505 }
506 }
507 None
508}
509
510fn extract_metadata(_node: Node, _source: &str, _language: &Language) -> ConstructMetadata {
512 ConstructMetadata {
513 visibility: None,
514 modifiers: Vec::new(),
515 parameters: Vec::new(),
516 return_type: None,
517 inheritance: Vec::new(),
518 annotations: Vec::new(),
519 documentation: None,
520 }
521}
522
523fn is_hidden_file(path: &Path) -> bool {
525 path.file_name()
526 .and_then(|name| name.to_str())
527 .map(|name| name.starts_with('.'))
528 .unwrap_or(false)
529}
530
531fn should_ignore_file(path: &Path, ignore_patterns: &[String]) -> bool {
533 let path_str = path.to_string_lossy();
534
535 for pattern in ignore_patterns {
536 if path_str.contains(pattern) {
537 return true;
538 }
539 }
540
541 false
542}