1use anyhow::{Context, Result};
2use regex::Regex;
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5use walkdir::WalkDir;
6
7use crate::git;
8
9#[derive(Debug)]
10pub struct RepositoryAnalysis {
11 pub repo_path: PathBuf,
12 pub file_count: usize,
13 pub language_stats: HashMap<String, usize>,
14 pub total_lines: usize,
15 pub code_lines: usize,
16 pub comment_lines: usize,
17 pub blank_lines: usize,
18 pub commit_count: usize,
19 pub contributors: Vec<git::Contributor>,
20 pub last_activity: String,
21 pub file_extensions: HashMap<String, usize>,
22 pub avg_file_size: f64,
23 pub largest_files: Vec<(PathBuf, usize)>,
24 pub complexity_stats: ComplexityStats,
25 pub file_age_stats: FileAgeStats,
26 pub duplicate_code: Vec<DuplicateCode>,
27 pub most_changed_files: Vec<(PathBuf, usize, usize, usize, f64, String, String, f64)>,
28}
29
30#[derive(Debug)]
31pub struct ComplexityStats {
32 pub avg_complexity: f64,
33 pub max_complexity: usize,
34 pub complex_files: Vec<(PathBuf, usize)>,
35 pub avg_function_length: f64,
36 pub max_function_length: usize,
37 pub long_functions: Vec<(PathBuf, String, usize)>,
38}
39
40#[derive(Debug)]
41pub struct FileAgeStats {
42 pub newest_files: Vec<(PathBuf, String)>,
43 pub oldest_files: Vec<(PathBuf, String)>,
44 pub most_modified_files: Vec<(PathBuf, usize)>,
45}
46
47#[derive(Debug)]
48pub struct DuplicateCode {
49 pub files: Vec<PathBuf>,
50 pub line_count: usize,
51 pub similarity: f64,
52}
53
54pub fn analyze_repository(repo_path: &Path, history_depth: usize) -> Result<RepositoryAnalysis> {
55 println!("Starting repository analysis...");
56 println!("Repository path: {}", repo_path.display());
57
58 let mut analysis = RepositoryAnalysis {
60 repo_path: repo_path.to_path_buf(),
61 file_count: 0,
62 language_stats: HashMap::new(),
63 total_lines: 0,
64 code_lines: 0,
65 comment_lines: 0,
66 blank_lines: 0,
67 commit_count: 0,
68 contributors: Vec::new(),
69 last_activity: String::new(),
70 file_extensions: HashMap::new(),
71 avg_file_size: 0.0,
72 largest_files: Vec::new(),
73 complexity_stats: ComplexityStats {
74 avg_complexity: 0.0,
75 max_complexity: 0,
76 complex_files: Vec::new(),
77 avg_function_length: 0.0,
78 max_function_length: 0,
79 long_functions: Vec::new(),
80 },
81 file_age_stats: FileAgeStats {
82 newest_files: Vec::new(),
83 oldest_files: Vec::new(),
84 most_modified_files: Vec::new(),
85 },
86 duplicate_code: Vec::new(),
87 most_changed_files: Vec::new(),
88 };
89
90 analyze_files(repo_path, &mut analysis)?;
92
93 analyze_git_history(repo_path, &mut analysis, history_depth)?;
95
96 analyze_code_complexity(repo_path, &mut analysis)?;
98
99 find_duplicate_code(repo_path, &mut analysis)?;
101
102 println!("Analysis complete!");
103 Ok(analysis)
104}
105
106fn analyze_files(repo_path: &Path, analysis: &mut RepositoryAnalysis) -> Result<()> {
107 println!("Analyzing files...");
108
109 let ignore_patterns = vec![
110 Regex::new(r"\.git/").unwrap(),
111 Regex::new(r"node_modules/").unwrap(),
112 Regex::new(r"target/").unwrap(),
113 Regex::new(r"\.DS_Store").unwrap(),
114 Regex::new(r"\.idea/").unwrap(),
115 Regex::new(r"\.vscode/").unwrap(),
116 Regex::new(r"dist/").unwrap(),
117 Regex::new(r"build/").unwrap(),
118 Regex::new(r"\.cache/").unwrap(),
119 ];
120
121 for entry in WalkDir::new(repo_path)
122 .into_iter()
123 .filter_entry(|e| !is_ignored(e.path(), &ignore_patterns))
124 .filter_map(|e| e.ok())
125 .filter(|e| e.file_type().is_file())
126 {
127 analysis.file_count += 1;
128
129 if let Ok(metadata) = entry.metadata() {
131 let file_size = metadata.len() as usize;
132 analysis
133 .largest_files
134 .push((entry.path().to_path_buf(), file_size));
135 }
136
137 if let Some(extension) = entry.path().extension() {
139 if let Some(ext_str) = extension.to_str() {
140 let ext = ext_str.to_lowercase();
141 *analysis.file_extensions.entry(ext.clone()).or_insert(0) += 1;
142
143 let language = match ext.as_str() {
145 "rs" => "Rust",
146 "js" => "JavaScript",
147 "ts" => "TypeScript",
148 "jsx" => "React",
149 "tsx" => "React",
150 "py" => "Python",
151 "java" => "Java",
152 "c" | "h" => "C",
153 "cpp" | "hpp" => "C++",
154 "go" => "Go",
155 "rb" => "Ruby",
156 "php" => "PHP",
157 "html" => "HTML",
158 "css" => "CSS",
159 "scss" | "sass" => "SASS",
160 "md" => "Markdown",
161 "json" => "JSON",
162 "yml" | "yaml" => "YAML",
163 "toml" => "TOML",
164 "sh" | "bash" => "Shell",
165 "sql" => "SQL",
166 "swift" => "Swift",
167 "kt" | "kts" => "Kotlin",
168 "dart" => "Dart",
169 "ex" | "exs" => "Elixir",
170 "hs" => "Haskell",
171 "clj" => "Clojure",
172 "fs" => "F#",
173 "vue" => "Vue",
174 "svelte" => "Svelte",
175 "xml" => "XML",
176 "gradle" => "Gradle",
177 "tf" | "tfvars" => "Terraform",
178 "proto" => "Protocol Buffers",
179 "graphql" | "gql" => "GraphQL",
180 "r" => "R",
181 "lua" => "Lua",
182 "pl" | "pm" => "Perl",
183 "cs" => "C#",
184 "vb" => "Visual Basic",
185 "scala" => "Scala",
186 "groovy" => "Groovy",
187 "m" => "Objective-C",
188 "mm" => "Objective-C++",
189 _ => "Other",
190 };
191
192 *analysis
193 .language_stats
194 .entry(language.to_string())
195 .or_insert(0) += 1;
196 }
197 }
198
199 if let Ok(content) = std::fs::read_to_string(entry.path()) {
201 let (total, code, comment, blank) = count_line_types(&content, entry.path());
202 analysis.total_lines += total;
203 analysis.code_lines += code;
204 analysis.comment_lines += comment;
205 analysis.blank_lines += blank;
206 }
207 }
208
209 Ok(())
210}
211
212fn count_line_types(content: &str, path: &Path) -> (usize, usize, usize, usize) {
213 let mut total_lines = 0;
214 let mut code_lines = 0;
215 let mut comment_lines = 0;
216 let mut blank_lines = 0;
217
218 let is_comment = |line: &str, in_block_comment: &mut bool| {
219 if let Some(ext) = path.extension() {
220 match ext.to_str().unwrap_or("").to_lowercase().as_str() {
221 "rs" => {
222 if line.trim().starts_with("//") {
224 return true;
225 }
226 if line.trim().starts_with("/*") && !line.trim().contains("*/") {
227 *in_block_comment = true;
228 return true;
229 }
230 if *in_block_comment {
231 if line.trim().contains("*/") {
232 *in_block_comment = false;
233 }
234 return true;
235 }
236 }
237 "js" | "ts" | "jsx" | "tsx" | "java" | "c" | "cpp" | "cs" | "go" | "swift"
238 | "kt" => {
239 if line.trim().starts_with("//") {
241 return true;
242 }
243 if line.trim().starts_with("/*") && !line.trim().contains("*/") {
244 *in_block_comment = true;
245 return true;
246 }
247 if *in_block_comment {
248 if line.trim().contains("*/") {
249 *in_block_comment = false;
250 }
251 return true;
252 }
253 }
254 "py" | "rb" | "sh" => {
255 if line.trim().starts_with("#") {
257 return true;
258 }
259 }
260 "html" | "xml" => {
261 if line.trim().starts_with("<!--") && !line.trim().contains("-->") {
263 *in_block_comment = true;
264 return true;
265 }
266 if *in_block_comment {
267 if line.trim().contains("-->") {
268 *in_block_comment = false;
269 }
270 return true;
271 }
272 }
273 _ => {}
274 }
275 }
276 false
277 };
278
279 let mut in_block_comment = false;
280
281 for line in content.lines() {
282 total_lines += 1;
283
284 if line.trim().is_empty() {
285 blank_lines += 1;
286 } else if is_comment(line, &mut in_block_comment) {
287 comment_lines += 1;
288 } else {
289 code_lines += 1;
290 }
291 }
292
293 (total_lines, code_lines, comment_lines, blank_lines)
294}
295
296fn analyze_git_history(
297 repo_path: &Path,
298 analysis: &mut RepositoryAnalysis,
299 history_depth: usize,
300) -> Result<()> {
301 println!("Analyzing git history...");
302
303 let (commit_count, contributors, last_activity, file_stats) =
304 git::analyze_git_repo_extended(repo_path, history_depth)
305 .context("Failed to analyze git repository")?;
306
307 analysis.commit_count = commit_count;
308 analysis.contributors = contributors;
309 analysis.last_activity = last_activity;
310
311 let mut newest_files: Vec<(PathBuf, String)> = file_stats
313 .iter()
314 .map(|(path, stats)| (path.clone(), stats.first_commit_date.clone()))
315 .collect();
316 newest_files.sort_by(|(_, a), (_, b)| b.cmp(a));
317 analysis.file_age_stats.newest_files = newest_files.into_iter().take(10).collect();
318
319 let mut oldest_files: Vec<(PathBuf, String)> = file_stats
320 .iter()
321 .map(|(path, stats)| (path.clone(), stats.first_commit_date.clone()))
322 .collect();
323 oldest_files.sort_by(|(_, a), (_, b)| a.cmp(b));
324 analysis.file_age_stats.oldest_files = oldest_files.into_iter().take(10).collect();
325
326 let mut most_modified_files: Vec<(PathBuf, usize)> = file_stats
327 .iter()
328 .map(|(path, stats)| (path.clone(), stats.commit_count))
329 .collect();
330 most_modified_files.sort_by(|(_, a), (_, b)| b.cmp(a));
331 analysis.file_age_stats.most_modified_files =
332 most_modified_files.into_iter().take(10).collect();
333
334 let mut most_changed_files = Vec::new();
336 for (path, stats) in file_stats.iter() {
337 let mut top_contributor = String::from("Unknown");
339 let mut max_commits = 0;
340
341 for (author, commit_count) in &stats.author_contributions {
342 if *commit_count > max_commits {
343 max_commits = *commit_count;
344 top_contributor = author.clone();
345 }
346 }
347
348 most_changed_files.push((
349 path.clone(),
350 stats.commit_count,
351 stats.lines_added,
352 stats.lines_removed,
353 stats.change_frequency,
354 top_contributor,
355 stats.last_commit_date.clone(),
356 stats.avg_changes_per_commit,
357 ));
358 }
359
360 most_changed_files.sort_by(|(_, _, _, _, a, _, _, _), (_, _, _, _, b, _, _, _)| {
362 b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal)
363 });
364
365 analysis.most_changed_files = most_changed_files.into_iter().take(10).collect();
367
368 Ok(())
369}
370
371fn analyze_code_complexity(repo_path: &Path, analysis: &mut RepositoryAnalysis) -> Result<()> {
372 println!("Analyzing code complexity...");
373
374 let mut total_complexity = 0;
375 let mut file_count = 0;
376 let mut complex_files = Vec::new();
377
378 let mut total_function_length = 0;
379 let mut function_count = 0;
380 let mut long_functions = Vec::new();
381
382 let function_patterns = HashMap::from([
384 ("rs", (Regex::new(r"fn\s+(\w+)\s*\(").unwrap(), Regex::new(r"\{").unwrap(), Regex::new(r"\}").unwrap())),
385 ("js", (Regex::new(r"function\s+(\w+)\s*\(|(\w+)\s*=\s*function\s*\(|(\w+)\s*:\s*function\s*\(|(\w+)\s*\([^)]*\)\s*\{").unwrap(), Regex::new(r"\{").unwrap(), Regex::new(r"\}").unwrap())),
386 ("ts", (Regex::new(r"function\s+(\w+)\s*\(|(\w+)\s*=\s*function\s*\(|(\w+)\s*:\s*function\s*\(|(\w+)\s*\([^)]*\)\s*\{").unwrap(), Regex::new(r"\{").unwrap(), Regex::new(r"\}").unwrap())),
387 ("py", (Regex::new(r"def\s+(\w+)\s*\(").unwrap(), Regex::new(r":").unwrap(), Regex::new(r"^\s*$|^\s*\w").unwrap())),
388 ("java", (Regex::new(r"(public|private|protected|static|\s) +[\w<>\[\]]+\s+(\w+) *\([^)]*\) *\{?").unwrap(), Regex::new(r"\{").unwrap(), Regex::new(r"\}").unwrap())),
389 ("go", (Regex::new(r"func\s+(\w+)\s*\(").unwrap(), Regex::new(r"\{").unwrap(), Regex::new(r"\}").unwrap())),
390 ]);
391
392 for entry in WalkDir::new(repo_path)
393 .into_iter()
394 .filter_entry(|e| !is_ignored(e.path(), &ignore_patterns()))
395 .filter_map(|e| e.ok())
396 .filter(|e| e.file_type().is_file())
397 {
398 if let Some(ext) = entry.path().extension() {
399 let ext_str = ext.to_str().unwrap_or("").to_lowercase();
400
401 if let Some((func_pattern, open_pattern, _close_pattern)) =
402 function_patterns.get(ext_str.as_str())
403 {
404 if let Ok(content) = std::fs::read_to_string(entry.path()) {
405 let complexity = calculate_cyclomatic_complexity(&content, &ext_str);
406 total_complexity += complexity;
407 file_count += 1;
408
409 if complexity > 10 {
410 complex_files.push((entry.path().to_path_buf(), complexity));
411 }
412
413 let functions = find_functions(
415 &content,
416 func_pattern,
417 open_pattern,
418 _close_pattern,
419 &ext_str,
420 );
421 for (name, length) in functions {
422 total_function_length += length;
423 function_count += 1;
424
425 if length > 30 {
426 long_functions.push((entry.path().to_path_buf(), name, length));
427 }
428 }
429 }
430 }
431 }
432 }
433
434 if file_count > 0 {
436 analysis.complexity_stats.avg_complexity = total_complexity as f64 / file_count as f64;
437 }
438
439 if function_count > 0 {
440 analysis.complexity_stats.avg_function_length =
441 total_function_length as f64 / function_count as f64;
442 }
443
444 complex_files.sort_by(|(_, a), (_, b)| b.cmp(a));
446 analysis.complexity_stats.complex_files = complex_files.into_iter().take(10).collect();
447
448 if let Some((_, complexity)) = analysis.complexity_stats.complex_files.first() {
449 analysis.complexity_stats.max_complexity = *complexity;
450 }
451
452 long_functions.sort_by(|(_, _, a), (_, _, b)| b.cmp(a));
453 analysis.complexity_stats.long_functions = long_functions.into_iter().take(10).collect();
454
455 if let Some((_, _, length)) = analysis.complexity_stats.long_functions.first() {
456 analysis.complexity_stats.max_function_length = *length;
457 }
458
459 Ok(())
460}
461
462fn calculate_cyclomatic_complexity(content: &str, ext: &str) -> usize {
463 let mut complexity = 1;
465
466 match ext {
467 "rs" | "js" | "ts" | "java" | "c" | "cpp" | "cs" | "go" | "swift" | "kt" | "scala" => {
468 for line in content.lines() {
470 let line = line.trim();
471
472 if line.starts_with("//") || line.starts_with("/*") || line.starts_with("*") {
474 continue;
475 }
476
477 if line.contains("if ")
479 || line.contains("else if")
480 || line.contains(" ? ") || line.contains("for ")
482 || line.contains("while ")
483 || line.contains("case ")
484 || line.contains("catch ")
485 || line.contains("switch ")
486 || (ext == "rs" && line.contains("match "))
487 || (ext == "go" && line.contains("select "))
488 || (ext == "swift" && line.contains("guard "))
489 {
490 complexity += 1;
491 }
492
493 complexity += line.matches("&&").count();
495 complexity += line.matches("||").count();
496 }
497 }
498 "py" => {
499 for line in content.lines() {
501 let line = line.trim();
502
503 if line.starts_with("#") {
505 continue;
506 }
507
508 if line.contains("if ")
509 || line.contains("elif ")
510 || line.contains("for ")
511 || line.contains("while ")
512 || line.contains("except ")
513 || line.contains("with ")
514 || line.contains("comprehension")
515 {
516 complexity += 1;
517 }
518
519 complexity += line.matches(" and ").count();
521 complexity += line.matches(" or ").count();
522 }
523 }
524 "rb" => {
525 for line in content.lines() {
527 let line = line.trim();
528
529 if line.starts_with("#") {
531 continue;
532 }
533
534 if line.contains("if ")
535 || line.contains("elsif ")
536 || line.contains("unless ")
537 || line.contains("case ")
538 || line.contains("when ")
539 || line.contains("for ")
540 || line.contains("while ")
541 || line.contains("until ")
542 || line.contains("rescue ")
543 {
544 complexity += 1;
545 }
546
547 complexity += line.matches("&&").count();
549 complexity += line.matches("||").count();
550 }
551 }
552 "php" => {
553 for line in content.lines() {
555 let line = line.trim();
556
557 if line.starts_with("//") || line.starts_with("/*") || line.starts_with("*") {
559 continue;
560 }
561
562 if line.contains("if ")
563 || line.contains("elseif ")
564 || line.contains("for ")
565 || line.contains("foreach ")
566 || line.contains("while ")
567 || line.contains("case ")
568 || line.contains("catch ")
569 {
570 complexity += 1;
571 }
572
573 complexity += line.matches("&&").count();
575 complexity += line.matches("||").count();
576 complexity += line.matches(" and ").count();
577 complexity += line.matches(" or ").count();
578 }
579 }
580 _ => {}
581 }
582
583 complexity
584}
585
586fn find_functions(
587 content: &str,
588 func_pattern: &Regex,
589 open_pattern: &Regex,
590 _close_pattern: &Regex,
591 ext: &str,
592) -> Vec<(String, usize)> {
593 let mut functions = Vec::new();
594 let lines: Vec<&str> = content.lines().collect();
595
596 let mut i = 0;
597 while i < lines.len() {
598 if let Some(captures) = func_pattern.captures(lines[i]) {
599 let mut func_name = String::new();
600 for j in 1..captures.len() {
601 if let Some(m) = captures.get(j) {
602 if !m.as_str().is_empty() {
603 func_name = m.as_str().to_string();
604 break;
605 }
606 }
607 }
608
609 if func_name.is_empty() {
610 func_name = "anonymous".to_string();
611 }
612
613 let mut start_line = i;
615
616 while start_line < lines.len() && !open_pattern.is_match(lines[start_line]) {
618 start_line += 1;
619 }
620
621 let mut end_line;
622 if ext == "py" {
623 let base_indent = lines[start_line]
625 .chars()
626 .take_while(|c| c.is_whitespace())
627 .count();
628 end_line = start_line + 1;
629
630 while end_line < lines.len() {
631 let indent = lines[end_line]
632 .chars()
633 .take_while(|c| c.is_whitespace())
634 .count();
635 if !lines[end_line].trim().is_empty() && indent <= base_indent {
636 break;
637 }
638 end_line += 1;
639 }
640 } else {
641 let mut brace_count = 1;
643 end_line = start_line + 1;
644
645 while end_line < lines.len() && brace_count > 0 {
646 if lines[end_line].contains('{') {
647 brace_count += lines[end_line].matches('{').count();
648 }
649 if lines[end_line].contains('}') {
650 brace_count -= lines[end_line].matches('}').count();
651 }
652 if brace_count == 0 {
653 break;
654 }
655 end_line += 1;
656 }
657 }
658
659 let function_length = end_line - start_line;
660 functions.push((func_name, function_length));
661
662 i = end_line;
663 } else {
664 i += 1;
665 }
666 }
667
668 functions
669}
670
671fn find_duplicate_code(repo_path: &Path, analysis: &mut RepositoryAnalysis) -> Result<()> {
672 println!("Finding duplicate code...");
673
674 let mut file_contents: HashMap<PathBuf, Vec<String>> = HashMap::new();
676
677 for entry in WalkDir::new(repo_path)
679 .into_iter()
680 .filter_entry(|e| !is_ignored(e.path(), &ignore_patterns()))
681 .filter_map(|e| e.ok())
682 .filter(|e| e.file_type().is_file())
683 {
684 if let Some(ext) = entry.path().extension() {
685 let ext_str = ext.to_str().unwrap_or("").to_lowercase();
686
687 if ["rs", "js", "ts", "py", "java", "c", "cpp", "go", "cs"].contains(&ext_str.as_str())
689 {
690 if let Ok(content) = std::fs::read_to_string(entry.path()) {
691 let lines: Vec<String> = content
692 .lines()
693 .map(|l| l.trim().to_string())
694 .filter(|l| !l.is_empty() && !l.starts_with("//") && !l.starts_with("#"))
695 .collect();
696
697 file_contents.insert(entry.path().to_path_buf(), lines);
698 }
699 }
700 }
701 }
702
703 let min_block_size = 6; let mut duplicates = Vec::new();
706
707 let files: Vec<PathBuf> = file_contents.keys().cloned().collect();
708
709 for i in 0..files.len() {
710 for j in (i + 1)..files.len() {
711 let file1 = &files[i];
712 let file2 = &files[j];
713
714 let lines1 = file_contents.get(file1).unwrap();
715 let lines2 = file_contents.get(file2).unwrap();
716
717 let mut duplicate_blocks = Vec::new();
718
719 for start1 in 0..(lines1.len().saturating_sub(min_block_size)) {
720 'outer: for start2 in 0..(lines2.len().saturating_sub(min_block_size)) {
721 let mut block_size = 0;
722
723 while start1 + block_size < lines1.len()
724 && start2 + block_size < lines2.len()
725 && lines1[start1 + block_size] == lines2[start2 + block_size]
726 {
727 block_size += 1;
728 }
729
730 if block_size >= min_block_size {
731 for (s1, s2, size) in &duplicate_blocks {
733 if (start1 >= *s1 && start1 < s1 + size)
734 || (start2 >= *s2 && start2 < s2 + size)
735 {
736 continue 'outer;
737 }
738 }
739
740 duplicate_blocks.push((start1, start2, block_size));
741 }
742 }
743 }
744
745 for (_, _, size) in duplicate_blocks {
746 if size >= min_block_size {
747 let mut files_vec = Vec::new();
748 files_vec.push(file1.clone());
749 files_vec.push(file2.clone());
750
751 duplicates.push(DuplicateCode {
752 files: files_vec,
753 line_count: size,
754 similarity: 1.0, });
756 }
757 }
758 }
759 }
760
761 duplicates.sort_by(|a, b| b.line_count.cmp(&a.line_count));
763 analysis.duplicate_code = duplicates.into_iter().take(10).collect();
764
765 Ok(())
766}
767
768fn ignore_patterns() -> Vec<Regex> {
769 vec![
770 Regex::new(r"\.git/").unwrap(),
771 Regex::new(r"node_modules/").unwrap(),
772 Regex::new(r"target/").unwrap(),
773 Regex::new(r"\.DS_Store").unwrap(),
774 Regex::new(r"\.idea/").unwrap(),
775 Regex::new(r"\.vscode/").unwrap(),
776 Regex::new(r"dist/").unwrap(),
777 Regex::new(r"build/").unwrap(),
778 Regex::new(r"\.cache/").unwrap(),
779 ]
780}
781
782fn is_ignored(path: &Path, patterns: &[Regex]) -> bool {
783 let path_str = path.to_string_lossy();
784 patterns.iter().any(|pattern| pattern.is_match(&path_str))
785}