repo_analyzer/
analyzer.rs

1use anyhow::{Context, Result};
2use regex::Regex;
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5use walkdir::WalkDir;
6
7use crate::git;
8
9#[derive(Debug)]
10pub struct RepositoryAnalysis {
11    pub repo_path: PathBuf,
12    pub file_count: usize,
13    pub language_stats: HashMap<String, usize>,
14    pub total_lines: usize,
15    pub code_lines: usize,
16    pub comment_lines: usize,
17    pub blank_lines: usize,
18    pub commit_count: usize,
19    pub contributors: Vec<git::Contributor>,
20    pub last_activity: String,
21    pub file_extensions: HashMap<String, usize>,
22    pub avg_file_size: f64,
23    pub largest_files: Vec<(PathBuf, usize)>,
24    pub complexity_stats: ComplexityStats,
25    pub file_age_stats: FileAgeStats,
26    pub duplicate_code: Vec<DuplicateCode>,
27    pub most_changed_files: Vec<(PathBuf, usize, usize, usize, f64, String, String, f64)>,
28}
29
30#[derive(Debug)]
31pub struct ComplexityStats {
32    pub avg_complexity: f64,
33    pub max_complexity: usize,
34    pub complex_files: Vec<(PathBuf, usize)>,
35    pub avg_function_length: f64,
36    pub max_function_length: usize,
37    pub long_functions: Vec<(PathBuf, String, usize)>,
38}
39
40#[derive(Debug)]
41pub struct FileAgeStats {
42    pub newest_files: Vec<(PathBuf, String)>,
43    pub oldest_files: Vec<(PathBuf, String)>,
44    pub most_modified_files: Vec<(PathBuf, usize)>,
45}
46
47#[derive(Debug)]
48pub struct DuplicateCode {
49    pub files: Vec<PathBuf>,
50    pub line_count: usize,
51    pub similarity: f64,
52}
53
54pub fn analyze_repository(repo_path: &Path, history_depth: usize) -> Result<RepositoryAnalysis> {
55    println!("Starting repository analysis...");
56    println!("Repository path: {}", repo_path.display());
57
58    // Create analysis structure
59    let mut analysis = RepositoryAnalysis {
60        repo_path: repo_path.to_path_buf(),
61        file_count: 0,
62        language_stats: HashMap::new(),
63        total_lines: 0,
64        code_lines: 0,
65        comment_lines: 0,
66        blank_lines: 0,
67        commit_count: 0,
68        contributors: Vec::new(),
69        last_activity: String::new(),
70        file_extensions: HashMap::new(),
71        avg_file_size: 0.0,
72        largest_files: Vec::new(),
73        complexity_stats: ComplexityStats {
74            avg_complexity: 0.0,
75            max_complexity: 0,
76            complex_files: Vec::new(),
77            avg_function_length: 0.0,
78            max_function_length: 0,
79            long_functions: Vec::new(),
80        },
81        file_age_stats: FileAgeStats {
82            newest_files: Vec::new(),
83            oldest_files: Vec::new(),
84            most_modified_files: Vec::new(),
85        },
86        duplicate_code: Vec::new(),
87        most_changed_files: Vec::new(),
88    };
89
90    // Analyze files
91    analyze_files(repo_path, &mut analysis)?;
92
93    // Analyze git history
94    analyze_git_history(repo_path, &mut analysis, history_depth)?;
95
96    // Analyze code complexity
97    analyze_code_complexity(repo_path, &mut analysis)?;
98
99    // Find duplicate code
100    find_duplicate_code(repo_path, &mut analysis)?;
101
102    println!("Analysis complete!");
103    Ok(analysis)
104}
105
106fn analyze_files(repo_path: &Path, analysis: &mut RepositoryAnalysis) -> Result<()> {
107    println!("Analyzing files...");
108
109    let ignore_patterns = vec![
110        Regex::new(r"\.git/").unwrap(),
111        Regex::new(r"node_modules/").unwrap(),
112        Regex::new(r"target/").unwrap(),
113        Regex::new(r"\.DS_Store").unwrap(),
114        Regex::new(r"\.idea/").unwrap(),
115        Regex::new(r"\.vscode/").unwrap(),
116        Regex::new(r"dist/").unwrap(),
117        Regex::new(r"build/").unwrap(),
118        Regex::new(r"\.cache/").unwrap(),
119    ];
120
121    for entry in WalkDir::new(repo_path)
122        .into_iter()
123        .filter_entry(|e| !is_ignored(e.path(), &ignore_patterns))
124        .filter_map(|e| e.ok())
125        .filter(|e| e.file_type().is_file())
126    {
127        analysis.file_count += 1;
128
129        // Get file size
130        if let Ok(metadata) = entry.metadata() {
131            let file_size = metadata.len() as usize;
132            analysis
133                .largest_files
134                .push((entry.path().to_path_buf(), file_size));
135        }
136
137        // Get file extension
138        if let Some(extension) = entry.path().extension() {
139            if let Some(ext_str) = extension.to_str() {
140                let ext = ext_str.to_lowercase();
141                *analysis.file_extensions.entry(ext.clone()).or_insert(0) += 1;
142
143                // Map extensions to languages
144                let language = match ext.as_str() {
145                    "rs" => "Rust",
146                    "js" => "JavaScript",
147                    "ts" => "TypeScript",
148                    "jsx" => "React",
149                    "tsx" => "React",
150                    "py" => "Python",
151                    "java" => "Java",
152                    "c" | "h" => "C",
153                    "cpp" | "hpp" => "C++",
154                    "go" => "Go",
155                    "rb" => "Ruby",
156                    "php" => "PHP",
157                    "html" => "HTML",
158                    "css" => "CSS",
159                    "scss" | "sass" => "SASS",
160                    "md" => "Markdown",
161                    "json" => "JSON",
162                    "yml" | "yaml" => "YAML",
163                    "toml" => "TOML",
164                    "sh" | "bash" => "Shell",
165                    "sql" => "SQL",
166                    "swift" => "Swift",
167                    "kt" | "kts" => "Kotlin",
168                    "dart" => "Dart",
169                    "ex" | "exs" => "Elixir",
170                    "hs" => "Haskell",
171                    "clj" => "Clojure",
172                    "fs" => "F#",
173                    "vue" => "Vue",
174                    "svelte" => "Svelte",
175                    "xml" => "XML",
176                    "gradle" => "Gradle",
177                    "tf" | "tfvars" => "Terraform",
178                    "proto" => "Protocol Buffers",
179                    "graphql" | "gql" => "GraphQL",
180                    "r" => "R",
181                    "lua" => "Lua",
182                    "pl" | "pm" => "Perl",
183                    "cs" => "C#",
184                    "vb" => "Visual Basic",
185                    "scala" => "Scala",
186                    "groovy" => "Groovy",
187                    "m" => "Objective-C",
188                    "mm" => "Objective-C++",
189                    _ => "Other",
190                };
191
192                *analysis
193                    .language_stats
194                    .entry(language.to_string())
195                    .or_insert(0) += 1;
196            }
197        }
198
199        // Count lines and analyze code
200        if let Ok(content) = std::fs::read_to_string(entry.path()) {
201            let (total, code, comment, blank) = count_line_types(&content, entry.path());
202            analysis.total_lines += total;
203            analysis.code_lines += code;
204            analysis.comment_lines += comment;
205            analysis.blank_lines += blank;
206        }
207    }
208
209    Ok(())
210}
211
212fn count_line_types(content: &str, path: &Path) -> (usize, usize, usize, usize) {
213    let mut total_lines = 0;
214    let mut code_lines = 0;
215    let mut comment_lines = 0;
216    let mut blank_lines = 0;
217
218    let is_comment = |line: &str, in_block_comment: &mut bool| {
219        if let Some(ext) = path.extension() {
220            match ext.to_str().unwrap_or("").to_lowercase().as_str() {
221                "rs" => {
222                    // Rust comments
223                    if line.trim().starts_with("//") {
224                        return true;
225                    }
226                    if line.trim().starts_with("/*") && !line.trim().contains("*/") {
227                        *in_block_comment = true;
228                        return true;
229                    }
230                    if *in_block_comment {
231                        if line.trim().contains("*/") {
232                            *in_block_comment = false;
233                        }
234                        return true;
235                    }
236                }
237                "js" | "ts" | "jsx" | "tsx" | "java" | "c" | "cpp" | "cs" | "go" | "swift"
238                | "kt" => {
239                    // C-style comments
240                    if line.trim().starts_with("//") {
241                        return true;
242                    }
243                    if line.trim().starts_with("/*") && !line.trim().contains("*/") {
244                        *in_block_comment = true;
245                        return true;
246                    }
247                    if *in_block_comment {
248                        if line.trim().contains("*/") {
249                            *in_block_comment = false;
250                        }
251                        return true;
252                    }
253                }
254                "py" | "rb" | "sh" => {
255                    // Python/Ruby/Shell comments
256                    if line.trim().starts_with("#") {
257                        return true;
258                    }
259                }
260                "html" | "xml" => {
261                    // HTML/XML comments
262                    if line.trim().starts_with("<!--") && !line.trim().contains("-->") {
263                        *in_block_comment = true;
264                        return true;
265                    }
266                    if *in_block_comment {
267                        if line.trim().contains("-->") {
268                            *in_block_comment = false;
269                        }
270                        return true;
271                    }
272                }
273                _ => {}
274            }
275        }
276        false
277    };
278
279    let mut in_block_comment = false;
280
281    for line in content.lines() {
282        total_lines += 1;
283
284        if line.trim().is_empty() {
285            blank_lines += 1;
286        } else if is_comment(line, &mut in_block_comment) {
287            comment_lines += 1;
288        } else {
289            code_lines += 1;
290        }
291    }
292
293    (total_lines, code_lines, comment_lines, blank_lines)
294}
295
296fn analyze_git_history(
297    repo_path: &Path,
298    analysis: &mut RepositoryAnalysis,
299    history_depth: usize,
300) -> Result<()> {
301    println!("Analyzing git history...");
302
303    let (commit_count, contributors, last_activity, file_stats) =
304        git::analyze_git_repo_extended(repo_path, history_depth)
305            .context("Failed to analyze git repository")?;
306
307    analysis.commit_count = commit_count;
308    analysis.contributors = contributors;
309    analysis.last_activity = last_activity;
310
311    // Process file age stats
312    let mut newest_files: Vec<(PathBuf, String)> = file_stats
313        .iter()
314        .map(|(path, stats)| (path.clone(), stats.first_commit_date.clone()))
315        .collect();
316    newest_files.sort_by(|(_, a), (_, b)| b.cmp(a));
317    analysis.file_age_stats.newest_files = newest_files.into_iter().take(10).collect();
318
319    let mut oldest_files: Vec<(PathBuf, String)> = file_stats
320        .iter()
321        .map(|(path, stats)| (path.clone(), stats.first_commit_date.clone()))
322        .collect();
323    oldest_files.sort_by(|(_, a), (_, b)| a.cmp(b));
324    analysis.file_age_stats.oldest_files = oldest_files.into_iter().take(10).collect();
325
326    let mut most_modified_files: Vec<(PathBuf, usize)> = file_stats
327        .iter()
328        .map(|(path, stats)| (path.clone(), stats.commit_count))
329        .collect();
330    most_modified_files.sort_by(|(_, a), (_, b)| b.cmp(a));
331    analysis.file_age_stats.most_modified_files =
332        most_modified_files.into_iter().take(10).collect();
333
334    // Create most changed files info for the report
335    let mut most_changed_files = Vec::new();
336    for (path, stats) in file_stats.iter() {
337        // Find top contributor for this file
338        let mut top_contributor = String::from("Unknown");
339        let mut max_commits = 0;
340
341        for (author, commit_count) in &stats.author_contributions {
342            if *commit_count > max_commits {
343                max_commits = *commit_count;
344                top_contributor = author.clone();
345            }
346        }
347
348        most_changed_files.push((
349            path.clone(),
350            stats.commit_count,
351            stats.lines_added,
352            stats.lines_removed,
353            stats.change_frequency,
354            top_contributor,
355            stats.last_commit_date.clone(),
356            stats.avg_changes_per_commit,
357        ));
358    }
359
360    // Sort by change frequency
361    most_changed_files.sort_by(|(_, _, _, _, a, _, _, _), (_, _, _, _, b, _, _, _)| {
362        b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal)
363    });
364
365    // Store the top 10 most changed files
366    analysis.most_changed_files = most_changed_files.into_iter().take(10).collect();
367
368    Ok(())
369}
370
371fn analyze_code_complexity(repo_path: &Path, analysis: &mut RepositoryAnalysis) -> Result<()> {
372    println!("Analyzing code complexity...");
373
374    let mut total_complexity = 0;
375    let mut file_count = 0;
376    let mut complex_files = Vec::new();
377
378    let mut total_function_length = 0;
379    let mut function_count = 0;
380    let mut long_functions = Vec::new();
381
382    // Patterns to identify functions in different languages
383    let function_patterns = HashMap::from([
384        ("rs", (Regex::new(r"fn\s+(\w+)\s*\(").unwrap(), Regex::new(r"\{").unwrap(), Regex::new(r"\}").unwrap())),
385        ("js", (Regex::new(r"function\s+(\w+)\s*\(|(\w+)\s*=\s*function\s*\(|(\w+)\s*:\s*function\s*\(|(\w+)\s*\([^)]*\)\s*\{").unwrap(), Regex::new(r"\{").unwrap(), Regex::new(r"\}").unwrap())),
386        ("ts", (Regex::new(r"function\s+(\w+)\s*\(|(\w+)\s*=\s*function\s*\(|(\w+)\s*:\s*function\s*\(|(\w+)\s*\([^)]*\)\s*\{").unwrap(), Regex::new(r"\{").unwrap(), Regex::new(r"\}").unwrap())),
387        ("py", (Regex::new(r"def\s+(\w+)\s*\(").unwrap(), Regex::new(r":").unwrap(), Regex::new(r"^\s*$|^\s*\w").unwrap())),
388        ("java", (Regex::new(r"(public|private|protected|static|\s) +[\w<>\[\]]+\s+(\w+) *\([^)]*\) *\{?").unwrap(), Regex::new(r"\{").unwrap(), Regex::new(r"\}").unwrap())),
389        ("go", (Regex::new(r"func\s+(\w+)\s*\(").unwrap(), Regex::new(r"\{").unwrap(), Regex::new(r"\}").unwrap())),
390    ]);
391
392    for entry in WalkDir::new(repo_path)
393        .into_iter()
394        .filter_entry(|e| !is_ignored(e.path(), &ignore_patterns()))
395        .filter_map(|e| e.ok())
396        .filter(|e| e.file_type().is_file())
397    {
398        if let Some(ext) = entry.path().extension() {
399            let ext_str = ext.to_str().unwrap_or("").to_lowercase();
400
401            if let Some((func_pattern, open_pattern, _close_pattern)) =
402                function_patterns.get(ext_str.as_str())
403            {
404                if let Ok(content) = std::fs::read_to_string(entry.path()) {
405                    let complexity = calculate_cyclomatic_complexity(&content, &ext_str);
406                    total_complexity += complexity;
407                    file_count += 1;
408
409                    if complexity > 10 {
410                        complex_files.push((entry.path().to_path_buf(), complexity));
411                    }
412
413                    // Analyze function lengths
414                    let functions = find_functions(
415                        &content,
416                        func_pattern,
417                        open_pattern,
418                        _close_pattern,
419                        &ext_str,
420                    );
421                    for (name, length) in functions {
422                        total_function_length += length;
423                        function_count += 1;
424
425                        if length > 30 {
426                            long_functions.push((entry.path().to_path_buf(), name, length));
427                        }
428                    }
429                }
430            }
431        }
432    }
433
434    // Calculate averages
435    if file_count > 0 {
436        analysis.complexity_stats.avg_complexity = total_complexity as f64 / file_count as f64;
437    }
438
439    if function_count > 0 {
440        analysis.complexity_stats.avg_function_length =
441            total_function_length as f64 / function_count as f64;
442    }
443
444    // Sort and store results
445    complex_files.sort_by(|(_, a), (_, b)| b.cmp(a));
446    analysis.complexity_stats.complex_files = complex_files.into_iter().take(10).collect();
447
448    if let Some((_, complexity)) = analysis.complexity_stats.complex_files.first() {
449        analysis.complexity_stats.max_complexity = *complexity;
450    }
451
452    long_functions.sort_by(|(_, _, a), (_, _, b)| b.cmp(a));
453    analysis.complexity_stats.long_functions = long_functions.into_iter().take(10).collect();
454
455    if let Some((_, _, length)) = analysis.complexity_stats.long_functions.first() {
456        analysis.complexity_stats.max_function_length = *length;
457    }
458
459    Ok(())
460}
461
462fn calculate_cyclomatic_complexity(content: &str, ext: &str) -> usize {
463    // Base complexity is 1
464    let mut complexity = 1;
465
466    match ext {
467        "rs" | "js" | "ts" | "java" | "c" | "cpp" | "cs" | "go" | "swift" | "kt" | "scala" => {
468            // Count control flow structures
469            for line in content.lines() {
470                let line = line.trim();
471
472                // Skip comments
473                if line.starts_with("//") || line.starts_with("/*") || line.starts_with("*") {
474                    continue;
475                }
476
477                // Count conditional statements
478                if line.contains("if ")
479                    || line.contains("else if")
480                    || line.contains(" ? ")  // Ternary operator
481                    || line.contains("for ")
482                    || line.contains("while ")
483                    || line.contains("case ")
484                    || line.contains("catch ")
485                    || line.contains("switch ")
486                    || (ext == "rs" && line.contains("match "))
487                    || (ext == "go" && line.contains("select "))
488                    || (ext == "swift" && line.contains("guard "))
489                {
490                    complexity += 1;
491                }
492
493                // Count logical operators (each represents a branch)
494                complexity += line.matches("&&").count();
495                complexity += line.matches("||").count();
496            }
497        }
498        "py" => {
499            // Count control flow structures for Python
500            for line in content.lines() {
501                let line = line.trim();
502
503                // Skip comments
504                if line.starts_with("#") {
505                    continue;
506                }
507
508                if line.contains("if ")
509                    || line.contains("elif ")
510                    || line.contains("for ")
511                    || line.contains("while ")
512                    || line.contains("except ")
513                    || line.contains("with ")
514                    || line.contains("comprehension")
515                {
516                    complexity += 1;
517                }
518
519                // Count logical operators
520                complexity += line.matches(" and ").count();
521                complexity += line.matches(" or ").count();
522            }
523        }
524        "rb" => {
525            // Ruby
526            for line in content.lines() {
527                let line = line.trim();
528
529                // Skip comments
530                if line.starts_with("#") {
531                    continue;
532                }
533
534                if line.contains("if ")
535                    || line.contains("elsif ")
536                    || line.contains("unless ")
537                    || line.contains("case ")
538                    || line.contains("when ")
539                    || line.contains("for ")
540                    || line.contains("while ")
541                    || line.contains("until ")
542                    || line.contains("rescue ")
543                {
544                    complexity += 1;
545                }
546
547                // Count logical operators
548                complexity += line.matches("&&").count();
549                complexity += line.matches("||").count();
550            }
551        }
552        "php" => {
553            // PHP
554            for line in content.lines() {
555                let line = line.trim();
556
557                // Skip comments
558                if line.starts_with("//") || line.starts_with("/*") || line.starts_with("*") {
559                    continue;
560                }
561
562                if line.contains("if ")
563                    || line.contains("elseif ")
564                    || line.contains("for ")
565                    || line.contains("foreach ")
566                    || line.contains("while ")
567                    || line.contains("case ")
568                    || line.contains("catch ")
569                {
570                    complexity += 1;
571                }
572
573                // Count logical operators
574                complexity += line.matches("&&").count();
575                complexity += line.matches("||").count();
576                complexity += line.matches(" and ").count();
577                complexity += line.matches(" or ").count();
578            }
579        }
580        _ => {}
581    }
582
583    complexity
584}
585
586fn find_functions(
587    content: &str,
588    func_pattern: &Regex,
589    open_pattern: &Regex,
590    _close_pattern: &Regex,
591    ext: &str,
592) -> Vec<(String, usize)> {
593    let mut functions = Vec::new();
594    let lines: Vec<&str> = content.lines().collect();
595
596    let mut i = 0;
597    while i < lines.len() {
598        if let Some(captures) = func_pattern.captures(lines[i]) {
599            let mut func_name = String::new();
600            for j in 1..captures.len() {
601                if let Some(m) = captures.get(j) {
602                    if !m.as_str().is_empty() {
603                        func_name = m.as_str().to_string();
604                        break;
605                    }
606                }
607            }
608
609            if func_name.is_empty() {
610                func_name = "anonymous".to_string();
611            }
612
613            // Find function body
614            let mut start_line = i;
615
616            // Find opening brace
617            while start_line < lines.len() && !open_pattern.is_match(lines[start_line]) {
618                start_line += 1;
619            }
620
621            let mut end_line;
622            if ext == "py" {
623                // Python functions are indentation-based
624                let base_indent = lines[start_line]
625                    .chars()
626                    .take_while(|c| c.is_whitespace())
627                    .count();
628                end_line = start_line + 1;
629
630                while end_line < lines.len() {
631                    let indent = lines[end_line]
632                        .chars()
633                        .take_while(|c| c.is_whitespace())
634                        .count();
635                    if !lines[end_line].trim().is_empty() && indent <= base_indent {
636                        break;
637                    }
638                    end_line += 1;
639                }
640            } else {
641                // Brace-based languages
642                let mut brace_count = 1;
643                end_line = start_line + 1;
644
645                while end_line < lines.len() && brace_count > 0 {
646                    if lines[end_line].contains('{') {
647                        brace_count += lines[end_line].matches('{').count();
648                    }
649                    if lines[end_line].contains('}') {
650                        brace_count -= lines[end_line].matches('}').count();
651                    }
652                    if brace_count == 0 {
653                        break;
654                    }
655                    end_line += 1;
656                }
657            }
658
659            let function_length = end_line - start_line;
660            functions.push((func_name, function_length));
661
662            i = end_line;
663        } else {
664            i += 1;
665        }
666    }
667
668    functions
669}
670
671fn find_duplicate_code(repo_path: &Path, analysis: &mut RepositoryAnalysis) -> Result<()> {
672    println!("Finding duplicate code...");
673
674    // Simple duplicate code detection using line hashing
675    let mut file_contents: HashMap<PathBuf, Vec<String>> = HashMap::new();
676
677    // Read file contents
678    for entry in WalkDir::new(repo_path)
679        .into_iter()
680        .filter_entry(|e| !is_ignored(e.path(), &ignore_patterns()))
681        .filter_map(|e| e.ok())
682        .filter(|e| e.file_type().is_file())
683    {
684        if let Some(ext) = entry.path().extension() {
685            let ext_str = ext.to_str().unwrap_or("").to_lowercase();
686
687            // Only analyze source code files
688            if ["rs", "js", "ts", "py", "java", "c", "cpp", "go", "cs"].contains(&ext_str.as_str())
689            {
690                if let Ok(content) = std::fs::read_to_string(entry.path()) {
691                    let lines: Vec<String> = content
692                        .lines()
693                        .map(|l| l.trim().to_string())
694                        .filter(|l| !l.is_empty() && !l.starts_with("//") && !l.starts_with("#"))
695                        .collect();
696
697                    file_contents.insert(entry.path().to_path_buf(), lines);
698                }
699            }
700        }
701    }
702
703    // Find duplicate blocks (simple approach)
704    let min_block_size = 6; // Minimum number of lines to consider a duplicate
705    let mut duplicates = Vec::new();
706
707    let files: Vec<PathBuf> = file_contents.keys().cloned().collect();
708
709    for i in 0..files.len() {
710        for j in (i + 1)..files.len() {
711            let file1 = &files[i];
712            let file2 = &files[j];
713
714            let lines1 = file_contents.get(file1).unwrap();
715            let lines2 = file_contents.get(file2).unwrap();
716
717            let mut duplicate_blocks = Vec::new();
718
719            for start1 in 0..(lines1.len().saturating_sub(min_block_size)) {
720                'outer: for start2 in 0..(lines2.len().saturating_sub(min_block_size)) {
721                    let mut block_size = 0;
722
723                    while start1 + block_size < lines1.len()
724                        && start2 + block_size < lines2.len()
725                        && lines1[start1 + block_size] == lines2[start2 + block_size]
726                    {
727                        block_size += 1;
728                    }
729
730                    if block_size >= min_block_size {
731                        // Check if this block overlaps with any existing block
732                        for (s1, s2, size) in &duplicate_blocks {
733                            if (start1 >= *s1 && start1 < s1 + size)
734                                || (start2 >= *s2 && start2 < s2 + size)
735                            {
736                                continue 'outer;
737                            }
738                        }
739
740                        duplicate_blocks.push((start1, start2, block_size));
741                    }
742                }
743            }
744
745            for (_, _, size) in duplicate_blocks {
746                if size >= min_block_size {
747                    let mut files_vec = Vec::new();
748                    files_vec.push(file1.clone());
749                    files_vec.push(file2.clone());
750
751                    duplicates.push(DuplicateCode {
752                        files: files_vec,
753                        line_count: size,
754                        similarity: 1.0, // Perfect match
755                    });
756                }
757            }
758        }
759    }
760
761    // Sort by line count and take top 10
762    duplicates.sort_by(|a, b| b.line_count.cmp(&a.line_count));
763    analysis.duplicate_code = duplicates.into_iter().take(10).collect();
764
765    Ok(())
766}
767
768fn ignore_patterns() -> Vec<Regex> {
769    vec![
770        Regex::new(r"\.git/").unwrap(),
771        Regex::new(r"node_modules/").unwrap(),
772        Regex::new(r"target/").unwrap(),
773        Regex::new(r"\.DS_Store").unwrap(),
774        Regex::new(r"\.idea/").unwrap(),
775        Regex::new(r"\.vscode/").unwrap(),
776        Regex::new(r"dist/").unwrap(),
777        Regex::new(r"build/").unwrap(),
778        Regex::new(r"\.cache/").unwrap(),
779    ]
780}
781
782fn is_ignored(path: &Path, patterns: &[Regex]) -> bool {
783    let path_str = path.to_string_lossy();
784    patterns.iter().any(|pattern| pattern.is_match(&path_str))
785}