garbage_code_hunter/rules/
duplication.rs

1use std::collections::HashMap;
2use std::path::Path;
3use syn::{visit::Visit, Block, File};
4
5use crate::analyzer::{CodeIssue, RoastLevel, Severity};
6use crate::rules::Rule;
7
8/// code duplication detection rule
9pub struct CodeDuplicationRule;
10
11impl Rule for CodeDuplicationRule {
12    fn name(&self) -> &'static str {
13        "code-duplication"
14    }
15
16    fn check(
17        &self,
18        file_path: &Path,
19        syntax_tree: &File,
20        content: &str,
21        _lang: &str,
22    ) -> Vec<CodeIssue> {
23        let mut visitor = DuplicationVisitor::new(file_path.to_path_buf(), content);
24        visitor.visit_file(syntax_tree);
25        visitor.find_duplications()
26    }
27}
28
29struct DuplicationVisitor {
30    file_path: std::path::PathBuf,
31    content: String,
32    code_blocks: Vec<String>,
33    line_hashes: HashMap<String, Vec<usize>>,
34}
35
36impl DuplicationVisitor {
37    fn new(file_path: std::path::PathBuf, content: &str) -> Self {
38        Self {
39            file_path,
40            content: content.to_string(),
41            code_blocks: Vec::new(),
42            line_hashes: HashMap::new(),
43        }
44    }
45
46    fn find_duplications(&mut self) -> Vec<CodeIssue> {
47        let mut issues = Vec::new();
48
49        // detect line duplications
50        self.detect_line_duplications(&mut issues);
51
52        // detect block duplications
53        self.detect_block_duplications(&mut issues);
54
55        issues
56    }
57
58    fn detect_line_duplications(&mut self, issues: &mut Vec<CodeIssue>) {
59        let lines: Vec<&str> = self.content.lines().collect();
60
61        for (line_num, line) in lines.iter().enumerate() {
62            let trimmed = line.trim();
63
64            // ignore empty lines, comments, and simple statements
65            if trimmed.is_empty()
66                || trimmed.starts_with("//")
67                || trimmed.starts_with("/*")
68                || trimmed.len() < 10
69                || is_simple_statement(trimmed)
70            {
71                continue;
72            }
73
74            let normalized = normalize_line(trimmed);
75            self.line_hashes
76                .entry(normalized)
77                .or_default()
78                .push(line_num + 1);
79        }
80
81        // find duplicate lines
82        for line_numbers in self.line_hashes.values() {
83            if line_numbers.len() >= 3 {
84                // 3 times or more duplicate
85                let messages = [
86                    format!(
87                        "检测到 {} 次重复代码!你是复制粘贴大师吗?",
88                        line_numbers.len()
89                    ),
90                    format!("这行代码重复了 {} 次,建议提取成函数", line_numbers.len()),
91                    format!("重复代码警报!{} 次重复让维护变成噩梦", line_numbers.len()),
92                    format!(
93                        "Copy-paste ninja detected! {} identical lines found",
94                        line_numbers.len()
95                    ),
96                    format!(
97                        "DRY principle violation: {} duplicated lines",
98                        line_numbers.len()
99                    ),
100                ];
101
102                let severity = if line_numbers.len() >= 5 {
103                    Severity::Nuclear
104                } else if line_numbers.len() >= 4 {
105                    Severity::Spicy
106                } else {
107                    Severity::Mild
108                };
109
110                issues.push(CodeIssue {
111                    file_path: self.file_path.clone(),
112                    line: line_numbers[0],
113                    column: 1,
114                    rule_name: "code-duplication".to_string(),
115                    message: messages[issues.len() % messages.len()].clone(),
116                    severity,
117                    roast_level: RoastLevel::Sarcastic,
118                });
119            }
120        }
121    }
122
123    fn detect_block_duplications(&self, issues: &mut Vec<CodeIssue>) {
124        // simple block duplication detection
125        let mut block_signatures = HashMap::new();
126
127        for (i, block) in self.code_blocks.iter().enumerate() {
128            if block.len() > 50 {
129                // only detect larger code blocks
130                let signature = generate_block_signature(block);
131                block_signatures
132                    .entry(signature)
133                    .or_insert_with(Vec::new)
134                    .push(i);
135            }
136        }
137
138        for (_, block_indices) in block_signatures {
139            if block_indices.len() >= 2 {
140                let messages = [
141                    format!("发现 {} 个相似代码块,考虑重构成函数", block_indices.len()),
142                    "代码块重复度过高,DRY原则哭了".to_string(),
143                    format!(
144                        "Similar code blocks detected: {} instances",
145                        block_indices.len()
146                    ),
147                    format!(
148                        "Refactoring opportunity: {} similar blocks found",
149                        block_indices.len()
150                    ),
151                ];
152
153                issues.push(CodeIssue {
154                    file_path: self.file_path.clone(),
155                    line: 1,
156                    column: 1,
157                    rule_name: "code-duplication".to_string(),
158                    message: messages[issues.len() % messages.len()].clone(),
159                    severity: Severity::Spicy,
160                    roast_level: RoastLevel::Sarcastic,
161                });
162            }
163        }
164    }
165}
166
167impl<'ast> Visit<'ast> for DuplicationVisitor {
168    fn visit_block(&mut self, block: &'ast Block) {
169        // collect code blocks for duplication detection
170        let block_str = format!("{block:?}");
171        if block_str.len() > 20 {
172            self.code_blocks.push(block_str);
173        }
174        syn::visit::visit_block(self, block);
175    }
176}
177
178fn normalize_line(line: &str) -> String {
179    // normalize code line, remove variable name differences
180    line.trim()
181        .replace(char::is_whitespace, "")
182        .replace("let", "VAR")
183        .replace("mut", "")
184        .to_lowercase()
185}
186
187fn is_simple_statement(line: &str) -> bool {
188    // check if the line is a simple statement
189    matches!(line.trim(), "{" | "}" | ";" | "(" | ")" | "[" | "]")
190}
191
192fn generate_block_signature(block: &str) -> String {
193    // generate code block signature for similarity detection
194    block
195        .chars()
196        .filter(|c| !c.is_whitespace())
197        .take(100)
198        .collect::<String>()
199        .to_lowercase()
200}