Skip to main content

garbage_code_hunter/
scoring.rs

1use crate::analyzer::{CodeIssue, Severity};
2use std::collections::HashMap;
3
4/// Code quality rating system
5/// Score range: 0-100, the higher the score, the worse the code quality
6/// 0-20: Excellent
7/// 21-40: Good
8/// 41-60: Average
9/// 61-80: Poor
10/// 81-100: Terrible
11#[derive(Debug, Clone)]
12pub struct CodeQualityScore {
13    pub total_score: f64,
14    pub category_scores: HashMap<String, f64>,
15    pub file_count: usize,
16    pub total_lines: usize,
17    pub issue_density: f64,
18    pub severity_distribution: SeverityDistribution,
19    pub quality_level: QualityLevel,
20}
21
22/// Breakdown of issues by severity level.
23#[derive(Debug, Clone)]
24pub struct SeverityDistribution {
25    pub nuclear: usize,
26    pub spicy: usize,
27    pub mild: usize,
28}
29
30/// Overall code quality rating derived from the score.
31#[derive(Debug, Clone, PartialEq)]
32pub enum QualityLevel {
33    Excellent, // 0-20
34    Good,      // 21-40
35    Average,   // 41-60
36    Poor,      // 61-80
37    Terrible,  // 81-100
38}
39
40impl QualityLevel {
41    pub fn from_score(score: f64) -> Self {
42        match score as u32 {
43            0..=20 => QualityLevel::Excellent,
44            21..=40 => QualityLevel::Good,
45            41..=60 => QualityLevel::Average,
46            61..=80 => QualityLevel::Poor,
47            _ => QualityLevel::Terrible,
48        }
49    }
50
51    pub fn description(&self, lang: &str) -> &'static str {
52        match (self, lang) {
53            (QualityLevel::Excellent, "zh-CN") => "优秀",
54            (QualityLevel::Good, "zh-CN") => "良好",
55            (QualityLevel::Average, "zh-CN") => "一般",
56            (QualityLevel::Poor, "zh-CN") => "较差",
57            (QualityLevel::Terrible, "zh-CN") => "糟糕",
58            (QualityLevel::Excellent, _) => "Excellent",
59            (QualityLevel::Good, _) => "Good",
60            (QualityLevel::Average, _) => "Average",
61            (QualityLevel::Poor, _) => "Poor",
62            (QualityLevel::Terrible, _) => "Terrible",
63        }
64    }
65
66    pub fn emoji(&self) -> &'static str {
67        match self {
68            QualityLevel::Excellent => "🏆",
69            QualityLevel::Good => "👍",
70            QualityLevel::Average => "😐",
71            QualityLevel::Poor => "😞",
72            QualityLevel::Terrible => "💀",
73        }
74    }
75}
76
77/// Calculates severity-weighted, category-based code quality scores.
78pub struct CodeScorer;
79
80impl CodeScorer {
81    pub fn new() -> Self {
82        Self
83    }
84
85    /// calculate code quality score using normalized category-based approach
86    pub fn calculate_score(
87        &self,
88        issues: &[CodeIssue],
89        file_count: usize,
90        total_lines: usize,
91    ) -> CodeQualityScore {
92        if issues.is_empty() {
93            return CodeQualityScore {
94                total_score: 0.0, // Perfect score when no issues (0 = best)
95                category_scores: HashMap::new(),
96                file_count,
97                total_lines,
98                issue_density: 0.0,
99                severity_distribution: SeverityDistribution {
100                    nuclear: 0,
101                    spicy: 0,
102                    mild: 0,
103                },
104                quality_level: QualityLevel::Excellent,
105            };
106        }
107
108        // calculate severity distribution
109        let severity_distribution = self.calculate_severity_distribution(issues);
110
111        // calculate category scores (0-100 for each category)
112        let category_scores = self.calculate_normalized_category_scores(issues, total_lines);
113
114        // calculate weighted final score
115        let total_score = self.calculate_weighted_final_score(&category_scores);
116
117        let issue_density = if total_lines > 0 {
118            issues.len() as f64 / total_lines as f64 * 1000.0 // issues per 1000 lines
119        } else {
120            0.0
121        };
122
123        CodeQualityScore {
124            total_score,
125            category_scores,
126            file_count,
127            total_lines,
128            issue_density,
129            severity_distribution,
130            quality_level: QualityLevel::from_score(total_score),
131        }
132    }
133
134    fn calculate_severity_distribution(&self, issues: &[CodeIssue]) -> SeverityDistribution {
135        let mut nuclear = 0;
136        let mut spicy = 0;
137        let mut mild = 0;
138
139        for issue in issues {
140            match issue.severity {
141                Severity::Nuclear => nuclear += 1,
142                Severity::Spicy => spicy += 1,
143                Severity::Mild => mild += 1,
144            }
145        }
146
147        SeverityDistribution {
148            nuclear,
149            spicy,
150            mild,
151        }
152    }
153
154    /// Calculate normalized category scores (0-100 for each category)
155    fn calculate_normalized_category_scores(
156        &self,
157        issues: &[CodeIssue],
158        total_lines: usize,
159    ) -> HashMap<String, f64> {
160        let mut category_scores = HashMap::new();
161        let mut category_weighted_counts: HashMap<String, f64> = HashMap::new();
162
163        // Define categories with their rule mappings
164        let categories = [
165            (
166                "naming",
167                vec![
168                    "terrible-naming",
169                    "single-letter-variable",
170                    "meaningless-naming",
171                    "hungarian-notation",
172                    "abbreviation-abuse",
173                    "c-naming",
174                ],
175            ),
176            (
177                "complexity",
178                vec![
179                    "deep-nesting",
180                    "long-function",
181                    "cyclomatic-complexity",
182                    "c-nesting",
183                    "c-long-function",
184                ],
185            ),
186            ("duplication", vec!["code-duplication"]),
187            (
188                "rust-basics",
189                vec![
190                    "unwrap-abuse",
191                    "unnecessary-clone",
192                    "string-abuse",
193                    "vec-abuse",
194                ],
195            ),
196            (
197                "advanced-rust",
198                vec![
199                    "complex-closure",
200                    "lifetime-abuse",
201                    "trait-complexity",
202                    "generic-abuse",
203                ],
204            ),
205            (
206                "rust-features",
207                vec![
208                    "channel-abuse",
209                    "async-abuse",
210                    "dyn-trait-abuse",
211                    "unsafe-abuse",
212                    "ffi-abuse",
213                    "macro-abuse",
214                ],
215            ),
216            (
217                "structure",
218                vec![
219                    "module-complexity",
220                    "pattern-matching-abuse",
221                    "reference-abuse",
222                    "box-abuse",
223                    "slice-abuse",
224                    "file-too-long",
225                    "duplicate-imports",
226                    "deep-module-nesting",
227                    "c-include-chaos",
228                ],
229            ),
230            (
231                "code-smells",
232                vec![
233                    "magic-number",
234                    "god-function",
235                    "commented-code",
236                    "dead-code",
237                    "c-magic-number",
238                    "c-god-function",
239                    "c-commented-code",
240                    "c-dead-code",
241                ],
242            ),
243            (
244                "student-code",
245                vec!["println-debugging", "panic-abuse", "todo-comment"],
246            ),
247            ("c-safety", vec!["c-goto-abuse", "c-malloc-leak"]),
248        ];
249
250        // Severity weights: Nuclear issues count 6x as much as Mild
251        let severity_weight = |severity: &Severity| -> f64 {
252            match severity {
253                Severity::Nuclear => 3.0,
254                Severity::Spicy => 1.5,
255                Severity::Mild => 0.5,
256            }
257        };
258
259        // Accumulate severity-weighted counts per category
260        for issue in issues {
261            let weight = severity_weight(&issue.severity);
262            for (category_name, rules) in &categories {
263                if rules.contains(&issue.rule_name.as_str()) {
264                    *category_weighted_counts
265                        .entry(category_name.to_string())
266                        .or_insert(0.0) += weight;
267                }
268            }
269        }
270
271        // Calculate normalized scores for each category (0-100)
272        for (category_name, _) in &categories {
273            let weighted_count = category_weighted_counts.get(*category_name).unwrap_or(&0.0);
274            let score = self.calculate_category_score(*weighted_count, total_lines, category_name);
275            category_scores.insert(category_name.to_string(), score);
276        }
277
278        category_scores
279    }
280
281    /// Calculate score for a specific category (0-100, where 0 is perfect, 100 is terrible, maximum 90)
282    fn calculate_category_score(
283        &self,
284        weighted_count: f64,
285        total_lines: usize,
286        category: &str,
287    ) -> f64 {
288        if total_lines == 0 {
289            return 0.0; // Perfect score when no code
290        }
291
292        // Calculate weighted issues per 1000 lines for this category
293        let issues_per_1k_lines = (weighted_count / total_lines as f64) * 1000.0;
294
295        // Different thresholds for different categories
296        let (excellent_threshold, good_threshold, average_threshold, poor_threshold) =
297            match category {
298                "naming" => (0.0, 2.0, 5.0, 10.0), // Naming should be very clean
299                "complexity" => (0.0, 1.0, 3.0, 6.0), // Complexity should be low
300                "duplication" => (0.0, 0.5, 2.0, 4.0), // Duplication should be minimal
301                "rust-basics" => (0.0, 1.0, 3.0, 6.0), // Basic Rust issues
302                "advanced-rust" => (0.0, 0.5, 2.0, 4.0), // Advanced features should be used carefully
303                "rust-features" => (0.0, 0.5, 1.5, 3.0), // Special features should be rare
304                "structure" => (0.0, 1.0, 3.0, 6.0),     // Structure issues
305                "code-smells" => (0.0, 1.5, 4.0, 8.0),   // Code smells are common
306                "student-code" => (0.0, 1.0, 3.0, 6.0),  // Student patterns
307                "c-safety" => (0.0, 0.5, 2.0, 4.0),      // C safety issues are serious
308                _ => (0.0, 1.0, 3.0, 6.0),               // Default thresholds
309            };
310
311        // Calculate score based on thresholds (0 = excellent, 100 = terrible)
312
313        if issues_per_1k_lines <= excellent_threshold {
314            0.0 // Perfect score
315        } else if issues_per_1k_lines <= good_threshold {
316            (issues_per_1k_lines - excellent_threshold) / (good_threshold - excellent_threshold)
317                * 20.0
318        } else if issues_per_1k_lines <= average_threshold {
319            20.0 + (issues_per_1k_lines - good_threshold) / (average_threshold - good_threshold)
320                * 20.0
321        } else if issues_per_1k_lines <= poor_threshold {
322            40.0 + (issues_per_1k_lines - average_threshold) / (poor_threshold - average_threshold)
323                * 20.0
324        } else {
325            // Beyond poor threshold, score increases rapidly but caps at 90
326            let excess = issues_per_1k_lines - poor_threshold;
327            (60.0 + excess * 2.0).min(90.0) // Cap at 90 to avoid perfect 100
328        }
329    }
330
331    /// Calculate weighted final score from category scores
332    fn calculate_weighted_final_score(&self, category_scores: &HashMap<String, f64>) -> f64 {
333        // Category weights (sum to ~0.95, normalized by total_weight)
334        let weights = [
335            ("naming", 0.15),        // 15% - Very important (includes garbage-naming + c-naming)
336            ("complexity", 0.15),    // 15% - Very important
337            ("duplication", 0.10),   // 10% - Important
338            ("rust-basics", 0.10),   // 10% - Important (includes string/vec abuse)
339            ("advanced-rust", 0.08), // 8% - Moderate
340            ("rust-features", 0.05), // 5% - Moderate
341            ("structure", 0.07),     // 7% - Structure issues (includes include-chaos)
342            ("code-smells", 0.15),   // 15% - Common issues (shared by Rust + C/C++)
343            ("student-code", 0.05),  // 5% - Beginner patterns
344            ("c-safety", 0.10),      // 10% - C/C++ safety (goto, malloc leaks)
345        ];
346
347        let mut weighted_sum = 0.0;
348        let mut total_weight = 0.0;
349
350        for (category, weight) in &weights {
351            if let Some(score) = category_scores.get(*category) {
352                weighted_sum += score * weight;
353                total_weight += weight;
354            }
355        }
356
357        if total_weight > 0.0 {
358            weighted_sum / total_weight
359        } else {
360            100.0 // Default to perfect score if no categories found
361        }
362    }
363}
364
365impl Default for CodeScorer {
366    fn default() -> Self {
367        Self::new()
368    }
369}