garbage_code_hunter/
scoring.rs

1#[allow(dead_code)]
2use crate::analyzer::{CodeIssue, Severity};
3use std::collections::HashMap;
4
5/// Code quality rating system
6/// Score range: 0-100, the higher the score, the worse the code quality
7/// 0-20: Excellent
8/// 21-40: Good
9/// 41-60: Average
10/// 61-80: Poor
11/// 81-100: Terrible
12#[derive(Debug, Clone)]
13pub struct CodeQualityScore {
14    pub total_score: f64,
15    pub category_scores: HashMap<String, f64>,
16    pub file_count: usize,
17    pub total_lines: usize,
18    pub issue_density: f64,
19    pub severity_distribution: SeverityDistribution,
20    pub quality_level: QualityLevel,
21}
22
23#[derive(Debug, Clone)]
24pub struct SeverityDistribution {
25    pub nuclear: usize,
26    pub spicy: usize,
27    pub mild: usize,
28}
29
30#[derive(Debug, Clone, PartialEq)]
31pub enum QualityLevel {
32    Excellent, // 0-20
33    Good,      // 21-40
34    Average,   // 41-60
35    Poor,      // 61-80
36    Terrible,  // 81-100
37}
38
39impl QualityLevel {
40    pub fn from_score(score: f64) -> Self {
41        match score as u32 {
42            0..=20 => QualityLevel::Excellent,
43            21..=40 => QualityLevel::Good,
44            41..=60 => QualityLevel::Average,
45            61..=80 => QualityLevel::Poor,
46            _ => QualityLevel::Terrible,
47        }
48    }
49
50    pub fn description(&self, lang: &str) -> &'static str {
51        match (self, lang) {
52            (QualityLevel::Excellent, "zh-CN") => "优秀",
53            (QualityLevel::Good, "zh-CN") => "良好",
54            (QualityLevel::Average, "zh-CN") => "一般",
55            (QualityLevel::Poor, "zh-CN") => "较差",
56            (QualityLevel::Terrible, "zh-CN") => "糟糕",
57            (QualityLevel::Excellent, _) => "Excellent",
58            (QualityLevel::Good, _) => "Good",
59            (QualityLevel::Average, _) => "Average",
60            (QualityLevel::Poor, _) => "Poor",
61            (QualityLevel::Terrible, _) => "Terrible",
62        }
63    }
64
65    pub fn emoji(&self) -> &'static str {
66        match self {
67            QualityLevel::Excellent => "🏆",
68            QualityLevel::Good => "👍",
69            QualityLevel::Average => "😐",
70            QualityLevel::Poor => "😞",
71            QualityLevel::Terrible => "💀",
72        }
73    }
74}
75
76#[allow(dead_code)]
77pub struct CodeScorer {
78    /// rule weights
79    pub rule_weights: HashMap<String, f64>,
80    /// severity weights
81    pub severity_weights: HashMap<Severity, f64>,
82}
83
84#[allow(dead_code)]
85impl CodeScorer {
86    pub fn new() -> Self {
87        let mut rule_weights = HashMap::new();
88
89        // Basic code quality issues
90        rule_weights.insert("terrible-naming".to_string(), 0.2);
91        rule_weights.insert("single-letter-variable".to_string(), 1.5);
92
93        // Complexity issues
94        rule_weights.insert("deep-nesting".to_string(), 0.3);
95        rule_weights.insert("long-function".to_string(), 2.5);
96        rule_weights.insert("cyclomatic-complexity".to_string(), 3.5);
97        rule_weights.insert("code-duplication".to_string(), 0.4);
98
99        // Rust specific issues
100        rule_weights.insert("unwrap-abuse".to_string(), 0.4); // high weight, because it may cause panic
101        rule_weights.insert("unnecessary-clone".to_string(), 0.3);
102
103        // Advanced Rust features abuse
104        rule_weights.insert("complex-closure".to_string(), 0.3);
105        rule_weights.insert("lifetime-abuse".to_string(), 0.35);
106        rule_weights.insert("trait-complexity".to_string(), 0.35);
107        rule_weights.insert("generic-abuse".to_string(), 0.35);
108
109        // Rust features abuse
110        rule_weights.insert("channel-abuse".to_string(), 0.4);
111        rule_weights.insert("async-abuse".to_string(), 0.4);
112        rule_weights.insert("dyn-trait-abuse".to_string(), 0.4);
113        rule_weights.insert("unsafe-abuse".to_string(), 0.5); // highest weight, because it's a safety issue
114        rule_weights.insert("ffi-abuse".to_string(), 0.6); // high weight, because it's a safety issue
115        rule_weights.insert("macro-abuse".to_string(), 0.6);
116        rule_weights.insert("module-complexity".to_string(), 0.3);
117        rule_weights.insert("pattern-matching-abuse".to_string(), 0.3);
118        rule_weights.insert("reference-abuse".to_string(), 0.3);
119        rule_weights.insert("box-abuse".to_string(), 0.3);
120        rule_weights.insert("slice-abuse".to_string(), 0.4);
121
122        let mut severity_weights = HashMap::new();
123        severity_weights.insert(Severity::Nuclear, 10.0); // nuclear penalty: first nuclear +20, each subsequent +5
124        severity_weights.insert(Severity::Spicy, 5.0); // spicy penalty: first 5 spicy +2, each subsequent +2
125        severity_weights.insert(Severity::Mild, 2.0); // mild penalty: first 20 mild +0.5, each subsequent +0.5
126
127        Self {
128            rule_weights,
129            severity_weights,
130        }
131    }
132
133    /// calculate code quality score using normalized category-based approach
134    pub fn calculate_score(
135        &self,
136        issues: &[CodeIssue],
137        file_count: usize,
138        total_lines: usize,
139    ) -> CodeQualityScore {
140        if issues.is_empty() {
141            return CodeQualityScore {
142                total_score: 100.0, // Perfect score when no issues
143                category_scores: HashMap::new(),
144                file_count,
145                total_lines,
146                issue_density: 0.0,
147                severity_distribution: SeverityDistribution {
148                    nuclear: 0,
149                    spicy: 0,
150                    mild: 0,
151                },
152                quality_level: QualityLevel::Excellent,
153            };
154        }
155
156        // calculate severity distribution
157        let severity_distribution = self.calculate_severity_distribution(issues);
158
159        // calculate category scores (0-100 for each category)
160        let category_scores = self.calculate_normalized_category_scores(issues, total_lines);
161
162        // calculate weighted final score
163        let total_score = self.calculate_weighted_final_score(&category_scores);
164
165        let issue_density = if total_lines > 0 {
166            issues.len() as f64 / total_lines as f64 * 1000.0 // issues per 1000 lines
167        } else {
168            0.0
169        };
170
171        CodeQualityScore {
172            total_score,
173            category_scores,
174            file_count,
175            total_lines,
176            issue_density,
177            severity_distribution,
178            quality_level: QualityLevel::from_score(total_score),
179        }
180    }
181
182    fn calculate_severity_distribution(&self, issues: &[CodeIssue]) -> SeverityDistribution {
183        let mut nuclear = 0;
184        let mut spicy = 0;
185        let mut mild = 0;
186
187        for issue in issues {
188            match issue.severity {
189                Severity::Nuclear => nuclear += 1,
190                Severity::Spicy => spicy += 1,
191                Severity::Mild => mild += 1,
192            }
193        }
194
195        SeverityDistribution {
196            nuclear,
197            spicy,
198            mild,
199        }
200    }
201
202    fn calculate_base_score(&self, issues: &[CodeIssue]) -> f64 {
203        let mut score = 0.0;
204
205        for issue in issues {
206            let rule_weight = self.rule_weights.get(&issue.rule_name).unwrap_or(&1.0);
207            let severity_weight = self.severity_weights.get(&issue.severity).unwrap_or(&1.0);
208
209            // calculate base score
210            score += rule_weight * severity_weight;
211        }
212
213        score
214    }
215
216    fn calculate_density_penalty(
217        &self,
218        issue_count: usize,
219        file_count: usize,
220        total_lines: usize,
221    ) -> f64 {
222        if total_lines == 0 || file_count == 0 {
223            return 0.0;
224        }
225
226        // calculate issues density (issues per 1000 lines)
227        let issues_per_1000_lines = (issue_count as f64 / total_lines as f64) * 1000.0;
228
229        // calculate average issues per file
230        let issues_per_file = issue_count as f64 / file_count as f64;
231
232        // calculate density penalty
233        let density_penalty = match issues_per_1000_lines {
234            x if x > 50.0 => 25.0, // high density
235            x if x > 30.0 => 15.0, // medium density
236            x if x > 20.0 => 10.0, // low density
237            x if x > 10.0 => 5.0,  // very low density
238            _ => 0.0,              // very low density
239        };
240
241        // calculate file penalty
242        let file_penalty = match issues_per_file {
243            x if x > 20.0 => 15.0,
244            x if x > 10.0 => 10.0,
245            x if x > 5.0 => 5.0,
246            _ => 0.0,
247        };
248
249        density_penalty + file_penalty
250    }
251
252    fn calculate_severity_penalty(&self, distribution: &SeverityDistribution) -> f64 {
253        let mut penalty = 0.0;
254
255        // calculate nuclear penalty
256        if distribution.nuclear > 0 {
257            penalty += 20.0 + (distribution.nuclear as f64 - 1.0) * 5.0; // nuclear penalty: first nuclear +20, each subsequent +5
258        }
259
260        // calculate spicy penalty
261        if distribution.spicy > 5 {
262            penalty += (distribution.spicy as f64 - 5.0) * 2.0; // spicy penalty: first 5 spicy +2, each subsequent +2
263        }
264
265        // calculate mild penalty
266        if distribution.mild > 20 {
267            penalty += (distribution.mild as f64 - 20.0) * 0.5; // mild penalty: first 20 mild +0.5, each subsequent +0.5
268        }
269
270        penalty
271    }
272
273    fn calculate_category_scores(&self, issues: &[CodeIssue]) -> HashMap<String, f64> {
274        let mut category_scores = HashMap::new();
275        let mut category_counts: HashMap<String, usize> = HashMap::new();
276
277        // define issue categories
278        let categories = [
279            ("naming", vec!["terrible-naming", "single-letter-variable"]),
280            (
281                "complexity",
282                vec!["deep-nesting", "long-function", "cyclomatic-complexity"],
283            ),
284            ("duplication", vec!["code-duplication"]),
285            ("rust-basics", vec!["unwrap-abuse", "unnecessary-clone"]),
286            (
287                "advanced-rust",
288                vec![
289                    "complex-closure",
290                    "lifetime-abuse",
291                    "trait-complexity",
292                    "generic-abuse",
293                ],
294            ),
295            (
296                "rust-features",
297                vec![
298                    "channel-abuse",
299                    "async-abuse",
300                    "dyn-trait-abuse",
301                    "unsafe-abuse",
302                    "ffi-abuse",
303                    "macro-abuse",
304                ],
305            ),
306            (
307                "structure",
308                vec![
309                    "module-complexity",
310                    "pattern-matching-abuse",
311                    "reference-abuse",
312                    "box-abuse",
313                    "slice-abuse",
314                ],
315            ),
316        ];
317
318        // calculate category scores
319        for issue in issues {
320            for (category_name, rules) in &categories {
321                if rules.contains(&issue.rule_name.as_str()) {
322                    *category_counts
323                        .entry(category_name.to_string())
324                        .or_insert(0) += 1;
325
326                    let rule_weight = self.rule_weights.get(&issue.rule_name).unwrap_or(&1.0);
327                    let severity_weight =
328                        self.severity_weights.get(&issue.severity).unwrap_or(&1.0);
329
330                    *category_scores
331                        .entry(category_name.to_string())
332                        .or_insert(0.0) += rule_weight * severity_weight;
333                }
334            }
335        }
336
337        category_scores
338    }
339
340    /// Calculate normalized category scores (0-100 for each category)
341    fn calculate_normalized_category_scores(
342        &self,
343        issues: &[CodeIssue],
344        total_lines: usize,
345    ) -> HashMap<String, f64> {
346        let mut category_scores = HashMap::new();
347        let mut category_counts: HashMap<String, usize> = HashMap::new();
348
349        // Define categories with weights and thresholds
350        let categories = [
351            ("naming", vec!["terrible-naming", "single-letter-variable"]),
352            (
353                "complexity",
354                vec!["deep-nesting", "long-function", "cyclomatic-complexity"],
355            ),
356            ("duplication", vec!["code-duplication"]),
357            ("rust-basics", vec!["unwrap-abuse", "unnecessary-clone"]),
358            (
359                "advanced-rust",
360                vec![
361                    "complex-closure",
362                    "lifetime-abuse",
363                    "trait-complexity",
364                    "generic-abuse",
365                ],
366            ),
367            (
368                "rust-features",
369                vec![
370                    "channel-abuse",
371                    "async-abuse",
372                    "dyn-trait-abuse",
373                    "unsafe-abuse",
374                    "ffi-abuse",
375                    "macro-abuse",
376                ],
377            ),
378            (
379                "structure",
380                vec![
381                    "module-complexity",
382                    "pattern-matching-abuse",
383                    "reference-abuse",
384                    "box-abuse",
385                    "slice-abuse",
386                ],
387            ),
388        ];
389
390        // Count issues per category
391        for issue in issues {
392            for (category_name, rules) in &categories {
393                if rules.contains(&issue.rule_name.as_str()) {
394                    *category_counts
395                        .entry(category_name.to_string())
396                        .or_insert(0) += 1;
397                }
398            }
399        }
400
401        // Calculate normalized scores for each category (0-100)
402        for (category_name, _) in &categories {
403            let count = category_counts.get(*category_name).unwrap_or(&0);
404            let score = self.calculate_category_score(*count, total_lines, category_name);
405            category_scores.insert(category_name.to_string(), score);
406        }
407
408        category_scores
409    }
410
411    /// Calculate score for a specific category (0-100, where 0 is perfect, 100 is terrible, maximum 90)
412    fn calculate_category_score(
413        &self,
414        issue_count: usize,
415        total_lines: usize,
416        category: &str,
417    ) -> f64 {
418        if total_lines == 0 {
419            return 0.0; // Perfect score when no code
420        }
421
422        // Calculate issues per 1000 lines for this category
423        let issues_per_1k_lines = (issue_count as f64 / total_lines as f64) * 1000.0;
424
425        // Different thresholds for different categories
426        let (excellent_threshold, good_threshold, average_threshold, poor_threshold) =
427            match category {
428                "naming" => (0.0, 2.0, 5.0, 10.0), // Naming should be very clean
429                "complexity" => (0.0, 1.0, 3.0, 6.0), // Complexity should be low
430                "duplication" => (0.0, 0.5, 2.0, 4.0), // Duplication should be minimal
431                "rust-basics" => (0.0, 1.0, 3.0, 6.0), // Basic Rust issues
432                "advanced-rust" => (0.0, 0.5, 2.0, 4.0), // Advanced features should be used carefully
433                "rust-features" => (0.0, 0.5, 1.5, 3.0), // Special features should be rare
434                "structure" => (0.0, 1.0, 3.0, 6.0),     // Structure issues
435                _ => (0.0, 1.0, 3.0, 6.0),               // Default thresholds
436            };
437
438        // Calculate score based on thresholds (0 = excellent, 100 = terrible)
439
440        if issues_per_1k_lines <= excellent_threshold {
441            0.0 // Perfect score
442        } else if issues_per_1k_lines <= good_threshold {
443            (issues_per_1k_lines - excellent_threshold) / (good_threshold - excellent_threshold)
444                * 20.0
445        } else if issues_per_1k_lines <= average_threshold {
446            20.0 + (issues_per_1k_lines - good_threshold) / (average_threshold - good_threshold)
447                * 20.0
448        } else if issues_per_1k_lines <= poor_threshold {
449            40.0 + (issues_per_1k_lines - average_threshold) / (poor_threshold - average_threshold)
450                * 20.0
451        } else {
452            // Beyond poor threshold, score increases rapidly but caps at 90
453            let excess = issues_per_1k_lines - poor_threshold;
454            (60.0 + excess * 2.0).min(90.0) // Cap at 90 to avoid perfect 100
455        }
456    }
457
458    /// Calculate weighted final score from category scores
459    fn calculate_weighted_final_score(&self, category_scores: &HashMap<String, f64>) -> f64 {
460        // Category weights (should sum to 1.0)
461        let weights = [
462            ("naming", 0.25),        // 25% - Very important
463            ("complexity", 0.20),    // 20% - Very important
464            ("duplication", 0.15),   // 15% - Important
465            ("rust-basics", 0.15),   // 15% - Important
466            ("advanced-rust", 0.10), // 10% - Moderate
467            ("rust-features", 0.10), // 10% - Moderate
468            ("structure", 0.05),     // 5% - Less critical
469        ];
470
471        let mut weighted_sum = 0.0;
472        let mut total_weight = 0.0;
473
474        for (category, weight) in &weights {
475            if let Some(score) = category_scores.get(*category) {
476                weighted_sum += score * weight;
477                total_weight += weight;
478            }
479        }
480
481        if total_weight > 0.0 {
482            weighted_sum / total_weight
483        } else {
484            100.0 // Default to perfect score if no categories found
485        }
486    }
487}
488
489impl Default for CodeScorer {
490    fn default() -> Self {
491        Self::new()
492    }
493}