ricecoder_generation/
scoring_system.rs

1//! Scoring system for code quality and spec compliance
2//!
3//! Provides detailed scoring mechanisms for evaluating code quality metrics,
4//! spec compliance, and generating actionable feedback based on scores.
5
6use crate::error::GenerationError;
7use crate::models::GeneratedFile;
8use ricecoder_specs::models::Spec;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12/// Detailed score breakdown
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct ScoreBreakdown {
15    /// Documentation score component
16    pub documentation: ScoreComponent,
17    /// Error handling score component
18    pub error_handling: ScoreComponent,
19    /// Code style score component
20    pub style: ScoreComponent,
21    /// Test coverage score component
22    pub coverage: ScoreComponent,
23    /// Complexity score component
24    pub complexity: ScoreComponent,
25    /// Naming conventions score component
26    pub naming: ScoreComponent,
27}
28
29/// A single score component
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct ScoreComponent {
32    /// Component name
33    pub name: String,
34    /// Score value (0.0 to 1.0)
35    pub score: f32,
36    /// Weight in overall calculation (0.0 to 1.0)
37    pub weight: f32,
38    /// Feedback message
39    pub feedback: String,
40    /// Specific issues found
41    pub issues: Vec<String>,
42}
43
44impl ScoreComponent {
45    /// Creates a new score component
46    pub fn new(name: &str, score: f32, weight: f32) -> Self {
47        Self {
48            name: name.to_string(),
49            score: score.clamp(0.0, 1.0),
50            weight: weight.clamp(0.0, 1.0),
51            feedback: String::new(),
52            issues: Vec::new(),
53        }
54    }
55
56    /// Adds feedback to the component
57    pub fn with_feedback(mut self, feedback: &str) -> Self {
58        self.feedback = feedback.to_string();
59        self
60    }
61
62    /// Adds an issue to the component
63    pub fn add_issue(&mut self, issue: String) {
64        self.issues.push(issue);
65    }
66}
67
68/// Spec compliance score breakdown
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct ComplianceScore {
71    /// Overall compliance score (0.0 to 1.0)
72    pub overall: f32,
73    /// Requirement coverage score
74    pub requirement_coverage: f32,
75    /// Acceptance criteria coverage score
76    pub criteria_coverage: f32,
77    /// Constraint adherence score
78    pub constraint_adherence: f32,
79    /// Detailed breakdown by requirement
80    pub requirement_scores: HashMap<String, f32>,
81}
82
83impl Default for ComplianceScore {
84    fn default() -> Self {
85        Self {
86            overall: 0.0,
87            requirement_coverage: 0.0,
88            criteria_coverage: 0.0,
89            constraint_adherence: 0.0,
90            requirement_scores: HashMap::new(),
91        }
92    }
93}
94
95/// Actionable feedback based on scores
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct ScoringFeedback {
98    /// Overall assessment
99    pub assessment: String,
100    /// Strengths identified
101    pub strengths: Vec<String>,
102    /// Areas for improvement
103    pub improvements: Vec<String>,
104    /// Critical issues that must be addressed
105    pub critical_issues: Vec<String>,
106    /// Recommended next steps
107    pub next_steps: Vec<String>,
108}
109
110/// Scoring configuration
111#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct ScoringConfig {
113    /// Weight for documentation score
114    pub documentation_weight: f32,
115    /// Weight for error handling score
116    pub error_handling_weight: f32,
117    /// Weight for style score
118    pub style_weight: f32,
119    /// Weight for coverage score
120    pub coverage_weight: f32,
121    /// Weight for complexity score
122    pub complexity_weight: f32,
123    /// Weight for naming conventions score
124    pub naming_weight: f32,
125    /// Threshold for critical issues
126    pub critical_threshold: f32,
127    /// Threshold for warnings
128    pub warning_threshold: f32,
129}
130
131impl Default for ScoringConfig {
132    fn default() -> Self {
133        Self {
134            documentation_weight: 0.20,
135            error_handling_weight: 0.20,
136            style_weight: 0.15,
137            coverage_weight: 0.20,
138            complexity_weight: 0.15,
139            naming_weight: 0.10,
140            critical_threshold: 0.3,
141            warning_threshold: 0.6,
142        }
143    }
144}
145
146/// Scoring system for code quality and compliance
147#[derive(Debug, Clone)]
148pub struct ScoringSystem {
149    config: ScoringConfig,
150}
151
152impl ScoringSystem {
153    /// Creates a new scoring system with default configuration
154    pub fn new() -> Self {
155        Self {
156            config: ScoringConfig::default(),
157        }
158    }
159
160    /// Creates a new scoring system with custom configuration
161    pub fn with_config(config: ScoringConfig) -> Self {
162        Self { config }
163    }
164
165    /// Scores code quality with detailed breakdown
166    pub fn score_quality(
167        &self,
168        files: &[GeneratedFile],
169    ) -> Result<ScoreBreakdown, GenerationError> {
170        let documentation = self.score_documentation(files)?;
171        let error_handling = self.score_error_handling(files)?;
172        let style = self.score_style(files)?;
173        let coverage = self.score_coverage(files)?;
174        let complexity = self.score_complexity(files)?;
175        let naming = self.score_naming(files)?;
176
177        Ok(ScoreBreakdown {
178            documentation,
179            error_handling,
180            style,
181            coverage,
182            complexity,
183            naming,
184        })
185    }
186
187    /// Scores spec compliance
188    pub fn score_compliance(
189        &self,
190        files: &[GeneratedFile],
191        spec: &Spec,
192    ) -> Result<ComplianceScore, GenerationError> {
193        let combined_content = files
194            .iter()
195            .map(|f| f.content.as_str())
196            .collect::<Vec<_>>()
197            .join("\n");
198
199        let mut requirement_scores = HashMap::new();
200        let mut total_coverage = 0.0;
201        let mut criteria_met = 0;
202        let mut total_criteria = 0;
203
204        for requirement in &spec.requirements {
205            let mut req_coverage = 0.0;
206
207            // Check if requirement is addressed
208            if combined_content.contains(&requirement.id)
209                || combined_content.contains(&requirement.user_story)
210            {
211                req_coverage = 0.5;
212            }
213
214            // Check acceptance criteria
215            let mut criteria_count = 0;
216            for criterion in &requirement.acceptance_criteria {
217                total_criteria += 1;
218                let criterion_text = format!("{} {}", criterion.when, criterion.then);
219                if combined_content.contains(&criterion_text) {
220                    criteria_met += 1;
221                    criteria_count += 1;
222                }
223            }
224
225            if !requirement.acceptance_criteria.is_empty() {
226                let criteria_ratio =
227                    criteria_count as f32 / requirement.acceptance_criteria.len() as f32;
228                req_coverage = (req_coverage + criteria_ratio) / 2.0;
229            }
230
231            requirement_scores.insert(requirement.id.clone(), req_coverage);
232            total_coverage += req_coverage;
233        }
234
235        let requirement_coverage = if !spec.requirements.is_empty() {
236            total_coverage / spec.requirements.len() as f32
237        } else {
238            0.0
239        };
240
241        let criteria_coverage = if total_criteria > 0 {
242            criteria_met as f32 / total_criteria as f32
243        } else {
244            0.0
245        };
246
247        let constraint_adherence = self.score_constraint_adherence(files)?;
248
249        let overall =
250            (requirement_coverage * 0.4) + (criteria_coverage * 0.4) + (constraint_adherence * 0.2);
251
252        Ok(ComplianceScore {
253            overall,
254            requirement_coverage,
255            criteria_coverage,
256            constraint_adherence,
257            requirement_scores,
258        })
259    }
260
261    /// Generates actionable feedback based on scores
262    pub fn generate_feedback(
263        &self,
264        quality_breakdown: &ScoreBreakdown,
265        compliance_score: &ComplianceScore,
266    ) -> ScoringFeedback {
267        let mut strengths = Vec::new();
268        let mut improvements = Vec::new();
269        let mut critical_issues = Vec::new();
270        let mut next_steps = Vec::new();
271
272        // Analyze quality components
273        if quality_breakdown.documentation.score > 0.8 {
274            strengths.push("Excellent documentation coverage".to_string());
275        } else if quality_breakdown.documentation.score < self.config.critical_threshold {
276            critical_issues.push("Documentation is severely lacking".to_string());
277        } else if quality_breakdown.documentation.score < self.config.warning_threshold {
278            improvements.push("Improve documentation coverage".to_string());
279        }
280
281        if quality_breakdown.error_handling.score > 0.8 {
282            strengths.push("Strong error handling implementation".to_string());
283        } else if quality_breakdown.error_handling.score < self.config.critical_threshold {
284            critical_issues.push("Error handling is insufficient".to_string());
285        } else if quality_breakdown.error_handling.score < self.config.warning_threshold {
286            improvements.push("Enhance error handling".to_string());
287        }
288
289        if quality_breakdown.style.score > 0.8 {
290            strengths.push("Code style is consistent and clean".to_string());
291        } else if quality_breakdown.style.score < self.config.warning_threshold {
292            improvements.push("Improve code style consistency".to_string());
293        }
294
295        if quality_breakdown.coverage.score > 0.7 {
296            strengths.push("Good test coverage".to_string());
297        } else if quality_breakdown.coverage.score < self.config.warning_threshold {
298            improvements.push("Increase test coverage".to_string());
299        }
300
301        if quality_breakdown.complexity.score > 0.7 {
302            strengths.push("Functions have reasonable complexity".to_string());
303        } else if quality_breakdown.complexity.score < self.config.warning_threshold {
304            improvements.push("Reduce function complexity".to_string());
305        }
306
307        if quality_breakdown.naming.score > 0.8 {
308            strengths.push("Naming conventions are well-followed".to_string());
309        } else if quality_breakdown.naming.score < self.config.warning_threshold {
310            improvements.push("Improve naming consistency".to_string());
311        }
312
313        // Analyze compliance
314        if compliance_score.overall > 0.9 {
315            strengths.push("Excellent spec compliance".to_string());
316        } else if compliance_score.overall < self.config.critical_threshold {
317            critical_issues.push("Spec compliance is critically low".to_string());
318        } else if compliance_score.overall < self.config.warning_threshold {
319            improvements.push("Improve spec compliance".to_string());
320        }
321
322        // Generate assessment
323        let assessment = if critical_issues.is_empty() {
324            if improvements.is_empty() {
325                "Code quality and compliance are excellent".to_string()
326            } else {
327                "Code quality is good with some areas for improvement".to_string()
328            }
329        } else {
330            "Code quality and compliance need significant improvement".to_string()
331        };
332
333        // Generate next steps
334        if !critical_issues.is_empty() {
335            next_steps.push("Address critical issues immediately".to_string());
336        }
337        if !improvements.is_empty() {
338            next_steps.push("Implement suggested improvements".to_string());
339        }
340        if compliance_score.overall < 0.8 {
341            next_steps.push("Review spec requirements and ensure all are addressed".to_string());
342        }
343        if quality_breakdown.coverage.score < 0.6 {
344            next_steps.push("Add more comprehensive tests".to_string());
345        }
346
347        ScoringFeedback {
348            assessment,
349            strengths,
350            improvements,
351            critical_issues,
352            next_steps,
353        }
354    }
355
356    /// Scores documentation
357    fn score_documentation(
358        &self,
359        files: &[GeneratedFile],
360    ) -> Result<ScoreComponent, GenerationError> {
361        let mut component =
362            ScoreComponent::new("Documentation", 0.0, self.config.documentation_weight);
363
364        let mut total_lines = 0;
365        let mut doc_lines = 0;
366        let mut public_items = 0;
367        let mut documented_items = 0;
368
369        for file in files {
370            let lines: Vec<&str> = file.content.lines().collect();
371            total_lines += lines.len();
372
373            for (idx, line) in lines.iter().enumerate() {
374                let trimmed = line.trim();
375
376                // Count documentation lines
377                if trimmed.starts_with("///")
378                    || trimmed.starts_with("//!")
379                    || trimmed.starts_with("/**")
380                {
381                    doc_lines += 1;
382                }
383
384                // Count public items
385                if trimmed.starts_with("pub ") {
386                    public_items += 1;
387                    // Check if documented
388                    if idx > 0
389                        && (lines[idx - 1].trim().starts_with("///")
390                            || lines[idx - 1].trim().starts_with("//!"))
391                    {
392                        documented_items += 1;
393                    }
394                }
395            }
396        }
397
398        let doc_ratio = if total_lines > 0 {
399            doc_lines as f32 / total_lines as f32
400        } else {
401            0.0
402        };
403
404        let public_doc_ratio = if public_items > 0 {
405            documented_items as f32 / public_items as f32
406        } else {
407            1.0
408        };
409
410        component.score = (doc_ratio * 0.5 + public_doc_ratio * 0.5).clamp(0.0, 1.0);
411
412        if component.score > 0.8 {
413            component.feedback = "Documentation is comprehensive and well-maintained".to_string();
414        } else if component.score > 0.6 {
415            component.feedback = "Documentation is adequate but could be improved".to_string();
416            component.add_issue("Some public items lack documentation".to_string());
417        } else {
418            component.feedback = "Documentation coverage is insufficient".to_string();
419            component.add_issue(format!(
420                "Only {:.0}% of public items are documented",
421                public_doc_ratio * 100.0
422            ));
423        }
424
425        Ok(component)
426    }
427
428    /// Scores error handling
429    fn score_error_handling(
430        &self,
431        files: &[GeneratedFile],
432    ) -> Result<ScoreComponent, GenerationError> {
433        let mut component =
434            ScoreComponent::new("Error Handling", 0.0, self.config.error_handling_weight);
435
436        let mut total_lines = 0;
437        let mut error_lines = 0;
438
439        for file in files {
440            let content = &file.content;
441            let language = &file.language;
442
443            total_lines += content.lines().count();
444
445            let error_patterns = match language.to_lowercase().as_str() {
446                "rust" => vec!["Result<", "?", "unwrap", "expect", "match"],
447                "typescript" | "javascript" => vec!["try", "catch", "throw", "Error"],
448                "python" => vec!["try", "except", "raise"],
449                "go" => vec!["if err != nil", "error"],
450                "java" => vec!["try", "catch", "throw", "Exception"],
451                _ => vec![],
452            };
453
454            for pattern in error_patterns {
455                error_lines += content.matches(pattern).count();
456            }
457        }
458
459        component.score = if total_lines > 0 {
460            (error_lines as f32 / total_lines as f32).clamp(0.0, 1.0)
461        } else {
462            0.0
463        };
464
465        if component.score > 0.8 {
466            component.feedback = "Error handling is comprehensive".to_string();
467        } else if component.score > 0.5 {
468            component.feedback = "Error handling is present but could be more thorough".to_string();
469            component.add_issue("Some operations may lack proper error handling".to_string());
470        } else {
471            component.feedback = "Error handling is minimal or missing".to_string();
472            component.add_issue("Add error handling for fallible operations".to_string());
473        }
474
475        Ok(component)
476    }
477
478    /// Scores code style
479    fn score_style(&self, files: &[GeneratedFile]) -> Result<ScoreComponent, GenerationError> {
480        let mut component = ScoreComponent::new("Style", 1.0, self.config.style_weight);
481
482        for file in files {
483            let lines: Vec<&str> = file.content.lines().collect();
484
485            // Check for trailing whitespace
486            let trailing_ws = lines
487                .iter()
488                .filter(|l| l.ends_with(' ') || l.ends_with('\t'))
489                .count();
490            if trailing_ws > 0 {
491                component.score -= 0.05;
492                component.add_issue(format!("{} lines have trailing whitespace", trailing_ws));
493            }
494
495            // Check for inconsistent indentation
496            let mut indent_styles = std::collections::HashMap::new();
497            for line in &lines {
498                if line.starts_with(' ') {
499                    let spaces = line.len() - line.trim_start().len();
500                    *indent_styles.entry(spaces % 4).or_insert(0) += 1;
501                }
502            }
503
504            if indent_styles.len() > 2 {
505                component.score -= 0.1;
506                component.add_issue("Inconsistent indentation detected".to_string());
507            }
508
509            // Check for long lines
510            let long_lines = lines.iter().filter(|l| l.len() > 100).count();
511            if long_lines > 0 {
512                component.score -= 0.05;
513                component.add_issue(format!("{} lines exceed 100 characters", long_lines));
514            }
515        }
516
517        component.score = component.score.clamp(0.0, 1.0);
518
519        if component.score > 0.9 {
520            component.feedback = "Code style is excellent".to_string();
521        } else if component.score > 0.7 {
522            component.feedback = "Code style is generally good".to_string();
523        } else {
524            component.feedback = "Code style needs improvement".to_string();
525        }
526
527        Ok(component)
528    }
529
530    /// Scores test coverage
531    fn score_coverage(&self, files: &[GeneratedFile]) -> Result<ScoreComponent, GenerationError> {
532        let mut component = ScoreComponent::new("Coverage", 0.0, self.config.coverage_weight);
533
534        let has_tests = files.iter().any(|f| {
535            f.path.contains("test") || f.path.contains("spec") || f.path.ends_with("_test.rs")
536        });
537
538        if has_tests {
539            component.score = 0.6;
540            component.feedback = "Test files are present".to_string();
541        } else {
542            component.score = 0.2;
543            component.feedback = "No test files detected".to_string();
544            component.add_issue("Add unit tests for public functions".to_string());
545        }
546
547        Ok(component)
548    }
549
550    /// Scores complexity
551    fn score_complexity(&self, files: &[GeneratedFile]) -> Result<ScoreComponent, GenerationError> {
552        let mut component = ScoreComponent::new("Complexity", 1.0, self.config.complexity_weight);
553
554        for file in files {
555            let content = &file.content;
556
557            // Estimate complexity by counting nested structures
558            let mut max_nesting: i32 = 0;
559            let mut current_nesting: i32 = 0;
560
561            for ch in content.chars() {
562                match ch {
563                    '{' | '[' | '(' => {
564                        current_nesting += 1;
565                        max_nesting = max_nesting.max(current_nesting);
566                    }
567                    '}' | ']' | ')' => {
568                        current_nesting = current_nesting.saturating_sub(1);
569                    }
570                    _ => {}
571                }
572            }
573
574            if max_nesting > 5 {
575                component.score -= 0.1;
576                component.add_issue(format!("High nesting depth detected: {}", max_nesting));
577            }
578        }
579
580        component.score = component.score.clamp(0.0, 1.0);
581
582        if component.score > 0.8 {
583            component.feedback = "Functions have reasonable complexity".to_string();
584        } else {
585            component.feedback = "Some functions may be too complex".to_string();
586        }
587
588        Ok(component)
589    }
590
591    /// Scores naming conventions
592    fn score_naming(&self, files: &[GeneratedFile]) -> Result<ScoreComponent, GenerationError> {
593        let mut component = ScoreComponent::new("Naming", 1.0, self.config.naming_weight);
594
595        for file in files {
596            let language = &file.language;
597            let content = &file.content;
598
599            // Check naming conventions based on language
600            match language.to_lowercase().as_str() {
601                "rust" => {
602                    // Check for snake_case functions
603                    let snake_case_violations = content.matches("fn [A-Z]").count();
604                    if snake_case_violations > 0 {
605                        component.score -= 0.1;
606                        component.add_issue("Rust functions should use snake_case".to_string());
607                    }
608                }
609                "typescript" | "javascript" => {
610                    // Check for camelCase variables
611                    let violations = content.matches("const [a-z]_[a-z]").count();
612                    if violations > 0 {
613                        component.score -= 0.1;
614                        component
615                            .add_issue("TypeScript variables should use camelCase".to_string());
616                    }
617                }
618                "python" => {
619                    // Check for snake_case functions
620                    let violations = content.matches("def [A-Z]").count();
621                    if violations > 0 {
622                        component.score -= 0.1;
623                        component.add_issue("Python functions should use snake_case".to_string());
624                    }
625                }
626                _ => {}
627            }
628        }
629
630        component.score = component.score.clamp(0.0, 1.0);
631
632        if component.score > 0.9 {
633            component.feedback = "Naming conventions are well-followed".to_string();
634        } else if component.score > 0.7 {
635            component.feedback = "Naming conventions are mostly followed".to_string();
636        } else {
637            component.feedback = "Naming conventions need improvement".to_string();
638        }
639
640        Ok(component)
641    }
642
643    /// Scores constraint adherence
644    fn score_constraint_adherence(&self, _files: &[GeneratedFile]) -> Result<f32, GenerationError> {
645        // Simplified constraint adherence scoring
646        // In a full implementation, this would check against specific constraints
647        Ok(0.8)
648    }
649}
650
651impl Default for ScoringSystem {
652    fn default() -> Self {
653        Self::new()
654    }
655}
656
657#[cfg(test)]
658mod tests {
659    use super::*;
660
661    #[test]
662    fn test_scoring_system_creation() {
663        let system = ScoringSystem::new();
664        assert_eq!(system.config.documentation_weight, 0.20);
665    }
666
667    #[test]
668    fn test_score_component_creation() {
669        let component = ScoreComponent::new("Test", 0.8, 0.5);
670        assert_eq!(component.name, "Test");
671        assert_eq!(component.score, 0.8);
672        assert_eq!(component.weight, 0.5);
673    }
674
675    #[test]
676    fn test_score_component_clamping() {
677        let component = ScoreComponent::new("Test", 1.5, 1.5);
678        assert_eq!(component.score, 1.0);
679        assert_eq!(component.weight, 1.0);
680    }
681
682    #[test]
683    fn test_compliance_score_default() {
684        let score = ComplianceScore::default();
685        assert_eq!(score.overall, 0.0);
686        assert_eq!(score.requirement_coverage, 0.0);
687    }
688}