Skip to main content

alimentar/quality/
scoring.rs

1//! 100-Point Quality Scoring System (GH-6)
2//!
3//! Based on the Toyota Way principles of Jidoka (built-in quality) and
4//! the Doctest Corpus QA Checklist for Publication.
5
6use std::{collections::HashMap, fmt};
7
8/// Severity levels for quality issues per QA checklist
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10pub enum Severity {
11    /// Critical issues block publication (2.0x weight)
12    Critical,
13    /// High priority issues (1.5x weight)
14    High,
15    /// Medium priority issues (1.0x weight)
16    Medium,
17    /// Low priority issues (0.5x weight)
18    Low,
19}
20
21impl Severity {
22    /// Get the weight multiplier for this severity
23    #[must_use]
24    pub fn weight(&self) -> f64 {
25        match self {
26            Self::Critical => 2.0,
27            Self::High => 1.5,
28            Self::Medium => 1.0,
29            Self::Low => 0.5,
30        }
31    }
32
33    /// Get the base point value for this severity
34    #[must_use]
35    pub fn base_points(&self) -> f64 {
36        match self {
37            Self::Critical => 2.0,
38            Self::High => 1.5,
39            Self::Medium => 1.0,
40            Self::Low => 0.5,
41        }
42    }
43}
44
45impl fmt::Display for Severity {
46    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47        match self {
48            Self::Critical => write!(f, "Critical"),
49            Self::High => write!(f, "High"),
50            Self::Medium => write!(f, "Medium"),
51            Self::Low => write!(f, "Low"),
52        }
53    }
54}
55
56/// Letter grades for dataset quality
57#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
58pub enum LetterGrade {
59    /// A (95-100): Publish immediately
60    A,
61    /// B (85-94): Publish with documented caveats
62    B,
63    /// C (70-84): Remediation required before publication
64    C,
65    /// D (50-69): Major rework needed
66    D,
67    /// F (<50): Do not publish
68    F,
69}
70
71impl LetterGrade {
72    /// Create a letter grade from a numeric score (0-100)
73    #[must_use]
74    pub fn from_score(score: f64) -> Self {
75        match score {
76            s if s >= 95.0 => Self::A,
77            s if s >= 85.0 => Self::B,
78            s if s >= 70.0 => Self::C,
79            s if s >= 50.0 => Self::D,
80            _ => Self::F,
81        }
82    }
83
84    /// Get the publication decision for this grade
85    #[must_use]
86    pub fn publication_decision(&self) -> &'static str {
87        match self {
88            Self::A => "Publish immediately",
89            Self::B => "Publish with documented caveats",
90            Self::C => "Remediation required before publication",
91            Self::D => "Major rework needed",
92            Self::F => "Do not publish",
93        }
94    }
95
96    /// Check if this grade allows publication
97    #[must_use]
98    pub fn is_publishable(&self) -> bool {
99        matches!(self, Self::A | Self::B)
100    }
101}
102
103impl fmt::Display for LetterGrade {
104    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
105        match self {
106            Self::A => write!(f, "A"),
107            Self::B => write!(f, "B"),
108            Self::C => write!(f, "C"),
109            Self::D => write!(f, "D"),
110            Self::F => write!(f, "F"),
111        }
112    }
113}
114
115/// A scored quality check item from the 100-point checklist
116#[derive(Debug, Clone)]
117pub struct ChecklistItem {
118    /// Unique identifier (e.g., "1", "25", "53")
119    pub id: u8,
120    /// Check description
121    pub description: String,
122    /// Pass/fail status
123    pub passed: bool,
124    /// Severity level
125    pub severity: Severity,
126    /// Suggestion for improvement if failed
127    pub suggestion: Option<String>,
128}
129
130impl ChecklistItem {
131    /// Create a new checklist item
132    #[must_use]
133    pub fn new(id: u8, description: impl Into<String>, severity: Severity, passed: bool) -> Self {
134        Self {
135            id,
136            description: description.into(),
137            passed,
138            severity,
139            suggestion: None,
140        }
141    }
142
143    /// Add a suggestion for improvement
144    #[must_use]
145    pub fn with_suggestion(mut self, suggestion: impl Into<String>) -> Self {
146        self.suggestion = Some(suggestion.into());
147        self
148    }
149
150    /// Get the points earned (0 if failed, severity points if passed)
151    #[must_use]
152    pub fn points_earned(&self) -> f64 {
153        if self.passed {
154            self.severity.base_points()
155        } else {
156            0.0
157        }
158    }
159
160    /// Get the maximum possible points for this item
161    #[must_use]
162    pub fn max_points(&self) -> f64 {
163        self.severity.base_points()
164    }
165}
166
167/// Complete quality score with breakdown
168#[derive(Debug, Clone)]
169pub struct QualityScore {
170    /// Numeric score (0-100)
171    pub score: f64,
172    /// Letter grade
173    pub grade: LetterGrade,
174    /// Total points earned
175    pub points_earned: f64,
176    /// Maximum possible points
177    pub max_points: f64,
178    /// Individual checklist items
179    pub checklist: Vec<ChecklistItem>,
180    /// Summary statistics by severity
181    pub severity_breakdown: HashMap<Severity, SeverityStats>,
182}
183
184/// Statistics for a severity level
185#[derive(Debug, Clone, Default)]
186pub struct SeverityStats {
187    /// Number of checks at this severity
188    pub total: usize,
189    /// Number of passed checks
190    pub passed: usize,
191    /// Number of failed checks
192    pub failed: usize,
193    /// Points earned at this severity
194    pub points_earned: f64,
195    /// Maximum possible points at this severity
196    pub max_points: f64,
197}
198
199impl QualityScore {
200    /// Create a quality score from checklist items
201    #[must_use]
202    pub fn from_checklist(checklist: Vec<ChecklistItem>) -> Self {
203        let mut severity_breakdown: HashMap<Severity, SeverityStats> = HashMap::new();
204
205        let mut points_earned = 0.0;
206        let mut max_points = 0.0;
207
208        for item in &checklist {
209            let stats = severity_breakdown.entry(item.severity).or_default();
210
211            stats.total += 1;
212            stats.max_points += item.max_points();
213
214            if item.passed {
215                stats.passed += 1;
216                stats.points_earned += item.points_earned();
217                points_earned += item.points_earned();
218            } else {
219                stats.failed += 1;
220            }
221
222            max_points += item.max_points();
223        }
224
225        let score = if max_points > 0.0 {
226            (points_earned / max_points * 100.0).clamp(0.0, 100.0)
227        } else {
228            100.0
229        };
230
231        let grade = LetterGrade::from_score(score);
232
233        Self {
234            score,
235            grade,
236            points_earned,
237            max_points,
238            checklist,
239            severity_breakdown,
240        }
241    }
242
243    /// Get failed items for actionable suggestions
244    #[must_use]
245    pub fn failed_items(&self) -> Vec<&ChecklistItem> {
246        self.checklist.iter().filter(|item| !item.passed).collect()
247    }
248
249    /// Get critical failures (blocks publication)
250    #[must_use]
251    pub fn critical_failures(&self) -> Vec<&ChecklistItem> {
252        self.checklist
253            .iter()
254            .filter(|item| !item.passed && item.severity == Severity::Critical)
255            .collect()
256    }
257
258    /// Check if there are any critical failures
259    #[must_use]
260    pub fn has_critical_failures(&self) -> bool {
261        self.checklist
262            .iter()
263            .any(|item| !item.passed && item.severity == Severity::Critical)
264    }
265
266    /// Generate a badge URL for shields.io
267    #[must_use]
268    pub fn badge_url(&self) -> String {
269        let color = match self.grade {
270            LetterGrade::A => "brightgreen",
271            LetterGrade::B => "green",
272            LetterGrade::C => "yellow",
273            LetterGrade::D => "orange",
274            LetterGrade::F => "red",
275        };
276        format!(
277            "https://img.shields.io/badge/data_quality-{}_({:.0}%25)-{}",
278            self.grade, self.score, color
279        )
280    }
281
282    /// Generate JSON output for CI/CD integration
283    #[must_use]
284    pub fn to_json(&self) -> String {
285        let failed_items: Vec<_> = self
286            .failed_items()
287            .iter()
288            .map(|item| {
289                format!(
290                    r#"    {{"id": {}, "description": "{}", "severity": "{}", "suggestion": {}}}"#,
291                    item.id,
292                    item.description.replace('"', "\\\""),
293                    item.severity,
294                    item.suggestion
295                        .as_ref()
296                        .map(|s| format!("\"{}\"", s.replace('"', "\\\"")))
297                        .unwrap_or_else(|| "null".to_string())
298                )
299            })
300            .collect();
301
302        format!(
303            r#"{{
304  "score": {:.2},
305  "grade": "{}",
306  "is_publishable": {},
307  "decision": "{}",
308  "points_earned": {:.2},
309  "max_points": {:.2},
310  "critical_failures": {},
311  "failed_items": [
312{}
313  ],
314  "badge_url": "{}"
315}}"#,
316            self.score,
317            self.grade,
318            self.grade.is_publishable(),
319            self.grade.publication_decision(),
320            self.points_earned,
321            self.max_points,
322            self.has_critical_failures(),
323            failed_items.join(",\n"),
324            self.badge_url()
325        )
326    }
327}