Skip to main content

datasynth_eval/quality/
mod.rs

1//! Data quality evaluation module.
2//!
3//! Validates data quality metrics including uniqueness, completeness,
4//! format consistency, and cross-field consistency.
5
6mod completeness;
7mod consistency;
8mod format;
9mod uniqueness;
10
11pub use completeness::{
12    CompletenessAnalysis, CompletenessAnalyzer, FieldCompleteness, FieldDefinition, FieldValue,
13};
14pub use consistency::{ConsistencyAnalysis, ConsistencyAnalyzer, ConsistencyRule};
15pub use format::{FormatAnalysis, FormatAnalyzer, FormatVariation};
16pub use uniqueness::{DuplicateInfo, UniqueRecord, UniquenessAnalysis, UniquenessAnalyzer};
17
18use serde::{Deserialize, Serialize};
19
20/// Combined data quality evaluation results.
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct QualityEvaluation {
23    /// Uniqueness analysis results.
24    pub uniqueness: Option<UniquenessAnalysis>,
25    /// Completeness analysis results.
26    pub completeness: Option<CompletenessAnalysis>,
27    /// Format consistency results.
28    pub format: Option<FormatAnalysis>,
29    /// Cross-field consistency results.
30    pub consistency: Option<ConsistencyAnalysis>,
31    /// Overall quality score (0.0-1.0).
32    pub overall_score: f64,
33    /// Whether quality meets thresholds.
34    pub passes: bool,
35    /// Quality issues found.
36    pub issues: Vec<String>,
37    /// Quality failures (alias for issues, used by report module).
38    pub failures: Vec<String>,
39}
40
41impl QualityEvaluation {
42    /// Create a new empty evaluation.
43    pub fn new() -> Self {
44        Self {
45            uniqueness: None,
46            completeness: None,
47            format: None,
48            consistency: None,
49            overall_score: 1.0,
50            passes: true,
51            issues: Vec::new(),
52            failures: Vec::new(),
53        }
54    }
55
56    /// Check all results against thresholds.
57    pub fn check_thresholds(&mut self, thresholds: &crate::config::EvaluationThresholds) {
58        self.issues.clear();
59        self.failures.clear();
60        let mut scores = Vec::new();
61
62        if let Some(ref uniqueness) = self.uniqueness {
63            if uniqueness.duplicate_rate > thresholds.duplicate_rate_max {
64                self.issues.push(format!(
65                    "Duplicate rate {} > {} (threshold)",
66                    uniqueness.duplicate_rate, thresholds.duplicate_rate_max
67                ));
68            }
69            scores.push(1.0 - uniqueness.duplicate_rate);
70        }
71
72        if let Some(ref completeness) = self.completeness {
73            if completeness.overall_completeness < thresholds.completeness_rate_min {
74                self.issues.push(format!(
75                    "Completeness {} < {} (threshold)",
76                    completeness.overall_completeness, thresholds.completeness_rate_min
77                ));
78            }
79            scores.push(completeness.overall_completeness);
80        }
81
82        if let Some(ref format) = self.format {
83            if format.consistency_score < thresholds.format_consistency_min {
84                self.issues.push(format!(
85                    "Format consistency {} < {} (threshold)",
86                    format.consistency_score, thresholds.format_consistency_min
87                ));
88            }
89            scores.push(format.consistency_score);
90        }
91
92        if let Some(ref consistency) = self.consistency {
93            // Use format consistency threshold for cross-field as they're related
94            if consistency.pass_rate < thresholds.format_consistency_min {
95                self.issues.push(format!(
96                    "Cross-field consistency {} < {} (threshold)",
97                    consistency.pass_rate, thresholds.format_consistency_min
98                ));
99            }
100            scores.push(consistency.pass_rate);
101        }
102
103        self.overall_score = if scores.is_empty() {
104            1.0
105        } else {
106            scores.iter().sum::<f64>() / scores.len() as f64
107        };
108
109        // Sync failures with issues
110        self.failures = self.issues.clone();
111        self.passes = self.issues.is_empty();
112    }
113}
114
115impl Default for QualityEvaluation {
116    fn default() -> Self {
117        Self::new()
118    }
119}