Skip to main content

datasynth_eval/quality/
mod.rs

1//! Data quality evaluation module.
2//!
3//! Validates data quality metrics including uniqueness, completeness,
4//! format consistency, and cross-field consistency.
5
6mod completeness;
7mod consistency;
8mod format;
9mod uniqueness;
10
11pub use completeness::{CompletenessAnalysis, CompletenessAnalyzer, FieldCompleteness};
12pub use consistency::{ConsistencyAnalysis, ConsistencyAnalyzer, ConsistencyRule};
13pub use format::{FormatAnalysis, FormatAnalyzer, FormatVariation};
14pub use uniqueness::{DuplicateInfo, UniquenessAnalysis, UniquenessAnalyzer};
15
16use serde::{Deserialize, Serialize};
17
18/// Combined data quality evaluation results.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct QualityEvaluation {
21    /// Uniqueness analysis results.
22    pub uniqueness: Option<UniquenessAnalysis>,
23    /// Completeness analysis results.
24    pub completeness: Option<CompletenessAnalysis>,
25    /// Format consistency results.
26    pub format: Option<FormatAnalysis>,
27    /// Cross-field consistency results.
28    pub consistency: Option<ConsistencyAnalysis>,
29    /// Overall quality score (0.0-1.0).
30    pub overall_score: f64,
31    /// Whether quality meets thresholds.
32    pub passes: bool,
33    /// Quality issues found.
34    pub issues: Vec<String>,
35    /// Quality failures (alias for issues, used by report module).
36    pub failures: Vec<String>,
37}
38
39impl QualityEvaluation {
40    /// Create a new empty evaluation.
41    pub fn new() -> Self {
42        Self {
43            uniqueness: None,
44            completeness: None,
45            format: None,
46            consistency: None,
47            overall_score: 1.0,
48            passes: true,
49            issues: Vec::new(),
50            failures: Vec::new(),
51        }
52    }
53
54    /// Check all results against thresholds.
55    pub fn check_thresholds(&mut self, thresholds: &crate::config::EvaluationThresholds) {
56        self.issues.clear();
57        self.failures.clear();
58        let mut scores = Vec::new();
59
60        if let Some(ref uniqueness) = self.uniqueness {
61            if uniqueness.duplicate_rate > thresholds.duplicate_rate_max {
62                self.issues.push(format!(
63                    "Duplicate rate {} > {} (threshold)",
64                    uniqueness.duplicate_rate, thresholds.duplicate_rate_max
65                ));
66            }
67            scores.push(1.0 - uniqueness.duplicate_rate);
68        }
69
70        if let Some(ref completeness) = self.completeness {
71            if completeness.overall_completeness < thresholds.completeness_rate_min {
72                self.issues.push(format!(
73                    "Completeness {} < {} (threshold)",
74                    completeness.overall_completeness, thresholds.completeness_rate_min
75                ));
76            }
77            scores.push(completeness.overall_completeness);
78        }
79
80        if let Some(ref format) = self.format {
81            if format.consistency_score < thresholds.format_consistency_min {
82                self.issues.push(format!(
83                    "Format consistency {} < {} (threshold)",
84                    format.consistency_score, thresholds.format_consistency_min
85                ));
86            }
87            scores.push(format.consistency_score);
88        }
89
90        if let Some(ref consistency) = self.consistency {
91            // Use format consistency threshold for cross-field as they're related
92            if consistency.pass_rate < thresholds.format_consistency_min {
93                self.issues.push(format!(
94                    "Cross-field consistency {} < {} (threshold)",
95                    consistency.pass_rate, thresholds.format_consistency_min
96                ));
97            }
98            scores.push(consistency.pass_rate);
99        }
100
101        self.overall_score = if scores.is_empty() {
102            1.0
103        } else {
104            scores.iter().sum::<f64>() / scores.len() as f64
105        };
106
107        // Sync failures with issues
108        self.failures = self.issues.clone();
109        self.passes = self.issues.is_empty();
110    }
111}
112
113impl Default for QualityEvaluation {
114    fn default() -> Self {
115        Self::new()
116    }
117}