Skip to main content

dataprof_core/
quality.rs

1/// ISO 25012 quality dimensions that can be selectively requested.
2#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
3#[serde(rename_all = "lowercase")]
4pub enum QualityDimension {
5    Completeness,
6    Consistency,
7    Uniqueness,
8    Accuracy,
9    Timeliness,
10}
11
12impl QualityDimension {
13    /// All currently implemented dimensions.
14    pub fn all() -> Vec<Self> {
15        vec![
16            Self::Completeness,
17            Self::Consistency,
18            Self::Uniqueness,
19            Self::Accuracy,
20            Self::Timeliness,
21        ]
22    }
23}
24
25impl std::str::FromStr for QualityDimension {
26    type Err = String;
27
28    fn from_str(s: &str) -> Result<Self, Self::Err> {
29        match s.to_lowercase().as_str() {
30            "completeness" => Ok(Self::Completeness),
31            "consistency" => Ok(Self::Consistency),
32            "uniqueness" => Ok(Self::Uniqueness),
33            "accuracy" => Ok(Self::Accuracy),
34            "timeliness" => Ok(Self::Timeliness),
35            _ => Err(format!("Unknown quality dimension: {s}")),
36        }
37    }
38}
39
40impl std::fmt::Display for QualityDimension {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        match self {
43            Self::Completeness => write!(f, "completeness"),
44            Self::Consistency => write!(f, "consistency"),
45            Self::Uniqueness => write!(f, "uniqueness"),
46            Self::Accuracy => write!(f, "accuracy"),
47            Self::Timeliness => write!(f, "timeliness"),
48        }
49    }
50}
51
52/// High-level categories of analysis that can be selectively enabled.
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
54#[serde(rename_all = "lowercase")]
55pub enum MetricPack {
56    /// Column names, data types, null counts — always included.
57    Schema,
58    /// Numeric stats (min/max/mean/median/std_dev/quartiles), text lengths.
59    Statistics,
60    /// Regex pattern detection (email, phone, UUID, etc.).
61    Patterns,
62    /// ISO 25012 quality dimensions (completeness, consistency, etc.).
63    Quality,
64}
65
66impl MetricPack {
67    /// All available metric packs.
68    pub fn all() -> Vec<Self> {
69        vec![
70            Self::Schema,
71            Self::Statistics,
72            Self::Patterns,
73            Self::Quality,
74        ]
75    }
76
77    /// Whether statistics should be computed given the selected packs.
78    pub fn include_statistics(packs: Option<&[Self]>) -> bool {
79        match packs {
80            None => true,
81            Some(p) => p.contains(&Self::Statistics),
82        }
83    }
84
85    /// Whether pattern detection should run given the selected packs.
86    pub fn include_patterns(packs: Option<&[Self]>) -> bool {
87        match packs {
88            None => true,
89            Some(p) => p.contains(&Self::Patterns),
90        }
91    }
92
93    /// Whether quality metrics should be computed given the selected packs.
94    pub fn include_quality(packs: Option<&[Self]>) -> bool {
95        match packs {
96            None => true,
97            Some(p) => p.contains(&Self::Quality),
98        }
99    }
100}
101
102impl std::str::FromStr for MetricPack {
103    type Err = String;
104
105    fn from_str(s: &str) -> Result<Self, Self::Err> {
106        match s.to_lowercase().as_str() {
107            "schema" => Ok(Self::Schema),
108            "statistics" => Ok(Self::Statistics),
109            "patterns" => Ok(Self::Patterns),
110            "quality" => Ok(Self::Quality),
111            _ => Err(format!(
112                "Unknown metric pack: {s}. Valid packs: schema, statistics, patterns, quality"
113            )),
114        }
115    }
116}
117
118impl std::fmt::Display for MetricPack {
119    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
120        match self {
121            Self::Schema => write!(f, "schema"),
122            Self::Statistics => write!(f, "statistics"),
123            Self::Patterns => write!(f, "patterns"),
124            Self::Quality => write!(f, "quality"),
125        }
126    }
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132
133    #[test]
134    fn test_metric_pack_include_helpers_none_means_all() {
135        assert!(MetricPack::include_statistics(None));
136        assert!(MetricPack::include_patterns(None));
137        assert!(MetricPack::include_quality(None));
138    }
139
140    #[test]
141    fn test_metric_pack_include_helpers_selective() {
142        let packs = vec![MetricPack::Schema, MetricPack::Quality];
143        assert!(!MetricPack::include_statistics(Some(&packs)));
144        assert!(!MetricPack::include_patterns(Some(&packs)));
145        assert!(MetricPack::include_quality(Some(&packs)));
146    }
147
148    #[test]
149    fn test_metric_pack_from_str() {
150        assert_eq!(
151            "statistics".parse::<MetricPack>().unwrap(),
152            MetricPack::Statistics
153        );
154        assert_eq!(
155            "QUALITY".parse::<MetricPack>().unwrap(),
156            MetricPack::Quality
157        );
158        assert!("invalid".parse::<MetricPack>().is_err());
159    }
160}