use crate::constraints::{
Assertion, FormatOptions, FormatType, StatisticType, UniquenessOptions, UniquenessType,
};
use crate::core::{CheckBuilder, ConstraintOptions, LogicalOperator};
use crate::prelude::*;
#[derive(Debug, Clone)]
pub struct CompletenessOptions {
threshold: Option<f64>,
operator: LogicalOperator,
null_is_failure: bool,
}
impl CompletenessOptions {
pub fn full() -> Self {
Self {
threshold: Some(1.0),
operator: LogicalOperator::All,
null_is_failure: true,
}
}
pub fn threshold(threshold: f64) -> Self {
Self {
threshold: Some(threshold),
operator: LogicalOperator::All,
null_is_failure: true,
}
}
pub fn at_least(n: usize) -> Self {
Self {
threshold: None,
operator: LogicalOperator::AtLeast(n),
null_is_failure: true,
}
}
pub fn any() -> Self {
Self {
threshold: None,
operator: LogicalOperator::Any,
null_is_failure: true,
}
}
pub fn with_operator(mut self, operator: LogicalOperator) -> Self {
self.operator = operator;
self
}
pub fn null_handling(mut self, null_is_failure: bool) -> Self {
self.null_is_failure = null_is_failure;
self
}
pub fn into_constraint_options(self) -> ConstraintOptions {
let mut options = ConstraintOptions::new()
.with_operator(self.operator)
.with_flag("null_is_failure", self.null_is_failure);
if let Some(threshold) = self.threshold {
options = options.with_threshold(threshold);
}
options
}
}
#[derive(Debug, Clone)]
pub struct StatisticalOptions {
statistics: Vec<(StatisticType, Assertion)>,
}
impl Default for StatisticalOptions {
fn default() -> Self {
Self::new()
}
}
impl StatisticalOptions {
pub fn new() -> Self {
Self {
statistics: Vec::new(),
}
}
pub fn min(mut self, assertion: Assertion) -> Self {
self.statistics.push((StatisticType::Min, assertion));
self
}
pub fn max(mut self, assertion: Assertion) -> Self {
self.statistics.push((StatisticType::Max, assertion));
self
}
pub fn mean(mut self, assertion: Assertion) -> Self {
self.statistics.push((StatisticType::Mean, assertion));
self
}
pub fn sum(mut self, assertion: Assertion) -> Self {
self.statistics.push((StatisticType::Sum, assertion));
self
}
pub fn standard_deviation(mut self, assertion: Assertion) -> Self {
self.statistics
.push((StatisticType::StandardDeviation, assertion));
self
}
pub fn variance(mut self, assertion: Assertion) -> Self {
self.statistics.push((StatisticType::Variance, assertion));
self
}
pub fn median(mut self, assertion: Assertion) -> Self {
self.statistics.push((StatisticType::Median, assertion));
self
}
pub fn percentile(mut self, percentile: f64, assertion: Assertion) -> Self {
self.statistics
.push((StatisticType::Percentile(percentile), assertion));
self
}
pub fn is_multi(&self) -> bool {
self.statistics.len() > 1
}
pub fn into_statistics(self) -> Vec<(StatisticType, Assertion)> {
self.statistics
}
}
impl CheckBuilder {
pub fn statistics(
self,
column: impl Into<String>,
options: StatisticalOptions,
) -> Result<Self> {
let column_str = column.into();
let stats = options.into_statistics();
let mut result = self;
for (stat_type, assertion) in stats {
result = result.statistic(column_str.clone(), stat_type, assertion);
}
Ok(result)
}
pub fn with_constraints<F>(self, build_fn: F) -> Self
where
F: FnOnce(Self) -> Self,
{
build_fn(self)
}
}
impl CheckBuilder {
pub fn primary_key<I, S>(self, columns: I) -> Self
where
I: IntoIterator<Item = S> + Clone,
S: Into<String>,
{
let columns_vec: Vec<String> = columns.clone().into_iter().map(Into::into).collect();
self.completeness(
columns_vec.clone(),
CompletenessOptions::full().into_constraint_options(),
)
.uniqueness(
columns_vec,
UniquenessType::FullUniqueness { threshold: 1.0 },
UniquenessOptions::default(),
)
}
pub fn email(self, column: impl Into<String>, threshold: f64) -> Self {
self.has_format(
column,
FormatType::Email,
threshold,
FormatOptions::new()
.trim_before_check(true)
.null_is_valid(false),
)
}
pub fn url(self, column: impl Into<String>, threshold: f64) -> Self {
self.has_format(
column,
FormatType::Url {
allow_localhost: false,
},
threshold,
FormatOptions::new().trim_before_check(true),
)
}
pub fn phone(
self,
column: impl Into<String>,
threshold: f64,
country_code: Option<&str>,
) -> Self {
let format_type = FormatType::Phone {
country: country_code.map(|s| s.to_string()),
};
self.has_format(
column,
format_type,
threshold,
FormatOptions::new().trim_before_check(true),
)
}
pub fn contains_ssn(self, column: impl Into<String>, threshold: f64) -> Self {
self.has_format(
column,
FormatType::SocialSecurityNumber,
threshold,
FormatOptions::new().trim_before_check(true),
)
}
pub fn value_range(self, column: impl Into<String>, min: f64, max: f64) -> Result<Self> {
self.statistics(
column,
StatisticalOptions::new()
.min(Assertion::GreaterThanOrEqual(min))
.max(Assertion::LessThanOrEqual(max)),
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::Check;
#[test]
fn test_completeness_options() {
let full = CompletenessOptions::full();
let options = full.into_constraint_options();
assert_eq!(options.threshold_or(0.0), 1.0);
assert!(options.flag("null_is_failure"));
let threshold = CompletenessOptions::threshold(0.95);
let options = threshold.into_constraint_options();
assert_eq!(options.threshold_or(0.0), 0.95);
let at_least = CompletenessOptions::at_least(2);
let options = at_least.into_constraint_options();
assert!(matches!(
options.operator_or(LogicalOperator::All),
LogicalOperator::AtLeast(2)
));
}
#[test]
fn test_statistical_options() {
let options = StatisticalOptions::new()
.min(Assertion::GreaterThan(0.0))
.max(Assertion::LessThan(100.0))
.mean(Assertion::Between(25.0, 75.0));
assert!(options.is_multi());
let stats = options.into_statistics();
assert_eq!(stats.len(), 3);
}
#[test]
fn test_new_builder_api() {
let check = Check::builder("test")
.completeness(
"user_id",
CompletenessOptions::full().into_constraint_options(),
)
.build();
assert_eq!(check.constraints().len(), 1);
let check = Check::builder("test")
.completeness(
vec!["email", "phone"],
CompletenessOptions::at_least(1).into_constraint_options(),
)
.build();
assert_eq!(check.constraints().len(), 1);
let check = Check::builder("test")
.has_format("email", FormatType::Email, 0.95, FormatOptions::default())
.build();
assert_eq!(check.constraints().len(), 1);
let check = Check::builder("test").email("email_field", 0.95).build();
assert_eq!(check.constraints().len(), 1);
}
}