use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use serde_json::Value;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum OnFailure {
Quarantine,
QuarantineBatch,
Abort,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum CompareOp {
Gt,
Gte,
Lt,
Lte,
Eq,
Ne,
}
impl std::fmt::Display for CompareOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
CompareOp::Gt => "gt",
CompareOp::Gte => "gte",
CompareOp::Lt => "lt",
CompareOp::Lte => "lte",
CompareOp::Eq => "eq",
CompareOp::Ne => "ne",
})
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum JsonType {
Boolean,
Number,
String,
Array,
Object,
Null,
}
impl std::fmt::Display for JsonType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
JsonType::Boolean => "boolean",
JsonType::Number => "number",
JsonType::String => "string",
JsonType::Array => "array",
JsonType::Object => "object",
JsonType::Null => "null",
})
}
}
fn default_true() -> bool {
true
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
pub struct QualitySpec {
#[serde(default)]
pub record: Vec<RecordCheck>,
#[serde(default)]
pub batch: Vec<BatchCheck>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum RecordCheck {
NotNull {
field: String,
#[serde(default = "default_true")]
treat_missing_as_null: bool,
on_failure: OnFailure,
},
NotEmpty {
field: String,
on_failure: OnFailure,
},
RegexMatch {
field: String,
pattern: String,
on_failure: OnFailure,
},
ValueInSet {
field: String,
values: Vec<Value>,
on_failure: OnFailure,
},
NotInSet {
field: String,
values: Vec<Value>,
on_failure: OnFailure,
},
Compare {
field: String,
op: CompareOp,
value: Value,
on_failure: OnFailure,
},
TypeIs {
field: String,
expected: JsonType,
on_failure: OnFailure,
},
StringLength {
field: String,
#[serde(default)]
min: Option<usize>,
#[serde(default)]
max: Option<usize>,
on_failure: OnFailure,
},
#[cfg(feature = "quality-jsonschema")]
JsonSchema {
schema: Value,
on_failure: OnFailure,
},
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum BatchCheck {
RowCount {
#[serde(default)]
min: Option<usize>,
#[serde(default)]
max: Option<usize>,
on_failure: OnFailure,
},
NullRate {
field: String,
max: f64,
on_failure: OnFailure,
},
Unique {
fields: Vec<String>,
on_failure: OnFailure,
},
DistinctCount {
field: String,
#[serde(default)]
min: Option<usize>,
#[serde(default)]
max: Option<usize>,
on_failure: OnFailure,
},
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn on_failure_serializes_snake_case() {
assert_eq!(
serde_json::to_string(&OnFailure::QuarantineBatch).unwrap(),
"\"quarantine_batch\""
);
}
#[test]
fn compare_op_round_trips() {
let op: CompareOp = serde_json::from_str("\"gte\"").unwrap();
assert_eq!(op, CompareOp::Gte);
}
#[test]
fn json_type_round_trips() {
let t: JsonType = serde_json::from_str("\"boolean\"").unwrap();
assert_eq!(t, JsonType::Boolean);
}
#[test]
fn parses_full_quality_block() {
let spec: QualitySpec = serde_json::from_value(serde_json::json!({
"record": [
{ "type": "not_null", "field": "user_id", "on_failure": "quarantine" },
{ "type": "compare", "field": "age", "op": "gte", "value": 0, "on_failure": "abort" },
{ "type": "string_length", "field": "name", "min": 1, "max": 256, "on_failure": "quarantine" }
],
"batch": [
{ "type": "row_count", "min": 1, "max": 100000, "on_failure": "abort" },
{ "type": "unique", "fields": ["id"], "on_failure": "quarantine" }
]
}))
.unwrap();
assert_eq!(spec.record.len(), 3);
assert_eq!(spec.batch.len(), 2);
assert!(matches!(spec.record[0], RecordCheck::NotNull { .. }));
assert!(matches!(spec.batch[1], BatchCheck::Unique { .. }));
if let RecordCheck::NotNull {
treat_missing_as_null,
..
} = &spec.record[0]
{
assert!(
*treat_missing_as_null,
"treat_missing_as_null defaults to true"
);
} else {
panic!("expected first record check to be NotNull");
}
}
#[test]
fn empty_quality_block_defaults_to_no_checks() {
let spec: QualitySpec = serde_json::from_str("{}").unwrap();
assert!(spec.record.is_empty());
assert!(spec.batch.is_empty());
}
}