use crate::error::{QcIssue, QcResult, Severity};
use oxigdal_core::vector::{FeatureCollection, FeatureId, FieldValue};
use std::collections::HashSet;
fn feature_id_to_string(id: &FeatureId) -> String {
match id {
FeatureId::Integer(i) => i.to_string(),
FeatureId::String(s) => s.clone(),
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct AttributionResult {
pub feature_count: usize,
pub complete_features: usize,
pub incomplete_features: usize,
pub required_field_violations: Vec<FieldViolation>,
pub domain_violations: Vec<DomainViolation>,
pub type_violations: Vec<TypeViolation>,
pub null_violations: Vec<NullViolation>,
pub referential_violations: Vec<ReferentialViolation>,
pub issues: Vec<QcIssue>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct FieldViolation {
pub feature_id: Option<String>,
pub field_name: String,
pub severity: Severity,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct DomainViolation {
pub feature_id: Option<String>,
pub field_name: String,
pub value: String,
pub expected_domain: String,
pub severity: Severity,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct TypeViolation {
pub feature_id: Option<String>,
pub field_name: String,
pub actual_type: String,
pub expected_type: String,
pub severity: Severity,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct NullViolation {
pub feature_id: Option<String>,
pub field_name: String,
pub severity: Severity,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ReferentialViolation {
pub feature_id: Option<String>,
pub field_name: String,
pub foreign_key: String,
pub referenced_table: String,
pub severity: Severity,
}
#[derive(Debug, Clone)]
pub struct FieldDefinition {
pub name: String,
pub expected_type: FieldType,
pub required: bool,
pub nullable: bool,
pub domain: Option<ValueDomain>,
pub default_value: Option<FieldValue>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FieldType {
Boolean,
Integer,
Float,
String,
Date,
DateTime,
Any,
}
#[derive(Debug, Clone)]
pub enum ValueDomain {
NumericRange {
min: f64,
max: f64,
},
Enumeration(HashSet<String>),
Pattern(String),
StringLength {
min: usize,
max: usize,
},
}
#[derive(Debug, Clone, Default)]
pub struct AttributionConfig {
pub field_definitions: Vec<FieldDefinition>,
pub referential_constraints: Vec<ReferentialConstraint>,
pub validate_date_formats: bool,
pub date_format: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ReferentialConstraint {
pub field_name: String,
pub referenced_table: String,
pub valid_values: HashSet<String>,
}
pub struct AttributionChecker {
config: AttributionConfig,
}
impl AttributionChecker {
#[must_use]
pub fn new() -> Self {
Self {
config: AttributionConfig::default(),
}
}
#[must_use]
pub fn with_config(config: AttributionConfig) -> Self {
Self { config }
}
pub fn validate(&self, features: &FeatureCollection) -> QcResult<AttributionResult> {
let mut issues = Vec::new();
let mut required_field_violations = Vec::new();
let mut domain_violations = Vec::new();
let mut type_violations = Vec::new();
let mut null_violations = Vec::new();
let mut referential_violations = Vec::new();
let mut complete_features = 0;
let mut incomplete_features = 0;
for feature in &features.features {
let feature_id = feature.id.as_ref().map(feature_id_to_string);
let mut is_complete = true;
for field_def in &self.config.field_definitions {
if field_def.required && !feature.properties.contains_key(&field_def.name) {
is_complete = false;
required_field_violations.push(FieldViolation {
feature_id: feature_id.clone(),
field_name: field_def.name.clone(),
severity: Severity::Major,
});
}
if let Some(value) = feature.properties.get(&field_def.name) {
if value.is_null() && !field_def.nullable {
null_violations.push(NullViolation {
feature_id: feature_id.clone(),
field_name: field_def.name.clone(),
severity: Severity::Major,
});
}
if !value.is_null() {
if let Some(type_violation) =
self.check_type(value, field_def, &feature_id)?
{
type_violations.push(type_violation);
}
}
if let Some(ref domain) = field_def.domain {
if let Some(domain_violation) =
self.check_domain(value, domain, &field_def.name, &feature_id)?
{
domain_violations.push(domain_violation);
}
}
}
}
for constraint in &self.config.referential_constraints {
if let Some(value) = feature.properties.get(&constraint.field_name) {
if let Some(ref_violation) =
self.check_referential_integrity(value, constraint, &feature_id)?
{
referential_violations.push(ref_violation);
}
}
}
if is_complete {
complete_features += 1;
} else {
incomplete_features += 1;
}
}
for violation in &required_field_violations {
issues.push(
QcIssue::new(
violation.severity,
"attribution",
"Missing required field",
format!("Field '{}' is required but missing", violation.field_name),
)
.with_location(violation.feature_id.clone().unwrap_or_default())
.with_suggestion("Add the required field"),
);
}
for violation in &domain_violations {
issues.push(
QcIssue::new(
violation.severity,
"attribution",
"Domain violation",
format!(
"Field '{}' has invalid value '{}', expected: {}",
violation.field_name, violation.value, violation.expected_domain
),
)
.with_location(violation.feature_id.clone().unwrap_or_default())
.with_suggestion("Use a valid value from the domain"),
);
}
for violation in &type_violations {
issues.push(
QcIssue::new(
violation.severity,
"attribution",
"Data type mismatch",
format!(
"Field '{}' has type '{}', expected '{}'",
violation.field_name, violation.actual_type, violation.expected_type
),
)
.with_location(violation.feature_id.clone().unwrap_or_default())
.with_suggestion("Convert value to expected type"),
);
}
for violation in &null_violations {
issues.push(
QcIssue::new(
violation.severity,
"attribution",
"Null value not allowed",
format!(
"Field '{}' does not allow null values",
violation.field_name
),
)
.with_location(violation.feature_id.clone().unwrap_or_default())
.with_suggestion("Provide a non-null value"),
);
}
for violation in &referential_violations {
issues.push(
QcIssue::new(
violation.severity,
"attribution",
"Referential integrity violation",
format!(
"Foreign key '{}' in field '{}' does not exist in '{}'",
violation.foreign_key, violation.field_name, violation.referenced_table
),
)
.with_location(violation.feature_id.clone().unwrap_or_default())
.with_suggestion("Use a valid foreign key value"),
);
}
Ok(AttributionResult {
feature_count: features.features.len(),
complete_features,
incomplete_features,
required_field_violations,
domain_violations,
type_violations,
null_violations,
referential_violations,
issues,
})
}
fn check_type(
&self,
value: &FieldValue,
field_def: &FieldDefinition,
feature_id: &Option<String>,
) -> QcResult<Option<TypeViolation>> {
let actual_type = match value {
FieldValue::Null => "Null",
FieldValue::Bool(_) => "Boolean",
FieldValue::Integer(_) => "Integer",
FieldValue::UInteger(_) => "Integer",
FieldValue::Float(_) => "Float",
FieldValue::Date(_) => "Date",
FieldValue::Blob(_) => "Blob",
FieldValue::String(_) => "String",
FieldValue::Array(_) => "Array",
FieldValue::Object(_) => "Object",
};
let matches = match field_def.expected_type {
FieldType::Boolean => matches!(value, FieldValue::Bool(_)),
FieldType::Integer => {
matches!(value, FieldValue::Integer(_) | FieldValue::UInteger(_))
}
FieldType::Float => matches!(
value,
FieldValue::Float(_) | FieldValue::Integer(_) | FieldValue::UInteger(_)
),
FieldType::String => matches!(value, FieldValue::String(_)),
FieldType::Date | FieldType::DateTime => matches!(value, FieldValue::String(_)),
FieldType::Any => true,
};
if !matches {
Ok(Some(TypeViolation {
feature_id: feature_id.clone(),
field_name: field_def.name.clone(),
actual_type: actual_type.to_string(),
expected_type: format!("{:?}", field_def.expected_type),
severity: Severity::Major,
}))
} else {
Ok(None)
}
}
fn check_domain(
&self,
value: &FieldValue,
domain: &ValueDomain,
field_name: &str,
feature_id: &Option<String>,
) -> QcResult<Option<DomainViolation>> {
match domain {
ValueDomain::NumericRange { min, max } => {
let num_value = match value {
FieldValue::Integer(i) => Some(*i as f64),
FieldValue::UInteger(u) => Some(*u as f64),
FieldValue::Float(f) => Some(*f),
_ => None,
};
if let Some(num) = num_value {
if num < *min || num > *max {
return Ok(Some(DomainViolation {
feature_id: feature_id.clone(),
field_name: field_name.to_string(),
value: format!("{}", num),
expected_domain: format!("[{}, {}]", min, max),
severity: Severity::Minor,
}));
}
}
}
ValueDomain::Enumeration(allowed_values) => {
if let FieldValue::String(s) = value {
if !allowed_values.contains(s) {
return Ok(Some(DomainViolation {
feature_id: feature_id.clone(),
field_name: field_name.to_string(),
value: s.clone(),
expected_domain: format!("One of: {:?}", allowed_values),
severity: Severity::Minor,
}));
}
}
}
ValueDomain::StringLength { min, max } => {
if let FieldValue::String(s) = value {
if s.len() < *min || s.len() > *max {
return Ok(Some(DomainViolation {
feature_id: feature_id.clone(),
field_name: field_name.to_string(),
value: s.clone(),
expected_domain: format!("Length: [{}, {}]", min, max),
severity: Severity::Minor,
}));
}
}
}
ValueDomain::Pattern(_pattern) => {
}
}
Ok(None)
}
fn check_referential_integrity(
&self,
value: &FieldValue,
constraint: &ReferentialConstraint,
feature_id: &Option<String>,
) -> QcResult<Option<ReferentialViolation>> {
if let FieldValue::String(s) = value {
if !constraint.valid_values.contains(s) {
return Ok(Some(ReferentialViolation {
feature_id: feature_id.clone(),
field_name: constraint.field_name.clone(),
foreign_key: s.clone(),
referenced_table: constraint.referenced_table.clone(),
severity: Severity::Major,
}));
}
}
Ok(None)
}
}
impl Default for AttributionChecker {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use oxigdal_core::vector::{Feature, Geometry, Point};
#[test]
fn test_attribution_checker_creation() {
let checker = AttributionChecker::new();
assert!(checker.config.field_definitions.is_empty());
}
#[test]
fn test_required_field_validation() {
let mut config = AttributionConfig::default();
config.field_definitions.push(FieldDefinition {
name: "name".to_string(),
expected_type: FieldType::String,
required: true,
nullable: false,
domain: None,
default_value: None,
});
let checker = AttributionChecker::with_config(config);
let feature = Feature::new(Geometry::Point(Point::new(0.0, 0.0)));
let collection = FeatureCollection {
features: vec![feature],
metadata: None,
};
let result = checker.validate(&collection);
assert!(result.is_ok());
#[allow(clippy::unwrap_used)]
let result = result.expect("validation should succeed for required field test");
assert_eq!(result.incomplete_features, 1);
assert_eq!(result.required_field_violations.len(), 1);
}
#[test]
fn test_field_type() {
assert_eq!(format!("{:?}", FieldType::String), "String");
assert_eq!(format!("{:?}", FieldType::Integer), "Integer");
}
}