use crate::model::{StarGraph, StarTerm, StarTriple};
use crate::shacl_star::ShaclStarValidator;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use thiserror::Error;
use tracing::{debug, info};
#[derive(Error, Debug)]
pub enum ValidationError {
#[error("Validation failed: {0}")]
ValidationFailed(String),
#[error("Invalid configuration: {0}")]
InvalidConfig(String),
#[error("Constraint violation: {0}")]
ConstraintViolation(String),
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum ValidationLevel {
Syntax,
Basic,
Semantic,
Complete,
Strict,
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum ValidationSeverity {
Info,
Warning,
Error,
Critical,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationIssue {
pub severity: ValidationSeverity,
pub category: ValidationCategory,
pub message: String,
pub location: Option<String>,
pub suggestion: Option<String>,
pub rule_id: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum ValidationCategory {
Syntax,
Semantics,
Performance,
DataQuality,
Consistency,
BestPractices,
Security,
Compliance,
}
#[derive(Debug, Clone)]
pub struct ValidationConfig {
pub level: ValidationLevel,
pub max_nesting_depth: usize,
pub max_graph_size: Option<usize>,
pub validate_iris: bool,
pub validate_datatypes: bool,
pub enable_shacl: bool,
pub custom_rules: Vec<CustomValidationRule>,
pub fail_fast: bool,
pub max_issues: Option<usize>,
}
impl Default for ValidationConfig {
fn default() -> Self {
Self {
level: ValidationLevel::Complete,
max_nesting_depth: 10,
max_graph_size: None,
validate_iris: true,
validate_datatypes: true,
enable_shacl: true,
custom_rules: Vec::new(),
fail_fast: false,
max_issues: Some(100),
}
}
}
#[derive(Debug, Clone)]
pub struct CustomValidationRule {
pub id: String,
pub description: String,
pub validator: fn(&StarTriple) -> Option<String>,
pub severity: ValidationSeverity,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationResult {
pub is_valid: bool,
pub level: ValidationLevel,
pub issues: Vec<ValidationIssue>,
pub statistics: ValidationStatistics,
pub validated_at: DateTime<Utc>,
pub duration_ms: u64,
}
impl ValidationResult {
pub fn get_issues_by_severity(&self, severity: ValidationSeverity) -> Vec<&ValidationIssue> {
self.issues
.iter()
.filter(|i| i.severity == severity)
.collect()
}
pub fn get_issues_by_category(&self, category: ValidationCategory) -> Vec<&ValidationIssue> {
self.issues
.iter()
.filter(|i| i.category == category)
.collect()
}
pub fn get_critical_issues(&self) -> Vec<&ValidationIssue> {
self.get_issues_by_severity(ValidationSeverity::Critical)
}
pub fn get_errors(&self) -> Vec<&ValidationIssue> {
self.get_issues_by_severity(ValidationSeverity::Error)
}
pub fn get_warnings(&self) -> Vec<&ValidationIssue> {
self.get_issues_by_severity(ValidationSeverity::Warning)
}
pub fn has_critical_issues(&self) -> bool {
self.issues
.iter()
.any(|i| i.severity == ValidationSeverity::Critical)
}
pub fn has_errors(&self) -> bool {
self.issues
.iter()
.any(|i| i.severity == ValidationSeverity::Error)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ValidationStatistics {
pub total_triples: usize,
pub quoted_triples: usize,
pub max_nesting_depth_found: usize,
pub unique_subjects: usize,
pub unique_predicates: usize,
pub unique_objects: usize,
pub blank_nodes: usize,
pub literals: usize,
pub iris: usize,
}
pub struct RdfStarValidator {
config: ValidationConfig,
shacl_validator: Option<ShaclStarValidator>,
}
impl RdfStarValidator {
pub fn new(config: ValidationConfig) -> Self {
let shacl_validator = if config.enable_shacl {
Some(ShaclStarValidator::new())
} else {
None
};
Self {
config,
shacl_validator,
}
}
pub fn validate(&self, graph: &StarGraph) -> Result<ValidationResult, ValidationError> {
let start = std::time::Instant::now();
info!("Starting validation of graph with {} triples", graph.len());
let mut issues = Vec::new();
match self.config.level {
ValidationLevel::Syntax => {
self.validate_syntax(graph, &mut issues)?;
}
ValidationLevel::Basic => {
self.validate_syntax(graph, &mut issues)?;
self.validate_basic_semantics(graph, &mut issues)?;
}
ValidationLevel::Semantic => {
self.validate_syntax(graph, &mut issues)?;
self.validate_basic_semantics(graph, &mut issues)?;
self.validate_semantics(graph, &mut issues)?;
}
ValidationLevel::Complete | ValidationLevel::Strict => {
self.validate_syntax(graph, &mut issues)?;
self.validate_basic_semantics(graph, &mut issues)?;
self.validate_semantics(graph, &mut issues)?;
self.validate_constraints(graph, &mut issues)?;
self.validate_performance(graph, &mut issues)?;
if self.config.level == ValidationLevel::Strict {
self.validate_best_practices(graph, &mut issues)?;
}
}
}
self.apply_custom_rules(graph, &mut issues)?;
let statistics = self.compute_statistics(graph);
let duration_ms = start.elapsed().as_millis() as u64;
let is_valid = !issues.iter().any(|i| {
matches!(
i.severity,
ValidationSeverity::Error | ValidationSeverity::Critical
)
});
let result = ValidationResult {
is_valid,
level: self.config.level.clone(),
issues,
statistics,
validated_at: Utc::now(),
duration_ms,
};
info!(
"Validation complete: {} issues found in {}ms",
result.issues.len(),
duration_ms
);
Ok(result)
}
fn validate_syntax(
&self,
graph: &StarGraph,
issues: &mut Vec<ValidationIssue>,
) -> Result<(), ValidationError> {
debug!("Validating syntax");
for (idx, triple) in graph.iter().enumerate() {
let depth = self.get_nesting_depth(triple);
if depth >= self.config.max_nesting_depth {
issues.push(ValidationIssue {
severity: ValidationSeverity::Error,
category: ValidationCategory::Syntax,
message: format!(
"Nesting depth {} exceeds or equals maximum {}",
depth, self.config.max_nesting_depth
),
location: Some(format!("triple {}", idx)),
suggestion: Some(
"Reduce nesting depth or increase max_nesting_depth".to_string(),
),
rule_id: Some("MAX_NESTING_DEPTH".to_string()),
});
if self.config.fail_fast {
break;
}
}
if self.config.validate_iris {
self.validate_term_iris(&triple.subject, "subject", idx, issues);
self.validate_term_iris(&triple.predicate, "predicate", idx, issues);
self.validate_term_iris(&triple.object, "object", idx, issues);
}
if let Some(max_issues) = self.config.max_issues {
if issues.len() >= max_issues {
break;
}
}
}
Ok(())
}
fn validate_basic_semantics(
&self,
graph: &StarGraph,
issues: &mut Vec<ValidationIssue>,
) -> Result<(), ValidationError> {
debug!("Validating basic semantics");
if let Some(max_size) = self.config.max_graph_size {
if graph.len() > max_size {
issues.push(ValidationIssue {
severity: ValidationSeverity::Warning,
category: ValidationCategory::Performance,
message: format!(
"Graph size {} exceeds recommended maximum {}",
graph.len(),
max_size
),
location: None,
suggestion: Some(
"Consider splitting the graph or increasing max_graph_size".to_string(),
),
rule_id: Some("MAX_GRAPH_SIZE".to_string()),
});
}
}
Ok(())
}
fn validate_semantics(
&self,
_graph: &StarGraph,
_issues: &mut Vec<ValidationIssue>,
) -> Result<(), ValidationError> {
debug!("Validating semantics");
for (idx, triple) in _graph.iter().enumerate() {
if !matches!(triple.predicate, StarTerm::NamedNode(_)) {
_issues.push(ValidationIssue {
severity: ValidationSeverity::Error,
category: ValidationCategory::Semantics,
message: "Predicate must be an IRI".to_string(),
location: Some(format!("triple {}", idx)),
suggestion: Some("Use an IRI for the predicate".to_string()),
rule_id: Some("PREDICATE_IRI".to_string()),
});
}
}
Ok(())
}
fn validate_constraints(
&self,
_graph: &StarGraph,
_issues: &mut Vec<ValidationIssue>,
) -> Result<(), ValidationError> {
debug!("Validating constraints");
if let Some(ref _validator) = self.shacl_validator {
debug!("SHACL-star validation enabled");
}
Ok(())
}
fn validate_performance(
&self,
graph: &StarGraph,
issues: &mut Vec<ValidationIssue>,
) -> Result<(), ValidationError> {
debug!("Validating performance characteristics");
let blank_node_count = graph
.iter()
.filter(|t| matches!(t.subject, StarTerm::BlankNode(_)))
.count();
if blank_node_count > graph.len() / 2 {
issues.push(ValidationIssue {
severity: ValidationSeverity::Warning,
category: ValidationCategory::Performance,
message: format!(
"High blank node usage ({}/{} triples) may impact performance",
blank_node_count,
graph.len()
),
location: None,
suggestion: Some(
"Consider using IRIs instead of blank nodes where possible".to_string(),
),
rule_id: Some("HIGH_BLANK_NODE_USAGE".to_string()),
});
}
Ok(())
}
fn validate_best_practices(
&self,
graph: &StarGraph,
issues: &mut Vec<ValidationIssue>,
) -> Result<(), ValidationError> {
debug!("Validating best practices");
let mut predicate_usage: HashMap<String, usize> = HashMap::new();
for triple in graph.iter() {
if let StarTerm::NamedNode(nn) = &triple.predicate {
*predicate_usage.entry(nn.iri.clone()).or_insert(0) += 1;
}
}
let single_use_predicates: Vec<_> = predicate_usage
.iter()
.filter(|(_, &count)| count == 1)
.collect();
if single_use_predicates.len() > predicate_usage.len() / 2 {
issues.push(ValidationIssue {
severity: ValidationSeverity::Info,
category: ValidationCategory::BestPractices,
message: format!(
"Many predicates ({}/{}) are used only once",
single_use_predicates.len(),
predicate_usage.len()
),
location: None,
suggestion: Some("Consider reusing predicates where appropriate".to_string()),
rule_id: Some("LOW_PREDICATE_REUSE".to_string()),
});
}
Ok(())
}
fn apply_custom_rules(
&self,
graph: &StarGraph,
issues: &mut Vec<ValidationIssue>,
) -> Result<(), ValidationError> {
if self.config.custom_rules.is_empty() {
return Ok(());
}
debug!("Applying {} custom rules", self.config.custom_rules.len());
for (idx, triple) in graph.iter().enumerate() {
for rule in &self.config.custom_rules {
if let Some(violation_msg) = (rule.validator)(triple) {
issues.push(ValidationIssue {
severity: rule.severity.clone(),
category: ValidationCategory::Compliance,
message: violation_msg,
location: Some(format!("triple {}", idx)),
suggestion: None,
rule_id: Some(rule.id.clone()),
});
if self.config.fail_fast {
return Ok(());
}
}
}
}
Ok(())
}
fn compute_statistics(&self, graph: &StarGraph) -> ValidationStatistics {
let mut unique_subjects = HashSet::new();
let mut unique_predicates = HashSet::new();
let mut unique_objects = HashSet::new();
let mut blank_nodes = 0;
let mut literals = 0;
let mut iris = 0;
let mut quoted_triples = 0;
let mut max_depth = 0;
for triple in graph.iter() {
match &triple.subject {
StarTerm::NamedNode(nn) => {
unique_subjects.insert(nn.iri.clone());
iris += 1;
}
StarTerm::BlankNode(bn) => {
unique_subjects.insert(format!("_:{}", bn.id));
blank_nodes += 1;
}
StarTerm::QuotedTriple(_) => {
quoted_triples += 1;
}
_ => {}
}
if let StarTerm::NamedNode(nn) = &triple.predicate {
unique_predicates.insert(nn.iri.clone());
iris += 1;
}
match &triple.object {
StarTerm::NamedNode(nn) => {
unique_objects.insert(nn.iri.clone());
iris += 1;
}
StarTerm::Literal(lit) => {
unique_objects.insert(lit.value.clone());
literals += 1;
}
StarTerm::BlankNode(bn) => {
unique_objects.insert(format!("_:{}", bn.id));
blank_nodes += 1;
}
StarTerm::QuotedTriple(_) => {
quoted_triples += 1;
}
_ => {}
}
let depth = self.get_nesting_depth(triple);
if depth > max_depth {
max_depth = depth;
}
}
ValidationStatistics {
total_triples: graph.len(),
quoted_triples,
max_nesting_depth_found: max_depth,
unique_subjects: unique_subjects.len(),
unique_predicates: unique_predicates.len(),
unique_objects: unique_objects.len(),
blank_nodes,
literals,
iris,
}
}
fn get_nesting_depth(&self, triple: &StarTriple) -> usize {
let subject_depth = self.get_term_depth(&triple.subject);
let object_depth = self.get_term_depth(&triple.object);
subject_depth.max(object_depth)
}
fn get_term_depth(&self, term: &StarTerm) -> usize {
match term {
StarTerm::QuotedTriple(qt) => 1 + self.get_nesting_depth(qt),
_ => 0,
}
}
fn validate_term_iris(
&self,
term: &StarTerm,
position: &str,
triple_idx: usize,
issues: &mut Vec<ValidationIssue>,
) {
if let StarTerm::NamedNode(nn) = term {
if !self.is_valid_iri(&nn.iri) {
issues.push(ValidationIssue {
severity: ValidationSeverity::Error,
category: ValidationCategory::Syntax,
message: format!("Invalid IRI in {}: {}", position, nn.iri),
location: Some(format!("triple {}", triple_idx)),
suggestion: Some("Ensure IRI is properly formatted".to_string()),
rule_id: Some("INVALID_IRI".to_string()),
});
}
}
if let StarTerm::QuotedTriple(qt) = term {
self.validate_term_iris(
&qt.subject,
&format!("{}->subject", position),
triple_idx,
issues,
);
self.validate_term_iris(
&qt.predicate,
&format!("{}->predicate", position),
triple_idx,
issues,
);
self.validate_term_iris(
&qt.object,
&format!("{}->object", position),
triple_idx,
issues,
);
}
}
fn is_valid_iri(&self, iri: &str) -> bool {
iri.starts_with("http://") || iri.starts_with("https://") || iri.starts_with("urn:")
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::StarTerm;
#[test]
fn test_validator_creation() {
let config = ValidationConfig::default();
let validator = RdfStarValidator::new(config);
assert!(validator.shacl_validator.is_some());
}
#[test]
fn test_basic_validation() -> Result<(), Box<dyn std::error::Error>> {
let config = ValidationConfig {
level: ValidationLevel::Basic,
..Default::default()
};
let validator = RdfStarValidator::new(config);
let mut graph = StarGraph::new();
graph.insert(StarTriple::new(
StarTerm::iri("http://example.org/s")?,
StarTerm::iri("http://example.org/p")?,
StarTerm::literal("object")?,
))?;
let result = validator.validate(&graph)?;
assert!(result.is_valid);
assert_eq!(result.statistics.total_triples, 1);
Ok(())
}
#[test]
fn test_nesting_depth_validation() -> Result<(), Box<dyn std::error::Error>> {
let config = ValidationConfig {
level: ValidationLevel::Syntax,
max_nesting_depth: 2,
..Default::default()
};
let validator = RdfStarValidator::new(config);
let mut graph = StarGraph::new();
let inner = StarTriple::new(
StarTerm::iri("http://example.org/s1")?,
StarTerm::iri("http://example.org/p1")?,
StarTerm::literal("obj1")?,
);
let middle = StarTriple::new(
StarTerm::quoted_triple(inner),
StarTerm::iri("http://example.org/p2")?,
StarTerm::literal("obj2")?,
);
let outer = StarTriple::new(
StarTerm::quoted_triple(middle),
StarTerm::iri("http://example.org/p3")?,
StarTerm::literal("obj3")?,
);
graph.insert(outer)?;
let result = validator.validate(&graph)?;
assert!(!result.is_valid);
assert!(result.has_errors());
Ok(())
}
#[test]
fn test_custom_validation_rule() -> Result<(), Box<dyn std::error::Error>> {
let mut config = ValidationConfig::default();
config.custom_rules.push(CustomValidationRule {
id: "NO_FORBIDDEN_PREDICATE".to_string(),
description: "Predicate must not be forbidden".to_string(),
validator: |triple| {
if let StarTerm::NamedNode(nn) = &triple.predicate {
if nn.iri == "http://example.org/forbidden" {
return Some("Forbidden predicate used".to_string());
}
}
None
},
severity: ValidationSeverity::Error,
});
let validator = RdfStarValidator::new(config);
let mut graph = StarGraph::new();
graph.insert(StarTriple::new(
StarTerm::iri("http://example.org/s")?,
StarTerm::iri("http://example.org/forbidden")?,
StarTerm::literal("obj")?,
))?;
let result = validator.validate(&graph)?;
assert!(!result.is_valid);
assert_eq!(result.get_errors().len(), 1);
Ok(())
}
}