use oxirs_core::model::{BlankNode, Literal, NamedNode, Quad, Subject, Triple};
use oxirs_core::RdfTerm;
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone)]
pub struct ValidationResult {
pub is_valid: bool,
pub issues: Vec<ValidationIssue>,
}
impl ValidationResult {
pub fn valid() -> Self {
Self {
is_valid: true,
issues: Vec::new(),
}
}
pub fn invalid(issues: Vec<ValidationIssue>) -> Self {
Self {
is_valid: false,
issues,
}
}
pub fn add_issue(&mut self, issue: ValidationIssue) {
self.is_valid = false;
self.issues.push(issue);
}
pub fn has_warnings(&self) -> bool {
self.issues.iter().any(|i| i.severity == Severity::Warning)
}
pub fn has_errors(&self) -> bool {
self.issues.iter().any(|i| i.severity == Severity::Error)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Severity {
Info,
Warning,
Error,
}
#[derive(Debug, Clone)]
pub struct ValidationIssue {
pub severity: Severity,
pub message: String,
pub suggestion: Option<String>,
}
impl ValidationIssue {
pub fn error(message: impl Into<String>) -> Self {
Self {
severity: Severity::Error,
message: message.into(),
suggestion: None,
}
}
pub fn warning(message: impl Into<String>) -> Self {
Self {
severity: Severity::Warning,
message: message.into(),
suggestion: None,
}
}
pub fn info(message: impl Into<String>) -> Self {
Self {
severity: Severity::Info,
message: message.into(),
suggestion: None,
}
}
pub fn with_suggestion(mut self, suggestion: impl Into<String>) -> Self {
self.suggestion = Some(suggestion.into());
self
}
}
pub fn validate_triple(triple: &Triple) -> ValidationResult {
let mut result = ValidationResult::valid();
match triple.subject() {
Subject::NamedNode(node) => {
if let Some(issue) = validate_named_node(node, "subject") {
result.add_issue(issue);
}
}
Subject::BlankNode(bnode) => {
if let Some(issue) = validate_blank_node(bnode, "subject") {
result.add_issue(issue);
}
}
_ => {}
}
if triple.predicate().is_named_node() {
let pred_iri = triple.predicate().to_string();
if let Ok(node) = NamedNode::new(&pred_iri) {
if let Some(issue) = validate_named_node(&node, "predicate") {
result.add_issue(issue);
}
}
}
match triple.object() {
oxirs_core::model::Object::NamedNode(node) => {
if let Some(issue) = validate_named_node(node, "object") {
result.add_issue(issue);
}
}
oxirs_core::model::Object::BlankNode(bnode) => {
if let Some(issue) = validate_blank_node(bnode, "object") {
result.add_issue(issue);
}
}
oxirs_core::model::Object::Literal(lit) => {
if let Some(issue) = validate_literal(lit) {
result.add_issue(issue);
}
}
_ => {}
}
result
}
pub fn validate_quad(quad: &Quad) -> ValidationResult {
let mut result = validate_triple(&Triple::new(
quad.subject().clone(),
quad.predicate().clone(),
quad.object().clone(),
));
if let oxirs_core::model::GraphName::NamedNode(node) = quad.graph_name() {
if let Some(issue) = validate_named_node(node, "graph name") {
result.add_issue(issue);
}
}
result
}
fn validate_named_node(node: &NamedNode, context: &str) -> Option<ValidationIssue> {
let iri = node.as_str();
if iri.is_empty() {
return Some(ValidationIssue::error(format!("Empty IRI in {}", context)));
}
if iri.contains(|c: char| c.is_whitespace()) {
return Some(
ValidationIssue::error(format!("IRI contains whitespace in {}", context))
.with_suggestion("Remove whitespace from IRI"),
);
}
if !iri.starts_with("http://")
&& !iri.starts_with("https://")
&& !iri.starts_with("urn:")
&& !iri.starts_with("file://")
{
return Some(
ValidationIssue::warning(format!("Unusual IRI scheme in {}: {}", context, iri))
.with_suggestion("Consider using http://, https://, or urn: schemes"),
);
}
None
}
fn validate_blank_node(bnode: &BlankNode, context: &str) -> Option<ValidationIssue> {
let id = bnode.as_str();
if id.is_empty() {
return Some(ValidationIssue::error(format!(
"Empty blank node ID in {}",
context
)));
}
if id.contains(|c: char| c.is_whitespace()) {
return Some(
ValidationIssue::error(format!("Blank node ID contains whitespace in {}", context))
.with_suggestion("Remove whitespace from blank node ID"),
);
}
None
}
fn validate_literal(literal: &Literal) -> Option<ValidationIssue> {
let value = literal.value();
if value.len() > 1_000_000 {
return Some(
ValidationIssue::warning("Literal value is very large (>1MB)")
.with_suggestion("Consider storing large values externally"),
);
}
if let Some(lang) = literal.language() {
if lang.is_empty() {
return Some(ValidationIssue::error("Empty language tag"));
}
if !lang.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
return Some(
ValidationIssue::error(format!("Invalid language tag: {}", lang)).with_suggestion(
"Language tags should contain only alphanumeric characters and hyphens",
),
);
}
}
None
}
pub fn check_duplicates(triples: &[Triple]) -> Vec<Triple> {
let mut seen = HashSet::new();
let mut duplicates = Vec::new();
for triple in triples {
let key = (
triple.subject().to_string(),
triple.predicate().to_string(),
triple.object().to_string(),
);
if !seen.insert(key) {
duplicates.push(triple.clone());
}
}
duplicates
}
pub fn check_orphaned_blank_nodes(triples: &[Triple]) -> Vec<String> {
let mut bnode_counts: HashMap<String, usize> = HashMap::new();
for triple in triples {
if let Subject::BlankNode(bnode) = triple.subject() {
*bnode_counts.entry(bnode.as_str().to_string()).or_insert(0) += 1;
}
if let oxirs_core::model::Object::BlankNode(bnode) = triple.object() {
*bnode_counts.entry(bnode.as_str().to_string()).or_insert(0) += 1;
}
}
bnode_counts
.into_iter()
.filter(|(_, count)| *count == 1)
.map(|(id, _)| id)
.collect()
}
#[derive(Debug, Clone, Default)]
pub struct DatasetStats {
pub triple_count: usize,
pub unique_subjects: usize,
pub unique_predicates: usize,
pub unique_objects: usize,
pub blank_node_count: usize,
pub literal_count: usize,
pub language_tagged_count: usize,
pub typed_literal_count: usize,
}
pub fn compute_stats(triples: &[Triple]) -> DatasetStats {
let mut stats = DatasetStats {
triple_count: triples.len(),
..Default::default()
};
let mut subjects = HashSet::new();
let mut predicates = HashSet::new();
let mut objects = HashSet::new();
for triple in triples {
subjects.insert(triple.subject().to_string());
predicates.insert(triple.predicate().to_string());
objects.insert(triple.object().to_string());
if matches!(triple.subject(), Subject::BlankNode(_)) {
stats.blank_node_count += 1;
}
if let oxirs_core::model::Object::Literal(lit) = triple.object() {
stats.literal_count += 1;
if lit.language().is_some() {
stats.language_tagged_count += 1;
}
if lit.datatype().as_str() != "http://www.w3.org/2001/XMLSchema#string" {
stats.typed_literal_count += 1;
}
}
}
stats.unique_subjects = subjects.len();
stats.unique_predicates = predicates.len();
stats.unique_objects = objects.len();
stats
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_validate_triple_valid() {
let triple = Triple::new(
NamedNode::new("http://example.org/s").expect("valid IRI"),
NamedNode::new("http://example.org/p").expect("valid IRI"),
Literal::new("value"),
);
let result = validate_triple(&triple);
assert!(result.is_valid);
assert!(result.issues.is_empty());
}
#[test]
fn test_validate_literal() {
let lit =
Literal::new_language_tagged_literal("Hello", "en").expect("validation should succeed");
let issue = validate_literal(&lit);
assert!(issue.is_none());
}
#[test]
fn test_check_duplicates() {
let triple1 = Triple::new(
NamedNode::new("http://example.org/s").expect("valid IRI"),
NamedNode::new("http://example.org/p").expect("valid IRI"),
Literal::new("value"),
);
let triple2 = triple1.clone();
let duplicates = check_duplicates(&[triple1, triple2]);
assert_eq!(duplicates.len(), 1);
}
#[test]
fn test_compute_stats() {
let triples = vec![
Triple::new(
NamedNode::new("http://example.org/s1").expect("valid IRI"),
NamedNode::new("http://example.org/p").expect("valid IRI"),
Literal::new("value1"),
),
Triple::new(
NamedNode::new("http://example.org/s2").expect("valid IRI"),
NamedNode::new("http://example.org/p").expect("valid IRI"),
Literal::new("value2"),
),
];
let stats = compute_stats(&triples);
assert_eq!(stats.triple_count, 2);
assert_eq!(stats.unique_subjects, 2);
assert_eq!(stats.unique_predicates, 1);
assert_eq!(stats.literal_count, 2);
}
#[test]
fn test_validation_result() {
let mut result = ValidationResult::valid();
assert!(result.is_valid);
result.add_issue(ValidationIssue::warning("Test warning"));
assert!(!result.is_valid);
assert!(result.has_warnings());
assert!(!result.has_errors());
result.add_issue(ValidationIssue::error("Test error"));
assert!(result.has_errors());
}
}