use super::types::*;
use crate::model::{BlankNode, Literal, NamedNode, Triple};
use anyhow::Result;
use sha2::{Digest, Sha256};
use std::collections::HashSet;
pub struct RdfStateMachine {
triples: HashSet<Triple>,
operation_count: u64,
digest_cache: Option<(u64, Vec<u8>)>,
}
impl RdfStateMachine {
pub fn new() -> Self {
Self {
triples: HashSet::new(),
operation_count: 0,
digest_cache: None,
}
}
pub fn execute(&mut self, operation: RdfOperation) -> Result<OperationResult> {
self.operation_count += 1;
self.digest_cache = None;
match operation {
RdfOperation::Insert(triple) => {
let t = self.deserialize_triple(triple)?;
self.triples.insert(t);
Ok(OperationResult::Success)
}
RdfOperation::Remove(triple) => {
let t = self.deserialize_triple(triple)?;
self.triples.remove(&t);
Ok(OperationResult::Success)
}
RdfOperation::BatchInsert(triples) => {
for triple in triples {
let t = self.deserialize_triple(triple)?;
self.triples.insert(t);
}
Ok(OperationResult::Success)
}
RdfOperation::BatchRemove(triples) => {
for triple in triples {
let t = self.deserialize_triple(triple)?;
self.triples.remove(&t);
}
Ok(OperationResult::Success)
}
RdfOperation::Query(_query) => {
let results: Vec<SerializableTriple> = self
.triples
.iter()
.take(10) .map(|t| self.serialize_triple(t))
.collect();
Ok(OperationResult::QueryResult(results))
}
}
}
pub fn get_state_digest(&self) -> Vec<u8> {
self.calculate_digest_readonly()
}
fn calculate_digest_readonly(&self) -> Vec<u8> {
let mut hasher = Sha256::new();
let mut sorted_triples: Vec<_> = self.triples.iter().collect();
sorted_triples.sort_by_key(|t| {
(
t.subject().to_string(),
t.predicate().to_string(),
t.object().to_string(),
)
});
for triple in sorted_triples {
hasher.update(triple.subject().to_string().as_bytes());
hasher.update(triple.predicate().to_string().as_bytes());
hasher.update(triple.object().to_string().as_bytes());
}
hasher.update(self.operation_count.to_le_bytes());
hasher.finalize().to_vec()
}
pub fn calculate_digest(&mut self) -> Vec<u8> {
if let Some((count, digest)) = &self.digest_cache {
if *count == self.operation_count {
return digest.clone();
}
}
let digest = self.calculate_digest_readonly();
self.digest_cache = Some((self.operation_count, digest.clone()));
digest
}
pub fn triple_count(&self) -> usize {
self.triples.len()
}
pub fn operation_count(&self) -> u64 {
self.operation_count
}
pub fn contains_triple(&self, triple: &SerializableTriple) -> Result<bool> {
let t = self.deserialize_triple(triple.clone())?;
Ok(self.triples.contains(&t))
}
pub fn get_all_triples(&self) -> Vec<SerializableTriple> {
self.triples
.iter()
.map(|t| self.serialize_triple(t))
.collect()
}
fn deserialize_triple(&self, st: SerializableTriple) -> Result<Triple> {
let subject = NamedNode::new(&st.subject)?;
let predicate = NamedNode::new(&st.predicate)?;
let object = match st.object_type {
ObjectType::NamedNode => crate::model::Object::NamedNode(NamedNode::new(&st.object)?),
ObjectType::BlankNode => crate::model::Object::BlankNode(BlankNode::new(&st.object)?),
ObjectType::Literal { datatype, language } => {
if let Some(lang) = language {
crate::model::Object::Literal(Literal::new_language_tagged_literal(
&st.object, &lang,
)?)
} else if let Some(dt) = datatype {
crate::model::Object::Literal(Literal::new_typed(
&st.object,
NamedNode::new(&dt)?,
))
} else {
crate::model::Object::Literal(Literal::new(&st.object))
}
}
};
Ok(Triple::new(subject, predicate, object))
}
fn serialize_triple(&self, triple: &Triple) -> SerializableTriple {
let object_type = match triple.object() {
crate::model::Object::NamedNode(_) => ObjectType::NamedNode,
crate::model::Object::BlankNode(_) => ObjectType::BlankNode,
crate::model::Object::Literal(lit) => ObjectType::Literal {
datatype: if lit.datatype().as_str() != "http://www.w3.org/2001/XMLSchema#string" {
Some(lit.datatype().as_str().to_string())
} else {
None
},
language: lit.language().map(|l| l.to_string()),
},
_ => ObjectType::NamedNode, };
SerializableTriple {
subject: triple.subject().to_string(),
predicate: triple.predicate().to_string(),
object: triple.object().to_string(),
object_type,
}
}
pub fn reset(&mut self) {
self.triples.clear();
self.operation_count = 0;
self.digest_cache = None;
}
pub fn apply_batch(&mut self, operations: Vec<RdfOperation>) -> Result<Vec<OperationResult>> {
let mut results = Vec::new();
let initial_count = self.operation_count;
let initial_cache = self.digest_cache.clone();
for operation in operations {
match self.execute(operation) {
Ok(result) => results.push(result),
Err(e) => {
self.operation_count = initial_count;
self.digest_cache = initial_cache;
return Err(e);
}
}
}
Ok(results)
}
}
impl Default for RdfStateMachine {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_state_machine_basic_operations() {
let mut state_machine = RdfStateMachine::new();
let triple = SerializableTriple {
subject: "http://example.org/s".to_string(),
predicate: "http://example.org/p".to_string(),
object: "value".to_string(),
object_type: ObjectType::Literal {
datatype: None,
language: None,
},
};
let result = state_machine
.execute(RdfOperation::Insert(triple.clone()))
.expect("operation should succeed");
assert!(matches!(result, OperationResult::Success));
assert_eq!(state_machine.triple_count(), 1);
assert_eq!(state_machine.operation_count(), 1);
assert!(state_machine
.contains_triple(&triple)
.expect("contains_triple should succeed"));
let result = state_machine
.execute(RdfOperation::Remove(triple.clone()))
.expect("operation should succeed");
assert!(matches!(result, OperationResult::Success));
assert_eq!(state_machine.triple_count(), 0);
assert!(!state_machine
.contains_triple(&triple)
.expect("contains_triple should succeed"));
}
#[test]
fn test_state_machine_batch_operations() {
let mut state_machine = RdfStateMachine::new();
let triples = vec![
SerializableTriple {
subject: "http://example.org/s1".to_string(),
predicate: "http://example.org/p".to_string(),
object: "value1".to_string(),
object_type: ObjectType::Literal {
datatype: None,
language: None,
},
},
SerializableTriple {
subject: "http://example.org/s2".to_string(),
predicate: "http://example.org/p".to_string(),
object: "value2".to_string(),
object_type: ObjectType::Literal {
datatype: None,
language: None,
},
},
];
let result = state_machine
.execute(RdfOperation::BatchInsert(triples.clone()))
.expect("operation should succeed");
assert!(matches!(result, OperationResult::Success));
assert_eq!(state_machine.triple_count(), 2);
let result = state_machine
.execute(RdfOperation::BatchRemove(triples))
.expect("operation should succeed");
assert!(matches!(result, OperationResult::Success));
assert_eq!(state_machine.triple_count(), 0);
}
#[test]
fn test_state_machine_digest_calculation() {
let mut state_machine = RdfStateMachine::new();
let triple = SerializableTriple {
subject: "http://example.org/s".to_string(),
predicate: "http://example.org/p".to_string(),
object: "value".to_string(),
object_type: ObjectType::Literal {
datatype: None,
language: None,
},
};
let digest1 = state_machine.calculate_digest();
state_machine
.execute(RdfOperation::Insert(triple.clone()))
.expect("operation should succeed");
let digest2 = state_machine.calculate_digest();
assert_ne!(digest1, digest2);
let digest3 = state_machine.calculate_digest();
assert_eq!(digest2, digest3);
let digest4 = state_machine.get_state_digest();
assert_eq!(digest2, digest4);
}
#[test]
fn test_query_operation() {
let mut state_machine = RdfStateMachine::new();
for i in 0..15 {
let triple = SerializableTriple {
subject: format!("http://example.org/s{i}"),
predicate: "http://example.org/p".to_string(),
object: format!("value{i}"),
object_type: ObjectType::Literal {
datatype: None,
language: None,
},
};
state_machine
.execute(RdfOperation::Insert(triple))
.expect("state machine execution should succeed");
}
let result = state_machine
.execute(RdfOperation::Query(
"SELECT * WHERE { ?s ?p ?o }".to_string(),
))
.expect("operation should succeed");
if let OperationResult::QueryResult(results) = result {
assert_eq!(results.len(), 10); } else {
panic!("Expected QueryResult");
}
}
#[test]
fn test_different_object_types() {
let mut state_machine = RdfStateMachine::new();
let triple1 = SerializableTriple {
subject: "http://example.org/s".to_string(),
predicate: "http://example.org/p".to_string(),
object: "http://example.org/o".to_string(),
object_type: ObjectType::NamedNode,
};
let triple2 = SerializableTriple {
subject: "http://example.org/s".to_string(),
predicate: "http://example.org/p2".to_string(),
object: "_:blank1".to_string(),
object_type: ObjectType::BlankNode,
};
let triple3 = SerializableTriple {
subject: "http://example.org/s".to_string(),
predicate: "http://example.org/p3".to_string(),
object: "42".to_string(),
object_type: ObjectType::Literal {
datatype: Some("http://www.w3.org/2001/XMLSchema#integer".to_string()),
language: None,
},
};
let triple4 = SerializableTriple {
subject: "http://example.org/s".to_string(),
predicate: "http://example.org/p4".to_string(),
object: "hello".to_string(),
object_type: ObjectType::Literal {
datatype: None,
language: Some("en".to_string()),
},
};
state_machine
.execute(RdfOperation::Insert(triple1))
.expect("operation should succeed");
state_machine
.execute(RdfOperation::Insert(triple2))
.expect("operation should succeed");
state_machine
.execute(RdfOperation::Insert(triple3))
.expect("operation should succeed");
state_machine
.execute(RdfOperation::Insert(triple4))
.expect("operation should succeed");
assert_eq!(state_machine.triple_count(), 4);
}
}