use crate::parser::{NTriplesLiteParser, ParseError as NtParseError};
use crate::writer::RdfTerm;
use std::collections::HashSet;
pub type NTriple = (RdfTerm, RdfTerm, RdfTerm);
#[derive(Debug, Clone)]
pub struct PatchParseError {
pub line: usize,
pub message: String,
}
impl PatchParseError {
fn new(line: usize, message: impl Into<String>) -> Self {
Self {
line,
message: message.into(),
}
}
}
impl std::fmt::Display for PatchParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"patch parse error at line {}: {}",
self.line, self.message
)
}
}
impl std::error::Error for PatchParseError {}
impl From<NtParseError> for PatchParseError {
fn from(e: NtParseError) -> Self {
Self {
line: e.line,
message: e.message,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct RdfDiff {
pub added: Vec<NTriple>,
pub removed: Vec<NTriple>,
}
impl RdfDiff {
pub fn new(added: Vec<NTriple>, removed: Vec<NTriple>) -> Self {
Self { added, removed }
}
pub fn is_empty(&self) -> bool {
self.added.is_empty() && self.removed.is_empty()
}
pub fn triple_count(&self) -> usize {
self.added.len() + self.removed.len()
}
pub fn apply(&self, triples: &mut Vec<NTriple>) {
let remove_set: HashSet<&NTriple> = self.removed.iter().collect();
triples.retain(|t| !remove_set.contains(t));
let existing: HashSet<NTriple> = triples.iter().cloned().collect();
let to_add: Vec<NTriple> = self
.added
.iter()
.filter(|t| !existing.contains(t))
.cloned()
.collect();
triples.extend(to_add);
}
pub fn invert(&self) -> Self {
Self {
added: self.removed.clone(),
removed: self.added.clone(),
}
}
pub fn to_patch_format(&self) -> String {
let mut out = String::new();
out.push_str(&format!(
"# RDF diff: +{} -{}\n",
self.added.len(),
self.removed.len()
));
for triple in &self.removed {
out.push_str("- ");
out.push_str(&triple_to_ntriples(triple));
out.push('\n');
}
for triple in &self.added {
out.push_str("+ ");
out.push_str(&triple_to_ntriples(triple));
out.push('\n');
}
out
}
}
pub fn compute_diff(before: &[NTriple], after: &[NTriple]) -> RdfDiff {
let set_before: HashSet<&NTriple> = before.iter().collect();
let set_after: HashSet<&NTriple> = after.iter().collect();
let mut added: Vec<NTriple> = set_after
.difference(&set_before)
.map(|t| (*t).clone())
.collect();
let mut removed: Vec<NTriple> = set_before
.difference(&set_after)
.map(|t| (*t).clone())
.collect();
added.sort();
removed.sort();
RdfDiff { added, removed }
}
pub fn parse_patch(patch: &str) -> Result<RdfDiff, PatchParseError> {
let mut added: Vec<NTriple> = Vec::new();
let mut removed: Vec<NTriple> = Vec::new();
let mut nt_parser = NTriplesLiteParser::new();
for (line_idx, line) in patch.lines().enumerate() {
let line_no = line_idx + 1;
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
if let Some(rest) = trimmed.strip_prefix("+ ") {
let triple = parse_single_triple(rest, line_no, &mut nt_parser)?;
added.push(triple);
} else if let Some(rest) = trimmed.strip_prefix("- ") {
let triple = parse_single_triple(rest, line_no, &mut nt_parser)?;
removed.push(triple);
} else {
return Err(PatchParseError::new(
line_no,
format!("line must start with '+ ' or '- ', found: {trimmed}"),
));
}
}
Ok(RdfDiff { added, removed })
}
fn triple_to_ntriples(triple: &NTriple) -> String {
format!("{} {} {} .", triple.0, triple.1, triple.2)
}
fn parse_single_triple(
line: &str,
line_no: usize,
parser: &mut NTriplesLiteParser,
) -> Result<NTriple, PatchParseError> {
parser.reset();
let mut triples = parser
.parse_str(line)
.map_err(|e| PatchParseError::new(line_no, e.message))?;
match triples.len() {
0 => Err(PatchParseError::new(
line_no,
"expected a triple but line was empty",
)),
1 => Ok(triples.remove(0)),
_ => Err(PatchParseError::new(
line_no,
"more than one triple on a patch line",
)),
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::writer::RdfTerm;
fn s() -> RdfTerm {
RdfTerm::iri("http://example.org/s")
}
fn p() -> RdfTerm {
RdfTerm::iri("http://example.org/p")
}
fn o1() -> RdfTerm {
RdfTerm::iri("http://example.org/o1")
}
fn o2() -> RdfTerm {
RdfTerm::iri("http://example.org/o2")
}
fn triple(s: RdfTerm, p: RdfTerm, o: RdfTerm) -> NTriple {
(s, p, o)
}
#[test]
fn test_diff_identical_graphs() {
let before = vec![triple(s(), p(), o1())];
let after = before.clone();
let diff = compute_diff(&before, &after);
assert!(diff.is_empty());
assert_eq!(diff.triple_count(), 0);
}
#[test]
fn test_diff_addition() {
let before: Vec<NTriple> = vec![];
let after = vec![triple(s(), p(), o1())];
let diff = compute_diff(&before, &after);
assert_eq!(diff.added.len(), 1);
assert!(diff.removed.is_empty());
}
#[test]
fn test_diff_removal() {
let before = vec![triple(s(), p(), o1())];
let after: Vec<NTriple> = vec![];
let diff = compute_diff(&before, &after);
assert!(diff.added.is_empty());
assert_eq!(diff.removed.len(), 1);
}
#[test]
fn test_diff_replacement() {
let before = vec![triple(s(), p(), o1())];
let after = vec![triple(s(), p(), o2())];
let diff = compute_diff(&before, &after);
assert_eq!(diff.added.len(), 1);
assert_eq!(diff.removed.len(), 1);
assert_eq!(diff.added[0].2.value, "http://example.org/o2");
assert_eq!(diff.removed[0].2.value, "http://example.org/o1");
}
#[test]
fn test_diff_duplicates_treated_as_set() {
let before = vec![triple(s(), p(), o1()), triple(s(), p(), o1())];
let after = vec![triple(s(), p(), o2())];
let diff = compute_diff(&before, &after);
assert_eq!(diff.added.len(), 1);
assert_eq!(diff.removed.len(), 1);
}
#[test]
fn test_apply_roundtrip() {
let before = vec![triple(s(), p(), o1())];
let after = vec![triple(s(), p(), o2())];
let diff = compute_diff(&before, &after);
let mut graph = before.clone();
diff.apply(&mut graph);
let mut graph_sorted = graph.clone();
let mut after_sorted = after.clone();
graph_sorted.sort();
after_sorted.sort();
assert_eq!(graph_sorted, after_sorted);
}
#[test]
fn test_apply_idempotent() {
let before = vec![triple(s(), p(), o1())];
let after = vec![triple(s(), p(), o2())];
let diff = compute_diff(&before, &after);
let mut graph = before.clone();
diff.apply(&mut graph);
diff.apply(&mut graph);
let mut graph_sorted = graph.clone();
let mut after_sorted = after.clone();
graph_sorted.sort();
after_sorted.sort();
assert_eq!(graph_sorted, after_sorted);
}
#[test]
fn test_apply_empty_diff() {
let before = vec![triple(s(), p(), o1())];
let diff = RdfDiff::new(vec![], vec![]);
let mut graph = before.clone();
diff.apply(&mut graph);
assert_eq!(graph, before);
}
#[test]
fn test_invert_roundtrip() {
let before = vec![triple(s(), p(), o1())];
let after = vec![triple(s(), p(), o2())];
let diff = compute_diff(&before, &after);
let inv = diff.invert();
let mut graph = after.clone();
inv.apply(&mut graph);
let mut graph_sorted = graph.clone();
let mut before_sorted = before.clone();
graph_sorted.sort();
before_sorted.sort();
assert_eq!(graph_sorted, before_sorted);
}
#[test]
fn test_patch_format_roundtrip() {
let before = vec![triple(s(), p(), o1())];
let after = vec![triple(s(), p(), o2())];
let diff = compute_diff(&before, &after);
let patch = diff.to_patch_format();
assert!(patch.contains("+ "), "missing '+' marker");
assert!(patch.contains("- "), "missing '-' marker");
assert!(patch.contains("# RDF diff:"), "missing header");
let parsed = parse_patch(&patch).expect("patch must parse successfully");
assert_eq!(parsed.added.len(), diff.added.len());
assert_eq!(parsed.removed.len(), diff.removed.len());
}
#[test]
fn test_patch_format_empty_diff() {
let diff = RdfDiff::new(vec![], vec![]);
let patch = diff.to_patch_format();
let parsed = parse_patch(&patch).expect("empty patch parses");
assert!(parsed.is_empty());
}
#[test]
fn test_patch_format_only_additions() {
let before: Vec<NTriple> = vec![];
let after = vec![triple(s(), p(), o1())];
let diff = compute_diff(&before, &after);
let patch = diff.to_patch_format();
assert!(patch.contains("+ "));
assert!(!patch.contains("- "));
let parsed = parse_patch(&patch).expect("parse should succeed");
assert_eq!(parsed.added.len(), 1);
assert!(parsed.removed.is_empty());
}
#[test]
fn test_patch_format_only_removals() {
let before = vec![triple(s(), p(), o1())];
let after: Vec<NTriple> = vec![];
let diff = compute_diff(&before, &after);
let patch = diff.to_patch_format();
assert!(!patch.contains("+ "));
assert!(patch.contains("- "));
let parsed = parse_patch(&patch).expect("parse should succeed");
assert!(parsed.added.is_empty());
assert_eq!(parsed.removed.len(), 1);
}
#[test]
fn test_patch_invalid_prefix() {
let bad_patch = "? <http://s> <http://p> <http://o> .\n";
let result = parse_patch(bad_patch);
assert!(result.is_err(), "invalid prefix should fail");
}
#[test]
fn test_patch_with_literal() {
let before = vec![triple(s(), p(), RdfTerm::simple_literal("old"))];
let after = vec![triple(s(), p(), RdfTerm::simple_literal("new"))];
let diff = compute_diff(&before, &after);
let patch = diff.to_patch_format();
let parsed = parse_patch(&patch).expect("literal patch parses");
assert_eq!(parsed.added.len(), 1);
assert_eq!(parsed.removed.len(), 1);
assert_eq!(parsed.added[0].2.value, "new");
assert_eq!(parsed.removed[0].2.value, "old");
}
#[test]
fn test_patch_apply_after_parse() {
let before = vec![triple(s(), p(), o1())];
let after = vec![triple(s(), p(), o2())];
let diff = compute_diff(&before, &after);
let patch_text = diff.to_patch_format();
let parsed_diff = parse_patch(&patch_text).expect("parse should succeed");
let mut graph = before.clone();
parsed_diff.apply(&mut graph);
let mut graph_sorted = graph.clone();
let mut after_sorted = after.clone();
graph_sorted.sort();
after_sorted.sort();
assert_eq!(graph_sorted, after_sorted);
}
}