Skip to main content

oxirs_ttl/diff/
rdf_diff.rs

1//! RDF graph diff and patch utilities
2//!
3//! Computes the symmetric difference between two RDF graphs and supports
4//! serialising the result to a simple patch format (lines prefixed with `+`
5//! or `-`) as well as parsing such patches back into [`RdfDiff`] values.
6//!
7//! The diff and patch operations use the lightweight [`RdfTerm`] / [`NTriple`]
8//! types from this crate, so no dependency on `oxirs-core` is required here.
9//!
10//! # Examples
11//!
12//! ## Computing a diff
13//!
14//! ```rust
15//! use oxirs_ttl::writer::RdfTerm;
16//! use oxirs_ttl::diff::{compute_diff, RdfDiff};
17//!
18//! let before = vec![
19//!     (RdfTerm::iri("http://s"), RdfTerm::iri("http://p"), RdfTerm::iri("http://o1")),
20//! ];
21//! let after = vec![
22//!     (RdfTerm::iri("http://s"), RdfTerm::iri("http://p"), RdfTerm::iri("http://o2")),
23//! ];
24//!
25//! let diff = compute_diff(&before, &after);
26//! assert_eq!(diff.added.len(), 1);
27//! assert_eq!(diff.removed.len(), 1);
28//! ```
29//!
30//! ## Applying a diff
31//!
32//! ```rust
33//! use oxirs_ttl::writer::RdfTerm;
34//! use oxirs_ttl::diff::compute_diff;
35//!
36//! let before = vec![
37//!     (RdfTerm::iri("http://s"), RdfTerm::iri("http://p"), RdfTerm::iri("http://o1")),
38//! ];
39//! let after = vec![
40//!     (RdfTerm::iri("http://s"), RdfTerm::iri("http://p"), RdfTerm::iri("http://o2")),
41//! ];
42//!
43//! let diff = compute_diff(&before, &after);
44//! let mut graph = before.clone();
45//! diff.apply(&mut graph);
46//! assert_eq!(graph, after);
47//! ```
48//!
49//! ## Round-trip via patch format
50//!
51//! ```rust
52//! use oxirs_ttl::writer::RdfTerm;
53//! use oxirs_ttl::diff::{compute_diff, parse_patch};
54//!
55//! let before = vec![
56//!     (RdfTerm::iri("http://s"), RdfTerm::iri("http://p"), RdfTerm::iri("http://o1")),
57//! ];
58//! let after = vec![
59//!     (RdfTerm::iri("http://s"), RdfTerm::iri("http://p"), RdfTerm::iri("http://o2")),
60//! ];
61//!
62//! let diff = compute_diff(&before, &after);
63//! let patch_text = diff.to_patch_format();
64//! let parsed = parse_patch(&patch_text).expect("valid patch");
65//! assert_eq!(parsed.added.len(), 1);
66//! assert_eq!(parsed.removed.len(), 1);
67//! ```
68
69use crate::parser::{NTriplesLiteParser, ParseError as NtParseError};
70use crate::writer::RdfTerm;
71use std::collections::HashSet;
72
73/// An N-Triple as used throughout this module
74pub type NTriple = (RdfTerm, RdfTerm, RdfTerm);
75
76// ─── Error type ─────────────────────────────────────────────────────────────
77
78/// Errors produced when parsing a patch document
79#[derive(Debug, Clone)]
80pub struct PatchParseError {
81    /// 1-based line number
82    pub line: usize,
83    /// Human-readable description
84    pub message: String,
85}
86
87impl PatchParseError {
88    fn new(line: usize, message: impl Into<String>) -> Self {
89        Self {
90            line,
91            message: message.into(),
92        }
93    }
94}
95
96impl std::fmt::Display for PatchParseError {
97    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98        write!(
99            f,
100            "patch parse error at line {}: {}",
101            self.line, self.message
102        )
103    }
104}
105
106impl std::error::Error for PatchParseError {}
107
108impl From<NtParseError> for PatchParseError {
109    fn from(e: NtParseError) -> Self {
110        Self {
111            line: e.line,
112            message: e.message,
113        }
114    }
115}
116
117// ─── RdfDiff ─────────────────────────────────────────────────────────────────
118
119/// The set difference between two RDF graphs
120///
121/// `added` contains triples present in `after` but not `before`;
122/// `removed` contains triples present in `before` but not `after`.
123#[derive(Debug, Clone, PartialEq)]
124pub struct RdfDiff {
125    /// Triples that were added (present in `after` graph, absent from `before`)
126    pub added: Vec<NTriple>,
127    /// Triples that were removed (present in `before` graph, absent from `after`)
128    pub removed: Vec<NTriple>,
129}
130
131impl RdfDiff {
132    /// Construct a diff with given additions and removals
133    pub fn new(added: Vec<NTriple>, removed: Vec<NTriple>) -> Self {
134        Self { added, removed }
135    }
136
137    /// Return `true` when no triples were added or removed
138    pub fn is_empty(&self) -> bool {
139        self.added.is_empty() && self.removed.is_empty()
140    }
141
142    /// Total number of changed triples (additions + removals)
143    pub fn triple_count(&self) -> usize {
144        self.added.len() + self.removed.len()
145    }
146
147    /// Apply this diff to a mutable graph in place.
148    ///
149    /// Removed triples are deleted, then added triples are appended.  The
150    /// operation is idempotent: applying the same diff twice leaves the graph
151    /// in the same state as applying it once.
152    pub fn apply(&self, triples: &mut Vec<NTriple>) {
153        // Remove triples that should be deleted
154        let remove_set: HashSet<&NTriple> = self.removed.iter().collect();
155        triples.retain(|t| !remove_set.contains(t));
156
157        // Add new triples (skip duplicates) – collect additions first to avoid
158        // a simultaneous mutable/immutable borrow of `triples`.
159        let existing: HashSet<NTriple> = triples.iter().cloned().collect();
160        let to_add: Vec<NTriple> = self
161            .added
162            .iter()
163            .filter(|t| !existing.contains(t))
164            .cloned()
165            .collect();
166        triples.extend(to_add);
167    }
168
169    /// Return the inverse diff (swap `added` and `removed`).
170    ///
171    /// Applying the inverse of a diff to the "after" graph yields the
172    /// "before" graph.
173    pub fn invert(&self) -> Self {
174        Self {
175            added: self.removed.clone(),
176            removed: self.added.clone(),
177        }
178    }
179
180    /// Serialise the diff to the simple patch format.
181    ///
182    /// Each removed triple is emitted as a line starting with `- ` followed
183    /// by an N-Triples representation; each added triple starts with `+ `.
184    /// A comment header records the diff statistics.
185    pub fn to_patch_format(&self) -> String {
186        let mut out = String::new();
187
188        out.push_str(&format!(
189            "# RDF diff: +{} -{}\n",
190            self.added.len(),
191            self.removed.len()
192        ));
193
194        for triple in &self.removed {
195            out.push_str("- ");
196            out.push_str(&triple_to_ntriples(triple));
197            out.push('\n');
198        }
199
200        for triple in &self.added {
201            out.push_str("+ ");
202            out.push_str(&triple_to_ntriples(triple));
203            out.push('\n');
204        }
205
206        out
207    }
208}
209
210// ─── Public functions ────────────────────────────────────────────────────────
211
212/// Compute the diff between two RDF graphs
213///
214/// Both slices are treated as sets: duplicate triples within a single graph
215/// are ignored.
216pub fn compute_diff(before: &[NTriple], after: &[NTriple]) -> RdfDiff {
217    let set_before: HashSet<&NTriple> = before.iter().collect();
218    let set_after: HashSet<&NTriple> = after.iter().collect();
219
220    let mut added: Vec<NTriple> = set_after
221        .difference(&set_before)
222        .map(|t| (*t).clone())
223        .collect();
224    let mut removed: Vec<NTriple> = set_before
225        .difference(&set_after)
226        .map(|t| (*t).clone())
227        .collect();
228
229    // Sort for deterministic output
230    added.sort();
231    removed.sort();
232
233    RdfDiff { added, removed }
234}
235
236/// Parse a patch document produced by [`RdfDiff::to_patch_format`]
237///
238/// Lines starting with `+` or `- ` are interpreted as N-Triples additions or
239/// removals respectively.  Lines starting with `#` and blank lines are skipped.
240pub fn parse_patch(patch: &str) -> Result<RdfDiff, PatchParseError> {
241    let mut added: Vec<NTriple> = Vec::new();
242    let mut removed: Vec<NTriple> = Vec::new();
243    let mut nt_parser = NTriplesLiteParser::new();
244
245    for (line_idx, line) in patch.lines().enumerate() {
246        let line_no = line_idx + 1;
247        let trimmed = line.trim();
248
249        if trimmed.is_empty() || trimmed.starts_with('#') {
250            continue;
251        }
252
253        if let Some(rest) = trimmed.strip_prefix("+ ") {
254            let triple = parse_single_triple(rest, line_no, &mut nt_parser)?;
255            added.push(triple);
256        } else if let Some(rest) = trimmed.strip_prefix("- ") {
257            let triple = parse_single_triple(rest, line_no, &mut nt_parser)?;
258            removed.push(triple);
259        } else {
260            return Err(PatchParseError::new(
261                line_no,
262                format!("line must start with '+ ' or '- ', found: {trimmed}"),
263            ));
264        }
265    }
266
267    Ok(RdfDiff { added, removed })
268}
269
270// ─── Helpers ─────────────────────────────────────────────────────────────────
271
272/// Serialize one triple as a single N-Triples line (without trailing newline)
273fn triple_to_ntriples(triple: &NTriple) -> String {
274    format!("{} {} {} .", triple.0, triple.1, triple.2)
275}
276
277/// Parse a single triple from an N-Triples line (used for patch parsing)
278fn parse_single_triple(
279    line: &str,
280    line_no: usize,
281    parser: &mut NTriplesLiteParser,
282) -> Result<NTriple, PatchParseError> {
283    parser.reset();
284    let mut triples = parser
285        .parse_str(line)
286        .map_err(|e| PatchParseError::new(line_no, e.message))?;
287
288    match triples.len() {
289        0 => Err(PatchParseError::new(
290            line_no,
291            "expected a triple but line was empty",
292        )),
293        1 => Ok(triples.remove(0)),
294        _ => Err(PatchParseError::new(
295            line_no,
296            "more than one triple on a patch line",
297        )),
298    }
299}
300
301// ─── Tests ───────────────────────────────────────────────────────────────────
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306    use crate::writer::RdfTerm;
307
308    fn s() -> RdfTerm {
309        RdfTerm::iri("http://example.org/s")
310    }
311    fn p() -> RdfTerm {
312        RdfTerm::iri("http://example.org/p")
313    }
314    fn o1() -> RdfTerm {
315        RdfTerm::iri("http://example.org/o1")
316    }
317    fn o2() -> RdfTerm {
318        RdfTerm::iri("http://example.org/o2")
319    }
320
321    fn triple(s: RdfTerm, p: RdfTerm, o: RdfTerm) -> NTriple {
322        (s, p, o)
323    }
324
325    // ── compute_diff ────────────────────────────────────────────────────────
326
327    #[test]
328    fn test_diff_identical_graphs() {
329        let before = vec![triple(s(), p(), o1())];
330        let after = before.clone();
331        let diff = compute_diff(&before, &after);
332        assert!(diff.is_empty());
333        assert_eq!(diff.triple_count(), 0);
334    }
335
336    #[test]
337    fn test_diff_addition() {
338        let before: Vec<NTriple> = vec![];
339        let after = vec![triple(s(), p(), o1())];
340        let diff = compute_diff(&before, &after);
341        assert_eq!(diff.added.len(), 1);
342        assert!(diff.removed.is_empty());
343    }
344
345    #[test]
346    fn test_diff_removal() {
347        let before = vec![triple(s(), p(), o1())];
348        let after: Vec<NTriple> = vec![];
349        let diff = compute_diff(&before, &after);
350        assert!(diff.added.is_empty());
351        assert_eq!(diff.removed.len(), 1);
352    }
353
354    #[test]
355    fn test_diff_replacement() {
356        let before = vec![triple(s(), p(), o1())];
357        let after = vec![triple(s(), p(), o2())];
358        let diff = compute_diff(&before, &after);
359        assert_eq!(diff.added.len(), 1);
360        assert_eq!(diff.removed.len(), 1);
361        assert_eq!(diff.added[0].2.value, "http://example.org/o2");
362        assert_eq!(diff.removed[0].2.value, "http://example.org/o1");
363    }
364
365    #[test]
366    fn test_diff_duplicates_treated_as_set() {
367        // Providing the same triple twice in 'before' should be equivalent to
368        // providing it once.
369        let before = vec![triple(s(), p(), o1()), triple(s(), p(), o1())];
370        let after = vec![triple(s(), p(), o2())];
371        let diff = compute_diff(&before, &after);
372        assert_eq!(diff.added.len(), 1);
373        assert_eq!(diff.removed.len(), 1);
374    }
375
376    // ── apply ───────────────────────────────────────────────────────────────
377
378    #[test]
379    fn test_apply_roundtrip() {
380        let before = vec![triple(s(), p(), o1())];
381        let after = vec![triple(s(), p(), o2())];
382        let diff = compute_diff(&before, &after);
383
384        let mut graph = before.clone();
385        diff.apply(&mut graph);
386
387        // Sort both for comparison
388        let mut graph_sorted = graph.clone();
389        let mut after_sorted = after.clone();
390        graph_sorted.sort();
391        after_sorted.sort();
392
393        assert_eq!(graph_sorted, after_sorted);
394    }
395
396    #[test]
397    fn test_apply_idempotent() {
398        let before = vec![triple(s(), p(), o1())];
399        let after = vec![triple(s(), p(), o2())];
400        let diff = compute_diff(&before, &after);
401
402        let mut graph = before.clone();
403        diff.apply(&mut graph);
404        diff.apply(&mut graph); // second application must be a no-op
405
406        let mut graph_sorted = graph.clone();
407        let mut after_sorted = after.clone();
408        graph_sorted.sort();
409        after_sorted.sort();
410
411        assert_eq!(graph_sorted, after_sorted);
412    }
413
414    #[test]
415    fn test_apply_empty_diff() {
416        let before = vec![triple(s(), p(), o1())];
417        let diff = RdfDiff::new(vec![], vec![]);
418        let mut graph = before.clone();
419        diff.apply(&mut graph);
420        assert_eq!(graph, before);
421    }
422
423    // ── invert ──────────────────────────────────────────────────────────────
424
425    #[test]
426    fn test_invert_roundtrip() {
427        let before = vec![triple(s(), p(), o1())];
428        let after = vec![triple(s(), p(), o2())];
429        let diff = compute_diff(&before, &after);
430        let inv = diff.invert();
431
432        // Applying the inverse to 'after' should restore 'before'
433        let mut graph = after.clone();
434        inv.apply(&mut graph);
435
436        let mut graph_sorted = graph.clone();
437        let mut before_sorted = before.clone();
438        graph_sorted.sort();
439        before_sorted.sort();
440
441        assert_eq!(graph_sorted, before_sorted);
442    }
443
444    // ── patch format ────────────────────────────────────────────────────────
445
446    #[test]
447    fn test_patch_format_roundtrip() {
448        let before = vec![triple(s(), p(), o1())];
449        let after = vec![triple(s(), p(), o2())];
450        let diff = compute_diff(&before, &after);
451
452        let patch = diff.to_patch_format();
453
454        // Patch must contain addition and removal markers
455        assert!(patch.contains("+ "), "missing '+' marker");
456        assert!(patch.contains("- "), "missing '-' marker");
457        assert!(patch.contains("# RDF diff:"), "missing header");
458
459        let parsed = parse_patch(&patch).expect("patch must parse successfully");
460        assert_eq!(parsed.added.len(), diff.added.len());
461        assert_eq!(parsed.removed.len(), diff.removed.len());
462    }
463
464    #[test]
465    fn test_patch_format_empty_diff() {
466        let diff = RdfDiff::new(vec![], vec![]);
467        let patch = diff.to_patch_format();
468        let parsed = parse_patch(&patch).expect("empty patch parses");
469        assert!(parsed.is_empty());
470    }
471
472    #[test]
473    fn test_patch_format_only_additions() {
474        let before: Vec<NTriple> = vec![];
475        let after = vec![triple(s(), p(), o1())];
476        let diff = compute_diff(&before, &after);
477        let patch = diff.to_patch_format();
478        assert!(patch.contains("+ "));
479        assert!(!patch.contains("- "));
480
481        let parsed = parse_patch(&patch).expect("parse should succeed");
482        assert_eq!(parsed.added.len(), 1);
483        assert!(parsed.removed.is_empty());
484    }
485
486    #[test]
487    fn test_patch_format_only_removals() {
488        let before = vec![triple(s(), p(), o1())];
489        let after: Vec<NTriple> = vec![];
490        let diff = compute_diff(&before, &after);
491        let patch = diff.to_patch_format();
492        assert!(!patch.contains("+ "));
493        assert!(patch.contains("- "));
494
495        let parsed = parse_patch(&patch).expect("parse should succeed");
496        assert!(parsed.added.is_empty());
497        assert_eq!(parsed.removed.len(), 1);
498    }
499
500    #[test]
501    fn test_patch_invalid_prefix() {
502        let bad_patch = "? <http://s> <http://p> <http://o> .\n";
503        let result = parse_patch(bad_patch);
504        assert!(result.is_err(), "invalid prefix should fail");
505    }
506
507    #[test]
508    fn test_patch_with_literal() {
509        let before = vec![triple(s(), p(), RdfTerm::simple_literal("old"))];
510        let after = vec![triple(s(), p(), RdfTerm::simple_literal("new"))];
511        let diff = compute_diff(&before, &after);
512        let patch = diff.to_patch_format();
513        let parsed = parse_patch(&patch).expect("literal patch parses");
514        assert_eq!(parsed.added.len(), 1);
515        assert_eq!(parsed.removed.len(), 1);
516        assert_eq!(parsed.added[0].2.value, "new");
517        assert_eq!(parsed.removed[0].2.value, "old");
518    }
519
520    #[test]
521    fn test_patch_apply_after_parse() {
522        let before = vec![triple(s(), p(), o1())];
523        let after = vec![triple(s(), p(), o2())];
524        let diff = compute_diff(&before, &after);
525        let patch_text = diff.to_patch_format();
526        let parsed_diff = parse_patch(&patch_text).expect("parse should succeed");
527
528        let mut graph = before.clone();
529        parsed_diff.apply(&mut graph);
530
531        let mut graph_sorted = graph.clone();
532        let mut after_sorted = after.clone();
533        graph_sorted.sort();
534        after_sorted.sort();
535
536        assert_eq!(graph_sorted, after_sorted);
537    }
538}