Skip to main content

oxirs_core/rdf/
diff.rs

1//! RDF dataset diff computation and patch application.
2//!
3//! Provides tools for computing the difference between two RDF datasets,
4//! applying patches, inverting diffs, and composing multiple diffs.
5
6use std::collections::HashSet;
7
8/// A single RDF triple represented as (subject, predicate, object) strings.
9#[derive(Debug, Clone, PartialEq, Eq, Hash)]
10pub struct Triple {
11    /// Subject IRI or blank node identifier.
12    pub subject: String,
13    /// Predicate IRI.
14    pub predicate: String,
15    /// Object IRI, blank node, or literal value.
16    pub object: String,
17}
18
19impl Triple {
20    /// Construct a new triple from string-convertible values.
21    pub fn new(
22        subject: impl Into<String>,
23        predicate: impl Into<String>,
24        object: impl Into<String>,
25    ) -> Self {
26        Self {
27            subject: subject.into(),
28            predicate: predicate.into(),
29            object: object.into(),
30        }
31    }
32}
33
34/// Statistics about a diff operation.
35#[derive(Debug, Clone, PartialEq, Eq, Default)]
36pub struct DiffStats {
37    /// Number of triples present in `after` but not in `before`.
38    pub added_count: usize,
39    /// Number of triples present in `before` but not in `after`.
40    pub removed_count: usize,
41    /// Number of triples present in both datasets (unchanged).
42    pub unchanged_count: usize,
43}
44
45/// The difference between two RDF datasets.
46#[derive(Debug, Clone)]
47pub struct DatasetDiff {
48    /// Triples added (in `after` but not `before`).
49    pub added: Vec<Triple>,
50    /// Triples removed (in `before` but not `after`).
51    pub removed: Vec<Triple>,
52    /// Summary statistics.
53    pub stats: DiffStats,
54}
55
56/// Engine for computing and manipulating RDF dataset diffs.
57pub struct RdfDiffEngine;
58
59impl RdfDiffEngine {
60    /// Compute the diff between `before` and `after` triple sets.
61    ///
62    /// - `added`   = triples in `after` that are not in `before`
63    /// - `removed` = triples in `before` that are not in `after`
64    pub fn compute(before: &[Triple], after: &[Triple]) -> DatasetDiff {
65        let before_set: HashSet<&Triple> = before.iter().collect();
66        let after_set: HashSet<&Triple> = after.iter().collect();
67
68        let added: Vec<Triple> = after
69            .iter()
70            .filter(|t| !before_set.contains(t))
71            .cloned()
72            .collect();
73
74        let removed: Vec<Triple> = before
75            .iter()
76            .filter(|t| !after_set.contains(t))
77            .cloned()
78            .collect();
79
80        let unchanged_count = before.iter().filter(|t| after_set.contains(t)).count();
81
82        let stats = DiffStats {
83            added_count: added.len(),
84            removed_count: removed.len(),
85            unchanged_count,
86        };
87
88        DatasetDiff {
89            added,
90            removed,
91            stats,
92        }
93    }
94
95    /// Apply `diff` to `base`, returning the resulting dataset.
96    ///
97    /// Removes all triples listed in `diff.removed` from `base`, then appends
98    /// all triples in `diff.added`, de-duplicating the result.
99    pub fn apply_diff(mut base: Vec<Triple>, diff: &DatasetDiff) -> Vec<Triple> {
100        let removed_set: HashSet<&Triple> = diff.removed.iter().collect();
101        base.retain(|t| !removed_set.contains(t));
102
103        for triple in &diff.added {
104            if !base.contains(triple) {
105                base.push(triple.clone());
106            }
107        }
108
109        base
110    }
111
112    /// Invert a diff — swap `added` and `removed` so applying the result
113    /// undoes the original diff.
114    pub fn invert(diff: DatasetDiff) -> DatasetDiff {
115        let stats = DiffStats {
116            added_count: diff.removed.len(),
117            removed_count: diff.added.len(),
118            unchanged_count: diff.stats.unchanged_count,
119        };
120        DatasetDiff {
121            added: diff.removed,
122            removed: diff.added,
123            stats,
124        }
125    }
126
127    /// Compose two diffs into a single diff representing the net effect of
128    /// applying `d1` followed by `d2`.
129    ///
130    /// Net effect:
131    /// - Net added   = (d1.added ∪ d2.added) \ d2.removed
132    /// - Net removed = (d1.removed ∪ d2.removed) \ d2.added
133    pub fn compose(d1: DatasetDiff, d2: DatasetDiff) -> DatasetDiff {
134        let d2_removed_set: HashSet<&Triple> = d2.removed.iter().collect();
135        let d2_added_set: HashSet<&Triple> = d2.added.iter().collect();
136
137        // Net added: items added in d1 not removed in d2, union items added in d2.
138        let mut net_added: Vec<Triple> = d1
139            .added
140            .iter()
141            .filter(|t| !d2_removed_set.contains(t))
142            .cloned()
143            .collect();
144        for t in &d2.added {
145            if !net_added.contains(t) {
146                net_added.push(t.clone());
147            }
148        }
149
150        // Net removed: items removed in d1 not re-added in d2, union items removed in d2.
151        let mut net_removed: Vec<Triple> = d1
152            .removed
153            .iter()
154            .filter(|t| !d2_added_set.contains(t))
155            .cloned()
156            .collect();
157        for t in &d2.removed {
158            if !net_removed.contains(t) {
159                net_removed.push(t.clone());
160            }
161        }
162
163        let stats = DiffStats {
164            added_count: net_added.len(),
165            removed_count: net_removed.len(),
166            unchanged_count: 0, // unknown after composition
167        };
168
169        DatasetDiff {
170            added: net_added,
171            removed: net_removed,
172            stats,
173        }
174    }
175
176    /// Return `true` if `diff` represents no change (no added and no removed triples).
177    pub fn is_empty(diff: &DatasetDiff) -> bool {
178        diff.added.is_empty() && diff.removed.is_empty()
179    }
180}
181
182/// A sequence of diffs that can be applied as a single patch.
183#[derive(Debug, Clone, Default)]
184pub struct DatasetPatch {
185    /// Ordered list of diffs in this patch.
186    pub patches: Vec<DatasetDiff>,
187}
188
189impl DatasetPatch {
190    /// Create a new, empty patch.
191    pub fn new() -> Self {
192        Self {
193            patches: Vec::new(),
194        }
195    }
196
197    /// Append a diff to the end of this patch.
198    pub fn add_diff(&mut self, diff: DatasetDiff) {
199        self.patches.push(diff);
200    }
201
202    /// Apply all diffs in order against `base` and return the result.
203    pub fn apply_all(&self, mut base: Vec<Triple>) -> Vec<Triple> {
204        for diff in &self.patches {
205            base = RdfDiffEngine::apply_diff(base, diff);
206        }
207        base
208    }
209
210    /// Total number of diffs in this patch.
211    pub fn len(&self) -> usize {
212        self.patches.len()
213    }
214
215    /// Return `true` if the patch contains no diffs.
216    pub fn is_empty(&self) -> bool {
217        self.patches.is_empty()
218    }
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    fn t(s: &str, p: &str, o: &str) -> Triple {
226        Triple::new(s, p, o)
227    }
228
229    // --- Triple tests ---
230
231    #[test]
232    fn test_triple_equality() {
233        let a = t("s", "p", "o");
234        let b = t("s", "p", "o");
235        assert_eq!(a, b);
236    }
237
238    #[test]
239    fn test_triple_inequality() {
240        let a = t("s", "p", "o1");
241        let b = t("s", "p", "o2");
242        assert_ne!(a, b);
243    }
244
245    // --- RdfDiffEngine::compute tests ---
246
247    #[test]
248    fn test_diff_empty_datasets() {
249        let diff = RdfDiffEngine::compute(&[], &[]);
250        assert!(diff.added.is_empty());
251        assert!(diff.removed.is_empty());
252        assert_eq!(diff.stats.unchanged_count, 0);
253    }
254
255    #[test]
256    fn test_diff_all_added() {
257        let before: Vec<Triple> = vec![];
258        let after = vec![t("s", "p", "o")];
259        let diff = RdfDiffEngine::compute(&before, &after);
260        assert_eq!(diff.added.len(), 1);
261        assert!(diff.removed.is_empty());
262        assert_eq!(diff.stats.added_count, 1);
263    }
264
265    #[test]
266    fn test_diff_all_removed() {
267        let before = vec![t("s", "p", "o")];
268        let after: Vec<Triple> = vec![];
269        let diff = RdfDiffEngine::compute(&before, &after);
270        assert!(diff.added.is_empty());
271        assert_eq!(diff.removed.len(), 1);
272        assert_eq!(diff.stats.removed_count, 1);
273    }
274
275    #[test]
276    fn test_diff_no_change() {
277        let dataset = vec![t("s", "p", "o")];
278        let diff = RdfDiffEngine::compute(&dataset, &dataset);
279        assert!(RdfDiffEngine::is_empty(&diff));
280        assert_eq!(diff.stats.unchanged_count, 1);
281    }
282
283    #[test]
284    fn test_diff_mixed() {
285        let before = vec![t("s", "p", "o1"), t("s", "p", "o2")];
286        let after = vec![t("s", "p", "o2"), t("s", "p", "o3")];
287        let diff = RdfDiffEngine::compute(&before, &after);
288        assert_eq!(diff.added, vec![t("s", "p", "o3")]);
289        assert_eq!(diff.removed, vec![t("s", "p", "o1")]);
290        assert_eq!(diff.stats.unchanged_count, 1);
291    }
292
293    // --- RdfDiffEngine::apply_diff tests ---
294
295    #[test]
296    fn test_apply_diff_add_triples() {
297        let base = vec![t("s", "p", "o1")];
298        let diff = RdfDiffEngine::compute(&base, &[t("s", "p", "o1"), t("s", "p", "o2")]);
299        let result = RdfDiffEngine::apply_diff(base, &diff);
300        assert!(result.contains(&t("s", "p", "o1")));
301        assert!(result.contains(&t("s", "p", "o2")));
302    }
303
304    #[test]
305    fn test_apply_diff_remove_triples() {
306        let base = vec![t("s", "p", "o1"), t("s", "p", "o2")];
307        let diff = RdfDiffEngine::compute(&base, &[t("s", "p", "o1")]);
308        let result = RdfDiffEngine::apply_diff(base, &diff);
309        assert_eq!(result.len(), 1);
310        assert!(result.contains(&t("s", "p", "o1")));
311    }
312
313    #[test]
314    fn test_apply_diff_no_change() {
315        let base = vec![t("s", "p", "o")];
316        let diff = RdfDiffEngine::compute(&base, &base.clone());
317        let result = RdfDiffEngine::apply_diff(base.clone(), &diff);
318        assert_eq!(result, base);
319    }
320
321    // --- RdfDiffEngine::invert tests ---
322
323    #[test]
324    fn test_invert_diff() {
325        let before = vec![t("s", "p", "o1")];
326        let after = vec![t("s", "p", "o2")];
327        let diff = RdfDiffEngine::compute(&before, &after);
328        let inv = RdfDiffEngine::invert(diff);
329        assert_eq!(inv.added, vec![t("s", "p", "o1")]);
330        assert_eq!(inv.removed, vec![t("s", "p", "o2")]);
331        assert_eq!(inv.stats.added_count, 1);
332        assert_eq!(inv.stats.removed_count, 1);
333    }
334
335    #[test]
336    fn test_invert_roundtrip() {
337        let base = vec![t("a", "b", "c"), t("d", "e", "f")];
338        let modified = vec![t("a", "b", "c"), t("x", "y", "z")];
339        let diff = RdfDiffEngine::compute(&base, &modified);
340        let inv = RdfDiffEngine::invert(diff);
341
342        let restored = RdfDiffEngine::apply_diff(modified.clone(), &inv);
343        // restored should equal base
344        assert!(restored.contains(&t("a", "b", "c")));
345        assert!(restored.contains(&t("d", "e", "f")));
346        assert!(!restored.contains(&t("x", "y", "z")));
347    }
348
349    // --- RdfDiffEngine::compose tests ---
350
351    #[test]
352    fn test_compose_empty_diffs() {
353        let d1 = RdfDiffEngine::compute(&[], &[]);
354        let d2 = RdfDiffEngine::compute(&[], &[]);
355        let composed = RdfDiffEngine::compose(d1, d2);
356        assert!(RdfDiffEngine::is_empty(&composed));
357    }
358
359    #[test]
360    fn test_compose_two_diffs() {
361        // d1: add t1; d2: add t2
362        let d1 = RdfDiffEngine::compute(&[], &[t("s", "p", "o1")]);
363        let d2 = RdfDiffEngine::compute(
364            &[t("s", "p", "o1")],
365            &[t("s", "p", "o1"), t("s", "p", "o2")],
366        );
367        let composed = RdfDiffEngine::compose(d1, d2);
368        // Net: both t1 and t2 should be in added
369        assert!(composed.added.contains(&t("s", "p", "o1")));
370        assert!(composed.added.contains(&t("s", "p", "o2")));
371    }
372
373    #[test]
374    fn test_compose_add_then_remove() {
375        // d1 adds t1; d2 removes t1 → net effect is nothing
376        let d1 = RdfDiffEngine::compute(&[], &[t("s", "p", "o1")]);
377        let d2 = RdfDiffEngine::compute(&[t("s", "p", "o1")], &[]);
378        let composed = RdfDiffEngine::compose(d1, d2);
379        // t1 should not be in net_added
380        assert!(!composed.added.contains(&t("s", "p", "o1")));
381        // t1 is in net_removed
382        assert!(composed.removed.contains(&t("s", "p", "o1")));
383    }
384
385    // --- RdfDiffEngine::is_empty tests ---
386
387    #[test]
388    fn test_is_empty_true() {
389        let diff = RdfDiffEngine::compute(&[t("s", "p", "o")], &[t("s", "p", "o")]);
390        assert!(RdfDiffEngine::is_empty(&diff));
391    }
392
393    #[test]
394    fn test_is_empty_false() {
395        let diff = RdfDiffEngine::compute(&[], &[t("s", "p", "o")]);
396        assert!(!RdfDiffEngine::is_empty(&diff));
397    }
398
399    // --- DatasetPatch tests ---
400
401    #[test]
402    fn test_patch_new_is_empty() {
403        let patch = DatasetPatch::new();
404        assert!(patch.is_empty());
405        assert_eq!(patch.len(), 0);
406    }
407
408    #[test]
409    fn test_patch_apply_single_diff() {
410        let mut patch = DatasetPatch::new();
411        let diff = RdfDiffEngine::compute(&[], &[t("s", "p", "o")]);
412        patch.add_diff(diff);
413
414        let result = patch.apply_all(vec![]);
415        assert_eq!(result, vec![t("s", "p", "o")]);
416    }
417
418    #[test]
419    fn test_patch_apply_multiple_diffs() {
420        let mut patch = DatasetPatch::new();
421        // Step 1: add t1
422        patch.add_diff(RdfDiffEngine::compute(&[], &[t("s", "p", "o1")]));
423        // Step 2: add t2 to existing set
424        patch.add_diff(RdfDiffEngine::compute(
425            &[t("s", "p", "o1")],
426            &[t("s", "p", "o1"), t("s", "p", "o2")],
427        ));
428
429        let result = patch.apply_all(vec![]);
430        assert!(result.contains(&t("s", "p", "o1")));
431        assert!(result.contains(&t("s", "p", "o2")));
432    }
433
434    #[test]
435    fn test_patch_apply_all_on_empty_patch() {
436        let patch = DatasetPatch::new();
437        let base = vec![t("s", "p", "o")];
438        let result = patch.apply_all(base.clone());
439        assert_eq!(result, base);
440    }
441
442    #[test]
443    fn test_patch_len() {
444        let mut patch = DatasetPatch::new();
445        assert_eq!(patch.len(), 0);
446        patch.add_diff(RdfDiffEngine::compute(&[], &[]));
447        assert_eq!(patch.len(), 1);
448        patch.add_diff(RdfDiffEngine::compute(&[], &[]));
449        assert_eq!(patch.len(), 2);
450    }
451}