Skip to main content

triblespace_core/trible/
tribleset.rs

1mod triblesetconstraint;
2pub mod triblesetidrangeconstraint;
3pub mod triblesetrangeconstraint;
4
5use triblesetconstraint::*;
6
7use crate::query::TriblePattern;
8use crate::inline::Inline;
9
10use crate::id::Id;
11use crate::patch::Entry;
12use crate::patch::PATCH;
13use crate::query::Variable;
14use crate::trible::AEVOrder;
15use crate::trible::AVEOrder;
16use crate::trible::EAVOrder;
17use crate::trible::EVAOrder;
18use crate::trible::Trible;
19use crate::trible::VAEOrder;
20use crate::trible::VEAOrder;
21use crate::trible::TRIBLE_LEN;
22use crate::inline::encodings::genid::GenId;
23use crate::inline::InlineEncoding;
24
25use std::iter::FromIterator;
26use std::iter::Map;
27use std::ops::Add;
28use std::ops::AddAssign;
29
30/// A collection of [`Trible`]s.
31///
32/// A [`TribleSet`] is a collection of [`Trible`]s that can be queried and manipulated.
33/// It supports efficient set operations like union, intersection, and difference.
34///
35/// The stored [`Trible`]s are indexed by the six possible orderings of their fields
36/// in corresponding [`PATCH`]es.
37///
38/// Clone is extremely cheap and can be used to create a snapshot of the current state of the [`TribleSet`].
39///
40/// Note that the [`TribleSet`] does not support an explicit `delete`/`remove` operation,
41/// as this would conflict with the CRDT semantics of the [`TribleSet`] and CALM principles as a whole.
42/// It does allow for set subtraction, but that operation is meant to compute the difference between two sets
43/// and not to remove elements from the set. A subtle but important distinction.
44#[derive(Debug, Clone)]
45pub struct TribleSet {
46    /// Entity → Attribute → Inline index.
47    pub eav: PATCH<TRIBLE_LEN, EAVOrder, ()>,
48    /// Inline → Entity → Attribute index.
49    pub vea: PATCH<TRIBLE_LEN, VEAOrder, ()>,
50    /// Attribute → Inline → Entity index.
51    pub ave: PATCH<TRIBLE_LEN, AVEOrder, ()>,
52    /// Inline → Attribute → Entity index.
53    pub vae: PATCH<TRIBLE_LEN, VAEOrder, ()>,
54    /// Entity → Inline → Attribute index.
55    pub eva: PATCH<TRIBLE_LEN, EVAOrder, ()>,
56    /// Attribute → Entity → Inline index.
57    pub aev: PATCH<TRIBLE_LEN, AEVOrder, ()>,
58}
59
60/// O(1) fingerprint for a [`TribleSet`], derived from the PATCH root hash.
61///
62/// This matches the equality semantics of [`TribleSet`], but it is not stable
63/// across process boundaries because [`PATCH`] uses a per-process hash key.
64#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
65pub struct TribleSetFingerprint(Option<u128>);
66
67impl TribleSetFingerprint {
68    /// Fingerprint of an empty set.
69    pub const EMPTY: Self = Self(None);
70
71    /// Returns `true` for the empty-set fingerprint.
72    pub fn is_empty(self) -> bool {
73        self.0.is_none()
74    }
75
76    /// Returns the raw 128-bit hash, or `None` for an empty set.
77    pub fn as_u128(self) -> Option<u128> {
78        self.0
79    }
80}
81
82type TribleSetInner<'a> =
83    Map<crate::patch::PATCHIterator<'a, 64, EAVOrder, ()>, fn(&[u8; 64]) -> &Trible>;
84
85/// Iterator over the tribles in a [`TribleSet`], yielded in EAV order.
86pub struct TribleSetIterator<'a> {
87    inner: TribleSetInner<'a>,
88}
89
90/// Minimum `other.len()` at which [`TribleSet::union`] fans out across
91/// rayon. Below this, the nested-join overhead dominates the saved
92/// per-index work. Tuned for the `entities/union*/5M` bench family.
93#[cfg(feature = "parallel")]
94pub const PARALLEL_UNION_THRESHOLD: usize = 4096;
95
96impl TribleSet {
97    /// Union of two [`TribleSet`]s.
98    ///
99    /// The other [`TribleSet`] is consumed, and this [`TribleSet`] is updated
100    /// in place.
101    ///
102    /// With the `parallel` feature enabled and `other` above
103    /// [`PARALLEL_UNION_THRESHOLD`] tribles, the six index unions
104    /// (`eav`/`eva`/`aev`/`ave`/`vea`/`vae`) fan out via nested
105    /// [`rayon::join`] — they touch disjoint memory so there's no
106    /// contention. The threshold gates on `other.len()` because PATCH
107    /// union work is bounded by the smaller side (each key from `other`
108    /// is inserted into `self`); when `other` is tiny (e.g. the per-
109    /// `entity!{}` `+=` in a serial fold) the rayon overhead would
110    /// dominate even at large `self`.
111    pub fn union(&mut self, other: Self) {
112        #[cfg(feature = "parallel")]
113        {
114            if other.len() >= PARALLEL_UNION_THRESHOLD {
115                let Self {
116                    eav,
117                    eva,
118                    aev,
119                    ave,
120                    vea,
121                    vae,
122                } = self;
123                let Self {
124                    eav: oeav,
125                    eva: oeva,
126                    aev: oaev,
127                    ave: oave,
128                    vea: ovea,
129                    vae: ovae,
130                } = other;
131                // Nested join trees the six tasks across rayon workers
132                // with much lower per-call overhead than `scope`.
133                rayon::join(
134                    || rayon::join(
135                        || eav.union(oeav),
136                        || eva.union(oeva),
137                    ),
138                    || rayon::join(
139                        || rayon::join(
140                            || aev.union(oaev),
141                            || ave.union(oave),
142                        ),
143                        || rayon::join(
144                            || vea.union(ovea),
145                            || vae.union(ovae),
146                        ),
147                    ),
148                );
149                return;
150            }
151        }
152
153        self.eav.union(other.eav);
154        self.eva.union(other.eva);
155        self.aev.union(other.aev);
156        self.ave.union(other.ave);
157        self.vea.union(other.vea);
158        self.vae.union(other.vae);
159    }
160
161    /// Returns a new set containing only tribles present in both sets.
162    pub fn intersect(&self, other: &Self) -> Self {
163        Self {
164            eav: self.eav.intersect(&other.eav),
165            eva: self.eva.intersect(&other.eva),
166            aev: self.aev.intersect(&other.aev),
167            ave: self.ave.intersect(&other.ave),
168            vea: self.vea.intersect(&other.vea),
169            vae: self.vae.intersect(&other.vae),
170        }
171    }
172
173    /// Returns a new set containing tribles in `self` but not in `other`.
174    pub fn difference(&self, other: &Self) -> Self {
175        Self {
176            eav: self.eav.difference(&other.eav),
177            eva: self.eva.difference(&other.eva),
178            aev: self.aev.difference(&other.aev),
179            ave: self.ave.difference(&other.ave),
180            vea: self.vea.difference(&other.vea),
181            vae: self.vae.difference(&other.vae),
182        }
183    }
184
185    /// Creates an empty set.
186    pub fn new() -> TribleSet {
187        TribleSet {
188            eav: PATCH::<TRIBLE_LEN, EAVOrder, ()>::new(),
189            eva: PATCH::<TRIBLE_LEN, EVAOrder, ()>::new(),
190            aev: PATCH::<TRIBLE_LEN, AEVOrder, ()>::new(),
191            ave: PATCH::<TRIBLE_LEN, AVEOrder, ()>::new(),
192            vea: PATCH::<TRIBLE_LEN, VEAOrder, ()>::new(),
193            vae: PATCH::<TRIBLE_LEN, VAEOrder, ()>::new(),
194        }
195    }
196
197    /// Returns the number of tribles in the set.
198    pub fn len(&self) -> usize {
199        self.eav.len() as usize
200    }
201
202    /// Returns `true` when the set contains no tribles.
203    pub fn is_empty(&self) -> bool {
204        self.len() == 0
205    }
206
207    /// Returns a fast fingerprint suitable for in-memory caching.
208    ///
209    /// The fingerprint matches [`TribleSet`] equality, but it is not stable
210    /// across process boundaries because [`PATCH`] uses a per-process hash key.
211    pub fn fingerprint(&self) -> TribleSetFingerprint {
212        TribleSetFingerprint(self.eav.root_hash())
213    }
214
215    /// Inserts a trible into all six covering indexes.
216    pub fn insert(&mut self, trible: &Trible) {
217        let key = Entry::new(&trible.data);
218        self.eav.insert(&key);
219        self.eva.insert(&key);
220        self.aev.insert(&key);
221        self.ave.insert(&key);
222        self.vea.insert(&key);
223        self.vae.insert(&key);
224    }
225
226    /// Returns `true` when the exact trible is present in the set.
227    pub fn contains(&self, trible: &Trible) -> bool {
228        self.eav.has_prefix(&trible.data)
229    }
230
231    /// Creates a constraint that proposes only values in the byte range
232    /// `[min, max]` (inclusive) using the VEA index with `infixes_range`.
233    ///
234    /// Use with `and!` alongside a `pattern!` for efficient range queries:
235    ///
236    /// ```rust,ignore
237    /// find!(ts: Inline<NsTAIInterval>,
238    ///     and!(
239    ///         pattern!(&data, [{ ?id @ attr: ?ts }]),
240    ///         data.value_in_range(ts, min_ts, max_ts),
241    ///     )
242    /// )
243    /// ```
244    pub fn value_in_range<V: InlineEncoding>(
245        &self,
246        variable: Variable<V>,
247        min: Inline<V>,
248        max: Inline<V>,
249    ) -> triblesetrangeconstraint::TribleSetRangeConstraint {
250        triblesetrangeconstraint::TribleSetRangeConstraint::new(variable, min, max, self.clone())
251    }
252
253    /// Creates a constraint that proposes only entity IDs in the byte range
254    /// `[min, max]` (inclusive) using the EAV index with `infixes_range`.
255    ///
256    /// ```rust,ignore
257    /// find!(id: Id,
258    ///     and!(
259    ///         pattern!(&data, [{ ?id @ attr: value }]),
260    ///         data.entity_in_range(id, min_id, max_id),
261    ///     )
262    /// )
263    /// ```
264    pub fn entity_in_range(
265        &self,
266        variable: Variable<GenId>,
267        min: Id,
268        max: Id,
269    ) -> triblesetidrangeconstraint::EntityRangeConstraint {
270        triblesetidrangeconstraint::EntityRangeConstraint::new(variable, min, max, self.clone())
271    }
272
273    /// Creates a constraint that proposes only attribute IDs in the byte range
274    /// `[min, max]` (inclusive) using the AEV index with `infixes_range`.
275    ///
276    /// ```rust,ignore
277    /// find!(attr: Id,
278    ///     and!(
279    ///         pattern!(&data, [{ entity @ ?attr: _ }]),
280    ///         data.attribute_in_range(attr, min_attr, max_attr),
281    ///     )
282    /// )
283    /// ```
284    pub fn attribute_in_range(
285        &self,
286        variable: Variable<GenId>,
287        min: Id,
288        max: Id,
289    ) -> triblesetidrangeconstraint::AttributeRangeConstraint {
290        triblesetidrangeconstraint::AttributeRangeConstraint::new(variable, min, max, self.clone())
291    }
292
293    /// Iterates over all tribles in EAV order.
294    pub fn iter(&self) -> TribleSetIterator<'_> {
295        TribleSetIterator {
296            inner: self
297                .eav
298                .iter()
299                .map(|data| Trible::as_transmute_raw_unchecked(data)),
300        }
301    }
302}
303
304impl PartialEq for TribleSet {
305    fn eq(&self, other: &Self) -> bool {
306        self.eav == other.eav
307    }
308}
309
310impl Eq for TribleSet {}
311
312impl Default for TribleSetFingerprint {
313    fn default() -> Self {
314        Self::EMPTY
315    }
316}
317
318impl From<&TribleSet> for TribleSetFingerprint {
319    fn from(set: &TribleSet) -> Self {
320        set.fingerprint()
321    }
322}
323
324impl AddAssign for TribleSet {
325    fn add_assign(&mut self, rhs: Self) {
326        self.union(rhs);
327    }
328}
329
330impl Add for TribleSet {
331    type Output = Self;
332
333    fn add(mut self, rhs: Self) -> Self::Output {
334        self.union(rhs);
335        self
336    }
337}
338
339impl FromIterator<Trible> for TribleSet {
340    fn from_iter<I: IntoIterator<Item = Trible>>(iter: I) -> Self {
341        let mut set = TribleSet::new();
342
343        for t in iter {
344            set.insert(&t);
345        }
346
347        set
348    }
349}
350
351impl TriblePattern for TribleSet {
352    type PatternConstraint<'a> = TribleSetConstraint;
353
354    fn pattern<V: InlineEncoding>(
355        &self,
356        e: Variable<GenId>,
357        a: Variable<GenId>,
358        v: Variable<V>,
359    ) -> Self::PatternConstraint<'static> {
360        TribleSetConstraint::new(e, a, v, self.clone())
361    }
362}
363
364impl<'a> Iterator for TribleSetIterator<'a> {
365    type Item = &'a Trible;
366
367    fn next(&mut self) -> Option<Self::Item> {
368        self.inner.next()
369    }
370}
371
372impl<'a> IntoIterator for &'a TribleSet {
373    type Item = &'a Trible;
374    type IntoIter = TribleSetIterator<'a>;
375
376    fn into_iter(self) -> Self::IntoIter {
377        self.iter()
378    }
379}
380
381impl Default for TribleSet {
382    fn default() -> Self {
383        Self::new()
384    }
385}
386
387#[cfg(test)]
388mod tests {
389    use crate::examples::literature;
390    use crate::prelude::*;
391
392    use super::*;
393    use fake::faker::lorem::en::Words;
394    use fake::faker::name::raw::FirstName;
395    use fake::faker::name::raw::LastName;
396    use fake::locales::EN;
397    use fake::Fake;
398
399    use rayon::iter::IntoParallelIterator;
400    use rayon::iter::ParallelIterator;
401
402    #[test]
403    fn union() {
404        let mut kb = TribleSet::new();
405        for _i in 0..100 {
406            let author = ufoid();
407            let book = ufoid();
408            kb += entity! { &author @
409               literature::firstname: FirstName(EN).fake::<String>(),
410               literature::lastname: LastName(EN).fake::<String>(),
411            };
412            kb += entity! { &book @
413               literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
414               literature::author: &author
415            };
416        }
417        assert_eq!(kb.len(), 400);
418    }
419
420    #[test]
421    fn union_parallel() {
422        let kb = (0..1000)
423            .into_par_iter()
424            .flat_map(|_| {
425                let author = ufoid();
426                let book = ufoid();
427                [
428                    entity! { &author @
429                       literature::firstname: FirstName(EN).fake::<String>(),
430                       literature::lastname: LastName(EN).fake::<String>(),
431                    },
432                    entity! { &book @
433                       literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
434                       literature::author: &author
435                    },
436                ]
437            })
438            .reduce(Fragment::default, |a, b| a + b);
439        assert_eq!(kb.len(), 4000);
440    }
441
442    #[test]
443    fn intersection() {
444        let mut kb1 = TribleSet::new();
445        let mut kb2 = TribleSet::new();
446        for _i in 0..100 {
447            let author = ufoid();
448            let book = ufoid();
449            kb1 += entity! { &author @
450               literature::firstname: FirstName(EN).fake::<String>(),
451               literature::lastname: LastName(EN).fake::<String>(),
452            };
453            kb1 += entity! { &book @
454               literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
455               literature::author: &author
456            };
457            kb2 += entity! { &author @
458               literature::firstname: FirstName(EN).fake::<String>(),
459               literature::lastname: LastName(EN).fake::<String>(),
460            };
461            kb2 += entity! { &book @
462               literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
463               literature::author: &author
464            };
465        }
466        let intersection = kb1.intersect(&kb2);
467        // Verify that the intersection contains only elements present in both kb1 and kb2
468        for trible in &intersection {
469            assert!(kb1.contains(trible));
470            assert!(kb2.contains(trible));
471        }
472    }
473
474    #[test]
475    fn difference() {
476        let mut kb1 = TribleSet::new();
477        let mut kb2 = TribleSet::new();
478        for _i in 0..100 {
479            let author = ufoid();
480            let book = ufoid();
481            kb1 += entity! { &author @
482               literature::firstname: FirstName(EN).fake::<String>(),
483               literature::lastname: LastName(EN).fake::<String>(),
484            };
485            kb1 += entity! { &book @
486               literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
487               literature::author: &author
488            };
489            if _i % 2 == 0 {
490                kb2 += entity! { &author @
491                   literature::firstname: FirstName(EN).fake::<String>(),
492                   literature::lastname: LastName(EN).fake::<String>(),
493                };
494                kb2 += entity! { &book @
495                   literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
496                   literature::author: &author
497                };
498            }
499        }
500        let difference = kb1.difference(&kb2);
501        // Verify that the difference contains only elements present in kb1 but not in kb2
502        for trible in &difference {
503            assert!(kb1.contains(trible));
504            assert!(!kb2.contains(trible));
505        }
506    }
507
508    #[test]
509    fn test_contains() {
510        let mut kb = TribleSet::new();
511        let author = ufoid();
512        let book = ufoid();
513        let author_tribles = entity! { &author @
514           literature::firstname: FirstName(EN).fake::<String>(),
515           literature::lastname: LastName(EN).fake::<String>(),
516        };
517        let book_tribles = entity! { &book @
518           literature::title: Words(1..3).fake::<Vec<String>>().join(" "),
519           literature::author: &author
520        };
521
522        kb += author_tribles.clone();
523        kb += book_tribles.clone();
524
525        for trible in &author_tribles {
526            assert!(kb.contains(trible));
527        }
528        for trible in &book_tribles {
529            assert!(kb.contains(trible));
530        }
531
532        let non_existent_trible = entity! { &ufoid() @
533           literature::firstname: FirstName(EN).fake::<String>(),
534           literature::lastname: LastName(EN).fake::<String>(),
535        };
536
537        for trible in &non_existent_trible {
538            assert!(!kb.contains(trible));
539        }
540    }
541}