Skip to main content

triblespace_core/blob/schemas/
succinctarchive.rs

1mod succinctarchiveconstraint;
2mod universe;
3
4use crate::blob::Blob;
5use crate::blob::BlobSchema;
6use crate::blob::ToBlob;
7use crate::blob::TryFromBlob;
8use crate::id::id_from_value;
9use crate::id::id_into_value;
10use crate::id::ExclusiveId;
11use crate::id::Id;
12use crate::id_hex;
13use crate::macros::entity;
14use crate::metadata;
15use crate::metadata::{ConstDescribe, ConstId};
16use crate::query::TriblePattern;
17use crate::repo::BlobStore;
18use crate::trible::Fragment;
19use crate::trible::Trible;
20use crate::trible::TribleSet;
21use crate::value::schemas::genid::GenId;
22use crate::value::schemas::hash::Blake3;
23use crate::value::schemas::UnknownValue;
24use crate::value::RawValue;
25use crate::value::Value;
26use crate::value::ValueSchema;
27use succinctarchiveconstraint::*;
28
29/// Re-export all universe types and traits.
30pub use universe::*;
31
32use std::convert::TryInto;
33use std::iter;
34
35use itertools::Itertools;
36
37use anybytes::area::{ByteArea, SectionWriter};
38use anybytes::Bytes;
39use jerky::bit_vector::rank9sel::Rank9SelIndex;
40use jerky::bit_vector::BitVector;
41use jerky::bit_vector::BitVectorBuilder;
42use jerky::bit_vector::BitVectorDataMeta;
43use jerky::bit_vector::NumBits;
44use jerky::bit_vector::Rank;
45use jerky::bit_vector::Select;
46use jerky::char_sequences::wavelet_matrix::WaveletMatrixMeta;
47use jerky::char_sequences::{WaveletMatrix, WaveletMatrixBuilder};
48use jerky::serialization::{Metadata, Serializable};
49
50/// Blob schema for a succinct archive based on *The Ring* (Arroyuelo
51/// et al., 2024) — a compact index that supports worst-case optimal
52/// joins over triples in almost no extra space.
53///
54/// The Ring treats each triple (E, A, V) as a bidirectional cyclic
55/// string of length 3. Two rings — forward (E→A→V) and reverse
56/// (E→V→A) — cover all six attribute orderings using only two sorted
57/// rotations each. The last column of each rotation is stored as a
58/// wavelet matrix, enabling rank/select navigation between columns
59/// without materialising six separate indexes.
60///
61/// Build from a [`TribleSet`] via [`ToBlob`], then query through
62/// [`SuccinctArchive`] and its [`Constraint`](crate::query::Constraint)
63/// implementation. Suitable for large, read-heavy, mostly-static
64/// datasets where compact storage matters more than incremental updates.
65pub struct SuccinctArchiveBlob;
66
67impl BlobSchema for SuccinctArchiveBlob {}
68
69impl ConstId for SuccinctArchiveBlob {
70    const ID: Id = id_hex!("8FAD1D4C7F884B51BAA5D6C56B873E41");
71}
72
73impl ConstDescribe for SuccinctArchiveBlob {
74    fn describe<B>(blobs: &mut B) -> Result<Fragment, B::PutError>
75    where
76        B: BlobStore<Blake3>,
77    {
78        let id = Self::ID;
79        let description = blobs.put(
80            "Succinct archive index for fast offline trible queries. The bytes store a compressed, query-friendly layout derived from a canonical trible set.\n\nUse for large, read-heavy, mostly immutable datasets where fast scans or joins matter more than incremental updates. Build it from a TribleSet or SimpleArchive, and keep a canonical source if you need to regenerate or validate the index.",
81        )?;
82        Ok(entity! {
83            ExclusiveId::force_ref(&id) @
84                metadata::name: blobs.put("succinctarchive")?,
85                metadata::description: description,
86                metadata::tag: metadata::KIND_BLOB_SCHEMA,
87        })
88    }
89}
90
91#[derive(Debug, Clone, Copy, zerocopy::FromBytes, zerocopy::KnownLayout, zerocopy::Immutable)]
92#[repr(C)]
93/// Serialisation metadata header for a [`SuccinctArchive`].
94///
95/// Stored at the start of the blob; the `D` parameter captures the
96/// domain (universe) metadata layout.
97pub struct SuccinctArchiveMeta<D: Metadata> {
98    /// Number of distinct entities in the archive.
99    pub entity_count: usize,
100    /// Number of distinct attributes in the archive.
101    pub attribute_count: usize,
102    /// Number of distinct values in the archive.
103    pub value_count: usize,
104    /// Domain (universe) metadata — maps integer codes to raw values.
105    pub domain: D,
106    /// Entity-axis prefix bit vector metadata.
107    pub e_a: BitVectorDataMeta,
108    /// Attribute-axis prefix bit vector metadata.
109    pub a_a: BitVectorDataMeta,
110    /// Value-axis prefix bit vector metadata.
111    pub v_a: BitVectorDataMeta,
112    /// First-occurrence markers for (entity, attribute) pairs.
113    pub changed_e_a: BitVectorDataMeta,
114    /// First-occurrence markers for (entity, value) pairs.
115    pub changed_e_v: BitVectorDataMeta,
116    /// First-occurrence markers for (attribute, entity) pairs.
117    pub changed_a_e: BitVectorDataMeta,
118    /// First-occurrence markers for (attribute, value) pairs.
119    pub changed_a_v: BitVectorDataMeta,
120    /// First-occurrence markers for (value, entity) pairs.
121    pub changed_v_e: BitVectorDataMeta,
122    /// First-occurrence markers for (value, attribute) pairs.
123    pub changed_v_a: BitVectorDataMeta,
124    /// Forward ring: EAV last-column wavelet matrix metadata.
125    pub eav_c: WaveletMatrixMeta,
126    /// Forward ring: VEA last-column wavelet matrix metadata.
127    pub vea_c: WaveletMatrixMeta,
128    /// Forward ring: AVE last-column wavelet matrix metadata.
129    pub ave_c: WaveletMatrixMeta,
130    /// Reverse ring: VAE last-column wavelet matrix metadata.
131    pub vae_c: WaveletMatrixMeta,
132    /// Reverse ring: EVA last-column wavelet matrix metadata.
133    pub eva_c: WaveletMatrixMeta,
134    /// Reverse ring: AEV last-column wavelet matrix metadata.
135    pub aev_c: WaveletMatrixMeta,
136}
137
138fn build_prefix_bv<I>(
139    domain_len: usize,
140    triple_count: usize,
141    iter: I,
142    writer: &mut SectionWriter,
143) -> BitVector<Rank9SelIndex>
144where
145    I: IntoIterator<Item = (usize, usize)>,
146{
147    let mut builder =
148        BitVectorBuilder::from_bit(false, triple_count + domain_len + 1, writer).unwrap();
149
150    let mut seen = 0usize;
151    let mut last = 0usize;
152    for (val, count) in iter {
153        for c in last..=val {
154            builder.set_bit(seen + c, true).unwrap();
155        }
156        seen += count;
157        last = val + 1;
158    }
159    for c in last..=domain_len {
160        builder.set_bit(seen + c, true).unwrap();
161    }
162    builder.freeze::<Rank9SelIndex>()
163}
164
165/// Deserialized Ring index — two rings of wavelet matrices with prefix
166/// bit vectors and pair-change markers, backed by a shared `Bytes`
167/// buffer.
168///
169/// The forward ring (E→A→V) stores three sorted rotations whose last
170/// columns are `eav_c`, `vea_c`, and `ave_c`. The reverse ring
171/// (E→V→A) stores `eva_c`, `aev_c`, and `vae_c`. Together they cover
172/// all six attribute orderings needed for WCO joins without
173/// materialising six separate indexes.
174///
175/// Implements [`Constraint`](crate::query::Constraint) via
176/// `TriblePattern::pattern`, so it can be used directly in `find!` and
177/// `pattern!` queries alongside regular [`TribleSet`]s.
178#[derive(Debug, Clone)]
179pub struct SuccinctArchive<U> {
180    /// The underlying blob bytes (shared, zero-copy).
181    pub bytes: Bytes,
182    /// The universe — maps integer codes to raw 32-byte values (the
183    /// domain of all distinct values appearing in E, A, or V positions).
184    pub domain: U,
185
186    /// Number of distinct entities in the universe.
187    pub entity_count: usize,
188    /// Number of distinct attributes in the universe.
189    pub attribute_count: usize,
190    /// Number of distinct values in the universe.
191    pub value_count: usize,
192
193    /// Entity-axis prefix bit vector: unary encoding of group sizes for
194    /// the entity column, enabling rank/select navigation.
195    pub e_a: BitVector<Rank9SelIndex>,
196    /// Attribute-axis prefix bit vector.
197    pub a_a: BitVector<Rank9SelIndex>,
198    /// Value-axis prefix bit vector.
199    pub v_a: BitVector<Rank9SelIndex>,
200
201    /// Bit vector marking the first occurrence of each `(entity, attribute)` pair
202    /// in `eav_c`.
203    pub changed_e_a: BitVector<Rank9SelIndex>,
204    /// Bit vector marking the first occurrence of each `(entity, value)` pair in
205    /// `eva_c`.
206    pub changed_e_v: BitVector<Rank9SelIndex>,
207    /// Bit vector marking the first occurrence of each `(attribute, entity)` pair
208    /// in `aev_c`.
209    pub changed_a_e: BitVector<Rank9SelIndex>,
210    /// Bit vector marking the first occurrence of each `(attribute, value)` pair
211    /// in `ave_c`.
212    pub changed_a_v: BitVector<Rank9SelIndex>,
213    /// Bit vector marking the first occurrence of each `(value, entity)` pair in
214    /// `vea_c`.
215    pub changed_v_e: BitVector<Rank9SelIndex>,
216    /// Bit vector marking the first occurrence of each `(value, attribute)` pair
217    /// in `vae_c`.
218    pub changed_v_a: BitVector<Rank9SelIndex>,
219
220    /// Forward ring: last column of EAV-sorted rotation (values).
221    pub eav_c: WaveletMatrix<Rank9SelIndex>,
222    /// Forward ring: last column of VEA-sorted rotation (attributes).
223    pub vea_c: WaveletMatrix<Rank9SelIndex>,
224    /// Forward ring: last column of AVE-sorted rotation (entities).
225    pub ave_c: WaveletMatrix<Rank9SelIndex>,
226    /// Reverse ring: last column of VAE-sorted rotation (entities).
227    pub vae_c: WaveletMatrix<Rank9SelIndex>,
228    /// Reverse ring: last column of EVA-sorted rotation (attributes).
229    pub eva_c: WaveletMatrix<Rank9SelIndex>,
230    /// Reverse ring: last column of AEV-sorted rotation (values).
231    pub aev_c: WaveletMatrix<Rank9SelIndex>,
232}
233
234impl<U> SuccinctArchive<U>
235where
236    U: Universe,
237{
238    /// Iterates over all tribles by walking the EAV wavelet matrix and
239    /// resolving each triple through the domain mapping.
240    pub fn iter<'a>(&'a self) -> impl Iterator<Item = Trible> + 'a {
241        (0..self.eav_c.len()).map(move |v_i| {
242            let v = self.eav_c.access(v_i).unwrap();
243            let a_i = self.v_a.select1(v).unwrap() - v + self.eav_c.rank(v_i, v).unwrap();
244            let a = self.vea_c.access(a_i).unwrap();
245            let e_i = self.a_a.select1(a).unwrap() - a + self.vea_c.rank(a_i, a).unwrap();
246            let e = self.ave_c.access(e_i).unwrap();
247
248            let e = self.domain.access(e);
249            let a = self.domain.access(a);
250            let v = self.domain.access(v);
251
252            let e: Id = Id::new(id_from_value(&e).unwrap()).unwrap();
253            let a: Id = Id::new(id_from_value(&a).unwrap()).unwrap();
254            let v: Value<UnknownValue> = Value::new(v);
255
256            Trible::force(&e, &a, &v)
257        })
258    }
259
260    /// Count the number of set bits in `bv` within `range`.
261    ///
262    /// The bit vectors in this archive encode the first occurrence of each
263    /// component pair.  By counting the set bits between two offsets we can
264    /// quickly determine how many distinct pairs appear in that slice of the
265    /// index.
266    pub fn distinct_in(
267        &self,
268        bv: &BitVector<Rank9SelIndex>,
269        range: &std::ops::Range<usize>,
270    ) -> usize {
271        bv.rank1(range.end).unwrap() - bv.rank1(range.start).unwrap()
272    }
273
274    /// Enumerate the rotated offsets of set bits in `bv` within `range`.
275    ///
276    /// `bv` marks the first occurrence of component pairs in the ordering that
277    /// produced `col`.  For each selected bit this function reads the component
278    /// value from `col` and uses `prefix` to translate the index to the adjacent
279    /// orientation.  The iterator therefore yields indices positioned to access
280    /// the middle component of each pair.
281    pub fn enumerate_in<'a>(
282        &'a self,
283        bv: &'a BitVector<Rank9SelIndex>,
284        range: &std::ops::Range<usize>,
285        col: &'a WaveletMatrix<Rank9SelIndex>,
286        prefix: &'a BitVector<Rank9SelIndex>,
287    ) -> impl Iterator<Item = usize> + 'a {
288        let start = bv.rank1(range.start).unwrap();
289        let end = bv.rank1(range.end).unwrap();
290        (start..end).map(move |r| {
291            let idx = bv.select1(r).unwrap();
292            let val = col.access(idx).unwrap();
293            prefix.select1(val).unwrap() - val + col.rank(idx, val).unwrap()
294        })
295    }
296
297    /// Enumerate the identifiers present in `prefix` using `rank`/`select` to
298    /// jump directly to the next distinct prefix sum.
299    pub fn enumerate_domain<'a>(
300        &'a self,
301        prefix: &'a BitVector<Rank9SelIndex>,
302    ) -> impl Iterator<Item = RawValue> + 'a {
303        let zero_count = prefix.num_bits() - (self.domain.len() + 1);
304        let mut z = 0usize;
305        std::iter::from_fn(move || {
306            if z >= zero_count {
307                return None;
308            }
309            let pos = prefix.select0(z).unwrap();
310            let id = prefix.rank1(pos).unwrap() - 1;
311            z = prefix.rank0(prefix.select1(id + 1).unwrap()).unwrap();
312            Some(self.domain.access(id))
313        })
314    }
315
316    /// Returns the serialization metadata header for this archive.
317    pub fn meta(&self) -> SuccinctArchiveMeta<U::Meta>
318    where
319        U: Serializable,
320    {
321        SuccinctArchiveMeta {
322            entity_count: self.entity_count,
323            attribute_count: self.attribute_count,
324            value_count: self.value_count,
325            domain: self.domain.metadata(),
326            e_a: self.e_a.metadata(),
327            a_a: self.a_a.metadata(),
328            v_a: self.v_a.metadata(),
329            changed_e_a: self.changed_e_a.metadata(),
330            changed_e_v: self.changed_e_v.metadata(),
331            changed_a_e: self.changed_a_e.metadata(),
332            changed_a_v: self.changed_a_v.metadata(),
333            changed_v_e: self.changed_v_e.metadata(),
334            changed_v_a: self.changed_v_a.metadata(),
335            eav_c: self.eav_c.metadata(),
336            vea_c: self.vea_c.metadata(),
337            ave_c: self.ave_c.metadata(),
338            vae_c: self.vae_c.metadata(),
339            eva_c: self.eva_c.metadata(),
340            aev_c: self.aev_c.metadata(),
341        }
342    }
343}
344
345impl<U> From<&TribleSet> for SuccinctArchive<U>
346where
347    U: Universe + Serializable<Error = jerky::error::Error>,
348    <U as Serializable>::Meta: Clone,
349{
350    fn from(set: &TribleSet) -> Self {
351        let triple_count = set.eav.len() as usize;
352
353        let entity_count = set.eav.segmented_len(&[0; 0]) as usize;
354        let attribute_count = set.ave.segmented_len(&[0; 0]) as usize;
355        let value_count = set.vea.segmented_len(&[0; 0]) as usize;
356
357        let e_iter = set
358            .eav
359            .iter_prefix_count::<16>()
360            .map(|(e, _)| id_into_value(&e));
361        let a_iter = set
362            .ave
363            .iter_prefix_count::<16>()
364            .map(|(a, _)| id_into_value(&a));
365        let v_iter = set.vea.iter_prefix_count::<32>().map(|(v, _)| v);
366
367        let mut area = ByteArea::new().unwrap();
368        let mut sections = area.sections();
369
370        let domain_iter = e_iter.merge(a_iter).merge(v_iter).dedup();
371        let domain = U::with_sorted_dedup(domain_iter, &mut sections);
372
373        let e_a = build_prefix_bv(
374            domain.len(),
375            triple_count,
376            set.eav.iter_prefix_count::<16>().map(|(e, c)| {
377                (
378                    domain.search(&id_into_value(&e)).expect("e in domain"),
379                    c as usize,
380                )
381            }),
382            &mut sections,
383        );
384
385        let a_a = build_prefix_bv(
386            domain.len(),
387            triple_count,
388            set.ave.iter_prefix_count::<16>().map(|(a, c)| {
389                (
390                    domain.search(&id_into_value(&a)).expect("a in domain"),
391                    c as usize,
392                )
393            }),
394            &mut sections,
395        );
396
397        let v_a = build_prefix_bv(
398            domain.len(),
399            triple_count,
400            set.vea
401                .iter_prefix_count::<32>()
402                .map(|(v, c)| (domain.search(&v).expect("v in domain"), c as usize)),
403            &mut sections,
404        );
405
406        let eav_c = {
407            let mut builder =
408                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
409                    .unwrap();
410            let mut iter = set
411                .eav
412                .iter_prefix_count::<64>()
413                .map(|(t, _)| t[32..64].try_into().unwrap())
414                .map(|v| domain.search(&v).expect("v in domain"));
415            builder.set_ints_from_iter(0, &mut iter).unwrap();
416            builder.freeze::<Rank9SelIndex>().unwrap()
417        };
418
419        let vea_c = {
420            let mut builder =
421                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
422                    .unwrap();
423            let mut iter = set
424                .vea
425                .iter_prefix_count::<64>()
426                .map(|(t, _)| id_into_value(t[48..64].try_into().unwrap()))
427                .map(|a| domain.search(&a).expect("a in domain"));
428            builder.set_ints_from_iter(0, &mut iter).unwrap();
429            builder.freeze::<Rank9SelIndex>().unwrap()
430        };
431
432        let ave_c = {
433            let mut builder =
434                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
435                    .unwrap();
436            let mut iter = set
437                .ave
438                .iter_prefix_count::<64>()
439                .map(|(t, _)| id_into_value(t[48..64].try_into().unwrap()))
440                .map(|e| domain.search(&e).expect("e in domain"));
441            builder.set_ints_from_iter(0, &mut iter).unwrap();
442            builder.freeze::<Rank9SelIndex>().unwrap()
443        };
444
445        let vae_c = {
446            let mut builder =
447                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
448                    .unwrap();
449            let mut iter = set
450                .vae
451                .iter_prefix_count::<64>()
452                .map(|(t, _)| id_into_value(t[48..64].try_into().unwrap()))
453                .map(|e| domain.search(&e).expect("e in domain"));
454            builder.set_ints_from_iter(0, &mut iter).unwrap();
455            builder.freeze::<Rank9SelIndex>().unwrap()
456        };
457
458        let eva_c = {
459            let mut builder =
460                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
461                    .unwrap();
462            let mut iter = set
463                .eva
464                .iter_prefix_count::<64>()
465                .map(|(t, _)| id_into_value(t[48..64].try_into().unwrap()))
466                .map(|a| domain.search(&a).expect("a in domain"));
467            builder.set_ints_from_iter(0, &mut iter).unwrap();
468            builder.freeze::<Rank9SelIndex>().unwrap()
469        };
470
471        let aev_c = {
472            let mut builder =
473                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
474                    .unwrap();
475            let mut iter = set
476                .aev
477                .iter_prefix_count::<64>()
478                .map(|(t, _)| t[32..64].try_into().unwrap())
479                .map(|v| domain.search(&v).expect("v in domain"));
480            builder.set_ints_from_iter(0, &mut iter).unwrap();
481            builder.freeze::<Rank9SelIndex>().unwrap()
482        };
483
484        let changed_e_a = {
485            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
486            let mut bits = set.eav.iter_prefix_count::<32>().flat_map(|(_, c)| {
487                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
488            });
489            b.set_bits_from_iter(0, &mut bits).unwrap();
490            b.freeze::<Rank9SelIndex>()
491        };
492
493        let changed_e_v = {
494            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
495            let mut bits = set.eva.iter_prefix_count::<48>().flat_map(|(_, c)| {
496                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
497            });
498            b.set_bits_from_iter(0, &mut bits).unwrap();
499            b.freeze::<Rank9SelIndex>()
500        };
501
502        let changed_a_e = {
503            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
504            let mut bits = set.aev.iter_prefix_count::<32>().flat_map(|(_, c)| {
505                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
506            });
507            b.set_bits_from_iter(0, &mut bits).unwrap();
508            b.freeze::<Rank9SelIndex>()
509        };
510
511        let changed_a_v = {
512            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
513            let mut bits = set.ave.iter_prefix_count::<48>().flat_map(|(_, c)| {
514                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
515            });
516            b.set_bits_from_iter(0, &mut bits).unwrap();
517            b.freeze::<Rank9SelIndex>()
518        };
519
520        let changed_v_e = {
521            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
522            let mut bits = set.vea.iter_prefix_count::<48>().flat_map(|(_, c)| {
523                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
524            });
525            b.set_bits_from_iter(0, &mut bits).unwrap();
526            b.freeze::<Rank9SelIndex>()
527        };
528
529        let changed_v_a = {
530            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
531            let mut bits = set.vae.iter_prefix_count::<48>().flat_map(|(_, c)| {
532                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
533            });
534            b.set_bits_from_iter(0, &mut bits).unwrap();
535            b.freeze::<Rank9SelIndex>()
536        };
537
538        let meta = SuccinctArchiveMeta {
539            entity_count,
540            attribute_count,
541            value_count,
542            domain: domain.metadata(),
543            e_a: e_a.metadata(),
544            a_a: a_a.metadata(),
545            v_a: v_a.metadata(),
546            changed_e_a: changed_e_a.metadata(),
547            changed_e_v: changed_e_v.metadata(),
548            changed_a_e: changed_a_e.metadata(),
549            changed_a_v: changed_a_v.metadata(),
550            changed_v_e: changed_v_e.metadata(),
551            changed_v_a: changed_v_a.metadata(),
552            eav_c: eav_c.metadata(),
553            vea_c: vea_c.metadata(),
554            ave_c: ave_c.metadata(),
555            vae_c: vae_c.metadata(),
556            eva_c: eva_c.metadata(),
557            aev_c: aev_c.metadata(),
558        };
559
560        let mut meta_sec = sections.reserve::<SuccinctArchiveMeta<U::Meta>>(1).unwrap();
561        meta_sec.as_mut_slice()[0] = meta.clone();
562        meta_sec.freeze().unwrap();
563
564        let bytes = area.freeze().unwrap();
565
566        SuccinctArchive::from_bytes(meta, bytes).unwrap()
567    }
568}
569
570impl<U> From<&SuccinctArchive<U>> for TribleSet
571where
572    U: Universe,
573{
574    fn from(archive: &SuccinctArchive<U>) -> Self {
575        archive.iter().collect()
576    }
577}
578
579impl<U> TriblePattern for SuccinctArchive<U>
580where
581    U: Universe,
582{
583    type PatternConstraint<'a>
584        = SuccinctArchiveConstraint<'a, U>
585    where
586        U: 'a;
587
588    fn pattern<'a, V: ValueSchema>(
589        &'a self,
590        e: crate::query::Variable<GenId>,
591        a: crate::query::Variable<GenId>,
592        v: crate::query::Variable<V>,
593    ) -> Self::PatternConstraint<'a> {
594        SuccinctArchiveConstraint::new(e, a, v, self)
595    }
596}
597
598impl<U> Serializable for SuccinctArchive<U>
599where
600    U: Universe + Serializable<Error = jerky::error::Error>,
601{
602    type Meta = SuccinctArchiveMeta<U::Meta>;
603    type Error = jerky::error::Error;
604
605    fn metadata(&self) -> Self::Meta {
606        self.meta()
607    }
608
609    fn from_bytes(meta: Self::Meta, bytes: Bytes) -> Result<Self, Self::Error> {
610        let domain = U::from_bytes(meta.domain, bytes.clone())?;
611
612        let e_a = BitVector::from_bytes(meta.e_a, bytes.clone())?;
613        let a_a = BitVector::from_bytes(meta.a_a, bytes.clone())?;
614        let v_a = BitVector::from_bytes(meta.v_a, bytes.clone())?;
615        let changed_e_a = BitVector::from_bytes(meta.changed_e_a, bytes.clone())?;
616        let changed_e_v = BitVector::from_bytes(meta.changed_e_v, bytes.clone())?;
617        let changed_a_e = BitVector::from_bytes(meta.changed_a_e, bytes.clone())?;
618        let changed_a_v = BitVector::from_bytes(meta.changed_a_v, bytes.clone())?;
619        let changed_v_e = BitVector::from_bytes(meta.changed_v_e, bytes.clone())?;
620        let changed_v_a = BitVector::from_bytes(meta.changed_v_a, bytes.clone())?;
621
622        let eav_c = WaveletMatrix::from_bytes(meta.eav_c, bytes.clone())?;
623        let vea_c = WaveletMatrix::from_bytes(meta.vea_c, bytes.clone())?;
624        let ave_c = WaveletMatrix::from_bytes(meta.ave_c, bytes.clone())?;
625        let vae_c = WaveletMatrix::from_bytes(meta.vae_c, bytes.clone())?;
626        let eva_c = WaveletMatrix::from_bytes(meta.eva_c, bytes.clone())?;
627        let aev_c = WaveletMatrix::from_bytes(meta.aev_c, bytes.clone())?;
628
629        Ok(SuccinctArchive {
630            bytes,
631            domain,
632            entity_count: meta.entity_count,
633            attribute_count: meta.attribute_count,
634            value_count: meta.value_count,
635            e_a,
636            a_a,
637            v_a,
638            changed_e_a,
639            changed_e_v,
640            changed_a_e,
641            changed_a_v,
642            changed_v_e,
643            changed_v_a,
644            eav_c,
645            vea_c,
646            ave_c,
647            vae_c,
648            eva_c,
649            aev_c,
650        })
651    }
652}
653
654impl<U> ToBlob<SuccinctArchiveBlob> for &SuccinctArchive<U>
655where
656    U: Universe + Serializable,
657{
658    fn to_blob(self) -> Blob<SuccinctArchiveBlob> {
659        Blob::new(self.bytes.clone())
660    }
661}
662
663impl<U> ToBlob<SuccinctArchiveBlob> for SuccinctArchive<U>
664where
665    U: Universe + Serializable,
666{
667    fn to_blob(self) -> Blob<SuccinctArchiveBlob> {
668        Blob::new(self.bytes)
669    }
670}
671
672/// Error returned when deserializing a [`SuccinctArchiveBlob`] into a [`SuccinctArchive`].
673pub struct SuccinctArchiveError;
674
675impl std::error::Error for SuccinctArchiveError {}
676
677impl std::fmt::Display for SuccinctArchiveError {
678    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
679        write!(f, "SuccinctArchiveError")
680    }
681}
682
683impl std::fmt::Debug for SuccinctArchiveError {
684    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
685        write!(f, "SuccinctArchiveError")
686    }
687}
688
689impl<U> TryFromBlob<SuccinctArchiveBlob> for SuccinctArchive<U>
690where
691    U: Universe + Serializable<Error = jerky::error::Error>,
692    <U as Serializable>::Meta: Copy + 'static,
693{
694    type Error = SuccinctArchiveError;
695
696    fn try_from_blob(blob: Blob<SuccinctArchiveBlob>) -> Result<Self, Self::Error> {
697        let bytes = blob.bytes;
698        let mut tail = bytes.clone();
699        let meta = *tail
700            .view_suffix::<SuccinctArchiveMeta<U::Meta>>()
701            .map_err(|_| SuccinctArchiveError)?;
702        SuccinctArchive::from_bytes(meta, bytes).map_err(|_| SuccinctArchiveError)
703    }
704}
705
706#[cfg(test)]
707mod tests {
708    use std::convert::TryInto;
709
710    use crate::blob::ToBlob;
711    use crate::id::fucid;
712    use crate::prelude::*;
713    use crate::query::find;
714    use crate::trible::Trible;
715    use crate::value::ToValue;
716    use crate::value::TryToValue;
717
718    use super::*;
719    use anybytes::area::ByteArea;
720    use itertools::Itertools;
721    use proptest::prelude::*;
722
723    pub mod knights {
724        use crate::prelude::*;
725
726        attributes! {
727            "328edd7583de04e2bedd6bd4fd50e651" as loves: valueschemas::GenId;
728            "328147856cc1984f0806dbb824d2b4cb" as name: valueschemas::ShortString;
729            "328f2c33d2fdd675e733388770b2d6c4" as title: valueschemas::ShortString;
730        }
731    }
732
733    proptest! {
734        #[test]
735        fn create(entries in prop::collection::vec(prop::collection::vec(0u8..255, 64), 1..128)) {
736            let mut set = TribleSet::new();
737            for entry in entries {
738                let mut key = [0; 64];
739                key.iter_mut().set_from(entry.iter().cloned());
740                set.insert(&Trible{ data: key});
741            }
742
743            let _archive: SuccinctArchive<CompressedUniverse> = (&set).into();
744        }
745
746        #[test]
747        fn roundtrip(entries in prop::collection::vec(prop::collection::vec(0u8..255, 64), 1..128)) {
748            let mut set = TribleSet::new();
749            for entry in entries {
750                let mut key = [0; 64];
751                key.iter_mut().set_from(entry.iter().cloned());
752                set.insert(&Trible{ data: key});
753            }
754
755            let archive: SuccinctArchive<CompressedUniverse> = (&set).into();
756            let set_: TribleSet = (&archive).into();
757
758            assert_eq!(set, set_);
759        }
760
761        #[test]
762        fn ordered_universe(values in prop::collection::vec(prop::collection::vec(0u8..255, 32), 1..128)) {
763            let mut values: Vec<RawValue> = values.into_iter().map(|v| v.try_into().unwrap()).collect();
764            values.sort();
765            let mut area = ByteArea::new().unwrap();
766            let mut sections = area.sections();
767            let u = OrderedUniverse::with(values.iter().copied(), &mut sections);
768            drop(sections);
769            let _bytes = area.freeze().unwrap();
770            for i in 0..u.len() {
771                let original = values[i];
772                let reconstructed = u.access(i);
773                assert_eq!(original, reconstructed);
774            }
775            for i in 0..u.len() {
776                let original = Some(i);
777                let found = u.search(&values[i]);
778                assert_eq!(original, found);
779            }
780        }
781
782        #[test]
783        fn compressed_universe(values in prop::collection::vec(prop::collection::vec(0u8..255, 32), 1..128)) {
784            let mut values: Vec<RawValue> = values.into_iter().map(|v| v.try_into().unwrap()).collect();
785            values.sort();
786            let mut area = ByteArea::new().unwrap();
787            let mut sections = area.sections();
788            let u = CompressedUniverse::with(values.iter().copied(), &mut sections);
789            drop(sections);
790            let _bytes = area.freeze().unwrap();
791            for i in 0..u.len() {
792                let original = values[i];
793                let reconstructed = u.access(i);
794                assert_eq!(original, reconstructed);
795            }
796            for i in 0..u.len() {
797                let original = Some(i);
798                let found = u.search(&values[i]);
799                assert_eq!(original, found);
800            }
801        }
802    }
803
804    #[test]
805    fn archive_pattern() {
806        let juliet = fucid();
807        let romeo = fucid();
808
809        let mut kb = TribleSet::new();
810
811        kb += entity! { &juliet @
812           knights::name: "Juliet",
813           knights::loves: &romeo,
814           knights::title: "Maiden"
815        };
816        kb += entity! { &romeo @
817           knights::name: "Romeo",
818           knights::loves: &juliet,
819           knights::title: "Prince"
820        };
821        kb += entity! {
822           knights::name: "Angelica",
823           knights::title: "Nurse"
824        };
825
826        let archive: SuccinctArchive<OrderedUniverse> = (&kb).into();
827
828        let r: Vec<_> = find!(
829            (juliet, name),
830            pattern!(&archive, [
831            {knights::name: "Romeo",
832             knights::loves: ?juliet},
833            {?juliet @
834                knights::name: ?name
835            }])
836        )
837        .collect();
838        assert_eq!(
839            vec![((&juliet).to_value(), "Juliet".try_to_value().unwrap(),)],
840            r
841        );
842    }
843
844    #[test]
845    fn blob_roundtrip() {
846        let juliet = fucid();
847        let romeo = fucid();
848
849        let mut kb = TribleSet::new();
850
851        kb += entity! {&juliet @
852            knights::name: "Juliet",
853            knights::loves: &romeo,
854            knights::title: "Maiden"
855        };
856        kb += entity! {&romeo @
857            knights::name: "Romeo",
858            knights::loves: &juliet,
859            knights::title: "Prince"
860        };
861
862        let archive: SuccinctArchive<OrderedUniverse> = (&kb).into();
863        let blob = (&archive).to_blob();
864        let rebuilt: SuccinctArchive<OrderedUniverse> = blob.try_from_blob().unwrap();
865        let kb2: TribleSet = (&rebuilt).into();
866        assert_eq!(kb, kb2);
867    }
868}