Skip to main content

triblespace_core/blob/encodings/
succinctarchive.rs

1mod succinctarchiveconstraint;
2mod succinctarchiverangeconstraint;
3mod universe;
4
5use crate::inline::Encodes;
6use crate::blob::Blob;
7use crate::blob::BlobEncoding;
8use crate::blob::TryFromBlob;
9use crate::id::id_from_value;
10use crate::id::id_into_value;
11use crate::id::ExclusiveId;
12use crate::id::Id;
13use crate::id_hex;
14use crate::macros::entity;
15use crate::metadata;
16use crate::metadata::MetaDescribe;
17use crate::query::TriblePattern;
18use crate::trible::Fragment;
19use crate::trible::Trible;
20use crate::trible::TribleSet;
21use crate::inline::encodings::genid::GenId;
22use crate::inline::encodings::UnknownInline;
23use crate::inline::RawInline;
24use crate::inline::Inline;
25use crate::inline::InlineEncoding;
26use succinctarchiveconstraint::*;
27
28/// Re-export all universe types and traits.
29pub use universe::*;
30
31use std::convert::TryInto;
32use std::iter;
33
34use itertools::Itertools;
35
36use anybytes::area::{ByteArea, SectionWriter};
37use anybytes::Bytes;
38use jerky::bit_vector::rank9sel::Rank9SelIndex;
39use jerky::bit_vector::BitVector;
40use jerky::bit_vector::BitVectorBuilder;
41use jerky::bit_vector::BitVectorDataMeta;
42use jerky::bit_vector::NumBits;
43use jerky::bit_vector::Rank;
44use jerky::bit_vector::Select;
45use jerky::char_sequences::wavelet_matrix::WaveletMatrixMeta;
46use jerky::char_sequences::{WaveletMatrix, WaveletMatrixBuilder};
47use jerky::serialization::{Metadata, Serializable};
48
49/// Blob encoding for a succinct archive based on *The Ring* (Arroyuelo
50/// et al., 2024) — a compact index that supports worst-case optimal
51/// joins over triples in almost no extra space.
52///
53/// The Ring treats each triple (E, A, V) as a bidirectional cyclic
54/// string of length 3. Two rings — forward (E→A→V) and reverse
55/// (E→V→A) — cover all six attribute orderings using only two sorted
56/// rotations each. The last column of each rotation is stored as a
57/// wavelet matrix, enabling rank/select navigation between columns
58/// without materialising six separate indexes.
59///
60/// Build from a [`TribleSet`] via [`IntoBlob`], then query through
61/// [`SuccinctArchive`] and its [`Constraint`](crate::query::Constraint)
62/// implementation. Suitable for large, read-heavy, mostly-static
63/// datasets where compact storage matters more than incremental updates.
64pub struct SuccinctArchiveBlob;
65
66impl BlobEncoding for SuccinctArchiveBlob {}
67
68impl MetaDescribe for SuccinctArchiveBlob {
69    fn describe() -> Fragment {
70        let id: Id = id_hex!("8FAD1D4C7F884B51BAA5D6C56B873E41");
71        entity! {
72            ExclusiveId::force_ref(&id) @
73                metadata::name: "succinctarchive",
74                metadata::description: "Succinct archive index for fast offline trible queries. The bytes store a compressed, query-friendly layout derived from a canonical trible set.\n\nUse for large, read-heavy, mostly immutable datasets where fast scans or joins matter more than incremental updates. Build it from a TribleSet or SimpleArchive, and keep a canonical source if you need to regenerate or validate the index.",
75                metadata::tag: metadata::KIND_BLOB_ENCODING,
76        }
77    }
78}
79
80#[derive(Debug, Clone, Copy, zerocopy::FromBytes, zerocopy::KnownLayout, zerocopy::Immutable)]
81#[repr(C)]
82/// Serialisation metadata header for a [`SuccinctArchive`].
83///
84/// Stored at the start of the blob; the `D` parameter captures the
85/// domain (universe) metadata layout.
86pub struct SuccinctArchiveMeta<D: Metadata> {
87    /// Number of distinct entities in the archive.
88    pub entity_count: usize,
89    /// Number of distinct attributes in the archive.
90    pub attribute_count: usize,
91    /// Number of distinct values in the archive.
92    pub value_count: usize,
93    /// Domain (universe) metadata — maps integer codes to raw values.
94    pub domain: D,
95    /// Entity-axis prefix bit vector metadata.
96    pub e_a: BitVectorDataMeta,
97    /// Attribute-axis prefix bit vector metadata.
98    pub a_a: BitVectorDataMeta,
99    /// Inline-axis prefix bit vector metadata.
100    pub v_a: BitVectorDataMeta,
101    /// First-occurrence markers for (entity, attribute) pairs.
102    pub changed_e_a: BitVectorDataMeta,
103    /// First-occurrence markers for (entity, value) pairs.
104    pub changed_e_v: BitVectorDataMeta,
105    /// First-occurrence markers for (attribute, entity) pairs.
106    pub changed_a_e: BitVectorDataMeta,
107    /// First-occurrence markers for (attribute, value) pairs.
108    pub changed_a_v: BitVectorDataMeta,
109    /// First-occurrence markers for (value, entity) pairs.
110    pub changed_v_e: BitVectorDataMeta,
111    /// First-occurrence markers for (value, attribute) pairs.
112    pub changed_v_a: BitVectorDataMeta,
113    /// Forward ring: EAV last-column wavelet matrix metadata.
114    pub eav_c: WaveletMatrixMeta,
115    /// Forward ring: VEA last-column wavelet matrix metadata.
116    pub vea_c: WaveletMatrixMeta,
117    /// Forward ring: AVE last-column wavelet matrix metadata.
118    pub ave_c: WaveletMatrixMeta,
119    /// Reverse ring: VAE last-column wavelet matrix metadata.
120    pub vae_c: WaveletMatrixMeta,
121    /// Reverse ring: EVA last-column wavelet matrix metadata.
122    pub eva_c: WaveletMatrixMeta,
123    /// Reverse ring: AEV last-column wavelet matrix metadata.
124    pub aev_c: WaveletMatrixMeta,
125}
126
127fn build_prefix_bv<I>(
128    domain_len: usize,
129    triple_count: usize,
130    iter: I,
131    writer: &mut SectionWriter,
132) -> BitVector<Rank9SelIndex>
133where
134    I: IntoIterator<Item = (usize, usize)>,
135{
136    let mut builder =
137        BitVectorBuilder::from_bit(false, triple_count + domain_len + 1, writer).unwrap();
138
139    let mut seen = 0usize;
140    let mut last = 0usize;
141    for (val, count) in iter {
142        for c in last..=val {
143            builder.set_bit(seen + c, true).unwrap();
144        }
145        seen += count;
146        last = val + 1;
147    }
148    for c in last..=domain_len {
149        builder.set_bit(seen + c, true).unwrap();
150    }
151    builder.freeze::<Rank9SelIndex>()
152}
153
154/// Deserialized Ring index — two rings of wavelet matrices with prefix
155/// bit vectors and pair-change markers, backed by a shared `Bytes`
156/// buffer.
157///
158/// The forward ring (E→A→V) stores three sorted rotations whose last
159/// columns are `eav_c`, `vea_c`, and `ave_c`. The reverse ring
160/// (E→V→A) stores `eva_c`, `aev_c`, and `vae_c`. Together they cover
161/// all six attribute orderings needed for WCO joins without
162/// materialising six separate indexes.
163///
164/// Implements [`Constraint`](crate::query::Constraint) via
165/// `TriblePattern::pattern`, so it can be used directly in `find!` and
166/// `pattern!` queries alongside regular [`TribleSet`]s.
167#[derive(Debug, Clone)]
168pub struct SuccinctArchive<U> {
169    /// The underlying blob bytes (shared, zero-copy).
170    pub bytes: Bytes,
171    /// The universe — maps integer codes to raw 32-byte values (the
172    /// domain of all distinct values appearing in E, A, or V positions).
173    pub domain: U,
174
175    /// Number of distinct entities in the universe.
176    pub entity_count: usize,
177    /// Number of distinct attributes in the universe.
178    pub attribute_count: usize,
179    /// Number of distinct values in the universe.
180    pub value_count: usize,
181
182    /// Entity-axis prefix bit vector: unary encoding of group sizes for
183    /// the entity column, enabling rank/select navigation.
184    pub e_a: BitVector<Rank9SelIndex>,
185    /// Attribute-axis prefix bit vector.
186    pub a_a: BitVector<Rank9SelIndex>,
187    /// Inline-axis prefix bit vector.
188    pub v_a: BitVector<Rank9SelIndex>,
189
190    /// Bit vector marking the first occurrence of each `(entity, attribute)` pair
191    /// in `eav_c`.
192    pub changed_e_a: BitVector<Rank9SelIndex>,
193    /// Bit vector marking the first occurrence of each `(entity, value)` pair in
194    /// `eva_c`.
195    pub changed_e_v: BitVector<Rank9SelIndex>,
196    /// Bit vector marking the first occurrence of each `(attribute, entity)` pair
197    /// in `aev_c`.
198    pub changed_a_e: BitVector<Rank9SelIndex>,
199    /// Bit vector marking the first occurrence of each `(attribute, value)` pair
200    /// in `ave_c`.
201    pub changed_a_v: BitVector<Rank9SelIndex>,
202    /// Bit vector marking the first occurrence of each `(value, entity)` pair in
203    /// `vea_c`.
204    pub changed_v_e: BitVector<Rank9SelIndex>,
205    /// Bit vector marking the first occurrence of each `(value, attribute)` pair
206    /// in `vae_c`.
207    pub changed_v_a: BitVector<Rank9SelIndex>,
208
209    /// Forward ring: last column of EAV-sorted rotation (values).
210    pub eav_c: WaveletMatrix<Rank9SelIndex>,
211    /// Forward ring: last column of VEA-sorted rotation (attributes).
212    pub vea_c: WaveletMatrix<Rank9SelIndex>,
213    /// Forward ring: last column of AVE-sorted rotation (entities).
214    pub ave_c: WaveletMatrix<Rank9SelIndex>,
215    /// Reverse ring: last column of VAE-sorted rotation (entities).
216    pub vae_c: WaveletMatrix<Rank9SelIndex>,
217    /// Reverse ring: last column of EVA-sorted rotation (attributes).
218    pub eva_c: WaveletMatrix<Rank9SelIndex>,
219    /// Reverse ring: last column of AEV-sorted rotation (values).
220    pub aev_c: WaveletMatrix<Rank9SelIndex>,
221}
222
223impl<U> SuccinctArchive<U>
224where
225    U: Universe,
226{
227    /// A value-range constraint that proposes only V-position values
228    /// in the inclusive byte-lexicographic range `[min, max]`.
229    ///
230    /// Mirrors [`TribleSet::value_in_range`](crate::trible::TribleSet::value_in_range).
231    /// The cost is O(log n + k) for an [`OrderedUniverse`] (n = universe
232    /// size, k = matching values that appear in V position) — closes
233    /// the date-window query collapse documented in the SPB case study's
234    /// storage-axis appendix.
235    ///
236    /// # Example
237    ///
238    /// ```rust,ignore
239    /// find!(ts: Inline<NsTAIInterval>,
240    ///     and!(
241    ///         pattern!(&archive, [{ ?id @ attr: ?ts }]),
242    ///         archive.value_in_range(ts, min_ts, max_ts),
243    ///     )
244    /// )
245    /// ```
246    pub fn value_in_range<V: InlineEncoding>(
247        &self,
248        variable: crate::query::Variable<V>,
249        min: Inline<V>,
250        max: Inline<V>,
251    ) -> succinctarchiverangeconstraint::SuccinctArchiveRangeConstraint<'_, U> {
252        succinctarchiverangeconstraint::SuccinctArchiveRangeConstraint::new(
253            variable, min, max, self,
254        )
255    }
256
257    /// Iterates over all tribles by walking the EAV wavelet matrix and
258    /// resolving each triple through the domain mapping.
259    pub fn iter<'a>(&'a self) -> impl Iterator<Item = Trible> + 'a {
260        (0..self.eav_c.len()).map(move |v_i| {
261            let v = self.eav_c.access(v_i).unwrap();
262            let a_i = self.v_a.select1(v).unwrap() - v + self.eav_c.rank(v_i, v).unwrap();
263            let a = self.vea_c.access(a_i).unwrap();
264            let e_i = self.a_a.select1(a).unwrap() - a + self.vea_c.rank(a_i, a).unwrap();
265            let e = self.ave_c.access(e_i).unwrap();
266
267            let e = self.domain.access(e);
268            let a = self.domain.access(a);
269            let v = self.domain.access(v);
270
271            let e: Id = Id::new(id_from_value(&e).unwrap()).unwrap();
272            let a: Id = Id::new(id_from_value(&a).unwrap()).unwrap();
273            let v: Inline<UnknownInline> = Inline::new(v);
274
275            Trible::force(&e, &a, &v)
276        })
277    }
278
279    /// Count the number of set bits in `bv` within `range`.
280    ///
281    /// The bit vectors in this archive encode the first occurrence of each
282    /// component pair.  By counting the set bits between two offsets we can
283    /// quickly determine how many distinct pairs appear in that slice of the
284    /// index.
285    pub fn distinct_in(
286        &self,
287        bv: &BitVector<Rank9SelIndex>,
288        range: &std::ops::Range<usize>,
289    ) -> usize {
290        bv.rank1(range.end).unwrap() - bv.rank1(range.start).unwrap()
291    }
292
293    /// Enumerate the rotated offsets of set bits in `bv` within `range`.
294    ///
295    /// `bv` marks the first occurrence of component pairs in the ordering that
296    /// produced `col`.  For each selected bit this function reads the component
297    /// value from `col` and uses `prefix` to translate the index to the adjacent
298    /// orientation.  The iterator therefore yields indices positioned to access
299    /// the middle component of each pair.
300    pub fn enumerate_in<'a>(
301        &'a self,
302        bv: &'a BitVector<Rank9SelIndex>,
303        range: &std::ops::Range<usize>,
304        col: &'a WaveletMatrix<Rank9SelIndex>,
305        prefix: &'a BitVector<Rank9SelIndex>,
306    ) -> impl Iterator<Item = usize> + 'a {
307        let start = bv.rank1(range.start).unwrap();
308        let end = bv.rank1(range.end).unwrap();
309        (start..end).map(move |r| {
310            let idx = bv.select1(r).unwrap();
311            let val = col.access(idx).unwrap();
312            prefix.select1(val).unwrap() - val + col.rank(idx, val).unwrap()
313        })
314    }
315
316    /// Enumerate the identifiers present in `prefix` using `rank`/`select` to
317    /// jump directly to the next distinct prefix sum.
318    pub fn enumerate_domain<'a>(
319        &'a self,
320        prefix: &'a BitVector<Rank9SelIndex>,
321    ) -> impl Iterator<Item = RawInline> + 'a {
322        let zero_count = prefix.num_bits() - (self.domain.len() + 1);
323        let mut z = 0usize;
324        std::iter::from_fn(move || {
325            if z >= zero_count {
326                return None;
327            }
328            let pos = prefix.select0(z).unwrap();
329            let id = prefix.rank1(pos).unwrap() - 1;
330            z = prefix.rank0(prefix.select1(id + 1).unwrap()).unwrap();
331            Some(self.domain.access(id))
332        })
333    }
334
335    /// Like [`Self::enumerate_domain`], but bounded to the half-open code range
336    /// `[code_range.start, code_range.end)`. Output-sensitive: iterates
337    /// only over codes that actually appear in `prefix` *and* fall within
338    /// the range. Empty groups are skipped via the `select1`-based stride.
339    ///
340    /// Combined with [`Universe::search_range`], this gives O(K) range
341    /// proposals where K = distinct codes-in-range that have at least one
342    /// occurrence on the indexed axis.
343    pub fn enumerate_domain_in_range<'a>(
344        &'a self,
345        prefix: &'a BitVector<Rank9SelIndex>,
346        code_range: std::ops::Range<usize>,
347    ) -> impl Iterator<Item = RawInline> + 'a {
348        let zero_count_total = prefix.num_bits() - (self.domain.len() + 1);
349        let end_code = code_range.end;
350        // Seek to the first 0-bit (first trible) at or after `code_range.start`'s
351        // group boundary. select1(start) is the position of the start-code's
352        // group's leading 1-bit; rank0 of that position is the trible-index of
353        // the first trible whose code >= start.
354        let mut z = if code_range.start >= self.domain.len() + 1 {
355            zero_count_total
356        } else {
357            let start_pos = prefix.select1(code_range.start).unwrap();
358            prefix.rank0(start_pos).unwrap()
359        };
360        std::iter::from_fn(move || {
361            if z >= zero_count_total {
362                return None;
363            }
364            let pos = prefix.select0(z).unwrap();
365            let id = prefix.rank1(pos).unwrap() - 1;
366            if id >= end_code {
367                return None;
368            }
369            z = prefix.rank0(prefix.select1(id + 1).unwrap()).unwrap();
370            Some(self.domain.access(id))
371        })
372    }
373
374    /// Returns the serialization metadata header for this archive.
375    pub fn meta(&self) -> SuccinctArchiveMeta<U::Meta>
376    where
377        U: Serializable,
378    {
379        SuccinctArchiveMeta {
380            entity_count: self.entity_count,
381            attribute_count: self.attribute_count,
382            value_count: self.value_count,
383            domain: self.domain.metadata(),
384            e_a: self.e_a.metadata(),
385            a_a: self.a_a.metadata(),
386            v_a: self.v_a.metadata(),
387            changed_e_a: self.changed_e_a.metadata(),
388            changed_e_v: self.changed_e_v.metadata(),
389            changed_a_e: self.changed_a_e.metadata(),
390            changed_a_v: self.changed_a_v.metadata(),
391            changed_v_e: self.changed_v_e.metadata(),
392            changed_v_a: self.changed_v_a.metadata(),
393            eav_c: self.eav_c.metadata(),
394            vea_c: self.vea_c.metadata(),
395            ave_c: self.ave_c.metadata(),
396            vae_c: self.vae_c.metadata(),
397            eva_c: self.eva_c.metadata(),
398            aev_c: self.aev_c.metadata(),
399        }
400    }
401}
402
403impl<U> From<&TribleSet> for SuccinctArchive<U>
404where
405    U: Universe + Serializable<Error = jerky::error::Error>,
406    <U as Serializable>::Meta: Clone,
407{
408    fn from(set: &TribleSet) -> Self {
409        let triple_count = set.eav.len() as usize;
410
411        let entity_count = set.eav.segmented_len(&[0; 0]) as usize;
412        let attribute_count = set.ave.segmented_len(&[0; 0]) as usize;
413        let value_count = set.vea.segmented_len(&[0; 0]) as usize;
414
415        let e_iter = set
416            .eav
417            .iter_prefix_count::<16>()
418            .map(|(e, _)| id_into_value(&e));
419        let a_iter = set
420            .ave
421            .iter_prefix_count::<16>()
422            .map(|(a, _)| id_into_value(&a));
423        let v_iter = set.vea.iter_prefix_count::<32>().map(|(v, _)| v);
424
425        let mut area = ByteArea::new().unwrap();
426        let mut sections = area.sections();
427
428        let domain_iter = e_iter.merge(a_iter).merge(v_iter).dedup();
429        let domain = U::with_sorted_dedup(domain_iter, &mut sections);
430
431        let e_a = build_prefix_bv(
432            domain.len(),
433            triple_count,
434            set.eav.iter_prefix_count::<16>().map(|(e, c)| {
435                (
436                    domain.search(&id_into_value(&e)).expect("e in domain"),
437                    c as usize,
438                )
439            }),
440            &mut sections,
441        );
442
443        let a_a = build_prefix_bv(
444            domain.len(),
445            triple_count,
446            set.ave.iter_prefix_count::<16>().map(|(a, c)| {
447                (
448                    domain.search(&id_into_value(&a)).expect("a in domain"),
449                    c as usize,
450                )
451            }),
452            &mut sections,
453        );
454
455        let v_a = build_prefix_bv(
456            domain.len(),
457            triple_count,
458            set.vea
459                .iter_prefix_count::<32>()
460                .map(|(v, c)| (domain.search(&v).expect("v in domain"), c as usize)),
461            &mut sections,
462        );
463
464        let eav_c = {
465            let mut builder =
466                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
467                    .unwrap();
468            let mut iter = set
469                .eav
470                .iter_prefix_count::<64>()
471                .map(|(t, _)| t[32..64].try_into().unwrap())
472                .map(|v| domain.search(&v).expect("v in domain"));
473            builder.set_ints_from_iter(0, &mut iter).unwrap();
474            builder.freeze::<Rank9SelIndex>().unwrap()
475        };
476
477        let vea_c = {
478            let mut builder =
479                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
480                    .unwrap();
481            let mut iter = set
482                .vea
483                .iter_prefix_count::<64>()
484                .map(|(t, _)| id_into_value(t[48..64].try_into().unwrap()))
485                .map(|a| domain.search(&a).expect("a in domain"));
486            builder.set_ints_from_iter(0, &mut iter).unwrap();
487            builder.freeze::<Rank9SelIndex>().unwrap()
488        };
489
490        let ave_c = {
491            let mut builder =
492                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
493                    .unwrap();
494            let mut iter = set
495                .ave
496                .iter_prefix_count::<64>()
497                .map(|(t, _)| id_into_value(t[48..64].try_into().unwrap()))
498                .map(|e| domain.search(&e).expect("e in domain"));
499            builder.set_ints_from_iter(0, &mut iter).unwrap();
500            builder.freeze::<Rank9SelIndex>().unwrap()
501        };
502
503        let vae_c = {
504            let mut builder =
505                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
506                    .unwrap();
507            let mut iter = set
508                .vae
509                .iter_prefix_count::<64>()
510                .map(|(t, _)| id_into_value(t[48..64].try_into().unwrap()))
511                .map(|e| domain.search(&e).expect("e in domain"));
512            builder.set_ints_from_iter(0, &mut iter).unwrap();
513            builder.freeze::<Rank9SelIndex>().unwrap()
514        };
515
516        let eva_c = {
517            let mut builder =
518                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
519                    .unwrap();
520            let mut iter = set
521                .eva
522                .iter_prefix_count::<64>()
523                .map(|(t, _)| id_into_value(t[48..64].try_into().unwrap()))
524                .map(|a| domain.search(&a).expect("a in domain"));
525            builder.set_ints_from_iter(0, &mut iter).unwrap();
526            builder.freeze::<Rank9SelIndex>().unwrap()
527        };
528
529        let aev_c = {
530            let mut builder =
531                WaveletMatrixBuilder::with_capacity(domain.len(), triple_count, &mut sections)
532                    .unwrap();
533            let mut iter = set
534                .aev
535                .iter_prefix_count::<64>()
536                .map(|(t, _)| t[32..64].try_into().unwrap())
537                .map(|v| domain.search(&v).expect("v in domain"));
538            builder.set_ints_from_iter(0, &mut iter).unwrap();
539            builder.freeze::<Rank9SelIndex>().unwrap()
540        };
541
542        let changed_e_a = {
543            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
544            let mut bits = set.eav.iter_prefix_count::<32>().flat_map(|(_, c)| {
545                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
546            });
547            b.set_bits_from_iter(0, &mut bits).unwrap();
548            b.freeze::<Rank9SelIndex>()
549        };
550
551        let changed_e_v = {
552            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
553            let mut bits = set.eva.iter_prefix_count::<48>().flat_map(|(_, c)| {
554                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
555            });
556            b.set_bits_from_iter(0, &mut bits).unwrap();
557            b.freeze::<Rank9SelIndex>()
558        };
559
560        let changed_a_e = {
561            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
562            let mut bits = set.aev.iter_prefix_count::<32>().flat_map(|(_, c)| {
563                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
564            });
565            b.set_bits_from_iter(0, &mut bits).unwrap();
566            b.freeze::<Rank9SelIndex>()
567        };
568
569        let changed_a_v = {
570            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
571            let mut bits = set.ave.iter_prefix_count::<48>().flat_map(|(_, c)| {
572                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
573            });
574            b.set_bits_from_iter(0, &mut bits).unwrap();
575            b.freeze::<Rank9SelIndex>()
576        };
577
578        let changed_v_e = {
579            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
580            let mut bits = set.vea.iter_prefix_count::<48>().flat_map(|(_, c)| {
581                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
582            });
583            b.set_bits_from_iter(0, &mut bits).unwrap();
584            b.freeze::<Rank9SelIndex>()
585        };
586
587        let changed_v_a = {
588            let mut b = BitVectorBuilder::with_capacity(triple_count, &mut sections).unwrap();
589            let mut bits = set.vae.iter_prefix_count::<48>().flat_map(|(_, c)| {
590                iter::once(true).chain(std::iter::repeat_n(false, c as usize - 1))
591            });
592            b.set_bits_from_iter(0, &mut bits).unwrap();
593            b.freeze::<Rank9SelIndex>()
594        };
595
596        let meta = SuccinctArchiveMeta {
597            entity_count,
598            attribute_count,
599            value_count,
600            domain: domain.metadata(),
601            e_a: e_a.metadata(),
602            a_a: a_a.metadata(),
603            v_a: v_a.metadata(),
604            changed_e_a: changed_e_a.metadata(),
605            changed_e_v: changed_e_v.metadata(),
606            changed_a_e: changed_a_e.metadata(),
607            changed_a_v: changed_a_v.metadata(),
608            changed_v_e: changed_v_e.metadata(),
609            changed_v_a: changed_v_a.metadata(),
610            eav_c: eav_c.metadata(),
611            vea_c: vea_c.metadata(),
612            ave_c: ave_c.metadata(),
613            vae_c: vae_c.metadata(),
614            eva_c: eva_c.metadata(),
615            aev_c: aev_c.metadata(),
616        };
617
618        let mut meta_sec = sections.reserve::<SuccinctArchiveMeta<U::Meta>>(1).unwrap();
619        meta_sec.as_mut_slice()[0] = meta.clone();
620        meta_sec.freeze().unwrap();
621
622        let bytes = area.freeze().unwrap();
623
624        SuccinctArchive::from_bytes(meta, bytes).unwrap()
625    }
626}
627
628impl<U> From<&SuccinctArchive<U>> for TribleSet
629where
630    U: Universe,
631{
632    fn from(archive: &SuccinctArchive<U>) -> Self {
633        archive.iter().collect()
634    }
635}
636
637impl<U> TriblePattern for SuccinctArchive<U>
638where
639    U: Universe + Send + Sync,
640{
641    type PatternConstraint<'a>
642        = SuccinctArchiveConstraint<'a, U>
643    where
644        U: 'a;
645
646    fn pattern<'a, V: InlineEncoding>(
647        &'a self,
648        e: crate::query::Variable<GenId>,
649        a: crate::query::Variable<GenId>,
650        v: crate::query::Variable<V>,
651    ) -> Self::PatternConstraint<'a> {
652        SuccinctArchiveConstraint::new(e, a, v, self)
653    }
654}
655
656impl<U> Serializable for SuccinctArchive<U>
657where
658    U: Universe + Serializable<Error = jerky::error::Error>,
659{
660    type Meta = SuccinctArchiveMeta<U::Meta>;
661    type Error = jerky::error::Error;
662
663    fn metadata(&self) -> Self::Meta {
664        self.meta()
665    }
666
667    fn from_bytes(meta: Self::Meta, bytes: Bytes) -> Result<Self, Self::Error> {
668        let domain = U::from_bytes(meta.domain, bytes.clone())?;
669
670        let e_a = BitVector::from_bytes(meta.e_a, bytes.clone())?;
671        let a_a = BitVector::from_bytes(meta.a_a, bytes.clone())?;
672        let v_a = BitVector::from_bytes(meta.v_a, bytes.clone())?;
673        let changed_e_a = BitVector::from_bytes(meta.changed_e_a, bytes.clone())?;
674        let changed_e_v = BitVector::from_bytes(meta.changed_e_v, bytes.clone())?;
675        let changed_a_e = BitVector::from_bytes(meta.changed_a_e, bytes.clone())?;
676        let changed_a_v = BitVector::from_bytes(meta.changed_a_v, bytes.clone())?;
677        let changed_v_e = BitVector::from_bytes(meta.changed_v_e, bytes.clone())?;
678        let changed_v_a = BitVector::from_bytes(meta.changed_v_a, bytes.clone())?;
679
680        let eav_c = WaveletMatrix::from_bytes(meta.eav_c, bytes.clone())?;
681        let vea_c = WaveletMatrix::from_bytes(meta.vea_c, bytes.clone())?;
682        let ave_c = WaveletMatrix::from_bytes(meta.ave_c, bytes.clone())?;
683        let vae_c = WaveletMatrix::from_bytes(meta.vae_c, bytes.clone())?;
684        let eva_c = WaveletMatrix::from_bytes(meta.eva_c, bytes.clone())?;
685        let aev_c = WaveletMatrix::from_bytes(meta.aev_c, bytes.clone())?;
686
687        Ok(SuccinctArchive {
688            bytes,
689            domain,
690            entity_count: meta.entity_count,
691            attribute_count: meta.attribute_count,
692            value_count: meta.value_count,
693            e_a,
694            a_a,
695            v_a,
696            changed_e_a,
697            changed_e_v,
698            changed_a_e,
699            changed_a_v,
700            changed_v_e,
701            changed_v_a,
702            eav_c,
703            vea_c,
704            ave_c,
705            vae_c,
706            eva_c,
707            aev_c,
708        })
709    }
710}
711
712impl<U> Encodes<&SuccinctArchive<U>> for SuccinctArchiveBlob
713where
714    U: Universe + Serializable,
715    crate::inline::encodings::hash::Handle<SuccinctArchiveBlob>: crate::inline::InlineEncoding,
716{
717    type Output = Blob<SuccinctArchiveBlob>;
718    fn encode(source: &SuccinctArchive<U>) -> Blob<SuccinctArchiveBlob> {
719        Blob::new(source.bytes.clone())
720    }
721}
722
723impl<U> Encodes<SuccinctArchive<U>> for SuccinctArchiveBlob
724where
725    U: Universe + Serializable,
726    crate::inline::encodings::hash::Handle<SuccinctArchiveBlob>: crate::inline::InlineEncoding,
727{
728    type Output = Blob<SuccinctArchiveBlob>;
729    fn encode(source: SuccinctArchive<U>) -> Blob<SuccinctArchiveBlob> {
730        Blob::new(source.bytes)
731    }
732}
733
734/// Error returned when deserializing a [`SuccinctArchiveBlob`] into a [`SuccinctArchive`].
735pub struct SuccinctArchiveError;
736
737impl std::error::Error for SuccinctArchiveError {}
738
739impl std::fmt::Display for SuccinctArchiveError {
740    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
741        write!(f, "SuccinctArchiveError")
742    }
743}
744
745impl std::fmt::Debug for SuccinctArchiveError {
746    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
747        write!(f, "SuccinctArchiveError")
748    }
749}
750
751impl<U> TryFromBlob<SuccinctArchiveBlob> for SuccinctArchive<U>
752where
753    U: Universe + Serializable<Error = jerky::error::Error>,
754    <U as Serializable>::Meta: Copy + 'static,
755{
756    type Error = SuccinctArchiveError;
757
758    fn try_from_blob(blob: Blob<SuccinctArchiveBlob>) -> Result<Self, Self::Error> {
759        let bytes = blob.bytes;
760        let mut tail = bytes.clone();
761        let meta = *tail
762            .view_suffix::<SuccinctArchiveMeta<U::Meta>>()
763            .map_err(|_| SuccinctArchiveError)?;
764        SuccinctArchive::from_bytes(meta, bytes).map_err(|_| SuccinctArchiveError)
765    }
766}
767
768#[cfg(test)]
769mod tests {
770    use std::convert::TryInto;
771
772    use crate::blob::IntoBlob;
773    use crate::id::fucid;
774    use crate::prelude::*;
775    use crate::query::find;
776    use crate::trible::Trible;
777    use crate::inline::IntoInline;
778    use crate::inline::TryToInline;
779
780    use super::*;
781    use anybytes::area::ByteArea;
782    use itertools::Itertools;
783    use proptest::prelude::*;
784
785    pub mod knights {
786        use crate::prelude::*;
787
788        attributes! {
789            "328edd7583de04e2bedd6bd4fd50e651" as loves: inlineencodings::GenId;
790            "328147856cc1984f0806dbb824d2b4cb" as name: inlineencodings::ShortString;
791            "328f2c33d2fdd675e733388770b2d6c4" as title: inlineencodings::ShortString;
792        }
793    }
794
795    proptest! {
796        #[test]
797        fn create(entries in prop::collection::vec(prop::collection::vec(0u8..255, 64), 1..128)) {
798            let mut set = TribleSet::new();
799            for entry in entries {
800                let mut key = [0; 64];
801                key.iter_mut().set_from(entry.iter().cloned());
802                set.insert(&Trible{ data: key});
803            }
804
805            let _archive: SuccinctArchive<CompressedUniverse> = (&set).into();
806        }
807
808        #[test]
809        fn roundtrip(entries in prop::collection::vec(prop::collection::vec(0u8..255, 64), 1..128)) {
810            let mut set = TribleSet::new();
811            for entry in entries {
812                let mut key = [0; 64];
813                key.iter_mut().set_from(entry.iter().cloned());
814                set.insert(&Trible{ data: key});
815            }
816
817            let archive: SuccinctArchive<CompressedUniverse> = (&set).into();
818            let set_: TribleSet = (&archive).into();
819
820            assert_eq!(set, set_);
821        }
822
823        #[test]
824        fn ordered_universe(values in prop::collection::vec(prop::collection::vec(0u8..255, 32), 1..128)) {
825            let mut values: Vec<RawInline> = values.into_iter().map(|v| v.try_into().unwrap()).collect();
826            values.sort();
827            let mut area = ByteArea::new().unwrap();
828            let mut sections = area.sections();
829            let u = OrderedUniverse::with(values.iter().copied(), &mut sections);
830            drop(sections);
831            let _bytes = area.freeze().unwrap();
832            for i in 0..u.len() {
833                let original = values[i];
834                let reconstructed = u.access(i);
835                assert_eq!(original, reconstructed);
836            }
837            for i in 0..u.len() {
838                let original = Some(i);
839                let found = u.search(&values[i]);
840                assert_eq!(original, found);
841            }
842        }
843
844        #[test]
845        fn compressed_universe(values in prop::collection::vec(prop::collection::vec(0u8..255, 32), 1..128)) {
846            let mut values: Vec<RawInline> = values.into_iter().map(|v| v.try_into().unwrap()).collect();
847            values.sort();
848            let mut area = ByteArea::new().unwrap();
849            let mut sections = area.sections();
850            let u = CompressedUniverse::with(values.iter().copied(), &mut sections);
851            drop(sections);
852            let _bytes = area.freeze().unwrap();
853            for i in 0..u.len() {
854                let original = values[i];
855                let reconstructed = u.access(i);
856                assert_eq!(original, reconstructed);
857            }
858            for i in 0..u.len() {
859                let original = Some(i);
860                let found = u.search(&values[i]);
861                assert_eq!(original, found);
862            }
863        }
864    }
865
866    #[test]
867    fn archive_pattern() {
868        let juliet = fucid();
869        let romeo = fucid();
870
871        let mut kb = TribleSet::new();
872
873        kb += entity! { &juliet @
874           knights::name: "Juliet",
875           knights::loves: &romeo,
876           knights::title: "Maiden"
877        };
878        kb += entity! { &romeo @
879           knights::name: "Romeo",
880           knights::loves: &juliet,
881           knights::title: "Prince"
882        };
883        kb += entity! {
884           knights::name: "Angelica",
885           knights::title: "Nurse"
886        };
887
888        let archive: SuccinctArchive<OrderedUniverse> = (&kb).into();
889
890        let r: Vec<_> = find!(
891            (juliet, name),
892            pattern!(&archive, [
893            {knights::name: "Romeo",
894             knights::loves: ?juliet},
895            {?juliet @
896                knights::name: ?name
897            }])
898        )
899        .collect();
900        assert_eq!(
901            vec![((&juliet).to_inline(), "Juliet".try_to_inline().unwrap(),)],
902            r
903        );
904    }
905
906    #[test]
907    fn blob_roundtrip() {
908        let juliet = fucid();
909        let romeo = fucid();
910
911        let mut kb = TribleSet::new();
912
913        kb += entity! {&juliet @
914            knights::name: "Juliet",
915            knights::loves: &romeo,
916            knights::title: "Maiden"
917        };
918        kb += entity! {&romeo @
919            knights::name: "Romeo",
920            knights::loves: &juliet,
921            knights::title: "Prince"
922        };
923
924        let archive: SuccinctArchive<OrderedUniverse> = (&kb).into();
925        let blob = (&archive).to_blob();
926        let rebuilt: SuccinctArchive<OrderedUniverse> = blob.try_from_blob().unwrap();
927        let kb2: TribleSet = (&rebuilt).into();
928        assert_eq!(kb, kb2);
929    }
930}