columnar/
lib.rs

1//! Types supporting flat / "columnar" layout for complex types.
2//!
3//! The intent is to re-layout `Vec<T>` types into vectors of reduced
4//! complexity, repeatedly. One should be able to push and pop easily,
5//! but indexing will be more complicated because we likely won't have
6//! a real `T` lying around to return as a reference. Instead, we will
7//! use Generic Associated Types (GATs) to provide alternate references.
8
9// Re-export derive crate.
10extern crate columnar_derive;
11pub use columnar_derive::Columnar;
12
13pub mod adts;
14
15/// A type that can be represented in columnar form.
16///
17/// For a running example, a type like `(A, Vec<B>)`.
18pub trait Columnar : 'static {
19    /// Repopulates `self` from a reference.
20    ///
21    /// By default this just calls `into_owned()`, but it can be overridden.
22    fn copy_from<'a>(&mut self, other: Ref<'a, Self>) where Self: Sized {
23        *self = Self::into_owned(other);
24    }
25    /// Produce an instance of `Self` from `Self::Ref<'a>`.
26    fn into_owned<'a>(other: Ref<'a, Self>) -> Self;
27
28    /// The type that stores the columnar representation.
29    ///
30    /// The container must support pushing both `&Self` and `Self::Ref<'_>`.
31    /// In our running example this might be `(Vec<A>, Vecs<Vec<B>>)`.
32    type Container: Container + for<'a> Push<&'a Self>;
33
34    /// Converts a sequence of the references to the type into columnar form.
35    fn as_columns<'a, I>(selves: I) -> Self::Container where I: IntoIterator<Item=&'a Self>, Self: 'a {
36        let mut columns: Self::Container = Default::default();
37        for item in selves {
38            columns.push(item);
39        }
40        columns
41    }
42    /// Converts a sequence of the type into columnar form.
43    ///
44    /// This consumes the owned `Self` types but uses them only by reference.
45    /// Consider `as_columns()` instead if it is equally ergonomic.
46    fn into_columns<I>(selves: I) -> Self::Container where I: IntoIterator<Item = Self>, Self: Sized {
47        let mut columns: Self::Container = Default::default();
48        for item in selves {
49            columns.push(&item);
50        }
51        columns
52    }
53    /// Reborrows the reference type to a shorter lifetime.
54    ///
55    /// Implementations must not change the contents of the reference, and should mark
56    /// the function as `#[inline(always)]`. It is no-op to overcome limitations
57    /// of the borrow checker. In many cases, it is enough to return `thing` as-is.
58    ///
59    /// For example, when comparing two references `Ref<'a>` and `Ref<'b>`, we can
60    /// reborrow both to a local lifetime to compare them. This allows us to keep the
61    /// lifetimes `'a` and `'b` separate, while otherwise Rust would unify them.
62    #[inline(always)] fn reborrow<'b, 'a: 'b>(thing: Ref<'a, Self>) -> Ref<'b, Self> {
63        Self::Container::reborrow_ref(thing)
64    }
65}
66
67/// The container type of columnar type `T`.
68///
69/// Equivalent to `<T as Columnar>::Container`.
70pub type ContainerOf<T> = <T as Columnar>::Container;
71
72/// For a lifetime, the reference type of columnar type `T`.
73///
74/// Equivalent to `<ContainerOf<T> as Container>::Ref<'a>`.
75pub type Ref<'a, T> = <ContainerOf<T> as Container>::Ref<'a>;
76
77/// A container that can hold `C`, and provide its preferred references.
78///
79/// As an example, `(Vec<A>, Vecs<Vec<B>>)`.
80pub trait Container : Len + Clear + for<'a> Push<Self::Ref<'a>> + Clone + Default + Send + 'static {
81    /// For each lifetime, a reference with that lifetime.
82    ///
83    /// As an example, `(&'a A, &'a [B])`.
84    type Ref<'a> : Copy;
85    /// The type of a borrowed container.
86    ///
87    /// Corresponding to our example, `(&'a [A], Vecs<&'a [B], &'a [u64]>)`.
88    type Borrowed<'a>: Copy + Len + AsBytes<'a> + FromBytes<'a> + Index<Ref = Self::Ref<'a>> where Self: 'a;
89    /// Converts a reference to the type to a borrowed variant.
90    fn borrow<'a>(&'a self) -> Self::Borrowed<'a>;
91    /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
92    fn reborrow<'b, 'a: 'b>(item: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a;
93    /// Reborrows the borrowed type to a shorter lifetime. See [`Columnar::reborrow`] for details.
94    fn reborrow_ref<'b, 'a: 'b>(item: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a;
95
96    /// Extends `self` by a range in `other`.
97    ///
98    /// This method has a default implementation, but can and should be specialized when ranges can be copied.
99    /// As an example, lists of lists are often backed by contiguous elements, all of which can be memcopied,
100    /// with only the offsets into them (the bounds) to push either before or after (rather than during).
101    #[inline(always)]
102    fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
103        self.extend(range.map(|i| other.get(i)))
104    }
105}
106
107pub use common::{Clear, Len, Push, IndexMut, Index, IndexAs, HeapSize, Slice, AsBytes, FromBytes};
108/// Common traits and types that are re-used throughout the module.
109pub mod common {
110
111    /// A type with a length.
112    pub trait Len {
113        /// The number of contained elements.
114        fn len(&self) -> usize;
115        /// Whether this contains no elements.
116        fn is_empty(&self) -> bool {
117            self.len() == 0
118        }
119    }
120    impl<L: Len + ?Sized> Len for &L {
121        #[inline(always)] fn len(&self) -> usize { L::len(*self) }
122    }
123    impl<L: Len + ?Sized> Len for &mut L {
124        #[inline(always)] fn len(&self) -> usize { L::len(*self) }
125    }
126    impl<T> Len for Vec<T> {
127        #[inline(always)] fn len(&self) -> usize { self.len() }
128    }
129    impl<T> Len for [T] {
130        #[inline(always)] fn len(&self) -> usize { <[T]>::len(self) }
131    }
132
133    /// A type that can accept items of type `T`.
134    pub trait Push<T> {
135        /// Pushes an item onto `self`.
136        fn push(&mut self, item: T);
137        /// Pushes elements of an iterator onto `self`.
138        #[inline(always)] fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
139            for item in iter {
140                self.push(item);
141            }
142        }
143    }
144    impl<T> Push<T> for Vec<T> {
145        #[inline(always)] fn push(&mut self, item: T) { self.push(item) }
146
147        #[inline(always)]
148        fn extend(&mut self, iter: impl IntoIterator<Item=T>) {
149            std::iter::Extend::extend(self, iter)
150        }
151    }
152    impl<'a, T: Clone> Push<&'a T> for Vec<T> {
153        #[inline(always)] fn push(&mut self, item: &'a T) { self.push(item.clone()) }
154
155        #[inline(always)]
156        fn extend(&mut self, iter: impl IntoIterator<Item=&'a T>) {
157            std::iter::Extend::extend(self, iter.into_iter().cloned())
158        }
159    }
160    impl<'a, T: Clone> Push<&'a [T]> for Vec<T> {
161        #[inline(always)] fn push(&mut self, item: &'a [T]) { self.clone_from_slice(item) }
162    }
163
164
165    pub use index::{Index, IndexMut, IndexAs};
166    /// Traits for accessing elements by `usize` indexes.
167    ///
168    /// There are several traits, with a core distinction being whether the returned reference depends on the lifetime of `&self`.
169    /// For one trait `Index` the result does not depend on this lifetime.
170    /// There is a third trait `IndexMut` that allows mutable access, that may be less commonly implemented.
171    pub mod index {
172
173        use crate::Len;
174        use crate::common::IterOwn;
175
176        /// A type that can be mutably accessed by `usize`.
177        pub trait IndexMut {
178            /// Type mutably referencing an indexed element.
179            type IndexMut<'a> where Self: 'a;
180            fn get_mut(& mut self, index: usize) -> Self::IndexMut<'_>;
181            /// A reference to the last element, should one exist.
182            #[inline(always)] fn last_mut(&mut self) -> Option<Self::IndexMut<'_>> where Self: Len {
183                if self.is_empty() { None }
184                else { Some(self.get_mut(self.len()-1)) }
185            }
186        }
187
188        impl<T: IndexMut + ?Sized> IndexMut for &mut T {
189            type IndexMut<'a> = T::IndexMut<'a> where Self: 'a;
190            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
191                T::get_mut(*self, index)
192            }
193        }
194        impl<T> IndexMut for Vec<T> {
195            type IndexMut<'a> = &'a mut T where Self: 'a;
196            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
197        }
198        impl<T> IndexMut for [T] {
199            type IndexMut<'a> = &'a mut T where Self: 'a;
200            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self[index] }
201        }
202
203        /// A type that can be accessed by `usize` but without borrowing `self`.
204        ///
205        /// This can be useful for types which include their own lifetimes, and
206        /// which wish to express that their reference has the same lifetime.
207        /// In the GAT `Index`, the `Ref<'_>` lifetime would be tied to `&self`.
208        ///
209        /// This trait may be challenging to implement for owning containers,
210        /// for example `Vec<_>`, which would need their `Ref` type to depend
211        /// on the lifetime of the `&self` borrow in the `get()` function.
212        pub trait Index {
213            /// The type returned by the `get` method.
214            ///
215            /// Notably, this does not vary with lifetime, and will not depend on the lifetime of `&self`.
216            type Ref;
217            fn get(&self, index: usize) -> Self::Ref;
218            #[inline(always)] fn last(&self) -> Option<Self::Ref> where Self: Len {
219                if self.is_empty() { None }
220                else { Some(self.get(self.len()-1)) }
221            }
222            /// Converts `&self` into an iterator.
223            ///
224            /// This has an awkward name to avoid collision with `iter()`, which may also be implemented.
225            #[inline(always)]
226            fn index_iter(&self) -> IterOwn<&Self> {
227                IterOwn {
228                    index: 0,
229                    slice: self,
230                }
231            }
232            /// Converts `self` into an iterator.
233            ///
234            /// This has an awkward name to avoid collision with `into_iter()`, which may also be implemented.
235            #[inline(always)]
236            fn into_index_iter(self) -> IterOwn<Self> where Self: Sized {
237                IterOwn {
238                    index: 0,
239                    slice: self,
240                }
241            }
242        }
243
244        // These implementations aim to reveal a longer lifetime, or to copy results to avoid a lifetime.
245        impl<'a, T> Index for &'a [T] {
246            type Ref = &'a T;
247            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
248        }
249        impl<T: Copy> Index for [T] {
250            type Ref = T;
251            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
252        }
253        impl<'a, T> Index for &'a Vec<T> {
254            type Ref = &'a T;
255            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { &self[index] }
256        }
257        impl<T: Copy> Index for Vec<T> {
258            type Ref = T;
259            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self[index] }
260        }
261
262
263        /// Types that can be converted into another type by copying.
264        ///
265        /// We use this trait to unify the ability of `T` and `&T` to be converted into `T`.
266        /// This is handy for copy types that we'd like to use, like `u8`, `u64` and `usize`.
267        pub trait CopyAs<T> {
268            fn copy_as(self) -> T;
269        }
270        impl<T: Copy> CopyAs<T> for &T {
271            #[inline(always)] fn copy_as(self) -> T { *self }
272        }
273        impl<T> CopyAs<T> for T {
274            #[inline(always)] fn copy_as(self) -> T { self }
275        }
276
277        pub trait IndexAs<T> {
278            fn index_as(&self, index: usize) -> T;
279            #[inline(always)] fn last(&self) -> Option<T> where Self: Len {
280                if self.is_empty() { None }
281                else { Some(self.index_as(self.len()-1)) }
282            }
283        }
284
285        impl<T: Index, S> IndexAs<S> for T where T::Ref: CopyAs<S> {
286            #[inline(always)] fn index_as(&self, index: usize) -> S { self.get(index).copy_as() }
287        }
288
289    }
290
291    use crate::Container;
292    use crate::common::index::CopyAs;
293    /// A composite trait which captures the ability `Push<&T>` and `Index<Ref = T>`.
294    ///
295    /// Implement `CopyAs<T>` for the reference type, and `Push<&'a T>` for the container.
296    pub trait PushIndexAs<T> : for<'a> Container<Ref<'a>: CopyAs<T>> + for<'a> Push<&'a T> { }
297    impl<T, C: for<'a> Container<Ref<'a>: CopyAs<T>> + for<'a> Push<&'a T>> PushIndexAs<T> for C { }
298
299    /// A type that can remove its contents and return to an empty state.
300    ///
301    /// Generally, this method does not release resources, and is used to make the container available for re-insertion.
302    pub trait Clear {
303        /// Clears `self`, without changing its capacity.
304        fn clear(&mut self);
305    }
306    // Vectors can be cleared.
307    impl<T> Clear for Vec<T> {
308        #[inline(always)] fn clear(&mut self) { self.clear() }
309    }
310    // Slice references can be cleared.
311    impl<T> Clear for &[T] {
312        #[inline(always)] fn clear(&mut self) { *self = &[]; }
313    }
314
315    pub trait HeapSize {
316        /// Active (len) and allocated (cap) heap sizes in bytes.
317        /// This should not include the size of `self` itself.
318        fn heap_size(&self) -> (usize, usize) { (0, 0) }
319    }
320
321    impl HeapSize for String {
322        fn heap_size(&self) -> (usize, usize) {
323            (self.len(), self.capacity())
324        }
325    }
326    impl<T: HeapSize> HeapSize for [T] {
327        fn heap_size(&self) -> (usize, usize) {
328            let mut l = std::mem::size_of_val(self);
329            let mut c = std::mem::size_of_val(self);
330            for item in self.iter() {
331                let (il, ic) = item.heap_size();
332                l += il;
333                c += ic;
334            }
335            (l, c)
336        }
337    }
338    impl<T: HeapSize> HeapSize for Vec<T> {
339        fn heap_size(&self) -> (usize, usize) {
340            let mut l = std::mem::size_of::<T>() * self.len();
341            let mut c = std::mem::size_of::<T>() * self.capacity();
342            for item in (self[..]).iter() {
343                let (il, ic) = item.heap_size();
344                l += il;
345                c += ic;
346            }
347            (l, c)
348        }
349    }
350
351    /// A struct representing a slice of a range of values.
352    ///
353    /// The lower and upper bounds should be meaningfully set on construction.
354    #[derive(Copy, Clone, Debug)]
355    pub struct Slice<S> {
356        pub lower: usize,
357        pub upper: usize,
358        pub slice: S,
359    }
360
361    impl<S> Slice<S> {
362        pub fn slice<R: std::ops::RangeBounds<usize>>(self, range: R) -> Self {
363            use std::ops::Bound;
364            let lower = match range.start_bound() {
365                Bound::Included(s) => std::cmp::max(self.lower, *s),
366                Bound::Excluded(s) => std::cmp::max(self.lower, *s+1),
367                Bound::Unbounded => self.lower,
368            };
369            let upper = match range.end_bound() {
370                Bound::Included(s) => std::cmp::min(self.upper, *s+1),
371                Bound::Excluded(s) => std::cmp::min(self.upper, *s),
372                Bound::Unbounded => self.upper,
373            };
374            assert!(lower <= upper);
375            Self { lower, upper, slice: self.slice }
376        }
377        pub fn new(lower: u64, upper: u64, slice: S) -> Self {
378            let lower: usize = lower.try_into().expect("slice bounds must fit in `usize`");
379            let upper: usize = upper.try_into().expect("slice bounds must fit in `usize`");
380            Self { lower, upper, slice }
381        }
382        pub fn len(&self) -> usize { self.upper - self.lower }
383        /// Map the slice to another type.
384        pub(crate) fn map<T>(self, f: impl Fn(S) -> T) -> Slice<T> {
385            Slice {
386                lower: self.lower,
387                upper: self.upper,
388                slice: f(self.slice),
389            }
390        }
391    }
392
393    impl<S: Index> PartialEq for Slice<S> where S::Ref: PartialEq {
394        fn eq(&self, other: &Self) -> bool {
395            if self.len() != other.len() { return false; }
396            for i in 0 .. self.len() {
397                if self.get(i) != other.get(i) { return false; }
398            }
399            true
400        }
401    }
402    impl<S: Index> PartialEq<[S::Ref]> for Slice<S> where S::Ref: PartialEq {
403        fn eq(&self, other: &[S::Ref]) -> bool {
404            if self.len() != other.len() { return false; }
405            for i in 0 .. self.len() {
406                if self.get(i) != other[i] { return false; }
407            }
408            true
409        }
410    }
411    impl<S: Index> PartialEq<Vec<S::Ref>> for Slice<S> where S::Ref: PartialEq {
412        fn eq(&self, other: &Vec<S::Ref>) -> bool {
413            if self.len() != other.len() { return false; }
414            for i in 0 .. self.len() {
415                if self.get(i) != other[i] { return false; }
416            }
417            true
418        }
419    }
420
421    impl<S: Index> Eq for Slice<S> where S::Ref: Eq { }
422
423    impl<S: Index> PartialOrd for Slice<S> where S::Ref: PartialOrd {
424        fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
425            use std::cmp::Ordering;
426            let len = std::cmp::min(self.len(), other.len());
427
428            for i in 0 .. len {
429                match self.get(i).partial_cmp(&other.get(i)) {
430                    Some(Ordering::Equal) => (),
431                    not_equal => return not_equal,
432                }
433            }
434
435            self.len().partial_cmp(&other.len())
436        }
437    }
438
439    impl<S: Index> Ord for Slice<S> where S::Ref: Ord + Eq {
440        fn cmp(&self, other: &Self) -> std::cmp::Ordering {
441            use std::cmp::Ordering;
442            let len = std::cmp::min(self.len(), other.len());
443
444            for i in 0 .. len {
445                match self.get(i).cmp(&other.get(i)) {
446                    Ordering::Equal => (),
447                    not_equal => return not_equal,
448                }
449            }
450
451            self.len().cmp(&other.len())
452        }
453    }
454
455    impl<S> Len for Slice<S> {
456        #[inline(always)] fn len(&self) -> usize { self.len() }
457    }
458
459    impl<S: Index> Index for Slice<S> {
460        type Ref = S::Ref;
461        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
462            assert!(index < self.upper - self.lower);
463            self.slice.get(self.lower + index)
464        }
465    }
466    impl<'a, S> Index for &'a Slice<S>
467    where
468        &'a S : Index,
469    {
470        type Ref = <&'a S as Index>::Ref;
471        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
472            assert!(index < self.upper - self.lower);
473            (&self.slice).get(self.lower + index)
474        }
475    }
476
477    impl<S: IndexMut> IndexMut for Slice<S> {
478        type IndexMut<'a> = S::IndexMut<'a> where S: 'a;
479        #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
480            assert!(index < self.upper - self.lower);
481            self.slice.get_mut(self.lower + index)
482        }
483    }
484
485    impl<S: Index + Len> Slice<S> {
486        /// Converts the slice into an iterator.
487        ///
488        /// This method exists rather than an `IntoIterator` implementation to avoid
489        /// a conflicting implementation for pushing an `I: IntoIterator` into `Vecs`.
490        pub fn into_iter(self) -> IterOwn<Slice<S>> {
491            self.into_index_iter()
492        }
493    }
494
495    impl<'a, T> Slice<&'a [T]> {
496        pub fn as_slice(&self) -> &'a [T] {
497            &self.slice[self.lower .. self.upper]
498        }
499    }
500
501    pub struct IterOwn<S> {
502        index: usize,
503        slice: S,
504    }
505
506    impl<S> IterOwn<S> {
507        pub fn new(index: usize, slice: S) -> Self {
508            Self { index, slice }
509        }
510    }
511
512    impl<S: Index + Len> Iterator for IterOwn<S> {
513        type Item = S::Ref;
514        #[inline(always)] fn next(&mut self) -> Option<Self::Item> {
515            if self.index < self.slice.len() {
516                let result = self.slice.get(self.index);
517                self.index += 1;
518                Some(result)
519            } else {
520                None
521            }
522        }
523        #[inline(always)]
524        fn size_hint(&self) -> (usize, Option<usize>) {
525            (self.slice.len() - self.index, Some(self.slice.len() - self.index))
526        }
527    }
528
529    impl<S: Index + Len> ExactSizeIterator for IterOwn<S> { }
530
531    /// A type that can be viewed as byte slices with lifetime `'a`.
532    ///
533    /// Implementors of this trait almost certainly reference the lifetime `'a` themselves.
534    pub trait AsBytes<'a> {
535        /// Presents `self` as a sequence of byte slices, with their required alignment.
536        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])>;
537    }
538
539    /// A type that can be reconstituted from byte slices with lifetime `'a`.
540    ///
541    /// Implementors of this trait almost certainly reference the lifetime `'a` themselves,
542    /// unless they actively deserialize the bytes (vs sit on the slices, as if zero-copy).
543    pub trait FromBytes<'a> {
544        /// Reconstructs `self` from a sequence of correctly aligned and sized bytes slices.
545        ///
546        /// The implementation is expected to consume the right number of items from the iterator,
547        /// which may go on to be used by other implementations of `FromBytes`.
548        ///
549        /// The implementation should aim for only doing trivial work, as it backs calls like
550        /// `borrow` for serialized containers.
551        ///
552        /// Implementations should almost always be marked as `#[inline(always)]` to ensure that
553        /// they are inlined. A single non-inlined function on a tree of `from_bytes` calls
554        /// can cause the performance to drop significantly.
555        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self;
556    }
557
558}
559
560/// Logic related to the transformation to and from bytes.
561///
562/// The methods here line up with the `AsBytes` and `FromBytes` traits.
563pub mod bytes {
564
565    use crate::AsBytes;
566
567    /// A coupled encode/decode pair for byte sequences.
568    pub trait EncodeDecode {
569        /// Encoded length in number of `u64` words required.
570        fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a>;
571        /// Encoded length in number of `u8` bytes required.
572        ///
573        /// This method should always be eight times `Self::length_in_words`, and is provided for convenience and clarity.
574        fn length_in_bytes<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> { 8 * Self::length_in_words(bytes) }
575        /// Encodes `bytes` into a sequence of `u64`.
576        fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a>;
577        /// Writes `bytes` in the encoded format to an arbitrary writer.
578        fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a>;
579        /// Decodes bytes from a sequence of `u64`.
580        fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]>;
581    }
582
583    /// A sequential byte layout for `AsBytes` and `FromBytes` implementors.
584    ///
585    /// The layout is aligned like a sequence of `u64`, where we repeatedly announce a length,
586    /// and then follow it by that many bytes. We may need to follow this with padding bytes.
587    pub use serialization::Sequence;
588    mod serialization {
589
590        use crate::AsBytes;
591
592        /// Encodes and decodes bytes sequences, by prepending the length and appending the all sequences.
593        pub struct Sequence;
594        impl super::EncodeDecode for Sequence {
595            fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
596                // Each byte slice has one `u64` for the length, and then as many `u64`s as needed to hold all bytes.
597                bytes.as_bytes().map(|(_align, bytes)| 1 + bytes.len().div_ceil(8)).sum()
598            }
599            fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
600                encode(store, bytes.as_bytes())
601            }
602            fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
603                write(writer, bytes.as_bytes())
604            }
605            fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
606                decode(store)
607            }
608        }
609
610        /// Encodes a sequence of byte slices as their length followed by their bytes, aligned to 8 bytes.
611        ///
612        /// Each length will be exactly 8 bytes, and the bytes that follow are padded out to a multiple of 8 bytes.
613        /// When reading the data, the length is in bytes, and one should consume those bytes and advance over padding.
614        pub fn encode<'a>(store: &mut Vec<u64>, bytes: impl Iterator<Item=(u64, &'a [u8])>) {
615            for (align, bytes) in bytes {
616                assert!(align <= 8);
617                store.push(bytes.len() as u64);
618                let whole_words = 8 * (bytes.len() / 8);
619                // We want to extend `store` by `bytes`, but `bytes` may not be `u64` aligned.
620                // In the latter case, init `store` and cast and copy onto it as a byte slice.
621                if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
622                    store.extend_from_slice(words);
623                }
624                else {
625                    let store_len = store.len();
626                    store.resize(store_len + whole_words/8, 0);
627                    let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
628                    slice.copy_from_slice(&bytes[.. whole_words]);
629                }
630                let remaining_bytes = &bytes[whole_words..];
631                if !remaining_bytes.is_empty() {
632                    let mut remainder = 0u64;
633                    let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
634                    for (i, byte) in remaining_bytes.iter().enumerate() {
635                        transmute[i] = *byte;
636                    }
637                    store.push(remainder);
638                }
639            }
640        }
641
642        /// Writes a sequence of byte slices as their length followed by their bytes, padded to 8 bytes.
643        ///
644        /// Each length will be exactly 8 bytes, and the bytes that follow are padded out to a multiple of 8 bytes.
645        /// When reading the data, the length is in bytes, and one should consume those bytes and advance over padding.
646        pub fn write<'a>(mut writer: impl std::io::Write, bytes: impl Iterator<Item=(u64, &'a [u8])>) -> std::io::Result<()> {
647            // Columnar data is serialized as a sequence of `u64` values, with each `[u8]` slice
648            // serialize as first its length in bytes, and then as many `u64` values as needed.
649            // Padding should be added, but only for alignment; no specific values are required.
650            for (align, bytes) in bytes {
651                assert!(align <= 8);
652                let length = u64::try_from(bytes.len()).unwrap();
653                writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&length)))?;
654                writer.write_all(bytes)?;
655                let padding = usize::try_from((8 - (length % 8)) % 8).unwrap();
656                writer.write_all(&[0; 8][..padding])?;
657            }
658            Ok(())
659        }
660
661        /// Decodes a sequence of byte slices from their length followed by their bytes.
662        ///
663        /// This decoder matches the `encode` function above.
664        /// In particular, it anticipates padding bytes when the length is not a multiple of eight.
665        pub fn decode(store: &[u64]) -> Decoder<'_> {
666            Decoder { store }
667        }
668
669        /// An iterator over byte slices, decoding from a sequence of lengths followed by bytes.
670        pub struct Decoder<'a> {
671            store: &'a [u64],
672        }
673
674        impl<'a> Iterator for Decoder<'a> {
675            type Item = &'a [u8];
676            fn next(&mut self) -> Option<Self::Item> {
677                if let Some(length) = self.store.first() {
678                    let length = *length as usize;
679                    self.store = &self.store[1..];
680                    let whole_words = if length % 8 == 0 { length / 8 } else { length / 8 + 1 };
681                    let bytes: &[u8] = bytemuck::try_cast_slice(&self.store[..whole_words]).expect("&[u64] should convert to &[u8]");
682                    self.store = &self.store[whole_words..];
683                    Some(&bytes[..length])
684                } else {
685                    None
686                }
687            }
688        }
689    }
690
691    /// A binary encoding of sequences of byte slices.
692    ///
693    /// The encoding starts with a sequence of n+1 offsets describing where to find the n slices in the bytes that follow.
694    /// Treating the offsets as a byte slice too, the each offset indicates the location (in bytes) of the end of its slice.
695    /// Each byte slice can be found from a pair of adjacent offsets, where the first is rounded up to a multiple of eight.
696    pub use serialization_neu::Indexed;
697    pub mod serialization_neu {
698
699        use crate::AsBytes;
700
701        /// Encodes and decodes bytes sequences, using an index of offsets.
702        pub struct Indexed;
703        impl super::EncodeDecode for Indexed {
704            fn length_in_words<'a, A>(bytes: &A) -> usize where A : AsBytes<'a> {
705                1 + bytes.as_bytes().map(|(_align, bytes)| 1 + bytes.len().div_ceil(8)).sum::<usize>()
706            }
707            fn encode<'a, A>(store: &mut Vec<u64>, bytes: &A) where A : AsBytes<'a> {
708                encode(store, bytes)
709            }
710            fn write<'a, A, W: std::io::Write>(writer: W, bytes: &A) -> std::io::Result<()> where A : AsBytes<'a> {
711                write(writer, bytes)
712            }
713            fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
714                decode(store)
715            }
716        }
717
718        /// Encodes `item` into `u64` aligned words.
719        ///
720        /// The sequence of byte slices are appended, with padding to have each slice start `u64` aligned.
721        /// The sequence is then pre-pended with as many byte offsets as there are slices in `item`, plus one.
722        /// The byte offsets indicate where each slice ends, and by rounding up to `u64` alignemnt where the next slice begins.
723        /// The first offset indicates where the list of offsets itself ends, and where the first slice begins.
724        ///
725        /// We will need to visit `as_bytes` three times to extract this information, so the method should be efficient and inlined.
726        /// The first read writes the first offset, the second writes each other offset, and the third writes the bytes themselves.
727        ///
728        /// The offsets are zero-based, rather than based on `store.len()`.
729        /// If you call the method with a non-empty `store` be careful decoding.
730        pub fn encode<'a, A>(store: &mut Vec<u64>, iter: &A)
731        where A : AsBytes<'a>,
732        {
733            // Read 1: Number of offsets we will record, equal to the number of slices plus one.
734            // TODO: right-size `store` before first call to `push`.
735            let offsets = 1 + iter.as_bytes().count();
736            let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
737            store.push(offsets_end);
738            // Read 2: Establish each of the offsets based on lengths of byte slices.
739            let mut position_bytes = offsets_end;
740            for (align, bytes) in iter.as_bytes() {
741                assert!(align <= 8);
742                // Write length in bytes, but round up to words before updating `position_bytes`.
743                let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
744                store.push(to_push);
745                let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
746                position_bytes += round_len;
747            }
748            // Read 3: Append each byte slice, with padding to align starts to `u64`.
749            for (_align, bytes) in iter.as_bytes() {
750                let whole_words = 8 * (bytes.len() / 8);
751                // We want to extend `store` by `bytes`, but `bytes` may not be `u64` aligned.
752                // In the latter case, init `store` and cast and copy onto it as a byte slice.
753                if let Ok(words) = bytemuck::try_cast_slice(&bytes[.. whole_words]) {
754                    store.extend_from_slice(words);
755                }
756                else {
757                    let store_len = store.len();
758                    store.resize(store_len + whole_words/8, 0);
759                    let slice = bytemuck::try_cast_slice_mut(&mut store[store_len..]).expect("&[u64] should convert to &[u8]");
760                    slice.copy_from_slice(&bytes[.. whole_words]);
761                }
762                let remaining_bytes = &bytes[whole_words..];
763                if !remaining_bytes.is_empty() {
764                    let mut remainder = 0u64;
765                    let transmute: &mut [u8] = bytemuck::try_cast_slice_mut(std::slice::from_mut(&mut remainder)).expect("&[u64] should convert to &[u8]");
766                    for (i, byte) in remaining_bytes.iter().enumerate() {
767                        transmute[i] = *byte;
768                    }
769                    store.push(remainder);
770                }
771            }
772        }
773
774        pub fn write<'a, A, W>(mut writer: W, iter: &A) -> std::io::Result<()>
775        where
776            A: AsBytes<'a>,
777            W: std::io::Write,
778        {
779            // Read 1: Number of offsets we will record, equal to the number of slices plus one.
780            let offsets = 1 + iter.as_bytes().count();
781            let offsets_end: u64 = TryInto::<u64>::try_into((offsets) * std::mem::size_of::<u64>()).unwrap();
782            writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&offsets_end)))?;
783            // Read 2: Establish each of the offsets based on lengths of byte slices.
784            let mut position_bytes = offsets_end;
785            for (align, bytes) in iter.as_bytes() {
786                assert!(align <= 8);
787                // Write length in bytes, but round up to words before updating `position_bytes`.
788                let to_push: u64 = position_bytes + TryInto::<u64>::try_into(bytes.len()).unwrap();
789                writer.write_all(bytemuck::cast_slice(std::slice::from_ref(&to_push)))?;
790                let round_len: u64 = ((bytes.len() + 7) & !7).try_into().unwrap();
791                position_bytes += round_len;
792            }
793            // Read 3: Append each byte slice, with padding to align starts to `u64`.
794            for (_align, bytes) in iter.as_bytes() {
795                writer.write_all(bytes)?;
796                let padding = ((bytes.len() + 7) & !7) - bytes.len();
797                if padding > 0 {
798                    writer.write_all(&[0u8;8][..padding])?;
799                }
800            }
801
802            Ok(())
803        }
804
805        /// Decodes an encoded sequence of byte slices. Each result will be `u64` aligned.
806        pub fn decode(store: &[u64]) -> impl Iterator<Item=&[u8]> {
807            assert!(store[0] % 8 == 0);
808            let slices = (store[0] / 8) - 1;
809            (0 .. slices).map(|i| decode_index(store, i))
810        }
811
812        /// Decodes a specific byte slice by index. It will be `u64` aligned.
813        #[inline(always)]
814        pub fn decode_index(store: &[u64], index: u64) -> &[u8] {
815            debug_assert!(index + 1 < store[0]/8);
816            let index: usize = index.try_into().unwrap();
817            let lower: usize = ((store[index] + 7) & !7).try_into().unwrap();
818            let upper: usize = (store[index + 1]).try_into().unwrap();
819            let bytes: &[u8] = bytemuck::try_cast_slice(store).expect("&[u64] should convert to &[u8]");
820            &bytes[lower .. upper]
821        }
822
823        #[cfg(test)]
824        mod test {
825
826            use crate::{Container, ContainerOf};
827            use crate::common::Push;
828            use crate::AsBytes;
829
830            use super::{encode, decode};
831
832            fn assert_roundtrip<'a, AB: AsBytes<'a>>(item: &AB) {
833                let mut store = Vec::new();
834                encode(&mut store, item);
835                assert!(item.as_bytes().map(|x| x.1).eq(decode(&store)));
836            }
837
838            #[test]
839            fn round_trip() {
840
841                let mut column: ContainerOf<Result<u64, String>> = Default::default();
842                for i in 0..10000u64 {
843                    column.push(&Ok::<u64, String>(i));
844                    column.push(&Err::<u64, String>(format!("{:?}", i)));
845                }
846
847                assert_roundtrip(&column.borrow());
848            }
849        }
850    }
851
852    #[cfg(test)]
853    mod test {
854        use crate::ContainerOf;
855
856        #[test]
857        fn round_trip() {
858
859            use crate::common::{Push, HeapSize, Len, Index};
860            use crate::{Container, AsBytes, FromBytes};
861
862            let mut column: ContainerOf<Result<u64, u64>> = Default::default();
863            for i in 0..100u64 {
864                column.push(Ok::<u64, u64>(i));
865                column.push(Err::<u64, u64>(i));
866            }
867
868            assert_eq!(column.len(), 200);
869            assert_eq!(column.heap_size(), (1624, 2080));
870
871            for i in 0..100 {
872                assert_eq!(column.get(2*i+0), Ok(i as u64));
873                assert_eq!(column.get(2*i+1), Err(i as u64));
874            }
875
876            let column2 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column.borrow().as_bytes().map(|(_, bytes)| bytes));
877            for i in 0..100 {
878                assert_eq!(column.get(2*i+0), column2.get(2*i+0).copied().map_err(|e| *e));
879                assert_eq!(column.get(2*i+1), column2.get(2*i+1).copied().map_err(|e| *e));
880            }
881
882            let column3 = crate::Results::<&[u64], &[u64], &[u64], &[u64], &u64>::from_bytes(&mut column2.as_bytes().map(|(_, bytes)| bytes));
883            for i in 0..100 {
884                assert_eq!(column3.get(2*i+0), column2.get(2*i+0));
885                assert_eq!(column3.get(2*i+1), column2.get(2*i+1));
886            }
887        }
888    }
889}
890
891/// Types that prefer to be represented by `Vec<T>`.
892pub mod primitive {
893    use std::num::Wrapping;
894
895    /// An implementation of opinions for types that want to use `Vec<T>`.
896    macro_rules! implement_columnable {
897        ($($index_type:ty),*) => { $(
898            impl crate::Columnar for $index_type {
899                #[inline(always)]
900                fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { *other }
901
902                type Container = Vec<$index_type>;
903            }
904            impl crate::Container for Vec<$index_type> {
905                type Ref<'a> = &'a $index_type;
906                type Borrowed<'a> = &'a [$index_type];
907                #[inline(always)]
908                fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { &self[..] }
909                #[inline(always)]
910                fn reborrow<'b, 'a: 'b>(thing: &'a [$index_type]) -> Self::Borrowed<'b> where Self: 'a { thing }
911                #[inline(always)]
912                fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
913
914                #[inline(always)]
915                fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) { self.extend_from_slice(&other[range]) }
916            }
917
918            impl crate::HeapSize for $index_type { }
919
920            impl<'a> crate::AsBytes<'a> for &'a [$index_type] {
921                #[inline(always)]
922                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
923                    std::iter::once((std::mem::align_of::<$index_type>() as u64, bytemuck::cast_slice(&self[..])))
924                }
925            }
926            impl<'a> crate::FromBytes<'a> for &'a [$index_type] {
927                #[inline(always)]
928                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
929                    // We use `unwrap()` here in order to panic with the `bytemuck` error, which may be informative.
930                    bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()
931                }
932            }
933        )* }
934    }
935
936    implement_columnable!(u8, u16, u32, u64, u128);
937    implement_columnable!(i8, i16, i32, i64, i128);
938    implement_columnable!(f32, f64);
939    implement_columnable!(Wrapping<u8>, Wrapping<u16>, Wrapping<u32>, Wrapping<u64>, Wrapping<u128>);
940    implement_columnable!(Wrapping<i8>, Wrapping<i16>, Wrapping<i32>, Wrapping<i64>, Wrapping<i128>);
941
942    pub use sizes::{Usizes, Isizes};
943    /// Columnar stores for `usize` and `isize`, stored as 64 bits.
944    mod sizes {
945
946        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize};
947        use crate::common::PushIndexAs;
948
949        #[derive(Copy, Clone, Default)]
950        pub struct Usizes<CV = Vec<u64>> { pub values: CV }
951
952        impl Columnar for usize {
953            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
954            type Container = Usizes;
955        }
956
957        impl<CV: PushIndexAs<u64>> Container for Usizes<CV> {
958            type Ref<'a> = usize;
959            type Borrowed<'a> = Usizes<CV::Borrowed<'a>> where CV: 'a;
960            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
961                Usizes { values: self.values.borrow() }
962            }
963            #[inline(always)]
964            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
965                Usizes { values: CV::reborrow(thing.values) }
966            }
967            #[inline(always)]
968            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
969
970            #[inline(always)]
971            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
972                self.values.extend_from_self(other.values, range)
973            }
974        }
975
976        impl<CV: Len> Len for Usizes<CV> { fn len(&self) -> usize { self.values.len() }}
977        impl IndexMut for Usizes {
978            type IndexMut<'a> = &'a mut u64;
979            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
980        }
981        impl<CV: IndexAs<u64>> Index for Usizes<CV> {
982            type Ref = usize;
983            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
984        }
985        impl<CV: IndexAs<u64>> Index for &Usizes<CV> {
986            type Ref = usize;
987            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Usizes values should fit in `usize`") }
988        }
989        impl<CV: for<'a> Push<&'a u64>> Push<usize> for Usizes<CV> {
990            #[inline]
991            fn push(&mut self, item: usize) { self.values.push(&item.try_into().expect("usize must fit in a u64")) }
992        }
993        impl Push<&usize> for Usizes {
994            #[inline]
995            fn push(&mut self, item: &usize) { self.values.push((*item).try_into().expect("usize must fit in a u64")) }
996        }
997        impl<CV: Clear> Clear for Usizes<CV> { fn clear(&mut self) { self.values.clear() }}
998
999        impl<CV: HeapSize> HeapSize for Usizes<CV> {
1000            fn heap_size(&self) -> (usize, usize) {
1001                self.values.heap_size()
1002            }
1003        }
1004
1005        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Usizes<CV> {
1006            #[inline(always)]
1007            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1008                self.values.as_bytes()
1009            }
1010        }
1011
1012        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Usizes<CV> {
1013            #[inline(always)]
1014            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1015                Self { values: CV::from_bytes(bytes) }
1016            }
1017        }
1018
1019
1020        #[derive(Copy, Clone, Default)]
1021        pub struct Isizes<CV = Vec<i64>> { pub values: CV }
1022
1023        impl Columnar for isize {
1024            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1025            type Container = Isizes;
1026        }
1027
1028        impl<CV: PushIndexAs<i64>> Container for Isizes<CV> {
1029            type Ref<'a> = isize;
1030            type Borrowed<'a> = Isizes<CV::Borrowed<'a>> where CV: 'a;
1031            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1032                Isizes { values: self.values.borrow() }
1033            }
1034            #[inline(always)]
1035            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where CV: 'a {
1036                Isizes { values: CV::reborrow(thing.values) }
1037            }
1038            #[inline(always)]
1039            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1040
1041            #[inline(always)]
1042            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1043                self.values.extend_from_self(other.values, range)
1044            }
1045        }
1046
1047        impl<CV: Len> Len for Isizes<CV> { fn len(&self) -> usize { self.values.len() }}
1048        impl IndexMut for Isizes {
1049            type IndexMut<'a> = &'a mut i64;
1050            #[inline(always)] fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> { &mut self.values[index] }
1051        }
1052        impl<CV: IndexAs<i64>> Index for Isizes<CV> {
1053            type Ref = isize;
1054            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
1055        }
1056        impl<CV: IndexAs<i64>> Index for &Isizes<CV> {
1057            type Ref = isize;
1058            #[inline(always)] fn get(&self, index: usize) -> Self::Ref { self.values.index_as(index).try_into().expect("Isizes values should fit in `isize`") }
1059        }
1060        impl<CV: for<'a> Push<&'a i64>> Push<isize> for Isizes<CV> {
1061            #[inline]
1062            fn push(&mut self, item: isize) { self.values.push(&item.try_into().expect("isize must fit in a i64")) }
1063        }
1064        impl Push<&isize> for Isizes {
1065            #[inline]
1066            fn push(&mut self, item: &isize) { self.values.push((*item).try_into().expect("isize must fit in a i64")) }
1067        }
1068        impl<CV: Clear> Clear for Isizes<CV> { fn clear(&mut self) { self.values.clear() }}
1069
1070        impl<CV: HeapSize> HeapSize for Isizes<CV> {
1071            fn heap_size(&self) -> (usize, usize) {
1072                self.values.heap_size()
1073            }
1074        }
1075
1076        impl<'a, CV: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Isizes<CV> {
1077            #[inline(always)]
1078            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1079                self.values.as_bytes()
1080            }
1081        }
1082
1083        impl<'a, CV: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Isizes<CV> {
1084            #[inline(always)]
1085            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1086                Self { values: CV::from_bytes(bytes) }
1087            }
1088        }
1089    }
1090
1091    pub use empty::Empties;
1092    /// A columnar store for `()`.
1093    mod empty {
1094
1095        use crate::common::index::CopyAs;
1096        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, Push, HeapSize};
1097
1098        #[derive(Copy, Clone, Debug, Default)]
1099        pub struct Empties<CC = u64> { pub count: CC, pub empty: () }
1100
1101        impl Columnar for () {
1102            #[inline(always)]
1103            fn into_owned<'a>(_other: crate::Ref<'a, Self>) -> Self { }
1104            type Container = Empties;
1105        }
1106
1107        impl Container for Empties {
1108            type Ref<'a> = ();
1109            type Borrowed<'a> = Empties<&'a u64>;
1110            #[inline(always)]
1111            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> { Empties { count: &self.count, empty: () } }
1112            #[inline(always)]
1113            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where Self: 'a {
1114                Empties { count: thing.count, empty: () }
1115            }
1116            #[inline(always)]
1117            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1118
1119            #[inline(always)]
1120            fn extend_from_self(&mut self, _other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1121                self.count += range.len() as u64;
1122            }
1123        }
1124
1125        impl<CC: CopyAs<u64> + Copy> Len for Empties<CC> {
1126            #[inline(always)] fn len(&self) -> usize { self.count.copy_as() as usize }
1127        }
1128        impl<CC> IndexMut for Empties<CC> {
1129            type IndexMut<'a> = &'a mut () where CC: 'a;
1130            // TODO: panic if out of bounds?
1131            #[inline(always)] fn get_mut(&mut self, _index: usize) -> Self::IndexMut<'_> { &mut self.empty }
1132        }
1133        impl<CC> Index for Empties<CC> {
1134            type Ref = ();
1135            #[inline(always)]
1136            fn get(&self, _index: usize) -> Self::Ref { }
1137        }
1138        impl<'a, CC> Index for &'a Empties<CC> {
1139            type Ref = &'a ();
1140            #[inline(always)]
1141            fn get(&self, _index: usize) -> Self::Ref { &() }
1142        }
1143        impl Push<()> for Empties {
1144            // TODO: check for overflow?
1145            #[inline(always)]
1146            fn push(&mut self, _item: ()) { self.count += 1; }
1147            #[inline(always)]
1148            fn extend(&mut self, iter: impl IntoIterator<Item=()>) {
1149                self.count += iter.into_iter().count() as u64;
1150            }
1151        }
1152        impl<'a> Push<&'a ()> for Empties {
1153            // TODO: check for overflow?
1154            #[inline(always)]
1155            fn push(&mut self, _item: &()) { self.count += 1; }
1156            #[inline(always)]
1157            fn extend(&mut self, iter: impl IntoIterator<Item=&'a ()>) {
1158                self.count += iter.into_iter().count() as u64;
1159            }
1160        }
1161
1162        impl HeapSize for Empties {
1163            #[inline(always)]
1164            fn heap_size(&self) -> (usize, usize) { (0, 0) }
1165        }
1166        impl Clear for Empties {
1167            #[inline(always)]
1168            fn clear(&mut self) { self.count = 0; }
1169        }
1170
1171        impl<'a> crate::AsBytes<'a> for crate::primitive::Empties<&'a u64> {
1172            #[inline(always)]
1173            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1174                std::iter::once((8, bytemuck::cast_slice(std::slice::from_ref(self.count))))
1175            }
1176        }
1177        impl<'a> crate::FromBytes<'a> for crate::primitive::Empties<&'a u64> {
1178            #[inline(always)]
1179            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1180                Self { count: &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0], empty: () }
1181            }
1182        }
1183    }
1184
1185    pub use boolean::Bools;
1186    /// A columnar store for `bool`.
1187    mod boolean {
1188
1189        use crate::common::index::CopyAs;
1190        use crate::{Container, Clear, Len, Index, IndexAs, Push, HeapSize};
1191
1192        /// A store for maintaining `Vec<bool>`.
1193        #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1194        pub struct Bools<VC = Vec<u64>, WC = u64> {
1195            /// The bundles of bits that form complete `u64` values.
1196            pub values: VC,
1197            /// The work-in-progress bits that are not yet complete.
1198            pub last_word: WC,
1199            /// The number of set bits in `bits.last()`.
1200            pub last_bits: WC,
1201        }
1202
1203        impl crate::Columnar for bool {
1204            #[inline(always)]
1205            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1206            type Container = Bools;
1207        }
1208
1209        impl<VC: crate::common::PushIndexAs<u64>> Container for Bools<VC> {
1210            type Ref<'a> = bool;
1211            type Borrowed<'a> = Bools<VC::Borrowed<'a>, &'a u64> where VC: 'a;
1212            #[inline(always)]
1213            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1214                Bools {
1215                    values: self.values.borrow(),
1216                    last_word: &self.last_word,
1217                    last_bits: &self.last_bits,
1218                }
1219            }
1220            #[inline(always)]
1221            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where VC: 'a {
1222                Bools {
1223                    values: VC::reborrow(thing.values),
1224                    last_word: thing.last_word,
1225                    last_bits: thing.last_bits,
1226                }
1227            }
1228            #[inline(always)]
1229            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1230
1231            // TODO: There is probably a smart way to implement `extend_from_slice`, but it isn't trivial due to alignment.
1232        }
1233
1234        impl<'a, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1235            #[inline(always)]
1236            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1237                let iter = self.values.as_bytes();
1238                let iter = crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_word))));
1239                crate::chain_one(iter, (std::mem::align_of::<u64>() as u64, bytemuck::cast_slice(std::slice::from_ref(self.last_bits))))
1240            }
1241        }
1242
1243        impl<'a, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Bools<VC, &'a u64> {
1244            #[inline(always)]
1245            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1246                let values = crate::FromBytes::from_bytes(bytes);
1247                let last_word = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1248                let last_bits = &bytemuck::try_cast_slice(bytes.next().expect("Iterator exhausted prematurely")).unwrap()[0];
1249                Self { values, last_word, last_bits }
1250            }
1251        }
1252
1253        impl<VC: Len, WC: Copy + CopyAs<u64>> Len for Bools<VC, WC> {
1254            #[inline(always)] fn len(&self) -> usize { self.values.len() * 64 + (self.last_bits.copy_as() as usize) }
1255        }
1256
1257        impl<VC: Len + IndexAs<u64>, WC: Copy + CopyAs<u64>> Index for Bools<VC, WC> {
1258            type Ref = bool;
1259            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1260                let block = index / 64;
1261                let word = if block == self.values.len() {
1262                    self.last_word.copy_as()
1263                } else {
1264                    self.values.index_as(block)
1265                };
1266                let bit = index % 64;
1267                (word >> bit) & 1 == 1
1268            }
1269        }
1270
1271        impl<VC: Len + IndexAs<u64>, WC: Copy + CopyAs<u64>> Index for &Bools<VC, WC> {
1272            type Ref = bool;
1273            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1274                (*self).get(index)
1275            }
1276        }
1277
1278        impl<VC: for<'a> Push<&'a u64>> Push<bool> for Bools<VC> {
1279            #[inline]
1280            fn push(&mut self, bit: bool) {
1281                self.last_word |= (bit as u64) << self.last_bits;
1282                self.last_bits += 1;
1283                // If we have a fully formed word, commit it to `self.values`.
1284                if self.last_bits == 64 {
1285                    self.values.push(&self.last_word);
1286                    self.last_word = 0;
1287                    self.last_bits = 0;
1288                }
1289            }
1290        }
1291        impl<'a, VC: for<'b> Push<&'b u64>> Push<&'a bool> for Bools<VC> {
1292            #[inline(always)]
1293            fn push(&mut self, bit: &'a bool) {
1294                self.push(*bit)
1295            }
1296        }
1297
1298
1299        impl<VC: Clear> Clear for Bools<VC> {
1300            #[inline(always)]
1301            fn clear(&mut self) {
1302                self.values.clear();
1303                self.last_word = 0;
1304                self.last_bits = 0;
1305            }
1306        }
1307
1308        impl<VC: HeapSize> HeapSize for Bools<VC> {
1309            #[inline(always)]
1310            fn heap_size(&self) -> (usize, usize) {
1311                self.values.heap_size()
1312            }
1313        }
1314    }
1315
1316    pub use duration::Durations;
1317    /// A columnar store for `std::time::Duration`.
1318    mod duration {
1319
1320        use std::time::Duration;
1321        use crate::{Container, Len, Index, IndexAs, Push, Clear, HeapSize};
1322
1323        // `std::time::Duration` is equivalent to `(u64, u32)`, corresponding to seconds and nanoseconds.
1324        #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1325        pub struct Durations<SC = Vec<u64>, NC = Vec<u32>> {
1326            pub seconds: SC,
1327            pub nanoseconds: NC,
1328        }
1329
1330        impl crate::Columnar for Duration {
1331            #[inline(always)]
1332            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other }
1333            type Container = Durations;
1334        }
1335
1336        impl<SC: crate::common::PushIndexAs<u64>, NC: crate::common::PushIndexAs<u32>> Container for Durations<SC, NC> {
1337            type Ref<'a> = Duration;
1338            type Borrowed<'a> = Durations<SC::Borrowed<'a>, NC::Borrowed<'a>> where SC: 'a, NC: 'a;
1339            #[inline(always)]
1340            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1341                Durations {
1342                    seconds: self.seconds.borrow(),
1343                    nanoseconds: self.nanoseconds.borrow(),
1344                }
1345            }
1346            #[inline(always)]
1347            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where SC: 'a, NC: 'a {
1348                Durations {
1349                    seconds: SC::reborrow(thing.seconds),
1350                    nanoseconds: NC::reborrow(thing.nanoseconds),
1351                }
1352            }
1353            #[inline(always)]
1354            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1355
1356            #[inline(always)]
1357            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1358                self.seconds.extend_from_self(other.seconds, range.clone());
1359                self.nanoseconds.extend_from_self(other.nanoseconds, range);
1360            }
1361        }
1362
1363        impl<'a, SC: crate::AsBytes<'a>, NC: crate::AsBytes<'a>> crate::AsBytes<'a> for crate::primitive::Durations<SC, NC> {
1364            #[inline(always)]
1365            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1366                crate::chain(self.seconds.as_bytes(), self.nanoseconds.as_bytes())
1367            }
1368        }
1369        impl<'a, SC: crate::FromBytes<'a>, NC: crate::FromBytes<'a>> crate::FromBytes<'a> for crate::primitive::Durations<SC, NC> {
1370            #[inline(always)]
1371            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1372                Self {
1373                    seconds: crate::FromBytes::from_bytes(bytes),
1374                    nanoseconds: crate::FromBytes::from_bytes(bytes),
1375                }
1376            }
1377        }
1378
1379        impl<SC: Len, NC> Len for Durations<SC, NC> {
1380            #[inline(always)] fn len(&self) -> usize { self.seconds.len() }
1381        }
1382
1383        impl<SC: IndexAs<u64>, NC: IndexAs<u32>> Index for Durations<SC, NC> {
1384            type Ref = Duration;
1385            #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1386                Duration::new(self.seconds.index_as(index), self.nanoseconds.index_as(index))
1387            }
1388        }
1389
1390        impl<SC: for<'a> Push<&'a u64>, NC: for<'a> Push<&'a u32>> Push<std::time::Duration> for Durations<SC, NC> {
1391            #[inline]
1392            fn push(&mut self, item: std::time::Duration) {
1393                self.seconds.push(&item.as_secs());
1394                self.nanoseconds.push(&item.subsec_nanos());
1395            }
1396        }
1397        impl<'a, SC: for<'b> Push<&'b u64>, NC: for<'b> Push<&'b u32>> Push<&'a std::time::Duration> for Durations<SC, NC> {
1398            #[inline]
1399            fn push(&mut self, item: &'a std::time::Duration) {
1400                self.push(*item)
1401            }
1402        }
1403        impl<'a, SC: Push<&'a u64>, NC: Push<&'a u32>> Push<(&'a u64, &'a u32)> for Durations<SC, NC> {
1404            #[inline]
1405            fn push(&mut self, item: (&'a u64, &'a u32)) {
1406                self.seconds.push(item.0);
1407                self.nanoseconds.push(item.1);
1408            }
1409        }
1410
1411        impl<SC: Clear, NC: Clear> Clear for Durations<SC, NC> {
1412            #[inline(always)]
1413            fn clear(&mut self) {
1414                self.seconds.clear();
1415                self.nanoseconds.clear();
1416            }
1417        }
1418
1419        impl<SC: HeapSize, NC: HeapSize> HeapSize for Durations<SC, NC> {
1420            #[inline(always)]
1421            fn heap_size(&self) -> (usize, usize) {
1422                let (l0, c0) = self.seconds.heap_size();
1423                let (l1, c1) = self.nanoseconds.heap_size();
1424                (l0 + l1, c0 + c1)
1425            }
1426        }
1427    }
1428}
1429
1430pub use string::Strings;
1431pub mod string {
1432
1433    use super::{Clear, Columnar, Container, Len, Index, IndexAs, Push, HeapSize};
1434
1435    /// A stand-in for `Vec<String>`.
1436    #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
1437    pub struct Strings<BC = Vec<u64>, VC = Vec<u8>> {
1438        /// Bounds container; provides indexed access to offsets.
1439        pub bounds: BC,
1440        /// Values container; provides slice access to bytes.
1441        pub values: VC,
1442    }
1443
1444    impl Columnar for String {
1445        #[inline(always)]
1446        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
1447            self.clear();
1448            self.push_str(other);
1449        }
1450        #[inline(always)]
1451        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self { other.to_string() }
1452        type Container = Strings;
1453    }
1454
1455    impl<BC: crate::common::PushIndexAs<u64>> Container for Strings<BC, Vec<u8>> {
1456        type Ref<'a> = &'a str;
1457        type Borrowed<'a> = Strings<BC::Borrowed<'a>, &'a [u8]> where BC: 'a;
1458        #[inline(always)]
1459        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1460            Strings {
1461                bounds: self.bounds.borrow(),
1462                values: self.values.borrow(),
1463            }
1464        }
1465        #[inline(always)]
1466        fn reborrow<'c, 'a: 'c>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'c> where BC: 'a {
1467            Strings {
1468                bounds: BC::reborrow(thing.bounds),
1469                values: thing.values,
1470            }
1471        }
1472        #[inline(always)]
1473        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a { thing }
1474
1475        #[inline(always)]
1476        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1477            if !range.is_empty() {
1478                // Imported bounds will be relative to this starting offset.
1479                let values_len = self.values.len() as u64;
1480
1481                // Push all bytes that we can, all at once.
1482                let other_lower = if range.start == 0 { 0 } else { other.bounds.index_as(range.start-1) };
1483                let other_upper = other.bounds.index_as(range.end-1);
1484                self.values.extend_from_self(other.values, other_lower as usize .. other_upper as usize);
1485
1486                // Each bound needs to be shifted by `values_len - other_lower`.
1487                if values_len == other_lower {
1488                    self.bounds.extend_from_self(other.bounds, range);
1489                }
1490                else {
1491                    for index in range {
1492                        let shifted = other.bounds.index_as(index) - other_lower + values_len;
1493                        self.bounds.push(&shifted)
1494                    }
1495                }
1496            }
1497        }
1498    }
1499
1500    impl<'a, BC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Strings<BC, VC> {
1501        #[inline(always)]
1502        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1503            crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
1504        }
1505    }
1506    impl<'a, BC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Strings<BC, VC> {
1507        #[inline(always)]
1508        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1509            Self {
1510                bounds: crate::FromBytes::from_bytes(bytes),
1511                values: crate::FromBytes::from_bytes(bytes),
1512            }
1513        }
1514    }
1515
1516    impl<BC: Len, VC> Len for Strings<BC, VC> {
1517        #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
1518    }
1519
1520    impl<'a, BC: Len+IndexAs<u64>> Index for Strings<BC, &'a [u8]> {
1521        type Ref = &'a str;
1522        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1523            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1524            let upper = self.bounds.index_as(index);
1525            let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
1526            let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
1527            std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
1528        }
1529    }
1530    impl<'a, BC: Len+IndexAs<u64>> Index for &'a Strings<BC, Vec<u8>> {
1531        type Ref = &'a str;
1532        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
1533            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1534            let upper = self.bounds.index_as(index);
1535            let lower: usize = lower.try_into().expect("bounds must fit in `usize`");
1536            let upper: usize = upper.try_into().expect("bounds must fit in `usize`");
1537            std::str::from_utf8(&self.values[lower .. upper]).expect("&[u8] must be valid utf8")
1538        }
1539    }
1540
1541    // This is a simpler implementation, but it leads to a performance regression
1542    // for Strings and str because it loses access to `Vec::extend_from_slice`.
1543    // 
1544    // impl<BC: Push<u64>, D: std::fmt::Display> Push<D> for Strings<BC> {
1545    //     #[inline(always)]
1546    //     fn push(&mut self, item: D) {
1547    //         use std::io::Write;
1548    //         write!(self.values, "{}", item).unwrap();
1549    //         self.bounds.push(self.values.len() as u64);
1550    //     }
1551    // }
1552
1553    impl<BC: for<'a> Push<&'a u64>> Push<&String> for Strings<BC> {
1554        #[inline(always)] fn push(&mut self, item: &String) {
1555            self.values.extend_from_slice(item.as_bytes());
1556            self.bounds.push(&(self.values.len() as u64));
1557        }
1558    }
1559    impl<BC: for<'a> Push<&'a u64>> Push<&str> for Strings<BC> {
1560        #[inline]
1561        fn push(&mut self, item: &str) {
1562            self.values.extend_from_slice(item.as_bytes());
1563            self.bounds.push(&(self.values.len() as u64));
1564        }
1565    }
1566    impl<'a, BC: for<'b> Push<&'b u64>> Push<std::fmt::Arguments<'a>> for Strings<BC> {
1567        #[inline]
1568        fn push(&mut self, item: std::fmt::Arguments<'a>) {
1569            use std::io::Write;
1570            self.values.write_fmt(item).expect("write_fmt failed");
1571            self.bounds.push(&(self.values.len() as u64));
1572        }
1573    }
1574    impl<'a, 'b, BC: for<'c> Push<&'c u64>> Push<&'b std::fmt::Arguments<'a>> for Strings<BC> {
1575        #[inline]
1576        fn push(&mut self, item: &'b std::fmt::Arguments<'a>) {
1577            use std::io::Write;
1578            self.values.write_fmt(*item).expect("write_fmt failed");
1579            self.bounds.push(&(self.values.len() as u64));
1580        }
1581    }
1582    impl<BC: Clear, VC: Clear> Clear for Strings<BC, VC> {
1583        #[inline(always)]
1584        fn clear(&mut self) {
1585            self.bounds.clear();
1586            self.values.clear();
1587        }
1588    }
1589    impl<BC: HeapSize, VC: HeapSize> HeapSize for Strings<BC, VC> {
1590        #[inline(always)]
1591        fn heap_size(&self) -> (usize, usize) {
1592            let (l0, c0) = self.bounds.heap_size();
1593            let (l1, c1) = self.values.heap_size();
1594            (l0 + l1, c0 + c1)
1595        }
1596    }
1597}
1598
1599pub use vector::Vecs;
1600pub mod vector {
1601
1602    use super::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize, Slice};
1603
1604    /// A stand-in for `Vec<Vec<T>>` for complex `T`.
1605    #[derive(Debug, Default, Copy, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
1606    pub struct Vecs<TC, BC = Vec<u64>> {
1607        pub bounds: BC,
1608        pub values: TC,
1609    }
1610
1611    impl<T: Columnar> Columnar for Vec<T> {
1612        #[inline(always)]
1613        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
1614            self.truncate(other.len());
1615            let mut other_iter = other.into_iter();
1616            for (s, o) in self.iter_mut().zip(&mut other_iter) {
1617                T::copy_from(s, o);
1618            }
1619            for o in other_iter {
1620                self.push(T::into_owned(o));
1621            }
1622        }
1623        #[inline(always)]
1624        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
1625            other.into_iter().map(|x| T::into_owned(x)).collect()
1626        }
1627        type Container = Vecs<T::Container>;
1628    }
1629
1630    impl<T: Columnar, const N: usize> Columnar for [T; N] {
1631        #[inline(always)]
1632        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
1633            for (s, o) in self.iter_mut().zip(other.into_iter()) {
1634                T::copy_from(s, o);
1635            }
1636        }
1637        #[inline(always)]
1638        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
1639            let vec: Vec<_> = other.into_iter().map(|x| T::into_owned(x)).collect();
1640            match vec.try_into() {
1641                Ok(array) => array,
1642                Err(_) => panic!("wrong length"),
1643            }
1644        }
1645        type Container = Vecs<T::Container>;
1646    }
1647
1648    impl<T: Columnar, const N: usize> Columnar for smallvec::SmallVec<[T; N]> {
1649        #[inline(always)]
1650        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
1651            self.truncate(other.len());
1652            let mut other_iter = other.into_iter();
1653            for (s, o) in self.iter_mut().zip(&mut other_iter) {
1654                T::copy_from(s, o);
1655            }
1656            for o in other_iter {
1657                self.push(T::into_owned(o));
1658            }
1659        }
1660        #[inline(always)]
1661        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
1662            other.into_iter().map(|x| T::into_owned(x)).collect()
1663        }
1664        type Container = Vecs<T::Container>;
1665    }
1666
1667    impl<BC: crate::common::PushIndexAs<u64>, TC: Container> Container for Vecs<TC, BC> {
1668        type Ref<'a> = Slice<TC::Borrowed<'a>> where TC: 'a;
1669        type Borrowed<'a> = Vecs<TC::Borrowed<'a>, BC::Borrowed<'a>> where BC: 'a, TC: 'a;
1670        #[inline(always)]
1671        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1672            Vecs {
1673                bounds: self.bounds.borrow(),
1674                values: self.values.borrow(),
1675            }
1676        }
1677        #[inline(always)]
1678        fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where BC: 'a, TC: 'a {
1679            Vecs {
1680                bounds: BC::reborrow(thing.bounds),
1681                values: TC::reborrow(thing.values),
1682            }
1683        }
1684        #[inline(always)]
1685        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
1686            thing.map(|x| TC::reborrow(x))
1687        }
1688
1689        #[inline(always)]
1690        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1691            if !range.is_empty() {
1692                // Imported bounds will be relative to this starting offset.
1693                let values_len = self.values.len() as u64;
1694
1695                // Push all bytes that we can, all at once.
1696                let other_lower = if range.start == 0 { 0 } else { other.bounds.index_as(range.start-1) };
1697                let other_upper = other.bounds.index_as(range.end-1);
1698                self.values.extend_from_self(other.values, other_lower as usize .. other_upper as usize);
1699
1700                // Each bound needs to be shifted by `values_len - other_lower`.
1701                if values_len == other_lower {
1702                    self.bounds.extend_from_self(other.bounds, range);
1703                }
1704                else {
1705                    for index in range {
1706                        let shifted = other.bounds.index_as(index) - other_lower + values_len;
1707                        self.bounds.push(&shifted)
1708                    }
1709                }
1710            }
1711        }
1712    }
1713
1714    impl<'a, TC: crate::AsBytes<'a>, BC: crate::AsBytes<'a>> crate::AsBytes<'a> for Vecs<TC, BC> {
1715        #[inline(always)]
1716        fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1717            crate::chain(self.bounds.as_bytes(), self.values.as_bytes())
1718        }
1719    }
1720    impl<'a, TC: crate::FromBytes<'a>, BC: crate::FromBytes<'a>> crate::FromBytes<'a> for Vecs<TC, BC> {
1721        #[inline(always)]
1722        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1723            Self {
1724                bounds: crate::FromBytes::from_bytes(bytes),
1725                values: crate::FromBytes::from_bytes(bytes),
1726            }
1727        }
1728    }
1729
1730    impl<TC: Len> Vecs<TC> {
1731        #[inline]
1732        pub fn push_iter<I>(&mut self, iter: I) where I: IntoIterator, TC: Push<I::Item> {
1733            self.values.extend(iter);
1734            self.bounds.push(self.values.len() as u64);
1735        }
1736    }
1737
1738    impl<TC, BC: Len> Len for Vecs<TC, BC> {
1739        #[inline(always)] fn len(&self) -> usize { self.bounds.len() }
1740    }
1741
1742    impl<TC: Copy, BC: Len+IndexAs<u64>> Index for Vecs<TC, BC> {
1743        type Ref = Slice<TC>;
1744        #[inline(always)]
1745        fn get(&self, index: usize) -> Self::Ref {
1746            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1747            let upper = self.bounds.index_as(index);
1748            Slice::new(lower, upper, self.values)
1749        }
1750    }
1751    impl<'a, TC, BC: Len+IndexAs<u64>> Index for &'a Vecs<TC, BC> {
1752        type Ref = Slice<&'a TC>;
1753        #[inline(always)]
1754        fn get(&self, index: usize) -> Self::Ref {
1755            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1756            let upper = self.bounds.index_as(index);
1757            Slice::new(lower, upper, &self.values)
1758        }
1759    }
1760    impl<TC, BC: Len+IndexAs<u64>> IndexMut for Vecs<TC, BC> {
1761        type IndexMut<'a> = Slice<&'a mut TC> where TC: 'a, BC: 'a;
1762
1763        #[inline(always)]
1764        fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
1765            let lower = if index == 0 { 0 } else { self.bounds.index_as(index - 1) };
1766            let upper = self.bounds.index_as(index);
1767            Slice::new(lower, upper, &mut self.values)
1768        }
1769    }
1770
1771    impl<'a, TC: Container, BC: for<'b> Push<&'b u64>> Push<Slice<TC::Borrowed<'a>>> for Vecs<TC, BC> {
1772        #[inline]
1773        fn push(&mut self, item: Slice<TC::Borrowed<'a>>) {
1774            self.values.extend_from_self(item.slice, item.lower .. item.upper);
1775            self.bounds.push(&(self.values.len() as u64));
1776        }
1777    }
1778
1779    impl<I: IntoIterator, TC: Push<I::Item> + Len, BC: for<'a> Push<&'a u64>> Push<I> for Vecs<TC, BC> {
1780        #[inline]
1781        fn push(&mut self, item: I) {
1782            self.values.extend(item);
1783            self.bounds.push(&(self.values.len() as u64));
1784        }
1785    }
1786
1787    impl<TC: Clear, BC: Clear> Clear for Vecs<TC, BC> {
1788        #[inline(always)]
1789        fn clear(&mut self) {
1790            self.bounds.clear();
1791            self.values.clear();
1792        }
1793    }
1794
1795    impl<TC: HeapSize, BC: HeapSize> HeapSize for Vecs<TC, BC> {
1796        fn heap_size(&self) -> (usize, usize) {
1797            let (l0, c0) = self.bounds.heap_size();
1798            let (l1, c1) = self.values.heap_size();
1799            (l0 + l1, c0 + c1)
1800        }
1801    }
1802}
1803
1804#[allow(non_snake_case)]
1805pub mod tuple {
1806
1807    use super::{Clear, Columnar, Container, Len, IndexMut, Index, Push, HeapSize};
1808
1809    // Implementations for tuple types.
1810    // These are all macro based, because the implementations are very similar.
1811    // The macro requires two names, one for the store and one for pushable types.
1812    macro_rules! tuple_impl {
1813        ( $($name:ident,$name2:ident)+) => (
1814
1815            impl<$($name: Columnar),*> Columnar for ($($name,)*) {
1816                #[inline(always)]
1817                fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
1818                    let ($($name,)*) = self;
1819                    let ($($name2,)*) = other;
1820                    $(crate::Columnar::copy_from($name, $name2);)*
1821                }
1822                #[inline(always)]
1823                fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
1824                    let ($($name2,)*) = other;
1825                    ($($name::into_owned($name2),)*)
1826                }
1827                type Container = ($($name::Container,)*);
1828            }
1829            impl<$($name2: Container,)*> Container for ($($name2,)*) {
1830                type Ref<'a> = ($($name2::Ref<'a>,)*) where $($name2: 'a,)*;
1831                type Borrowed<'a> = ($($name2::Borrowed<'a>,)*) where $($name2: 'a,)*;
1832                #[inline(always)]
1833                fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1834                    let ($($name,)*) = self;
1835                    ($($name.borrow(),)*)
1836                }
1837                #[inline(always)]
1838                fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where $($name2: 'a,)* {
1839                    let ($($name,)*) = thing;
1840                    ($($name2::reborrow($name),)*)
1841                }
1842                #[inline(always)]
1843                fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
1844                    let ($($name2,)*) = thing;
1845                    ($($name2::reborrow_ref($name2),)*)
1846                }
1847
1848                #[inline(always)]
1849                fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
1850                    let ($($name,)*) = self;
1851                    let ($($name2,)*) = other;
1852                    $( $name.extend_from_self($name2, range.clone()); )*
1853                }
1854            }
1855
1856            #[allow(non_snake_case)]
1857            impl<'a, $($name: crate::AsBytes<'a>),*> crate::AsBytes<'a> for ($($name,)*) {
1858                #[inline(always)]
1859                fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
1860                    let ($($name,)*) = self;
1861                    let iter = None.into_iter();
1862                    $( let iter = crate::chain(iter, $name.as_bytes()); )*
1863                    iter
1864                }
1865            }
1866            impl<'a, $($name: crate::FromBytes<'a>),*> crate::FromBytes<'a> for ($($name,)*) {
1867                #[inline(always)]
1868                #[allow(non_snake_case)]
1869                fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1870                    $(let $name = crate::FromBytes::from_bytes(bytes);)*
1871                    ($($name,)*)
1872                }
1873            }
1874
1875            impl<$($name: Len),*> Len for ($($name,)*) {
1876                #[inline(always)]
1877                fn len(&self) -> usize {
1878                    self.0.len()
1879                }
1880            }
1881            impl<$($name: Clear),*> Clear for ($($name,)*) {
1882                #[inline(always)]
1883                fn clear(&mut self) {
1884                    let ($($name,)*) = self;
1885                    $($name.clear();)*
1886                }
1887            }
1888            impl<$($name: HeapSize),*> HeapSize for ($($name,)*) {
1889                #[inline(always)]
1890                fn heap_size(&self) -> (usize, usize) {
1891                    let ($($name,)*) = self;
1892                    let mut l = 0;
1893                    let mut c = 0;
1894                    $(let (l0, c0) = $name.heap_size(); l += l0; c += c0;)*
1895                    (l, c)
1896                }
1897            }
1898            impl<$($name: Index),*> Index for ($($name,)*) {
1899                type Ref = ($($name::Ref,)*);
1900                #[inline(always)]
1901                fn get(&self, index: usize) -> Self::Ref {
1902                    let ($($name,)*) = self;
1903                    ($($name.get(index),)*)
1904                }
1905            }
1906            impl<'a, $($name),*> Index for &'a ($($name,)*) where $( &'a $name: Index),* {
1907                type Ref = ($(<&'a $name as Index>::Ref,)*);
1908                #[inline(always)]
1909                fn get(&self, index: usize) -> Self::Ref {
1910                    let ($($name,)*) = self;
1911                    ($($name.get(index),)*)
1912                }
1913            }
1914
1915            impl<$($name: IndexMut),*> IndexMut for ($($name,)*) {
1916                type IndexMut<'a> = ($($name::IndexMut<'a>,)*) where $($name: 'a),*;
1917                #[inline(always)]
1918                fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
1919                    let ($($name,)*) = self;
1920                    ($($name.get_mut(index),)*)
1921                }
1922            }
1923            impl<$($name2, $name: Push<$name2>),*> Push<($($name2,)*)> for ($($name,)*) {
1924                #[inline]
1925                fn push(&mut self, item: ($($name2,)*)) {
1926                    let ($($name,)*) = self;
1927                    let ($($name2,)*) = item;
1928                    $($name.push($name2);)*
1929                }
1930            }
1931            impl<'a, $($name2, $name: Push<&'a $name2>),*> Push<&'a ($($name2,)*)> for ($($name,)*) {
1932                #[inline]
1933                fn push(&mut self, item: &'a ($($name2,)*)) {
1934                    let ($($name,)*) = self;
1935                    let ($($name2,)*) = item;
1936                    $($name.push($name2);)*
1937                }
1938            }
1939        )
1940    }
1941
1942    tuple_impl!(A,AA);
1943    tuple_impl!(A,AA B,BB);
1944    tuple_impl!(A,AA B,BB C,CC);
1945    tuple_impl!(A,AA B,BB C,CC D,DD);
1946    tuple_impl!(A,AA B,BB C,CC D,DD E,EE);
1947    tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF);
1948    tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG);
1949    tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG H,HH);
1950    tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG H,HH I,II);
1951    tuple_impl!(A,AA B,BB C,CC D,DD E,EE F,FF G,GG H,HH I,II J,JJ);
1952
1953    #[cfg(test)]
1954    mod test {
1955        #[test]
1956        fn round_trip() {
1957
1958            use crate::common::{Index, Push, HeapSize, Len};
1959
1960            let mut column: crate::ContainerOf<(u64, u8, String)> = Default::default();
1961            for i in 0..100 {
1962                column.push((i, i as u8, &i.to_string()));
1963                column.push((i, i as u8, &"".to_string()));
1964            }
1965
1966            assert_eq!(column.len(), 200);
1967            assert_eq!(column.heap_size(), (3590, 4608));
1968
1969            for i in 0..100u64 {
1970                assert_eq!((&column).get((2*i+0) as usize), (&i, &(i as u8), i.to_string().as_str()));
1971                assert_eq!((&column).get((2*i+1) as usize), (&i, &(i as u8), ""));
1972            }
1973
1974            // Compare to the heap size of a `Vec<Option<usize>>`.
1975            let mut column: Vec<(u64, u8, String)> = Default::default();
1976            for i in 0..100 {
1977                column.push((i, i as u8, i.to_string()));
1978                column.push((i, i as u8, "".to_string()));
1979            }
1980            assert_eq!(column.heap_size(), (8190, 11040));
1981
1982        }
1983    }
1984}
1985
1986pub use sums::{rank_select::RankSelect, result::Results, option::Options};
1987/// Containers for enumerations ("sum types") that store variants separately.
1988///
1989/// The main work of these types is storing a discriminant and index efficiently,
1990/// as containers for each of the variant types can hold the actual data.
1991pub mod sums {
1992
1993    /// Stores for maintaining discriminants, and associated sequential indexes.
1994    ///
1995    /// The sequential indexes are not explicitly maintained, but are supported
1996    /// by a `rank(index)` function that indicates how many of a certain variant
1997    /// precede the given index. While this could potentially be done with a scan
1998    /// of all preceding discriminants, the stores maintain running accumulations
1999    /// that make the operation constant time (using additional amortized memory).
2000    pub mod rank_select {
2001
2002        use crate::primitive::Bools;
2003        use crate::common::index::CopyAs;
2004        use crate::{Container, Len, Index, IndexAs, Push, Clear, HeapSize};
2005
2006        /// A store for maintaining `Vec<bool>` with fast `rank` and `select` access.
2007        ///
2008        /// The design is to have `u64` running counts for each block of 1024 bits,
2009        /// which are roughly the size of a cache line. This is roughly 6% overhead,
2010        /// above the bits themselves, which seems pretty solid.
2011        #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2012        pub struct RankSelect<CC = Vec<u64>, VC = Vec<u64>, WC = u64> {
2013            /// Counts of the number of cumulative set (true) bits, *after* each block of 1024 bits.
2014            pub counts: CC,
2015            /// The bits themselves.
2016            pub values: Bools<VC, WC>,
2017        }
2018
2019        impl<CC: crate::common::PushIndexAs<u64>, VC: crate::common::PushIndexAs<u64>> RankSelect<CC, VC> {
2020            pub fn borrow<'a>(&'a self) -> RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a u64> {
2021                RankSelect {
2022                    counts: self.counts.borrow(),
2023                    values: self.values.borrow(),
2024                }
2025            }
2026            #[inline(always)]
2027            pub fn reborrow<'b, 'a: 'b>(thing: RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a u64>) -> RankSelect<CC::Borrowed<'b>, VC::Borrowed<'b>, &'b u64> {
2028                RankSelect {
2029                    counts: CC::reborrow(thing.counts),
2030                    values: Bools::<VC, u64>::reborrow(thing.values),
2031                }
2032            }
2033        }
2034
2035        impl<'a, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for RankSelect<CC, VC, &'a u64> {
2036            #[inline(always)]
2037            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2038                crate::chain(self.counts.as_bytes(), self.values.as_bytes())
2039            }
2040        }
2041        impl<'a, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for RankSelect<CC, VC, &'a u64> {
2042            #[inline(always)]
2043            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2044                Self {
2045                    counts: crate::FromBytes::from_bytes(bytes),
2046                    values: crate::FromBytes::from_bytes(bytes),
2047                }
2048            }
2049        }
2050
2051
2052        impl<CC, VC: Len + IndexAs<u64>, WC: Copy+CopyAs<u64>> RankSelect<CC, VC, WC> {
2053            #[inline(always)]
2054            pub fn get(&self, index: usize) -> bool {
2055                Index::get(&self.values, index)
2056            }
2057        }
2058        impl<CC: Len + IndexAs<u64>, VC: Len + IndexAs<u64>, WC: Copy+CopyAs<u64>> RankSelect<CC, VC, WC> {
2059            /// The number of set bits *strictly* preceding `index`.
2060            ///
2061            /// This number is accumulated first by reading out of `self.counts` at the correct position,
2062            /// then by summing the ones in strictly prior `u64` entries, then by counting the ones in the
2063            /// masked `u64` in which the bit lives.
2064            pub fn rank(&self, index: usize) -> usize {
2065                let bit = index % 64;
2066                let block = index / 64;
2067                let chunk = block / 16;
2068                let mut count = if chunk > 0 { self.counts.index_as(chunk - 1) as usize } else { 0 };
2069                for pos in (16 * chunk) .. block {
2070                    count += self.values.values.index_as(pos).count_ones() as usize;
2071                }
2072                // TODO: Panic if out of bounds?
2073                let intra_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
2074                count += (intra_word & ((1 << bit) - 1)).count_ones() as usize;
2075                count
2076            }
2077            /// The index of the `rank`th set bit, should one exist.
2078            pub fn select(&self, rank: u64) -> Option<usize> {
2079                let mut chunk = 0;
2080                // Step one is to find the position in `counts` where we go from `rank` to `rank + 1`.
2081                // The position we are looking for is within that chunk of bits.
2082                // TODO: Binary search is likely better at many scales. Rust's binary search is .. not helpful with ties.
2083                while chunk < self.counts.len() && self.counts.index_as(chunk) <= rank {
2084                    chunk += 1;
2085                }
2086                let mut count = if chunk < self.counts.len() { self.counts.index_as(chunk) } else { 0 };
2087                // Step two is to find the position within that chunk where the `rank`th bit is.
2088                let mut block = 16 * chunk;
2089                while block < self.values.values.len() && count + (self.values.values.index_as(block).count_ones() as u64) <= rank {
2090                    count += self.values.values.index_as(block).count_ones() as u64;
2091                    block += 1;
2092                }
2093                // Step three is to search the last word for the location, or return `None` if we run out of bits.
2094                let last_bits = if block == self.values.values.len() { self.values.last_bits.copy_as() as usize } else { 64 };
2095                let last_word = if block == self.values.values.len() { self.values.last_word.copy_as() } else { self.values.values.index_as(block) };
2096                for shift in 0 .. last_bits {
2097                    if ((last_word >> shift) & 0x01 == 0x01) && count + 1 == rank {
2098                        return Some(64 * block + shift);
2099                    }
2100                    count += (last_word >> shift) & 0x01;
2101                }
2102
2103                None
2104            }
2105        }
2106
2107        impl<CC, VC: Len, WC: Copy + CopyAs<u64>> RankSelect<CC, VC, WC> {
2108            pub fn len(&self) -> usize {
2109                self.values.len()
2110            }
2111        }
2112
2113        // This implementation probably only works for `Vec<u64>` and `Vec<u64>`, but we could fix that.
2114        // Partly, it's hard to name the `Index` flavor that allows one to get back a `u64`.
2115        impl<CC: for<'a> Push<&'a u64> + Len + IndexAs<u64>, VC: for<'a> Push<&'a u64> + Len + IndexAs<u64>> RankSelect<CC, VC> {
2116            #[inline]
2117            pub fn push(&mut self, bit: bool) {
2118                self.values.push(&bit);
2119                while self.counts.len() < self.values.len() / 1024 {
2120                    let mut count = self.counts.last().unwrap_or(0);
2121                    let lower = 16 * self.counts.len();
2122                    let upper = lower + 16;
2123                    for i in lower .. upper {
2124                        count += self.values.values.index_as(i).count_ones() as u64;
2125                    }
2126                    self.counts.push(&count);
2127                }
2128            }
2129        }
2130        impl<CC: Clear, VC: Clear> Clear for RankSelect<CC, VC> {
2131            fn clear(&mut self) {
2132                self.counts.clear();
2133                self.values.clear();
2134            }
2135        }
2136        impl<CC: HeapSize, VC: HeapSize> HeapSize for RankSelect<CC, VC> {
2137            fn heap_size(&self) -> (usize, usize) {
2138                let (l0, c0) = self.counts.heap_size();
2139                let (l1, c1) = self.values.heap_size();
2140                (l0 + l1, c0 + c1)
2141            }
2142        }
2143    }
2144
2145    pub mod result {
2146
2147        use crate::common::index::CopyAs;
2148        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize};
2149        use crate::RankSelect;
2150
2151        #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2152        pub struct Results<SC, TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
2153            /// Bits set to `true` correspond to `Ok` variants.
2154            pub indexes: RankSelect<CC, VC, WC>,
2155            pub oks: SC,
2156            pub errs: TC,
2157        }
2158
2159        impl<S: Columnar, T: Columnar> Columnar for Result<S, T> {
2160            fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2161                match (&mut *self, other) {
2162                    (Ok(x), Ok(y)) => x.copy_from(y),
2163                    (Err(x), Err(y)) => x.copy_from(y),
2164                    (_, other) => { *self = Self::into_owned(other); },
2165                }
2166            }
2167            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2168                match other {
2169                    Ok(y) => Ok(S::into_owned(y)),
2170                    Err(y) => Err(T::into_owned(y)),
2171                }
2172            }
2173            type Container = Results<S::Container, T::Container>;
2174        }
2175
2176        impl<SC: Container, TC: Container> Container for Results<SC, TC> {
2177            type Ref<'a> = Result<SC::Ref<'a>, TC::Ref<'a>> where SC: 'a, TC: 'a;
2178            type Borrowed<'a> = Results<SC::Borrowed<'a>, TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where SC: 'a, TC: 'a;
2179            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2180                Results {
2181                    indexes: self.indexes.borrow(),
2182                    oks: self.oks.borrow(),
2183                    errs: self.errs.borrow(),
2184                }
2185            }
2186            #[inline(always)]
2187            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where SC: 'a, TC: 'a {
2188                Results {
2189                    indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
2190                    oks: SC::reborrow(thing.oks),
2191                    errs: TC::reborrow(thing.errs),
2192                }
2193            }
2194            #[inline(always)]
2195            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2196                match thing {
2197                    Ok(y) => Ok(SC::reborrow_ref(y)),
2198                    Err(y) => Err(TC::reborrow_ref(y)),
2199                }
2200            }
2201
2202            #[inline(always)]
2203            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2204                if !range.is_empty() {
2205                    // Starting offsets of each variant in `other`.
2206                    let oks_start = other.indexes.rank(range.start);
2207                    let errs_start = range.start - oks_start;
2208
2209                    // Count the number of `Ok` and `Err` variants as we push, to determine the range.
2210                    // TODO: This could probably be `popcnt` somehow.
2211                    let mut oks = 0;
2212                    for index in range.clone() {
2213                        let bit = other.indexes.get(index);
2214                        self.indexes.push(bit);
2215                        if bit { oks += 1; }
2216                    }
2217                    let errs = range.len() - oks;
2218
2219                    self.oks.extend_from_self(other.oks, oks_start .. oks_start + oks);
2220                    self.errs.extend_from_self(other.errs, errs_start .. errs_start + errs);
2221                }
2222            }
2223        }
2224
2225        impl<'a, SC: crate::AsBytes<'a>, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
2226            #[inline(always)]
2227            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2228                let iter = self.indexes.as_bytes();
2229                let iter = crate::chain(iter, self.oks.as_bytes());
2230                crate::chain(iter, self.errs.as_bytes())
2231            }
2232        }
2233        impl<'a, SC: crate::FromBytes<'a>, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Results<SC, TC, CC, VC, &'a u64> {
2234            #[inline(always)]
2235            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2236                Self {
2237                    indexes: crate::FromBytes::from_bytes(bytes),
2238                    oks: crate::FromBytes::from_bytes(bytes),
2239                    errs: crate::FromBytes::from_bytes(bytes),
2240                }
2241            }
2242        }
2243
2244        impl<SC, TC, CC, VC: Len, WC: Copy+CopyAs<u64>> Len for Results<SC, TC, CC, VC, WC> {
2245            #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
2246        }
2247
2248        impl<SC, TC, CC, VC, WC> Index for Results<SC, TC, CC, VC, WC>
2249        where
2250            SC: Index,
2251            TC: Index,
2252            CC: IndexAs<u64> + Len,
2253            VC: IndexAs<u64> + Len,
2254            WC: Copy + CopyAs<u64>,
2255        {
2256            type Ref = Result<SC::Ref, TC::Ref>;
2257            #[inline(always)]
2258            fn get(&self, index: usize) -> Self::Ref {
2259                if self.indexes.get(index) {
2260                    Ok(self.oks.get(self.indexes.rank(index)))
2261                } else {
2262                    Err(self.errs.get(index - self.indexes.rank(index)))
2263                }
2264            }
2265        }
2266        impl<'a, SC, TC, CC, VC, WC> Index for &'a Results<SC, TC, CC, VC, WC>
2267        where
2268            &'a SC: Index,
2269            &'a TC: Index,
2270            CC: IndexAs<u64> + Len,
2271            VC: IndexAs<u64> + Len,
2272            WC: Copy + CopyAs<u64>,
2273        {
2274            type Ref = Result<<&'a SC as Index>::Ref, <&'a TC as Index>::Ref>;
2275            #[inline(always)]
2276            fn get(&self, index: usize) -> Self::Ref {
2277                if self.indexes.get(index) {
2278                    Ok((&self.oks).get(self.indexes.rank(index)))
2279                } else {
2280                    Err((&self.errs).get(index - self.indexes.rank(index)))
2281                }
2282            }
2283        }
2284
2285        // NB: You are not allowed to change the variant, but can change its contents.
2286        impl<SC: IndexMut, TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Results<SC, TC, CC, VC> {
2287            type IndexMut<'a> = Result<SC::IndexMut<'a>, TC::IndexMut<'a>> where SC: 'a, TC: 'a, CC: 'a, VC: 'a;
2288            #[inline(always)]
2289            fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2290                if self.indexes.get(index) {
2291                    Ok(self.oks.get_mut(self.indexes.rank(index)))
2292                } else {
2293                    Err(self.errs.get_mut(index - self.indexes.rank(index)))
2294                }
2295            }
2296        }
2297
2298        impl<S, SC: Push<S>, T, TC: Push<T>> Push<Result<S, T>> for Results<SC, TC> {
2299            #[inline]
2300            fn push(&mut self, item: Result<S, T>) {
2301                match item {
2302                    Ok(item) => {
2303                        self.indexes.push(true);
2304                        self.oks.push(item);
2305                    }
2306                    Err(item) => {
2307                        self.indexes.push(false);
2308                        self.errs.push(item);
2309                    }
2310                }
2311            }
2312        }
2313        impl<'a, S, SC: Push<&'a S>, T, TC: Push<&'a T>> Push<&'a Result<S, T>> for Results<SC, TC> {
2314            #[inline]
2315            fn push(&mut self, item: &'a Result<S, T>) {
2316                match item {
2317                    Ok(item) => {
2318                        self.indexes.push(true);
2319                        self.oks.push(item);
2320                    }
2321                    Err(item) => {
2322                        self.indexes.push(false);
2323                        self.errs.push(item);
2324                    }
2325                }
2326            }
2327        }
2328
2329        impl<SC: Clear, TC: Clear> Clear for Results<SC, TC> {
2330            fn clear(&mut self) {
2331                self.indexes.clear();
2332                self.oks.clear();
2333                self.errs.clear();
2334            }
2335        }
2336
2337        impl<SC: HeapSize, TC: HeapSize> HeapSize for Results<SC, TC> {
2338            fn heap_size(&self) -> (usize, usize) {
2339                let (l0, c0) = self.oks.heap_size();
2340                let (l1, c1) = self.errs.heap_size();
2341                let (li, ci) = self.indexes.heap_size();
2342                (l0 + l1 + li, c0 + c1 + ci)
2343            }
2344        }
2345
2346        impl<SC, TC, CC, VC, WC> Results<SC, TC, CC, VC, WC> {
2347            /// Returns ok values if no errors exist.
2348            pub fn unwrap(self) -> SC where TC: Len {
2349                assert!(self.errs.is_empty());
2350                self.oks
2351            }
2352            /// Returns error values if no oks exist.
2353            pub fn unwrap_err(self) -> TC where SC: Len {
2354                assert!(self.oks.is_empty());
2355                self.errs
2356            }
2357        }
2358        #[cfg(test)]
2359        mod test {
2360            #[test]
2361            fn round_trip() {
2362
2363                use crate::common::{Index, Push, HeapSize, Len};
2364
2365                let mut column: crate::ContainerOf<Result<u64, u64>> = Default::default();
2366                for i in 0..100 {
2367                    column.push(Ok::<u64, u64>(i));
2368                    column.push(Err::<u64, u64>(i));
2369                }
2370
2371                assert_eq!(column.len(), 200);
2372                assert_eq!(column.heap_size(), (1624, 2080));
2373
2374                for i in 0..100 {
2375                    assert_eq!(column.get(2*i+0), Ok(i as u64));
2376                    assert_eq!(column.get(2*i+1), Err(i as u64));
2377                }
2378
2379                let mut column: crate::ContainerOf<Result<u64, u8>> = Default::default();
2380                for i in 0..100 {
2381                    column.push(Ok::<u64, u8>(i as u64));
2382                    column.push(Err::<u64, u8>(i as u8));
2383                }
2384
2385                assert_eq!(column.len(), 200);
2386                assert_eq!(column.heap_size(), (924, 1184));
2387
2388                for i in 0..100 {
2389                    assert_eq!(column.get(2*i+0), Ok(i as u64));
2390                    assert_eq!(column.get(2*i+1), Err(i as u8));
2391                }
2392            }
2393        }
2394    }
2395
2396    pub mod option {
2397
2398        use crate::common::index::CopyAs;
2399        use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, HeapSize};
2400        use crate::RankSelect;
2401
2402        #[derive(Copy, Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2403        pub struct Options<TC, CC=Vec<u64>, VC=Vec<u64>, WC=u64> {
2404            /// Uses two bits for each item, one to indicate the variant and one (amortized)
2405            /// to enable efficient rank determination.
2406            pub indexes: RankSelect<CC, VC, WC>,
2407            pub somes: TC,
2408        }
2409
2410        impl<T: Columnar> Columnar for Option<T> {
2411            fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
2412                match (&mut *self, other) {
2413                    (Some(x), Some(y)) => { x.copy_from(y); }
2414                    (_, other) => { *self = Self::into_owned(other); }
2415                }
2416            }
2417            fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
2418                other.map(|x| T::into_owned(x))
2419            }
2420            type Container = Options<T::Container>;
2421        }
2422
2423        impl<TC: Container> Container for Options<TC> {
2424            type Ref<'a> = Option<TC::Ref<'a>> where TC: 'a;
2425            type Borrowed<'a> = Options<TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a u64> where TC: 'a;
2426            fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
2427                Options {
2428                    indexes: self.indexes.borrow(),
2429                    somes: self.somes.borrow(),
2430                }
2431            }
2432            #[inline(always)]
2433            fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where TC: 'a {
2434                Options {
2435                    indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
2436                    somes: TC::reborrow(thing.somes),
2437                }
2438            }
2439            #[inline(always)]
2440            fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
2441                thing.map(TC::reborrow_ref)
2442            }
2443
2444            #[inline(always)]
2445            fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: std::ops::Range<usize>) {
2446                if !range.is_empty() {
2447                    // Starting offsets of `Some` variants in `other`.
2448                    let somes_start = other.indexes.rank(range.start);
2449
2450                    // Count the number of `Some` variants as we push, to determine the range.
2451                    // TODO: This could probably be `popcnt` somehow.
2452                    let mut somes = 0;
2453                    for index in range {
2454                        let bit = other.indexes.get(index);
2455                        self.indexes.push(bit);
2456                        if bit { somes += 1; }
2457                    }
2458
2459                    self.somes.extend_from_self(other.somes, somes_start .. somes_start + somes);
2460                }
2461            }
2462        }
2463
2464        impl<'a, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Options<TC, CC, VC, &'a u64> {
2465            #[inline(always)]
2466            fn as_bytes(&self) -> impl Iterator<Item=(u64, &'a [u8])> {
2467                crate::chain(self.indexes.as_bytes(), self.somes.as_bytes())
2468            }
2469        }
2470
2471        impl <'a, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Options<TC, CC, VC, &'a u64> {
2472            #[inline(always)]
2473            fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
2474                Self {
2475                    indexes: crate::FromBytes::from_bytes(bytes),
2476                    somes: crate::FromBytes::from_bytes(bytes),
2477                }
2478            }
2479        }
2480
2481        impl<T, CC, VC: Len, WC: Copy + CopyAs<u64>> Len for Options<T, CC, VC, WC> {
2482            #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
2483        }
2484
2485        impl<TC: Index, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: Copy+CopyAs<u64>> Index for Options<TC, CC, VC, WC> {
2486            type Ref = Option<TC::Ref>;
2487            #[inline(always)]
2488            fn get(&self, index: usize) -> Self::Ref {
2489                if self.indexes.get(index) {
2490                    Some(self.somes.get(self.indexes.rank(index)))
2491                } else {
2492                    None
2493                }
2494            }
2495        }
2496        impl<'a, TC, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: Copy+CopyAs<u64>> Index for &'a Options<TC, CC, VC, WC>
2497        where &'a TC: Index
2498        {
2499            type Ref = Option<<&'a TC as Index>::Ref>;
2500            #[inline(always)]
2501            fn get(&self, index: usize) -> Self::Ref {
2502                if self.indexes.get(index) {
2503                    Some((&self.somes).get(self.indexes.rank(index)))
2504                } else {
2505                    None
2506                }
2507            }
2508        }
2509        impl<TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Options<TC, CC, VC> {
2510            type IndexMut<'a> = Option<TC::IndexMut<'a>> where TC: 'a, CC: 'a, VC: 'a;
2511            #[inline(always)]
2512            fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
2513                if self.indexes.get(index) {
2514                    Some(self.somes.get_mut(self.indexes.rank(index)))
2515                } else {
2516                    None
2517                }
2518            }
2519        }
2520
2521        impl<T, TC: Push<T> + Len> Push<Option<T>> for Options<TC> {
2522            #[inline]
2523            fn push(&mut self, item: Option<T>) {
2524                match item {
2525                    Some(item) => {
2526                        self.indexes.push(true);
2527                        self.somes.push(item);
2528                    }
2529                    None => {
2530                        self.indexes.push(false);
2531                    }
2532                }
2533            }
2534        }
2535        impl<'a, T, TC: Push<&'a T> + Len> Push<&'a Option<T>> for Options<TC> {
2536            #[inline]
2537            fn push(&mut self, item: &'a Option<T>) {
2538                match item {
2539                    Some(item) => {
2540                        self.indexes.push(true);
2541                        self.somes.push(item);
2542                    }
2543                    None => {
2544                        self.indexes.push(false);
2545                    }
2546                }
2547            }
2548        }
2549
2550        impl<TC: Clear> Clear for Options<TC> {
2551            fn clear(&mut self) {
2552                self.indexes.clear();
2553                self.somes.clear();
2554            }
2555        }
2556
2557        impl<TC: HeapSize> HeapSize for Options<TC> {
2558            fn heap_size(&self) -> (usize, usize) {
2559                let (l0, c0) = self.somes.heap_size();
2560                let (li, ci) = self.indexes.heap_size();
2561                (l0 + li, c0 + ci)
2562            }
2563        }
2564
2565        #[cfg(test)]
2566        mod test {
2567
2568            use crate::Columnar;
2569            use crate::common::{Index, HeapSize, Len};
2570            use crate::Options;
2571
2572            #[test]
2573            fn round_trip_some() {
2574                // Type annotation is important to avoid some inference overflow.
2575                let store: Options<Vec<i32>> = Columnar::into_columns((0..100).map(Some));
2576                assert_eq!(store.len(), 100);
2577                assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == Some(&b)));
2578                assert_eq!(store.heap_size(), (408, 544));
2579            }
2580
2581            #[test]
2582            fn round_trip_none() {
2583                let store = Columnar::into_columns((0..100).map(|_x| None::<i32>));
2584                assert_eq!(store.len(), 100);
2585                let foo = &store;
2586                assert!(foo.index_iter().zip(0..100).all(|(a, _b)| a == None));
2587                assert_eq!(store.heap_size(), (8, 32));
2588            }
2589
2590            #[test]
2591            fn round_trip_mixed() {
2592                // Type annotation is important to avoid some inference overflow.
2593                let store: Options<Vec<i32>>  = Columnar::into_columns((0..100).map(|x| if x % 2 == 0 { Some(x) } else { None }));
2594                assert_eq!(store.len(), 100);
2595                assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == if b % 2 == 0 { Some(&b) } else { None }));
2596                assert_eq!(store.heap_size(), (208, 288));
2597            }
2598        }
2599    }
2600}
2601
2602pub use lookback::{Repeats, Lookbacks};
2603/// Containers that can store either values, or offsets to prior values.
2604///
2605/// This has the potential to be more efficient than a list of `T` when many values repeat in
2606/// close proximity. Values must be equatable, and the degree of lookback can be configured.
2607pub mod lookback {
2608
2609    use crate::{Options, Results, Push, Index, Len, HeapSize};
2610
2611    /// A container that encodes repeated values with a `None` variant, at the cost of extra bits for every record.
2612    #[derive(Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2613    pub struct Repeats<TC, const N: u8 = 255> {
2614        /// Some(x) encodes a value, and None indicates the prior `x` value.
2615        pub inner: Options<TC>,
2616    }
2617
2618    impl<T: PartialEq, TC: Push<T> + Len, const N: u8> Push<T> for Repeats<TC, N>
2619    where
2620        for<'a> &'a TC: Index,
2621        for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
2622    {
2623        #[inline]
2624        fn push(&mut self, item: T) {
2625            // Look at the last `somes` value for a potential match.
2626            let insert: Option<T> = if (&self.inner.somes).last().map(|x| x.eq(&item)) == Some(true) {
2627                None
2628            } else {
2629                Some(item)
2630            };
2631            self.inner.push(insert);
2632        }
2633    }
2634
2635    impl<TC: Len, const N: u8> Len for Repeats<TC, N> {
2636        #[inline(always)] fn len(&self) -> usize { self.inner.len() }
2637    }
2638
2639    impl<TC: Index, const N: u8> Index for Repeats<TC, N> {
2640        type Ref = TC::Ref;
2641        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2642            match self.inner.get(index) {
2643                Some(item) => item,
2644                None => {
2645                    let pos = self.inner.indexes.rank(index) - 1;
2646                    self.inner.somes.get(pos)
2647                },
2648            }
2649        }
2650    }
2651
2652    impl<TC: HeapSize, const N: u8> HeapSize for Repeats<TC, N> {
2653        fn heap_size(&self) -> (usize, usize) {
2654            self.inner.heap_size()
2655        }
2656    }
2657
2658    #[derive(Clone, Debug, Default, PartialEq, serde::Serialize, serde::Deserialize)]
2659    pub struct Lookbacks<TC, VC = Vec<u8>, const N: u8 = 255> {
2660        /// Ok(x) encodes a value, and Err(y) indicates a value `y` back.
2661        pub inner: Results<TC, VC>,
2662    }
2663
2664    impl<T: PartialEq, TC: Push<T> + Len, VC: Push<u8>, const N: u8> Push<T> for Lookbacks<TC, VC, N>
2665    where
2666        for<'a> &'a TC: Index,
2667        for<'a> <&'a TC as Index>::Ref : PartialEq<T>,
2668    {
2669        #[inline]
2670        fn push(&mut self, item: T) {
2671            // Look backwards through (0 .. N) to look for a matching value.
2672            let oks_len = self.inner.oks.len();
2673            let find = (0u8 .. N).take(self.inner.oks.len()).find(|i| (&self.inner.oks).get(oks_len - (*i as usize) - 1) == item);
2674            let insert: Result<T, u8> = if let Some(back) = find { Err(back) } else { Ok(item) };
2675            self.inner.push(insert);
2676        }
2677    }
2678
2679    impl<TC, VC, const N: u8> Len for Lookbacks<TC, VC, N> {
2680        #[inline(always)] fn len(&self) -> usize { self.inner.len() }
2681    }
2682
2683    impl<TC: Index, VC: Index<Ref=u8>, const N: u8> Index for Lookbacks<TC, VC, N> {
2684        type Ref = TC::Ref;
2685        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2686            match self.inner.get(index) {
2687                Ok(item) => item,
2688                Err(back) => {
2689                    let pos = self.inner.indexes.rank(index) - 1;
2690                    self.inner.oks.get(pos - (back as usize))
2691                },
2692            }
2693        }
2694    }
2695    impl<'a, TC, const N: u8> Index for &'a Lookbacks<TC, Vec<u8>, N>
2696    where
2697        &'a TC: Index,
2698    {
2699        type Ref = <&'a TC as Index>::Ref;
2700        #[inline(always)] fn get(&self, index: usize) -> Self::Ref {
2701            match (&self.inner).get(index) {
2702                Ok(item) => item,
2703                Err(back) => {
2704                    let pos = self.inner.indexes.rank(index) - 1;
2705                    (&self.inner.oks).get(pos - (*back as usize))
2706                },
2707            }
2708        }
2709    }
2710
2711    impl<TC: HeapSize, VC: HeapSize, const N: u8> HeapSize for Lookbacks<TC, VC, N> {
2712        fn heap_size(&self) -> (usize, usize) {
2713            self.inner.heap_size()
2714        }
2715    }
2716}
2717
2718/// Containers for `Vec<(K, V)>` that form columns by `K` keys.
2719mod maps {
2720
2721    use crate::{Len, Push};
2722    use crate::Options;
2723
2724    /// A container for `Vec<(K, V)>` items.
2725    ///
2726    /// Each inserted map is expected to have one `val` for any `key`.
2727    /// Each is stored with `None` variants for absent keys. As such,
2728    /// this type is not meant for large sparse key spaces.
2729    pub struct KeyMaps<CK, CV> {
2730        _keys: CK,
2731        vals: Vec<CV>,
2732    }
2733
2734    impl<CK, CV: Len> Len for KeyMaps<CK, CV> {
2735        fn len(&self) -> usize {
2736            // This .. behaves badly if we have no keys.
2737            self.vals[0].len()
2738        }
2739    }
2740
2741    // Should this implementation preserve the order of the key-val pairs?
2742    // That might want an associated `Vec<usize>` for each, to order the keys.
2743    // If they are all identical, it shouldn't take up any space, though.
2744    impl<K: PartialOrd, V, CV: Push<K>> Push<Vec<(K, V)>> for KeyMaps<Vec<K>, CV> {
2745        #[inline]
2746        fn push(&mut self, _item: Vec<(K, V)>) {
2747
2748        }
2749    }
2750
2751    /// A container for `Vec<K>` items sliced by index.
2752    ///
2753    /// The container puts each `item[i]` element into the `i`th column.
2754    pub struct ListMaps<CV> {
2755        vals: Vec<Options<CV>>,
2756    }
2757
2758    impl<CV> Default for ListMaps<CV> {
2759        fn default() -> Self {
2760            ListMaps { vals: Default::default() }
2761        }
2762    }
2763
2764    impl<CV: Len> Len for ListMaps<CV> {
2765        fn len(&self) -> usize {
2766            self.vals[0].len()
2767        }
2768    }
2769
2770    impl<'a, V, CV: Push<&'a V> + Len + Default> Push<&'a Vec<V>> for ListMaps<CV> {
2771        #[inline]
2772        fn push(&mut self, item: &'a Vec<V>) {
2773            let mut item_len = item.len();
2774            let self_len = if self.vals.is_empty() { 0 } else { self.vals[0].len() };
2775            while self.vals.len() < item_len {
2776                let mut new_store: Options<CV> = Default::default();
2777                for _ in 0..self_len {
2778                    new_store.push(None);
2779                }
2780                self.vals.push(new_store);
2781            }
2782            for (store, i) in self.vals.iter_mut().zip(item) {
2783                store.push(Some(i));
2784            }
2785            while item_len < self.vals.len() {
2786                self.vals[item_len].push(None);
2787                item_len += 1;
2788            }
2789        }
2790    }
2791
2792    #[cfg(test)]
2793    mod test {
2794
2795        use crate::common::{Len, Push};
2796        use crate::{Results, Strings};
2797
2798        #[test]
2799        fn round_trip_listmap() {
2800
2801            // Each record is a list, of first homogeneous elements, and one heterogeneous.
2802            let records = (0 .. 1024).map(|i|
2803                vec![
2804                    Ok(i),
2805                    Err(format!("{:?}", i)),
2806                    if i % 2 == 0 { Ok(i) } else { Err(format!("{:?}", i)) },
2807                ]
2808            );
2809
2810            // We'll stash all the records in the store, which expects them.
2811            let mut store: super::ListMaps<Results<Vec<i32>, Strings>> = Default::default();
2812            for record in records {
2813                store.push(&record);
2814            }
2815
2816            // Demonstrate type-safe restructuring.
2817            // We expect the first two columns to be homogenous, and the third to be mixed.
2818            let field0: Option<&[i32]> = if store.vals[0].somes.oks.len() == store.vals[0].len() {
2819                Some(&store.vals[0].somes.oks)
2820            } else { None };
2821
2822            let field1: Option<&Strings> = if store.vals[1].somes.errs.len() == store.vals[1].len() {
2823                Some(&store.vals[1].somes.errs)
2824            } else { None };
2825
2826            let field2: Option<&[i32]> = if store.vals[2].somes.oks.len() == store.vals[2].len() {
2827                Some(&store.vals[2].somes.oks)
2828            } else { None };
2829
2830            assert!(field0.is_some());
2831            assert!(field1.is_some());
2832            assert!(field2.is_none());
2833        }
2834    }
2835
2836}
2837
2838/// Containers for `isize` and `usize` that adapt to the size of the data.
2839///
2840/// Similar structures could be used for containers of `u8`, `u16`, `u32`, and `u64`,
2841/// without losing their type information, if one didn't need the bespoke compression.
2842mod sizes {
2843
2844    use crate::Push;
2845    use crate::Results;
2846
2847    /// A four-variant container for integers of varying sizes.
2848    struct Sizes<C0, C1, C2, C3> {
2849        /// Four variants stored separately.
2850        inner: Results<Results<C0, C1>, Results<C2, C3>>,
2851    }
2852
2853    impl<C0: Default, C1: Default, C2: Default, C3: Default> Default for Sizes<C0, C1, C2, C3> {
2854        fn default() -> Self {
2855            Sizes { inner: Default::default() }
2856        }
2857    }
2858
2859    impl<C0: Push<u8>, C1: Push<u16>, C2: Push<u32>, C3: Push<u64>> Push<usize> for Sizes<C0, C1, C2, C3> {
2860        #[inline]
2861        fn push(&mut self, item: usize) {
2862            if let Ok(item) = TryInto::<u8>::try_into(item) {
2863                self.inner.push(Ok(Ok(item)))
2864            } else if let Ok(item) = TryInto::<u16>::try_into(item) {
2865                self.inner.push(Ok(Err(item)))
2866            } else if let Ok(item) = TryInto::<u32>::try_into(item) {
2867                self.inner.push(Err(Ok(item)))
2868            } else if let Ok(item) = TryInto::<u64>::try_into(item) {
2869                self.inner.push(Err(Err(item)))
2870            } else {
2871                panic!("usize exceeds bounds of u64")
2872            }
2873        }
2874    }
2875
2876    impl<C0: Push<i8>, C1: Push<i16>, C2: Push<i32>, C3: Push<i64>> Push<isize> for Sizes<C0, C1, C2, C3> {
2877        #[inline]
2878        fn push(&mut self, item: isize) {
2879            if let Ok(item) = TryInto::<i8>::try_into(item) {
2880                self.inner.push(Ok(Ok(item)))
2881            } else if let Ok(item) = TryInto::<i16>::try_into(item) {
2882                self.inner.push(Ok(Err(item)))
2883            } else if let Ok(item) = TryInto::<i32>::try_into(item) {
2884                self.inner.push(Err(Ok(item)))
2885            } else if let Ok(item) = TryInto::<i64>::try_into(item) {
2886                self.inner.push(Err(Err(item)))
2887            } else {
2888                panic!("isize exceeds bounds of i64")
2889            }
2890        }
2891    }
2892}
2893
2894/// Roaring bitmap (and similar) containers.
2895pub mod roaring {
2896
2897    use crate::Results;
2898
2899    /// A container for `bool` that uses techniques from Roaring bitmaps.
2900    ///
2901    /// These techniques are to block the bits into blocks of 2^16 bits,
2902    /// and to encode each block based on its density. Either a bitmap
2903    /// for dense blocks or a list of set bits for sparse blocks.
2904    ///
2905    /// Additionally, other representations encode runs of set bits.
2906    pub struct RoaringBits {
2907        _inner: Results<[u64; 1024], Vec<u16>>,
2908    }
2909}
2910
2911pub use chain_mod::{chain, chain_one};
2912
2913pub mod chain_mod {
2914    //! Chain iterators, or iterators and an item. Iterators that might improve inlining, at the
2915    //! expense of not providing iterator maker traits.
2916
2917    /// Chain two iterators together. The result first iterates over `a`, then `b`, until both are
2918    /// exhausted.
2919    ///
2920    /// This addresses a quirk where deep iterators would not be optimized to their full potential.
2921    /// Here, functions are marked with `#[inline(always)]` to indicate that the compiler should
2922    /// try hard to inline the iterators.
2923    #[inline(always)]
2924    pub fn chain<A: IntoIterator, B: IntoIterator<Item=A::Item>>(a: A, b: B) -> Chain<A::IntoIter, B::IntoIter> {
2925        Chain { a: Some(a.into_iter()), b: Some(b.into_iter()) }
2926    }
2927
2928    pub struct Chain<A, B> {
2929        a: Option<A>,
2930        b: Option<B>,
2931    }
2932
2933    impl<A, B> Iterator for Chain<A, B>
2934    where
2935        A: Iterator,
2936        B: Iterator<Item=A::Item>,
2937    {
2938        type Item = A::Item;
2939
2940        #[inline(always)]
2941        fn next(&mut self) -> Option<Self::Item> {
2942            if let Some(a) = self.a.as_mut() {
2943                let x = a.next();
2944                if x.is_none() {
2945                    self.a = None;
2946                } else {
2947                    return x;
2948                }
2949            }
2950            if let Some(b) = self.b.as_mut() {
2951                let x = b.next();
2952                if x.is_none() {
2953                    self.b = None;
2954                } else {
2955                    return x;
2956                }
2957            }
2958            None
2959        }
2960
2961        #[inline]
2962        fn fold<Acc, F>(self, mut acc: Acc, mut f: F) -> Acc
2963        where
2964            F: FnMut(Acc, Self::Item) -> Acc,
2965        {
2966            if let Some(a) = self.a {
2967                acc = a.fold(acc, &mut f);
2968            }
2969            if let Some(b) = self.b {
2970                acc = b.fold(acc, f);
2971            }
2972            acc
2973        }
2974    }
2975
2976    /// Chain a single item to an iterator. The resulting iterator first iterates over `a`,
2977    /// then `b`. The resulting iterator is marked as `#[inline(always)]`, which in some situations
2978    /// causes better inlining behavior with current Rust versions.
2979    #[inline(always)]
2980    pub fn chain_one<A: IntoIterator>(a: A, b: A::Item) -> ChainOne<A::IntoIter> {
2981        ChainOne { a: Some(a.into_iter()), b: Some(b) }
2982    }
2983
2984    pub struct ChainOne<A: Iterator> {
2985        a: Option<A>,
2986        b: Option<A::Item>,
2987    }
2988
2989    impl<A: Iterator> Iterator for ChainOne<A> {
2990        type Item = A::Item;
2991
2992        #[inline(always)]
2993        fn next(&mut self) -> Option<Self::Item> {
2994            if let Some(a) = self.a.as_mut() {
2995                let x = a.next();
2996                if x.is_none() {
2997                    self.a = None;
2998                    self.b.take()
2999                } else {
3000                    x
3001                }
3002            } else {
3003                None
3004            }
3005        }
3006    }
3007
3008    #[cfg(test)]
3009    mod test {
3010        use super::*;
3011
3012        #[test]
3013        fn test_chain() {
3014            let a = [1, 2, 3];
3015            let b = [4, 5, 6];
3016            let mut chain = chain(a, b);
3017            assert_eq!(chain.next(), Some(1));
3018            assert_eq!(chain.next(), Some(2));
3019            assert_eq!(chain.next(), Some(3));
3020            assert_eq!(chain.next(), Some(4));
3021            assert_eq!(chain.next(), Some(5));
3022            assert_eq!(chain.next(), Some(6));
3023            assert_eq!(chain.next(), None);
3024        }
3025
3026        #[test]
3027        fn test_chain_one() {
3028            let a = [1, 2, 3];
3029            let b = 4;
3030            let mut chain = chain_one(a, b);
3031            assert_eq!(chain.next(), Some(1));
3032            assert_eq!(chain.next(), Some(2));
3033            assert_eq!(chain.next(), Some(3));
3034            assert_eq!(chain.next(), Some(4));
3035            assert_eq!(chain.next(), None);
3036        }
3037    }
3038}