Skip to main content

columnar/
sums.rs

1//! Containers for enumerations ("sum types") that store variants separately.
2//!
3//! The main work of these types is storing a discriminant and index efficiently,
4//! as containers for each of the variant types can hold the actual data.
5
6/// Stores for maintaining discriminants, and associated sequential indexes.
7///
8/// The sequential indexes are not explicitly maintained, but are supported
9/// by a `rank(index)` function that indicates how many of a certain variant
10/// precede the given index. While this could potentially be done with a scan
11/// of all preceding discriminants, the stores maintain running accumulations
12/// that make the operation constant time (using additional amortized memory).
13pub mod rank_select {
14
15    use alloc::{vec::Vec, string::String};
16    use crate::primitive::Bools;
17
18    use crate::{Borrow, Len, Index, IndexAs, Push, Clear};
19
20    /// Number of 64-bit words per cumulative-popcount chunk. Smaller values
21    /// reduce the worst-case word scan in `select` (and the catch-up scan
22    /// in `rank`) at the cost of more `counts` entries (~6% overhead per
23    /// halving). 16 → 1024 bits/chunk is the historical default; 8 → 512
24    /// bits gives faster random `select` for ~12% counts memory.
25    const WORDS_PER_CHUNK: usize = 16;
26    const BITS_PER_CHUNK: usize = 64 * WORDS_PER_CHUNK;
27
28    /// A store for maintaining `Vec<bool>` with fast `rank` and `select` access.
29    ///
30    /// The design is to have `u64` running counts for each block of 1024 bits,
31    /// which are roughly the size of a cache line. This is roughly 6% overhead,
32    /// above the bits themselves, which seems pretty solid.
33    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
34    #[derive(Copy, Clone, Debug, Default, PartialEq)]
35    pub struct RankSelect<CC = Vec<u64>, VC = Vec<u64>, WC = [u64; 2]> {
36        /// Counts of the number of cumulative set (true) bits, *after* each block of 1024 bits.
37        pub counts: CC,
38        /// The bits themselves.
39        pub values: Bools<VC, WC>,
40    }
41
42    impl<CC: crate::common::BorrowIndexAs<u64>, VC: crate::common::BorrowIndexAs<u64>> RankSelect<CC, VC> {
43        #[inline(always)]
44        pub fn borrow<'a>(&'a self) -> RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a [u64]> {
45            RankSelect {
46                counts: self.counts.borrow(),
47                values: self.values.borrow(),
48            }
49        }
50        #[inline(always)]
51        pub fn reborrow<'b, 'a: 'b>(thing: RankSelect<CC::Borrowed<'a>, VC::Borrowed<'a>, &'a [u64]>) -> RankSelect<CC::Borrowed<'b>, VC::Borrowed<'b>, &'b [u64]> {
52            RankSelect {
53                counts: CC::reborrow(thing.counts),
54                values: Bools::<VC, [u64; 2]>::reborrow(thing.values),
55            }
56        }
57    }
58
59    impl<'a, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for RankSelect<CC, VC, &'a [u64]> {
60        const SLICE_COUNT: usize = CC::SLICE_COUNT + <Bools<VC, &'a [u64]> as crate::AsBytes<'a>>::SLICE_COUNT;
61        #[inline]
62        fn get_byte_slice(&self, index: usize) -> (u64, &'a [u8]) {
63            debug_assert!(index < Self::SLICE_COUNT);
64            if index < CC::SLICE_COUNT {
65                self.counts.get_byte_slice(index)
66            } else {
67                self.values.get_byte_slice(index - CC::SLICE_COUNT)
68            }
69        }
70    }
71    impl<'a, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for RankSelect<CC, VC, &'a [u64]> {
72        const SLICE_COUNT: usize = CC::SLICE_COUNT + <crate::primitive::Bools<VC, &'a [u64]>>::SLICE_COUNT;
73        #[inline(always)]
74        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
75            Self {
76                counts: crate::FromBytes::from_bytes(bytes),
77                values: crate::FromBytes::from_bytes(bytes),
78            }
79        }
80        #[inline(always)]
81        fn from_store(store: &crate::bytes::indexed::DecodedStore<'a>, offset: &mut usize) -> Self {
82            Self {
83                counts: CC::from_store(store, offset),
84                values: <crate::primitive::Bools<VC, &'a [u64]>>::from_store(store, offset),
85            }
86        }
87        fn element_sizes(sizes: &mut Vec<usize>) -> Result<(), String> {
88            CC::element_sizes(sizes)?;
89            <crate::primitive::Bools<VC, &'a [u64]>>::element_sizes(sizes)?;
90            Ok(())
91        }
92    }
93
94
95    impl<CC, VC: Len + IndexAs<u64>, WC: IndexAs<u64>> RankSelect<CC, VC, WC> {
96        #[inline(always)]
97        pub fn get(&self, index: usize) -> bool {
98            Index::get(&self.values, index)
99        }
100    }
101    impl<CC: Len + IndexAs<u64>, VC: Len + IndexAs<u64>, WC: IndexAs<u64>> RankSelect<CC, VC, WC> {
102        /// The number of set bits *strictly* preceding `index`.
103        ///
104        /// This number is accumulated first by reading out of `self.counts` at the correct position,
105        /// then by summing the ones in strictly prior `u64` entries, then by counting the ones in the
106        /// masked `u64` in which the bit lives.
107        pub fn rank(&self, index: usize) -> usize {
108            let bit = index % 64;
109            let block = index / 64;
110            let chunk = block / WORDS_PER_CHUNK;
111            let mut count = if chunk > 0 { self.counts.index_as(chunk - 1) as usize } else { 0 };
112            for pos in (WORDS_PER_CHUNK * chunk) .. block {
113                count += self.values.values.index_as(pos).count_ones() as usize;
114            }
115            // TODO: Panic if out of bounds?
116            let intra_word = if block == self.values.values.len() { self.values.tail.index_as(0) } else { self.values.values.index_as(block) };
117            count += (intra_word & ((1 << bit) - 1)).count_ones() as usize;
118            count
119        }
120        /// The position of the `rank`-th set bit (0-indexed), if it exists.
121        ///
122        /// `select(0)` returns the position of the first set bit. In general,
123        /// `select(rank(p)) == p` when `p` is the position of a set bit, mirroring
124        /// the convention of [`Self::rank`] (which counts bits strictly before
125        /// its argument).
126        #[inline]
127        pub fn select(&self, rank: u64) -> Option<usize> {
128            // Step one: find the BITS_PER_CHUNK-bit chunk containing the rank-th set bit.
129            // We want the smallest `chunk` for which `counts[chunk] > rank` — the
130            // chunk whose cumulative count first exceeds `rank`. Equivalent to a
131            // partition point over `counts` on the predicate `counts[i] <= rank`.
132            let chunk = {
133                let mut lo = 0;
134                let mut hi = self.counts.len();
135                while lo < hi {
136                    let mid = lo + (hi - lo) / 2;
137                    if self.counts.index_as(mid) <= rank { lo = mid + 1; } else { hi = mid; }
138                }
139                lo
140            };
141            // Number of set bits strictly before `chunk`'s BITS_PER_CHUNK bits.
142            let mut count = if chunk > 0 { self.counts.index_as(chunk - 1) } else { 0 };
143
144            // Step two: find the 64-bit word within `chunk` containing the rank-th set bit.
145            let mut block = WORDS_PER_CHUNK * chunk;
146            while block < self.values.values.len() {
147                let pop = self.values.values.index_as(block).count_ones() as u64;
148                if count + pop > rank { break; }
149                count += pop;
150                block += 1;
151            }
152
153            // Step three: locate the bit within the chosen word, or return `None`
154            // if `rank` is past the total set-bit count.
155            let (last_word, last_bits) = if block == self.values.values.len() {
156                (self.values.tail.index_as(0), self.values.tail.index_as(1) as usize)
157            } else {
158                (self.values.values.index_as(block), 64)
159            };
160            // Mask off any padding past the last valid bit (only relevant for the tail).
161            let masked = if last_bits == 64 { last_word } else { last_word & ((1u64 << last_bits) - 1) };
162            let k = (rank - count) as u32;
163            let shift = select_in_word(masked, k);
164            if shift >= last_bits as u32 { None } else { Some(64 * block + shift as usize) }
165        }
166    }
167
168    /// Position of the `k`-th set bit (0-indexed) within a 64-bit word. Returns 64 if
169    /// the word has fewer than `k + 1` set bits. Portable, ~6 conditional shifts based
170    /// on byte/half popcounts — independent of bit density, unlike a linear scan.
171    #[inline]
172    fn select_in_word(mut w: u64, mut k: u32) -> u32 {
173        if k >= w.count_ones() { return 64; }
174        let mut pos = 0u32;
175        // Halve the search range repeatedly. At each step, popcount of the low half
176        // tells us whether the k-th bit is in the low half (recurse) or the high half
177        // (subtract that count and shift).
178        let pop = (w & 0xFFFF_FFFF).count_ones();
179        if k >= pop { k -= pop; pos += 32; w >>= 32; }
180        let pop = (w & 0xFFFF).count_ones();
181        if k >= pop { k -= pop; pos += 16; w >>= 16; }
182        let pop = (w & 0xFF).count_ones();
183        if k >= pop { k -= pop; pos += 8; w >>= 8; }
184        let pop = (w & 0xF).count_ones();
185        if k >= pop { k -= pop; pos += 4; w >>= 4; }
186        let pop = (w & 0x3).count_ones();
187        if k >= pop { k -= pop; pos += 2; w >>= 2; }
188        let pop = w & 0x1;
189        if (k as u64) >= pop { pos += 1; }
190        pos
191    }
192
193    /// Forward cursor over a [`RankSelect`].
194    ///
195    /// Use this instead of repeated `rank`/`select` calls when you want to traverse
196    /// the bitvector in order. The cursor caches a current word and running rank,
197    /// so a single word load serves many subsequent operations — no re-probing
198    /// `counts` or rescanning words.
199    ///
200    /// At a high level the cursor maintains an *invariant pair* (`pos`, `rank`):
201    /// `pos` is the next bit to consider, and `rank` is the number of 1-bits in
202    /// `[0, pos)`. Every operation maintains this pair so that callers can read
203    /// either coordinate freely.
204    ///
205    /// Pick the method that matches the question you want to answer:
206    ///
207    /// | Operation         | Use when you want…                                        |
208    /// |-------------------|-----------------------------------------------------------|
209    /// | [`next_one`][n1]  | "Where is the next 1-bit?" — emits 1-bit positions in order. |
210    /// | [`step`][s]       | "What is the bit at the current position?" — read-by-bit traversal. |
211    /// | [`seek_to_pos`][stp] | "Jump to bit position p; what is its rank?" — random forward seek. |
212    /// | [`seek_to_rank`][str] | "Jump to the k-th 1-bit; where is it?" — equivalent to `select`. |
213    ///
214    /// The cursor is **forward-only**: every operation advances `pos` and `rank`
215    /// monotonically. Trying to seek backward triggers a debug-assertion failure.
216    ///
217    /// [n1]: Cursor::next_one
218    /// [s]: Cursor::step
219    /// [stp]: Cursor::seek_to_pos
220    /// [str]: Cursor::seek_to_rank
221    pub struct Cursor<'a, CC, VC, WC> {
222        rs: &'a RankSelect<CC, VC, WC>,
223        /// Index of the current 64-bit word within `values` (or `== values.len()` for tail).
224        word_idx: usize,
225        /// 1-bits remaining in the current word at positions ≥ `bit_pos % 64`.
226        /// Bits at positions strictly below `bit_pos % 64` are cleared.
227        word_remaining: u64,
228        /// Position of the next bit to consider.
229        bit_pos: usize,
230        /// Number of 1-bits in `[0, bit_pos)`.
231        rank: u64,
232        /// Cached total bit count of the underlying RankSelect.
233        total_bits: usize,
234    }
235
236    impl<CC: Len + IndexAs<u64>, VC: Len + IndexAs<u64>, WC: IndexAs<u64>> RankSelect<CC, VC, WC> {
237        /// Create a forward cursor positioned at bit 0.
238        pub fn cursor(&self) -> Cursor<'_, CC, VC, WC> {
239            let total_bits = self.len();
240            let mut c = Cursor {
241                rs: self,
242                word_idx: 0,
243                word_remaining: 0,
244                bit_pos: 0,
245                rank: 0,
246                total_bits,
247            };
248            c.load_word();
249            c
250        }
251    }
252
253    impl<'a, CC: Len + IndexAs<u64>, VC: Len + IndexAs<u64>, WC: IndexAs<u64>> Cursor<'a, CC, VC, WC> {
254        /// Position of the next bit to consider.
255        #[inline] pub fn pos(&self) -> usize { self.bit_pos }
256        /// Number of 1-bits strictly before `pos()`.
257        #[inline] pub fn rank(&self) -> u64 { self.rank }
258        /// Total number of bits in the underlying vector.
259        #[inline] pub fn total_bits(&self) -> usize { self.total_bits }
260
261        /// Load the word at `self.word_idx`, masked to its valid bits, and align
262        /// `bit_pos` to the start of that word. No-op past the end.
263        fn load_word(&mut self) {
264            let vlen = self.rs.values.values.len();
265            if self.word_idx < vlen {
266                self.word_remaining = self.rs.values.values.index_as(self.word_idx);
267                self.bit_pos = self.word_idx * 64;
268            } else if self.word_idx == vlen {
269                let raw = self.rs.values.tail.index_as(0);
270                let valid = self.rs.values.tail.index_as(1);
271                let mask = if valid >= 64 { !0u64 } else if valid == 0 { 0 } else { (1u64 << valid) - 1 };
272                self.word_remaining = raw & mask;
273                self.bit_pos = self.word_idx * 64;
274            } else {
275                self.word_remaining = 0;
276            }
277        }
278
279        /// Emit the next 1-bit position in order; advance past it.
280        ///
281        /// Equivalent to `select(self.rank())` followed by stepping one past that
282        /// position, but amortized: a single word load serves up to 64 results.
283        ///
284        /// **Use this for streaming through every set bit.** The canonical example
285        /// is recovering monotone Vecs bounds from an RS-encoded unary bitvector:
286        /// each call returns the next bound's bit position, no per-call re-probing
287        /// of `counts`. Returns `None` once all set bits have been emitted.
288        pub fn next_one(&mut self) -> Option<usize> {
289            loop {
290                if self.word_remaining != 0 {
291                    let bit_in_word = self.word_remaining.trailing_zeros() as usize;
292                    let pos = self.word_idx * 64 + bit_in_word;
293                    self.word_remaining &= self.word_remaining - 1;
294                    self.bit_pos = pos + 1;
295                    self.rank += 1;
296                    return Some(pos);
297                }
298                if self.bit_pos >= self.total_bits { return None; }
299                self.word_idx += 1;
300                self.load_word();
301                if self.word_idx > self.rs.values.values.len() { return None; }
302            }
303        }
304
305        /// Advance one bit. Return its value: `Some(true)` for 1, `Some(false)` for 0,
306        /// or `None` if past the end.
307        ///
308        /// **Use this when you need to visit every bit in order, regardless of value.**
309        /// Common pattern: the lookup-walk loop in a hash table that probes consecutive
310        /// slots, deciding what to do based on whether each slot is occupied. Each
311        /// call is a single bit-test + an occasional word-boundary crossing.
312        pub fn step(&mut self) -> Option<bool> {
313            if self.bit_pos >= self.total_bits { return None; }
314            let bit_in_word = self.bit_pos & 63;
315            let is_one = (self.word_remaining >> bit_in_word) & 1 == 1;
316            if is_one {
317                self.word_remaining &= !(1u64 << bit_in_word);
318                self.rank += 1;
319            }
320            self.bit_pos += 1;
321            if (self.bit_pos & 63) == 0 && self.bit_pos < self.total_bits {
322                self.word_idx += 1;
323                self.load_word();
324            }
325            Some(is_one)
326        }
327
328        /// Jump forward to bit position `target` and report its `rank` (via the
329        /// cursor's state) without consuming the bit there.
330        ///
331        /// After return, `pos() == target` and `rank() == rank(target)` (the count
332        /// of 1-bits strictly before `target`). A subsequent [`step`][Self::step]
333        /// reads the bit *at* `target`.
334        ///
335        /// **Use this when you have a known target position and want to read or
336        /// continue from there.** Typical use is hash-table lookup: a query's slot
337        /// position is computed from its hash; you want to jump there and then
338        /// walk forward through the probe chain.
339        ///
340        /// Fast path: if `target` lies within the cursor's current word, this is a
341        /// popcount + mask. Otherwise it pays one `RankSelect::rank` call to
342        /// re-anchor.
343        ///
344        /// `target` must be `>= self.pos()` (forward-only; debug-asserts otherwise).
345        /// Returns `false` if `target` is past the end of the bitvector.
346        pub fn seek_to_pos(&mut self, target: usize) -> bool {
347            debug_assert!(target >= self.bit_pos, "seek_to_pos is forward-only");
348            if target >= self.total_bits {
349                self.bit_pos = self.total_bits;
350                self.word_remaining = 0;
351                return false;
352            }
353            let target_word = target / 64;
354            if target_word == self.word_idx {
355                // Same word: count 1-bits we skip past, then mask the word.
356                let cur_bit = (self.bit_pos & 63) as u32;
357                let tgt_bit = (target & 63) as u32;
358                let skipped_mask = if tgt_bit == 64 { !0u64 } else { (1u64 << tgt_bit) - 1 };
359                let skipped = self.word_remaining & skipped_mask;
360                self.rank += skipped.count_ones() as u64;
361                // Clear consumed low bits (positions < target).
362                self.word_remaining &= !skipped_mask;
363                let _ = cur_bit;
364                self.bit_pos = target;
365                return true;
366            }
367            // Different word: re-anchor rank via the standard rank() formula, then
368            // load the target word and mask off bits below `target`.
369            self.rank = self.rs.rank(target) as u64;
370            self.word_idx = target_word;
371            self.load_word();
372            let tgt_bit = target & 63;
373            let mask = if tgt_bit == 0 { !0u64 } else { !((1u64 << tgt_bit) - 1) };
374            self.word_remaining &= mask;
375            self.bit_pos = target;
376            true
377        }
378
379        /// Jump forward to the `target`-th set bit (0-indexed) and return its
380        /// position. Equivalent to [`RankSelect::select`] for the cursor's
381        /// current state.
382        ///
383        /// After return, `rank() == target + 1` (the target bit has been consumed).
384        ///
385        /// **Use this when you have a target rank and want the position.** Typical
386        /// use is "give me bound[k]" against an RS-encoded Vecs-bounds bitvector,
387        /// possibly skipping forward over a stretch of consecutive bounds you
388        /// don't care about.
389        ///
390        /// Fast path: if the target's bit lies within the cursor's current word,
391        /// this is a `select_in_word` + bit-clear. Otherwise it pays a binary
392        /// search over `counts`, mirroring `RankSelect::select`.
393        ///
394        /// `target` must be `>= self.rank()` (forward-only; debug-asserts otherwise).
395        /// Returns `None` if there are fewer than `target + 1` set bits in total.
396        pub fn seek_to_rank(&mut self, target: u64) -> Option<usize> {
397            debug_assert!(target >= self.rank, "seek_to_rank is forward-only");
398            // Cheap path: target is within the current word's remaining 1-bits.
399            let here_pop = self.word_remaining.count_ones() as u64;
400            if target < self.rank + here_pop {
401                let k = (target - self.rank) as u32;
402                let bit_in_word = select_in_word(self.word_remaining, k) as usize;
403                let pos = self.word_idx * 64 + bit_in_word;
404                // Consume up to and including the target bit: clear the lowest k+1 set bits.
405                let mut w = self.word_remaining;
406                for _ in 0..=k { w &= w.wrapping_sub(1); }
407                self.word_remaining = w;
408                self.rank = target + 1;
409                self.bit_pos = pos + 1;
410                return Some(pos);
411            }
412            // Otherwise: jump via chunk binary search, mirroring `select`.
413            let counts = &self.rs.counts;
414            let chunk = {
415                let mut lo = 0;
416                let mut hi = counts.len();
417                while lo < hi {
418                    let mid = lo + (hi - lo) / 2;
419                    if counts.index_as(mid) <= target { lo = mid + 1; } else { hi = mid; }
420                }
421                lo
422            };
423            let mut count = if chunk > 0 { counts.index_as(chunk - 1) } else { 0 };
424            let vlen = self.rs.values.values.len();
425            let mut block = WORDS_PER_CHUNK * chunk;
426            while block < vlen {
427                let pop = self.rs.values.values.index_as(block).count_ones() as u64;
428                if count + pop > target { break; }
429                count += pop;
430                block += 1;
431            }
432            self.word_idx = block;
433            self.load_word();
434            self.rank = count;
435            // Now finish within the freshly loaded word.
436            if target >= count + self.word_remaining.count_ones() as u64 {
437                // Exhausted.
438                self.bit_pos = self.total_bits;
439                self.word_remaining = 0;
440                return None;
441            }
442            let k = (target - count) as u32;
443            let bit_in_word = select_in_word(self.word_remaining, k) as usize;
444            let pos = self.word_idx * 64 + bit_in_word;
445            let mut w = self.word_remaining;
446            for _ in 0..=k { w &= w.wrapping_sub(1); }
447            self.word_remaining = w;
448            self.rank = target + 1;
449            self.bit_pos = pos + 1;
450            Some(pos)
451        }
452    }
453
454    impl<CC, VC: Len, WC: IndexAs<u64>> RankSelect<CC, VC, WC> {
455        pub fn len(&self) -> usize {
456            self.values.len()
457        }
458    }
459
460    // This implementation probably only works for `Vec<u64>` and `Vec<u64>`, but we could fix that.
461    // Partly, it's hard to name the `Index` flavor that allows one to get back a `u64`.
462    impl<CC: for<'a> Push<&'a u64> + Len + IndexAs<u64>, VC: for<'a> Push<&'a u64> + Len + IndexAs<u64>> RankSelect<CC, VC> {
463        #[inline]
464        pub fn push(&mut self, bit: bool) {
465            self.values.push(&bit);
466            while self.counts.len() < self.values.len() / BITS_PER_CHUNK {
467                let mut count = self.counts.last().unwrap_or(0);
468                let lower = WORDS_PER_CHUNK * self.counts.len();
469                let upper = lower + WORDS_PER_CHUNK;
470                for i in lower .. upper {
471                    count += self.values.values.index_as(i).count_ones() as u64;
472                }
473                self.counts.push(&count);
474            }
475        }
476    }
477    impl<CC: Clear, VC: Clear> Clear for RankSelect<CC, VC> {
478        fn clear(&mut self) {
479            self.counts.clear();
480            self.values.clear();
481        }
482    }
483
484    #[cfg(test)]
485    mod tests {
486        use alloc::{vec, vec::Vec};
487        use super::RankSelect;
488
489        fn build(bits: &[bool]) -> RankSelect {
490            let mut rs: RankSelect = RankSelect::default();
491            for &b in bits { rs.push(b); }
492            rs
493        }
494
495        /// All true bits are recovered by `select(rank(p))` for every set position `p`,
496        /// and `rank` agrees with a naive count.
497        fn check_round_trip(bits: &[bool]) {
498            let rs = build(bits);
499            let mut expected = 0u64;
500            for (i, &b) in bits.iter().enumerate() {
501                assert_eq!(rs.rank(i), expected as usize, "rank({}) on pattern of len {}", i, bits.len());
502                if b {
503                    let pos = rs.select(expected).unwrap_or_else(|| panic!("select({}) returned None for set bit at {}", expected, i));
504                    assert_eq!(pos, i, "select({}) on pattern of len {}", expected, bits.len());
505                    expected += 1;
506                }
507            }
508            // Out-of-range select returns None.
509            assert!(rs.select(expected).is_none());
510        }
511
512        #[test]
513        fn select_first_bit() {
514            // Bit 0 set, nothing else.
515            let mut bits = vec![false; 2048];
516            bits[0] = true;
517            check_round_trip(&bits);
518        }
519
520        #[test]
521        fn select_small_dense() {
522            // First five bits set in a 2048-bit vector (spans two chunks).
523            let mut bits = vec![false; 2048];
524            for i in 0..5 { bits[i] = true; }
525            check_round_trip(&bits);
526        }
527
528        #[test]
529        fn select_chunk_boundary() {
530            // Set bits exactly at the chunk boundary positions 1023 and 1024.
531            let mut bits = vec![false; 4096];
532            bits[1023] = true;
533            bits[1024] = true;
534            check_round_trip(&bits);
535        }
536
537        #[test]
538        fn select_in_tail() {
539            // Bits 0..3 set, then nothing through bit 1100. Pattern length 1100 puts
540            // the final bits in the tail (not a complete 1024-bit chunk).
541            let mut bits = vec![false; 1100];
542            bits[0] = true;
543            bits[1099] = true;
544            check_round_trip(&bits);
545        }
546
547        #[test]
548        fn select_every_other() {
549            // Dense, multiple chunks.
550            let bits: Vec<bool> = (0..3000).map(|i| i % 2 == 0).collect();
551            check_round_trip(&bits);
552        }
553
554        #[test]
555        fn select_sparse_multi_chunk() {
556            // One set bit per 1024-bit chunk, six chunks.
557            let mut bits = vec![false; 6 * 1024];
558            for c in 0..6 { bits[1024 * c + 17] = true; }
559            check_round_trip(&bits);
560        }
561
562        #[test]
563        fn select_out_of_range() {
564            let rs = build(&[true, false, true]);
565            assert_eq!(rs.select(0), Some(0));
566            assert_eq!(rs.select(1), Some(2));
567            assert_eq!(rs.select(2), None);
568            assert_eq!(rs.select(1000), None);
569        }
570
571        #[test]
572        fn cursor_next_one_matches_select() {
573            // For each set bit in a 3000-bit pattern, walking next_one gives the
574            // same positions as repeated select calls.
575            let bits: Vec<bool> = (0..3000).map(|i| i % 7 == 0).collect();
576            let rs = build(&bits);
577            let mut cur = rs.cursor();
578            let mut k = 0u64;
579            while let Some(pos) = cur.next_one() {
580                assert_eq!(Some(pos), rs.select(k));
581                assert_eq!(cur.rank(), k + 1);
582                assert_eq!(cur.pos(), pos + 1);
583                k += 1;
584            }
585            assert_eq!(k, rs.rank(rs.len()) as u64);
586        }
587
588        #[test]
589        fn cursor_step_walks_every_bit() {
590            let bits: Vec<bool> = (0..2050).map(|i| i % 3 == 0).collect();
591            let rs = build(&bits);
592            let mut cur = rs.cursor();
593            let mut expected_rank = 0u64;
594            for (i, &b) in bits.iter().enumerate() {
595                assert_eq!(cur.pos(), i);
596                assert_eq!(cur.rank(), expected_rank);
597                assert_eq!(cur.step(), Some(b));
598                if b { expected_rank += 1; }
599            }
600            assert_eq!(cur.step(), None);
601        }
602
603        #[test]
604        fn cursor_seek_to_rank_skips_far_forward() {
605            // 3 chunks worth of bits, with set bits clustered. Seek directly to
606            // the 100-th set bit from a fresh cursor; result matches select(100).
607            let bits: Vec<bool> = (0..3200).map(|i| i % 3 == 1).collect();
608            let rs = build(&bits);
609            let mut cur = rs.cursor();
610            let pos = cur.seek_to_rank(100).unwrap();
611            assert_eq!(Some(pos), rs.select(100));
612            assert_eq!(cur.rank(), 101);
613            // A subsequent next_one continues correctly.
614            let next = cur.next_one().unwrap();
615            assert_eq!(Some(next), rs.select(101));
616        }
617
618        #[test]
619        fn cursor_seek_to_rank_within_current_word() {
620            // Set bits in a known pattern within the first word; cursor should
621            // satisfy seek_to_rank from the current-word fast path without re-probing.
622            let bits: Vec<bool> = (0..64).map(|i| matches!(i, 1 | 5 | 13 | 30 | 50)).collect();
623            let rs = build(&bits);
624            let mut cur = rs.cursor();
625            assert_eq!(cur.seek_to_rank(0), Some(1));
626            assert_eq!(cur.seek_to_rank(2), Some(13));
627            assert_eq!(cur.seek_to_rank(4), Some(50));
628            assert_eq!(cur.next_one(), None);
629        }
630
631        #[test]
632        fn cursor_seek_to_pos_same_word_and_cross_word() {
633            let bits: Vec<bool> = (0..256).map(|i| matches!(i % 5, 0 | 2)).collect();
634            let rs = build(&bits);
635            // Same-word seek.
636            let mut cur = rs.cursor();
637            assert!(cur.seek_to_pos(10));
638            assert_eq!(cur.pos(), 10);
639            assert_eq!(cur.rank(), rs.rank(10) as u64);
640            assert!(cur.seek_to_pos(40));
641            assert_eq!(cur.rank(), rs.rank(40) as u64);
642            // Cross-word seek.
643            assert!(cur.seek_to_pos(200));
644            assert_eq!(cur.pos(), 200);
645            assert_eq!(cur.rank(), rs.rank(200) as u64);
646            // Step reads the bit at the target.
647            assert_eq!(cur.step(), Some(bits[200]));
648        }
649
650        #[test]
651        fn cursor_seek_to_pos_out_of_range() {
652            let bits: Vec<bool> = (0..100).map(|_| true).collect();
653            let rs = build(&bits);
654            let mut cur = rs.cursor();
655            assert!(!cur.seek_to_pos(1000));
656            assert_eq!(cur.step(), None);
657        }
658
659        #[test]
660        fn cursor_seek_to_rank_out_of_range() {
661            let bits: Vec<bool> = (0..200).map(|i| i % 10 == 0).collect();
662            let rs = build(&bits);
663            let mut cur = rs.cursor();
664            assert_eq!(cur.seek_to_rank(10_000), None);
665        }
666
667        #[test]
668        fn select_in_word_basic() {
669            use super::select_in_word;
670            // 0b10110: set bits at positions 1, 2, 4.
671            assert_eq!(select_in_word(0b10110, 0), 1);
672            assert_eq!(select_in_word(0b10110, 1), 2);
673            assert_eq!(select_in_word(0b10110, 2), 4);
674            assert_eq!(select_in_word(0b10110, 3), 64);
675            // Edges of the word.
676            assert_eq!(select_in_word(1u64 << 63, 0), 63);
677            assert_eq!(select_in_word(u64::MAX, 0), 0);
678            assert_eq!(select_in_word(u64::MAX, 63), 63);
679            assert_eq!(select_in_word(u64::MAX, 64), 64);
680            assert_eq!(select_in_word(0, 0), 64);
681        }
682    }
683}
684
685pub mod result {
686
687    use alloc::{vec::Vec, string::String};
688
689    use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, Borrow};
690    use crate::RankSelect;
691
692    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
693    #[derive(Copy, Clone, Debug, Default, PartialEq)]
694    pub struct Results<SC, TC, CC=Vec<u64>, VC=Vec<u64>, WC=[u64; 2]> {
695        /// Bits set to `true` correspond to `Ok` variants.
696        pub indexes: RankSelect<CC, VC, WC>,
697        pub oks: SC,
698        pub errs: TC,
699    }
700
701    impl<S: Columnar, T: Columnar> Columnar for Result<S, T> {
702        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
703            match (&mut *self, other) {
704                (Ok(x), Ok(y)) => x.copy_from(y),
705                (Err(x), Err(y)) => x.copy_from(y),
706                (_, other) => { *self = Self::into_owned(other); },
707            }
708        }
709        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
710            match other {
711                Ok(y) => Ok(S::into_owned(y)),
712                Err(y) => Err(T::into_owned(y)),
713            }
714        }
715        type Container = Results<S::Container, T::Container>;
716    }
717
718    impl<SC: Borrow, TC: Borrow> Borrow for Results<SC, TC> {
719        type Ref<'a> = Result<SC::Ref<'a>, TC::Ref<'a>> where SC: 'a, TC: 'a;
720        type Borrowed<'a> = Results<SC::Borrowed<'a>, TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a [u64]> where SC: 'a, TC: 'a;
721        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
722            Results {
723                indexes: self.indexes.borrow(),
724                oks: self.oks.borrow(),
725                errs: self.errs.borrow(),
726            }
727        }
728        #[inline(always)]
729        fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where SC: 'a, TC: 'a {
730            Results {
731                indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
732                oks: SC::reborrow(thing.oks),
733                errs: TC::reborrow(thing.errs),
734            }
735        }
736        #[inline(always)]
737        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
738            match thing {
739                Ok(y) => Ok(SC::reborrow_ref(y)),
740                Err(y) => Err(TC::reborrow_ref(y)),
741            }
742        }
743    }
744
745    impl<SC: Container, TC: Container> Container for Results<SC, TC> {
746        #[inline(always)]
747        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: core::ops::Range<usize>) {
748            if !range.is_empty() {
749                // Starting offsets of each variant in `other`.
750                let oks_start = other.indexes.rank(range.start);
751                let errs_start = range.start - oks_start;
752
753                // Count the number of `Ok` and `Err` variants as we push, to determine the range.
754                // TODO: This could probably be `popcnt` somehow.
755                let mut oks = 0;
756                for index in range.clone() {
757                    let bit = other.indexes.get(index);
758                    self.indexes.push(bit);
759                    if bit { oks += 1; }
760                }
761                let errs = range.len() - oks;
762
763                self.oks.extend_from_self(other.oks, oks_start .. oks_start + oks);
764                self.errs.extend_from_self(other.errs, errs_start .. errs_start + errs);
765            }
766        }
767
768        fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
769            // TODO: reserve room in `self.indexes`.
770            self.oks.reserve_for(selves.clone().map(|x| x.oks));
771            self.errs.reserve_for(selves.map(|x| x.errs));
772        }
773    }
774
775    impl<'a, SC: crate::AsBytes<'a>, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Results<SC, TC, CC, VC, &'a [u64]> {
776        const SLICE_COUNT: usize = <RankSelect<CC, VC, &'a [u64]> as crate::AsBytes<'a>>::SLICE_COUNT + SC::SLICE_COUNT + TC::SLICE_COUNT;
777        #[inline]
778        fn get_byte_slice(&self, index: usize) -> (u64, &'a [u8]) {
779            debug_assert!(index < Self::SLICE_COUNT);
780            let idx_count = <RankSelect<CC, VC, &'a [u64]> as crate::AsBytes<'a>>::SLICE_COUNT;
781            if index < idx_count {
782                self.indexes.get_byte_slice(index)
783            } else if index < idx_count + SC::SLICE_COUNT {
784                self.oks.get_byte_slice(index - idx_count)
785            } else {
786                self.errs.get_byte_slice(index - idx_count - SC::SLICE_COUNT)
787            }
788        }
789    }
790    impl<'a, SC: crate::FromBytes<'a>, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Results<SC, TC, CC, VC, &'a [u64]> {
791        const SLICE_COUNT: usize = <RankSelect<CC, VC, &'a [u64]>>::SLICE_COUNT + SC::SLICE_COUNT + TC::SLICE_COUNT;
792        #[inline(always)]
793        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
794            Self {
795                indexes: crate::FromBytes::from_bytes(bytes),
796                oks: crate::FromBytes::from_bytes(bytes),
797                errs: crate::FromBytes::from_bytes(bytes),
798            }
799        }
800        #[inline(always)]
801        fn from_store(store: &crate::bytes::indexed::DecodedStore<'a>, offset: &mut usize) -> Self {
802            Self {
803                indexes: crate::FromBytes::from_store(store, offset),
804                oks: SC::from_store(store, offset),
805                errs: TC::from_store(store, offset),
806            }
807        }
808        fn element_sizes(sizes: &mut Vec<usize>) -> Result<(), String> {
809            <RankSelect<CC, VC, &'a [u64]>>::element_sizes(sizes)?;
810            SC::element_sizes(sizes)?;
811            TC::element_sizes(sizes)?;
812            Ok(())
813        }
814    }
815
816    impl<SC, TC, CC, VC: Len, WC: IndexAs<u64>> Len for Results<SC, TC, CC, VC, WC> {
817        #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
818    }
819
820    impl<SC, TC, CC, VC, WC> Index for Results<SC, TC, CC, VC, WC>
821    where
822        SC: Index,
823        TC: Index,
824        CC: IndexAs<u64> + Len,
825        VC: IndexAs<u64> + Len,
826        WC: IndexAs<u64>,
827    {
828        type Ref = Result<SC::Ref, TC::Ref>;
829        #[inline(always)]
830        fn get(&self, index: usize) -> Self::Ref {
831            if self.indexes.get(index) {
832                Ok(self.oks.get(self.indexes.rank(index)))
833            } else {
834                Err(self.errs.get(index - self.indexes.rank(index)))
835            }
836        }
837    }
838    impl<'a, SC, TC, CC, VC, WC> Index for &'a Results<SC, TC, CC, VC, WC>
839    where
840        &'a SC: Index,
841        &'a TC: Index,
842        CC: IndexAs<u64> + Len,
843        VC: IndexAs<u64> + Len,
844        WC: IndexAs<u64>,
845    {
846        type Ref = Result<<&'a SC as Index>::Ref, <&'a TC as Index>::Ref>;
847        #[inline(always)]
848        fn get(&self, index: usize) -> Self::Ref {
849            if self.indexes.get(index) {
850                Ok((&self.oks).get(self.indexes.rank(index)))
851            } else {
852                Err((&self.errs).get(index - self.indexes.rank(index)))
853            }
854        }
855    }
856
857    // NB: You are not allowed to change the variant, but can change its contents.
858    impl<SC: IndexMut, TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Results<SC, TC, CC, VC> {
859        type IndexMut<'a> = Result<SC::IndexMut<'a>, TC::IndexMut<'a>> where SC: 'a, TC: 'a, CC: 'a, VC: 'a;
860        #[inline(always)]
861        fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
862            if self.indexes.get(index) {
863                Ok(self.oks.get_mut(self.indexes.rank(index)))
864            } else {
865                Err(self.errs.get_mut(index - self.indexes.rank(index)))
866            }
867        }
868    }
869
870    impl<S, SC: Push<S>, T, TC: Push<T>> Push<Result<S, T>> for Results<SC, TC> {
871        #[inline]
872        fn push(&mut self, item: Result<S, T>) {
873            match item {
874                Ok(item) => {
875                    self.indexes.push(true);
876                    self.oks.push(item);
877                }
878                Err(item) => {
879                    self.indexes.push(false);
880                    self.errs.push(item);
881                }
882            }
883        }
884    }
885    impl<'a, S, SC: Push<&'a S>, T, TC: Push<&'a T>> Push<&'a Result<S, T>> for Results<SC, TC> {
886        #[inline]
887        fn push(&mut self, item: &'a Result<S, T>) {
888            match item {
889                Ok(item) => {
890                    self.indexes.push(true);
891                    self.oks.push(item);
892                }
893                Err(item) => {
894                    self.indexes.push(false);
895                    self.errs.push(item);
896                }
897            }
898        }
899    }
900
901    impl<SC: Clear, TC: Clear> Clear for Results<SC, TC> {
902        fn clear(&mut self) {
903            self.indexes.clear();
904            self.oks.clear();
905            self.errs.clear();
906        }
907    }
908
909    impl<SC, TC, CC, VC, WC> Results<SC, TC, CC, VC, WC> {
910        /// Returns ok values if no errors exist.
911        pub fn unwrap(self) -> SC where TC: Len {
912            assert!(self.errs.is_empty());
913            self.oks
914        }
915        /// Returns error values if no oks exist.
916        pub fn unwrap_err(self) -> TC where SC: Len {
917            assert!(self.oks.is_empty());
918            self.errs
919        }
920        /// Returns ok values if no errors exist, or `None`.
921        pub fn try_unwrap(self) -> Option<SC> where TC: Len {
922            if self.errs.is_empty() { Some(self.oks) } else { None }
923        }
924        /// Returns error values if no oks exist, or `None`.
925        pub fn try_unwrap_err(self) -> Option<TC> where SC: Len {
926            if self.oks.is_empty() { Some(self.errs) } else { None }
927        }
928    }
929    #[cfg(test)]
930    mod test {
931        #[test]
932        fn round_trip() {
933
934            use crate::common::{Index, Push, Len};
935
936            let mut column: crate::ContainerOf<Result<u64, u64>> = Default::default();
937            for i in 0..100 {
938                column.push(Ok::<u64, u64>(i));
939                column.push(Err::<u64, u64>(i));
940            }
941
942            assert_eq!(column.len(), 200);
943
944            for i in 0..100 {
945                assert_eq!(column.get(2*i+0), Ok(i as u64));
946                assert_eq!(column.get(2*i+1), Err(i as u64));
947            }
948
949            let mut column: crate::ContainerOf<Result<u64, u8>> = Default::default();
950            for i in 0..100 {
951                column.push(Ok::<u64, u8>(i as u64));
952                column.push(Err::<u64, u8>(i as u8));
953            }
954
955            assert_eq!(column.len(), 200);
956
957            for i in 0..100 {
958                assert_eq!(column.get(2*i+0), Ok(i as u64));
959                assert_eq!(column.get(2*i+1), Err(i as u8));
960            }
961        }
962    }
963}
964
965pub mod option {
966
967    use alloc::{vec::Vec, string::String};
968
969    use crate::{Clear, Columnar, Container, Len, IndexMut, Index, IndexAs, Push, Borrow};
970    use crate::RankSelect;
971
972#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
973    #[derive(Copy, Clone, Debug, Default, PartialEq)]
974    pub struct Options<TC, CC=Vec<u64>, VC=Vec<u64>, WC=[u64; 2]> {
975        /// Uses two bits for each item, one to indicate the variant and one (amortized)
976        /// to enable efficient rank determination.
977        pub indexes: RankSelect<CC, VC, WC>,
978        pub somes: TC,
979    }
980
981    impl<T: Columnar> Columnar for Option<T> {
982        fn copy_from<'a>(&mut self, other: crate::Ref<'a, Self>) {
983            match (&mut *self, other) {
984                (Some(x), Some(y)) => { x.copy_from(y); }
985                (_, other) => { *self = Self::into_owned(other); }
986            }
987        }
988        fn into_owned<'a>(other: crate::Ref<'a, Self>) -> Self {
989            other.map(|x| T::into_owned(x))
990        }
991        type Container = Options<T::Container>;
992    }
993
994    impl<TC: Borrow> Borrow for Options<TC> {
995        type Ref<'a> = Option<TC::Ref<'a>> where TC: 'a;
996        type Borrowed<'a> = Options<TC::Borrowed<'a>, &'a [u64], &'a [u64], &'a [u64]> where TC: 'a;
997        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
998            Options {
999                indexes: self.indexes.borrow(),
1000                somes: self.somes.borrow(),
1001            }
1002        }
1003        #[inline(always)]
1004        fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> where TC: 'a {
1005            Options {
1006                indexes: RankSelect::<Vec<u64>, Vec<u64>>::reborrow(thing.indexes),
1007                somes: TC::reborrow(thing.somes),
1008            }
1009        }
1010        #[inline(always)]
1011        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> where Self: 'a {
1012            thing.map(TC::reborrow_ref)
1013        }
1014    }
1015
1016    impl<TC: Container> Container for Options<TC> {
1017        #[inline(always)]
1018        fn extend_from_self(&mut self, other: Self::Borrowed<'_>, range: core::ops::Range<usize>) {
1019            if !range.is_empty() {
1020                // Starting offsets of `Some` variants in `other`.
1021                let somes_start = other.indexes.rank(range.start);
1022
1023                // Count the number of `Some` variants as we push, to determine the range.
1024                // TODO: This could probably be `popcnt` somehow.
1025                let mut somes = 0;
1026                for index in range {
1027                    let bit = other.indexes.get(index);
1028                    self.indexes.push(bit);
1029                    if bit { somes += 1; }
1030                }
1031
1032                self.somes.extend_from_self(other.somes, somes_start .. somes_start + somes);
1033            }
1034        }
1035
1036        fn reserve_for<'a, I>(&mut self, selves: I) where Self: 'a, I: Iterator<Item = Self::Borrowed<'a>> + Clone {
1037            // TODO: reserve room in `self.indexes`.
1038            self.somes.reserve_for(selves.map(|x| x.somes));
1039        }
1040    }
1041
1042    impl<'a, TC: crate::AsBytes<'a>, CC: crate::AsBytes<'a>, VC: crate::AsBytes<'a>> crate::AsBytes<'a> for Options<TC, CC, VC, &'a [u64]> {
1043        const SLICE_COUNT: usize = <RankSelect<CC, VC, &'a [u64]> as crate::AsBytes<'a>>::SLICE_COUNT + TC::SLICE_COUNT;
1044        #[inline]
1045        fn get_byte_slice(&self, index: usize) -> (u64, &'a [u8]) {
1046            debug_assert!(index < Self::SLICE_COUNT);
1047            let idx_count = <RankSelect<CC, VC, &'a [u64]> as crate::AsBytes<'a>>::SLICE_COUNT;
1048            if index < idx_count {
1049                self.indexes.get_byte_slice(index)
1050            } else {
1051                self.somes.get_byte_slice(index - idx_count)
1052            }
1053        }
1054    }
1055
1056    impl <'a, TC: crate::FromBytes<'a>, CC: crate::FromBytes<'a>, VC: crate::FromBytes<'a>> crate::FromBytes<'a> for Options<TC, CC, VC, &'a [u64]> {
1057        const SLICE_COUNT: usize = <RankSelect<CC, VC, &'a [u64]>>::SLICE_COUNT + TC::SLICE_COUNT;
1058        #[inline(always)]
1059        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1060            Self {
1061                indexes: crate::FromBytes::from_bytes(bytes),
1062                somes: crate::FromBytes::from_bytes(bytes),
1063            }
1064        }
1065        #[inline(always)]
1066        fn from_store(store: &crate::bytes::indexed::DecodedStore<'a>, offset: &mut usize) -> Self {
1067            Self {
1068                indexes: crate::FromBytes::from_store(store, offset),
1069                somes: TC::from_store(store, offset),
1070            }
1071        }
1072        fn element_sizes(sizes: &mut Vec<usize>) -> Result<(), String> {
1073            <RankSelect<CC, VC, &'a [u64]>>::element_sizes(sizes)?;
1074            TC::element_sizes(sizes)?;
1075            Ok(())
1076        }
1077    }
1078
1079    impl<T, CC, VC: Len, WC: IndexAs<u64>> Len for Options<T, CC, VC, WC> {
1080        #[inline(always)] fn len(&self) -> usize { self.indexes.len() }
1081    }
1082
1083    impl<TC: Index, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: IndexAs<u64>> Index for Options<TC, CC, VC, WC> {
1084        type Ref = Option<TC::Ref>;
1085        #[inline(always)]
1086        fn get(&self, index: usize) -> Self::Ref {
1087            if self.indexes.get(index) {
1088                Some(self.somes.get(self.indexes.rank(index)))
1089            } else {
1090                None
1091            }
1092        }
1093    }
1094    impl<'a, TC, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len, WC: IndexAs<u64>> Index for &'a Options<TC, CC, VC, WC>
1095    where &'a TC: Index
1096    {
1097        type Ref = Option<<&'a TC as Index>::Ref>;
1098        #[inline(always)]
1099        fn get(&self, index: usize) -> Self::Ref {
1100            if self.indexes.get(index) {
1101                Some((&self.somes).get(self.indexes.rank(index)))
1102            } else {
1103                None
1104            }
1105        }
1106    }
1107    impl<TC: IndexMut, CC: IndexAs<u64> + Len, VC: IndexAs<u64> + Len> IndexMut for Options<TC, CC, VC> {
1108        type IndexMut<'a> = Option<TC::IndexMut<'a>> where TC: 'a, CC: 'a, VC: 'a;
1109        #[inline(always)]
1110        fn get_mut(&mut self, index: usize) -> Self::IndexMut<'_> {
1111            if self.indexes.get(index) {
1112                Some(self.somes.get_mut(self.indexes.rank(index)))
1113            } else {
1114                None
1115            }
1116        }
1117    }
1118
1119    impl<T, TC: Push<T> + Len> Push<Option<T>> for Options<TC> {
1120        #[inline]
1121        fn push(&mut self, item: Option<T>) {
1122            match item {
1123                Some(item) => {
1124                    self.indexes.push(true);
1125                    self.somes.push(item);
1126                }
1127                None => {
1128                    self.indexes.push(false);
1129                }
1130            }
1131        }
1132    }
1133    impl<'a, T, TC: Push<&'a T> + Len> Push<&'a Option<T>> for Options<TC> {
1134        #[inline]
1135        fn push(&mut self, item: &'a Option<T>) {
1136            match item {
1137                Some(item) => {
1138                    self.indexes.push(true);
1139                    self.somes.push(item);
1140                }
1141                None => {
1142                    self.indexes.push(false);
1143                }
1144            }
1145        }
1146    }
1147
1148    impl<TC, CC, VC, WC> Options<TC, CC, VC, WC> {
1149        /// Returns the inner container if all elements are `Some`, or `None`.
1150        pub fn try_unwrap(self) -> Option<TC> where TC: Len, VC: Len, WC: IndexAs<u64> {
1151            if self.somes.len() == self.indexes.len() { Some(self.somes) } else { None }
1152        }
1153        /// True if all elements are `None`.
1154        pub fn is_all_none(&self) -> bool where TC: Len {
1155            self.somes.is_empty()
1156        }
1157    }
1158
1159    impl<TC: Clear> Clear for Options<TC> {
1160        fn clear(&mut self) {
1161            self.indexes.clear();
1162            self.somes.clear();
1163        }
1164    }
1165
1166    #[cfg(test)]
1167    mod test {
1168        use alloc::vec::Vec;
1169
1170        use crate::Columnar;
1171        use crate::common::{Index, Len};
1172        use crate::Options;
1173
1174        #[test]
1175        fn round_trip_some() {
1176            // Type annotation is important to avoid some inference overflow.
1177            let store: Options<Vec<i32>> = Columnar::into_columns((0..100).map(Some));
1178            assert_eq!(store.len(), 100);
1179            assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == Some(&b)));
1180        }
1181
1182        #[test]
1183        fn round_trip_none() {
1184            let store = Columnar::into_columns((0..100).map(|_x| None::<i32>));
1185            assert_eq!(store.len(), 100);
1186            let foo = &store;
1187            assert!(foo.index_iter().zip(0..100).all(|(a, _b)| a == None));
1188        }
1189
1190        #[test]
1191        fn round_trip_mixed() {
1192            // Type annotation is important to avoid some inference overflow.
1193            let store: Options<Vec<i32>>  = Columnar::into_columns((0..100).map(|x| if x % 2 == 0 { Some(x) } else { None }));
1194            assert_eq!(store.len(), 100);
1195            assert!((&store).index_iter().zip(0..100).all(|(a, b)| a == if b % 2 == 0 { Some(&b) } else { None }));
1196        }
1197    }
1198}
1199
1200pub mod discriminant {
1201
1202    use alloc::{vec::Vec, string::String};
1203    use crate::{Clear, Container, Len, Index, IndexAs, Borrow};
1204
1205    /// Tracks variant discriminants and offsets for enum containers.
1206    ///
1207    /// Uses two arrays (`variant` and `offset`) with three states:
1208    /// - **Empty**: both arrays empty, length is 0.
1209    /// - **Homogeneous**: `variant` is empty, `offset` holds `[tag, count]` where
1210    ///   `tag = variant_index + 1`. All elements share a single variant with
1211    ///   identity offsets (element `i` maps to offset `i`).
1212    /// - **Heterogeneous**: `variant` has per-element discriminants (`u8`),
1213    ///   `offset` has per-element offsets into variant containers (`u64`).
1214    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1215    #[derive(Clone, Debug, Default, PartialEq)]
1216    pub struct Discriminant<CVar = Vec<u8>, COff = Vec<u64>> {
1217        /// Per-element variant discriminants; empty when homogeneous.
1218        pub variant: CVar,
1219        /// Per-element offsets (heterogeneous), or `[tag, count]` (homogeneous), or empty.
1220        pub offset: COff,
1221    }
1222
1223    impl<CVar: Copy, COff: Copy> Copy for Discriminant<CVar, COff> {}
1224
1225    impl Discriminant {
1226        /// Push a variant discriminant and the offset into its variant container.
1227        #[inline]
1228        pub fn push(&mut self, variant: u8, offset: u64) {
1229            let tag = variant as u64 + 1;
1230            if self.variant.is_empty() {
1231                if self.offset.is_empty() {
1232                    // Empty → start homogeneous: offset = [tag, 1].
1233                    self.offset.push(tag);
1234                    self.offset.push(1);
1235                } else if self.offset[0] == tag {
1236                    // Same variant; stay homogeneous, increment count.
1237                    self.offset[1] += 1;
1238                } else {
1239                    // Different variant; transition to heterogeneous.
1240                    let prev = (self.offset[0] - 1) as u8;
1241                    let count = self.offset[1];
1242                    self.variant.reserve(count as usize + 1);
1243                    self.offset.clear();
1244                    self.offset.reserve(count as usize + 1);
1245                    for i in 0..count {
1246                        self.variant.push(prev);
1247                        self.offset.push(i);
1248                    }
1249                    self.variant.push(variant);
1250                    self.offset.push(offset);
1251                }
1252            } else {
1253                // Already heterogeneous.
1254                self.variant.push(variant);
1255                self.offset.push(offset);
1256            }
1257        }
1258
1259        /// Pre-allocate for the given borrowed discriminants.
1260        pub fn reserve_for<'a>(&mut self, selves: impl Iterator<Item = Discriminant<&'a [u8], &'a [u64]>> + Clone) {
1261            self.variant.reserve_for(selves.clone().map(|x| x.variant));
1262            self.offset.reserve_for(selves.map(|x| x.offset));
1263        }
1264    }
1265
1266    impl<CVar: Len, COff: Len> Discriminant<CVar, COff> {
1267        /// True if elements have mixed variants, with per-element discriminants and offsets.
1268        #[inline]
1269        pub fn is_heterogeneous(&self) -> bool {
1270            !self.variant.is_empty()
1271        }
1272        /// Returns `Some(variant)` if all elements share a single variant.
1273        #[inline]
1274        pub fn homogeneous(&self) -> Option<u8> where COff: IndexAs<u64> {
1275            if self.variant.is_empty() && self.offset.len() >= 2 {
1276                Some((self.offset.index_as(0) - 1) as u8)
1277            } else {
1278                None
1279            }
1280        }
1281        /// Returns `(variant, offset)` for the element at `index`.
1282        #[inline(always)]
1283        pub fn get(&self, index: usize) -> (u8, u64) where CVar: IndexAs<u8>, COff: IndexAs<u64> {
1284            if self.is_heterogeneous() {
1285                (self.variant.index_as(index), self.offset.index_as(index))
1286            } else {
1287                let tag: u64 = self.offset.index_as(0);
1288                ((tag - 1) as u8, index as u64)
1289            }
1290        }
1291    }
1292
1293    impl<CVar: Len, COff: Len + IndexAs<u64>> Len for Discriminant<CVar, COff> {
1294        #[inline(always)]
1295        fn len(&self) -> usize {
1296            if self.is_heterogeneous() { self.variant.len() }
1297            else if self.offset.len() >= 2 { self.offset.index_as(1) as usize }
1298            else { 0 }
1299        }
1300    }
1301
1302    // Index for the borrowed form: returns (variant, offset).
1303    impl<'a> Index for Discriminant<&'a [u8], &'a [u64]> {
1304        type Ref = (u8, u64);
1305        #[inline(always)]
1306        fn get(&self, index: usize) -> (u8, u64) {
1307            if self.is_heterogeneous() {
1308                (self.variant.index_as(index), self.offset.index_as(index))
1309            } else {
1310                ((self.offset[0] - 1) as u8, index as u64)
1311            }
1312        }
1313    }
1314
1315    // Borrow
1316    impl Borrow for Discriminant {
1317        type Ref<'a> = (u8, u64);
1318        type Borrowed<'a> = Discriminant<&'a [u8], &'a [u64]>;
1319        #[inline(always)]
1320        fn borrow<'a>(&'a self) -> Self::Borrowed<'a> {
1321            Discriminant {
1322                variant: &self.variant[..],
1323                offset: &self.offset[..],
1324            }
1325        }
1326        #[inline(always)]
1327        fn reborrow<'b, 'a: 'b>(thing: Self::Borrowed<'a>) -> Self::Borrowed<'b> {
1328            Discriminant {
1329                variant: thing.variant,
1330                offset: thing.offset,
1331            }
1332        }
1333        #[inline(always)]
1334        fn reborrow_ref<'b, 'a: 'b>(thing: Self::Ref<'a>) -> Self::Ref<'b> { thing }
1335    }
1336
1337    impl<CVar: Clear, COff: Clear> Clear for Discriminant<CVar, COff> {
1338        #[inline(always)]
1339        fn clear(&mut self) {
1340            self.variant.clear();
1341            self.offset.clear();
1342        }
1343    }
1344
1345
1346    // AsBytes for Discriminant, generic over container types.
1347    impl<'a, CVar: crate::AsBytes<'a>, COff: crate::AsBytes<'a>> crate::AsBytes<'a> for Discriminant<CVar, COff> {
1348        const SLICE_COUNT: usize = CVar::SLICE_COUNT + COff::SLICE_COUNT;
1349        #[inline]
1350        fn get_byte_slice(&self, index: usize) -> (u64, &'a [u8]) {
1351            debug_assert!(index < Self::SLICE_COUNT);
1352            if index < CVar::SLICE_COUNT {
1353                self.variant.get_byte_slice(index)
1354            } else {
1355                self.offset.get_byte_slice(index - CVar::SLICE_COUNT)
1356            }
1357        }
1358    }
1359
1360    // FromBytes for borrowed form
1361    impl<'a> crate::FromBytes<'a> for Discriminant<&'a [u8], &'a [u64]> {
1362        const SLICE_COUNT: usize = <&'a [u8]>::SLICE_COUNT + <&'a [u64]>::SLICE_COUNT;
1363        #[inline(always)]
1364        fn from_bytes(bytes: &mut impl Iterator<Item=&'a [u8]>) -> Self {
1365            let variant = crate::FromBytes::from_bytes(bytes);
1366            let offset = crate::FromBytes::from_bytes(bytes);
1367            Self { variant, offset }
1368        }
1369        #[inline(always)]
1370        fn from_store(store: &crate::bytes::indexed::DecodedStore<'a>, offset: &mut usize) -> Self {
1371            let variant = crate::FromBytes::from_store(store, offset);
1372            let offset_field = crate::FromBytes::from_store(store, offset);
1373            Self { variant, offset: offset_field }
1374        }
1375        fn element_sizes(sizes: &mut Vec<usize>) -> Result<(), String> {
1376            <&[u8]>::element_sizes(sizes)?;
1377            <&[u64]>::element_sizes(sizes)?;
1378            Ok(())
1379        }
1380    }
1381
1382    #[cfg(test)]
1383    mod test {
1384        use crate::Len;
1385
1386        #[test]
1387        fn homogeneous_push() {
1388            let mut d = super::Discriminant::default();
1389            d.push(2, 0);
1390            d.push(2, 1);
1391            d.push(2, 2);
1392            assert_eq!(d.len(), 3);
1393            assert_eq!(d.homogeneous(), Some(2));
1394            assert!(d.variant.is_empty());
1395            // offset holds [tag, count] = [3, 3] in homogeneous mode.
1396            assert_eq!(d.offset, vec![3, 3]);
1397        }
1398
1399        #[test]
1400        fn heterogeneous_transition() {
1401            let mut d = super::Discriminant::default();
1402            d.push(0, 0);
1403            d.push(0, 1);
1404            d.push(1, 0); // transition
1405            assert_eq!(d.len(), 3);
1406            assert_eq!(d.homogeneous(), None);
1407            assert_eq!(d.variant, vec![0, 0, 1]);
1408            assert_eq!(d.offset, vec![0, 1, 0]);
1409        }
1410
1411        #[test]
1412        fn clear_resets() {
1413            use crate::Clear;
1414            let mut d = super::Discriminant::default();
1415            d.push(1, 0);
1416            d.push(1, 1);
1417            d.clear();
1418            assert_eq!(d.len(), 0);
1419            // After clear, first push starts homogeneous again.
1420            d.push(3, 0);
1421            assert_eq!(d.homogeneous(), Some(3));
1422            assert_eq!(d.len(), 1);
1423        }
1424
1425        #[test]
1426        fn borrow_index() {
1427            use crate::Borrow;
1428            let mut d = super::Discriminant::default();
1429            d.push(2, 0);
1430            d.push(2, 1);
1431            d.push(2, 2);
1432            let b = d.borrow();
1433            assert_eq!(b.get(0), (2, 0));
1434            assert_eq!(b.get(1), (2, 1));
1435            assert_eq!(b.get(2), (2, 2));
1436        }
1437
1438        #[test]
1439        fn borrow_index_heterogeneous() {
1440            use crate::Borrow;
1441            let mut d = super::Discriminant::default();
1442            d.push(0, 0);
1443            d.push(1, 0);
1444            d.push(0, 1);
1445            let b = d.borrow();
1446            assert_eq!(b.get(0), (0, 0));
1447            assert_eq!(b.get(1), (1, 0));
1448            assert_eq!(b.get(2), (0, 1));
1449        }
1450    }
1451}