compressed_intvec/variable/
reader.rs

1//! A reader for efficient, repeated random access into an [`IntVec`].
2//!
3//! This module provides [`IntVecReader`], a reusable reader that is designed to
4//! optimize random access performance.
5//!
6//! # Performance
7//!
8//! A standard call to [`get`](super::IntVec::get) is convenient, but it
9//! creates and discards an internal bitstream reader for each call. When
10//! performing many random lookups, this can introduce significant overhead.
11//!
12//! [`IntVecReader`] avoids this by maintaining a persistent, reusable
13//! reader instance. This amortizes the setup cost across multiple `get` operations,
14//! making it ideal for access patterns where lookup indices are not known in
15//! advance (e.g., graph traversals, pointer chasing).
16//!
17//! For reading a predefined list of indices, [`get_many`](super::IntVec::get_many)
18//! is generally more efficient, as it can pre-sort the indices for a single
19//! sequential scan.
20//!
21//! [`IntVec`]: crate::variable::IntVec
22
23use super::{traits::Storable, IntVec, IntVecBitReader, IntVecError};
24use dsi_bitstream::{
25    dispatch::{Codes, CodesRead, FuncCodeReader, StaticCodeRead},
26    prelude::{BitRead, BitSeek, Endianness},
27};
28use std::marker::PhantomData;
29
30/// An internal hybrid dispatcher for reading compression codes.
31///
32/// This enum acts as a two-stage dispatcher to read values from a
33/// compressed bitstream. It is designed to handle all valid codecs supported by
34/// [`dsi-bitstream`](https://crates.io/crates/dsi-bitstream), ensuring correctness while maximizing performance.
35///
36/// ### Dispatch Strategy
37///
38/// 1.  **Fast Path ([`CodecReader::Fast`])**: For common codecs
39///     (e.g., Gamma, Delta, or Zeta with small parameters), [`dsi-bitstream`](https://crates.io/crates/dsi-bitstream)
40///     provides pre-compiled function pointers via [`FuncCodeReader`]. This path
41///     avoids the overhead of a `match` statement on every read operation, as the
42///     correct function is resolved once at creation time.
43///
44/// 2.  **Slow Path ([`CodecReader::Slow`])**: For codecs with parameters outside of
45///     the pre-compiled set (e.g., `Golomb { b: 15 }`), the fast path is not
46///     available. In this case, the dispatcher falls back to storing the [`Codes`]
47///     enum variant directly. Each read operation then uses a `match` statement to
48///     call the appropriate decoding function. While slightly slower due to the
49///     runtime dispatch, this ensures that any validly created `IntVec` can be read.
50///
51/// This hybrid approach guarantees that the reader will never panic due to an
52/// "unsupported code" error, which was a critical issue in previous implementations.
53/// The `new` constructor automatically selects the appropriate path.
54pub(super) enum CodecReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
55where
56    for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
57        + CodesRead<E>
58        + BitSeek<Error = core::convert::Infallible>,
59{
60    /// Fast-path reader using a pre-resolved function pointer.
61    Fast(FuncCodeReader<E, IntVecBitReader<'a, E>>),
62    /// Fallback reader using dynamic dispatch on the `Codes` enum.
63    Slow(Codes),
64    /// Zero-sized marker to carry the generic parameters, ensuring type safety
65    /// and allowing the compiler to properly manage lifetimes.
66    _Phantom(PhantomData<(&'a B, T)>),
67}
68
69impl<T: Storable, E: Endianness, B: AsRef<[u64]>> CodecReader<'_, T, E, B>
70where
71    for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
72        + CodesRead<E>
73        + BitSeek<Error = core::convert::Infallible>,
74{
75    /// Creates a new [`CodecReader`], automatically selecting the fastest available
76    /// dispatch path for the given codec.
77    ///
78    /// This constructor attempts to create a high-performance [`FuncCodeReader`].
79    /// If the codec is not supported by the fast path (e.g., it has uncommon
80    /// parameters), it falls back to the dynamic dispatch mechanism.
81    /// This method will not panic.
82    pub(super) fn new(code: Codes) -> Self {
83        match FuncCodeReader::new(code) {
84            Ok(fast_reader) => Self::Fast(fast_reader),
85            Err(_) => Self::Slow(code),
86        }
87    }
88}
89
90impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> StaticCodeRead<E, IntVecBitReader<'a, E>>
91    for CodecReader<'a, T, E, B>
92where
93    for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
94        + CodesRead<E>
95        + BitSeek<Error = core::convert::Infallible>,
96{
97    #[inline]
98    fn read(&self, reader: &mut IntVecBitReader<'a, E>) -> Result<u64, core::convert::Infallible> {
99        match self {
100            // If we have a function pointer, call it directly. This is the fast path.
101            Self::Fast(func_reader) => func_reader.read(reader),
102            // Otherwise, use the slower dynamic dispatch. This is the fallback path.
103            Self::Slow(code) => code.read(reader),
104            // This variant is never constructed, but is needed for the type system.
105            Self::_Phantom(_) => unreachable!(),
106        }
107    }
108}
109
110/// A stateful reader for an `IntVec` that provides fast random access.
111///
112/// This reader is created by the [`IntVec::reader`](super::IntVec::reader)
113/// method. It maintains an internal, reusable bitstream reader, making it highly
114/// efficient for performing multiple random lookups where the access pattern is
115/// not known ahead of time.
116///
117/// # Examples
118///
119/// ```
120/// use compressed_intvec::variable::{IntVec, UIntVec};
121///
122/// let data: Vec<u32> = (0..100).rev().collect(); // Data is not sequential
123/// let vec: UIntVec<u32> = IntVec::from_slice(&data).unwrap();
124///
125/// // Create a reusable reader
126/// let mut reader = vec.reader();
127///
128/// // Perform multiple random reads efficiently
129/// assert_eq!(reader.get(99).unwrap(), Some(0));
130/// assert_eq!(reader.get(0).unwrap(), Some(99));
131/// assert_eq!(reader.get(50).unwrap(), Some(49));
132/// ```
133pub struct IntVecReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
134where
135    for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
136        + CodesRead<E>
137        + BitSeek<Error = core::convert::Infallible>,
138{
139    /// A reference to the parent `IntVec`.
140    pub(super) intvec: &'a IntVec<T, E, B>,
141    /// The stateful, reusable bitstream reader.
142    pub(super) reader: IntVecBitReader<'a, E>,
143    /// The hybrid dispatcher that handles codec reading.
144    pub(super) code_reader: CodecReader<'a, T, E, B>,
145}
146
147impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> IntVecReader<'a, T, E, B>
148where
149    for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
150        + CodesRead<E>
151        + BitSeek<Error = core::convert::Infallible>,
152{
153    /// Creates a new `IntVecReader`.
154    pub(super) fn new(intvec: &'a IntVec<T, E, B>) -> Self {
155        let bit_reader = IntVecBitReader::new(dsi_bitstream::impls::MemWordReader::new(
156            intvec.data.as_ref(),
157        ));
158        // This robustly selects the best available dispatch strategy.
159        let code_reader = CodecReader::new(intvec.encoding);
160        Self {
161            intvec,
162            reader: bit_reader,
163            code_reader,
164        }
165    }
166
167    /// Retrieves the element at `index`, or `None` if out of bounds.
168    ///
169    /// This method leverages the stateful nature of the reader to perform efficient
170    /// random access by seeking to the nearest preceding sample point and decoding
171    /// sequentially from there.
172    #[inline]
173    pub fn get(&mut self, index: usize) -> Result<Option<T>, IntVecError> {
174        if index >= self.intvec.len {
175            return Ok(None);
176        }
177        // SAFETY: The bounds check has been performed.
178        let value = unsafe { self.get_unchecked(index) };
179        Ok(Some(value))
180    }
181
182    /// Retrieves the element at `index` without bounds checking.
183    ///
184    /// # Safety
185    ///
186    /// Calling this method with an out-of-bounds `index` is undefined behavior.
187    /// The caller must ensure that `index < self.intvec.len()`.
188    #[inline]
189    pub unsafe fn get_unchecked(&mut self, index: usize) -> T {
190        debug_assert!(
191            index < self.intvec.len(),
192            "Index out of bounds: index was {} but length was {}",
193            index,
194            self.intvec.len()
195        );
196
197        let k = self.intvec.k;
198        let sample_index = index / k;
199        // SAFETY: The caller guarantees that `index` is in bounds, which implies
200        // that `sample_index` is also a valid index into the samples vector.
201        let start_bit = self.intvec.samples.get_unchecked(sample_index);
202        let start_index = sample_index * k;
203
204        // The underlying bitstream operations are infallible, so unwrap is safe.
205        self.reader.set_bit_pos(start_bit).unwrap();
206
207        // Sequentially decode elements from the sample point up to the target index.
208        for _ in start_index..index {
209            // We use the hybrid dispatcher here. It will either call a function
210            // pointer or use a match statement, depending on the codec.
211            self.code_reader.read(&mut self.reader).unwrap();
212        }
213        // Read the target value.
214        let word = self.code_reader.read(&mut self.reader).unwrap();
215        Storable::from_word(word)
216    }
217}