compressed_intvec/variable/reader.rs
1//! A reader for efficient, repeated random access into an [`IntVec`].
2//!
3//! This module provides [`IntVecReader`], a reusable reader that is designed to
4//! optimize random access performance.
5//!
6//! # Performance
7//!
8//! A standard call to [`get`](super::IntVec::get) is convenient, but it
9//! creates and discards an internal bitstream reader for each call. When
10//! performing many random lookups, this can introduce significant overhead.
11//!
12//! [`IntVecReader`] avoids this by maintaining a persistent, reusable
13//! reader instance. This amortizes the setup cost across multiple `get` operations,
14//! making it ideal for access patterns where lookup indices are not known in
15//! advance (e.g., graph traversals, pointer chasing).
16//!
17//! For reading a predefined list of indices, [`get_many`](super::IntVec::get_many)
18//! is generally more efficient, as it can pre-sort the indices for a single
19//! sequential scan.
20//!
21//! [`IntVec`]: crate::variable::IntVec
22
23use super::{traits::Storable, IntVec, IntVecBitReader, IntVecError};
24use dsi_bitstream::{
25 dispatch::{Codes, CodesRead, FuncCodeReader, StaticCodeRead},
26 prelude::{BitRead, BitSeek, Endianness},
27};
28use std::marker::PhantomData;
29
30/// An internal hybrid dispatcher for reading compression codes.
31///
32/// This enum acts as a two-stage dispatcher to read values from a
33/// compressed bitstream. It is designed to handle all valid codecs supported by
34/// [`dsi-bitstream`](https://crates.io/crates/dsi-bitstream), ensuring correctness while maximizing performance.
35///
36/// ### Dispatch Strategy
37///
38/// 1. **Fast Path ([`CodecReader::Fast`])**: For common codecs
39/// (e.g., Gamma, Delta, or Zeta with small parameters), [`dsi-bitstream`](https://crates.io/crates/dsi-bitstream)
40/// provides pre-compiled function pointers via [`FuncCodeReader`]. This path
41/// avoids the overhead of a `match` statement on every read operation, as the
42/// correct function is resolved once at creation time.
43///
44/// 2. **Slow Path ([`CodecReader::Slow`])**: For codecs with parameters outside of
45/// the pre-compiled set (e.g., `Golomb { b: 15 }`), the fast path is not
46/// available. In this case, the dispatcher falls back to storing the [`Codes`]
47/// enum variant directly. Each read operation then uses a `match` statement to
48/// call the appropriate decoding function. While slightly slower due to the
49/// runtime dispatch, this ensures that any validly created `IntVec` can be read.
50///
51/// This hybrid approach guarantees that the reader will never panic due to an
52/// "unsupported code" error, which was a critical issue in previous implementations.
53/// The `new` constructor automatically selects the appropriate path.
54pub(super) enum CodecReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
55where
56 for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
57 + CodesRead<E>
58 + BitSeek<Error = core::convert::Infallible>,
59{
60 /// Fast-path reader using a pre-resolved function pointer.
61 Fast(FuncCodeReader<E, IntVecBitReader<'a, E>>),
62 /// Fallback reader using dynamic dispatch on the `Codes` enum.
63 Slow(Codes),
64 /// Zero-sized marker to carry the generic parameters, ensuring type safety
65 /// and allowing the compiler to properly manage lifetimes.
66 _Phantom(PhantomData<(&'a B, T)>),
67}
68
69impl<T: Storable, E: Endianness, B: AsRef<[u64]>> CodecReader<'_, T, E, B>
70where
71 for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
72 + CodesRead<E>
73 + BitSeek<Error = core::convert::Infallible>,
74{
75 /// Creates a new [`CodecReader`], automatically selecting the fastest available
76 /// dispatch path for the given codec.
77 ///
78 /// This constructor attempts to create a high-performance [`FuncCodeReader`].
79 /// If the codec is not supported by the fast path (e.g., it has uncommon
80 /// parameters), it falls back to the dynamic dispatch mechanism.
81 /// This method will not panic.
82 pub(super) fn new(code: Codes) -> Self {
83 match FuncCodeReader::new(code) {
84 Ok(fast_reader) => Self::Fast(fast_reader),
85 Err(_) => Self::Slow(code),
86 }
87 }
88}
89
90impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> StaticCodeRead<E, IntVecBitReader<'a, E>>
91 for CodecReader<'a, T, E, B>
92where
93 for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
94 + CodesRead<E>
95 + BitSeek<Error = core::convert::Infallible>,
96{
97 #[inline]
98 fn read(&self, reader: &mut IntVecBitReader<'a, E>) -> Result<u64, core::convert::Infallible> {
99 match self {
100 // If we have a function pointer, call it directly. This is the fast path.
101 Self::Fast(func_reader) => func_reader.read(reader),
102 // Otherwise, use the slower dynamic dispatch. This is the fallback path.
103 Self::Slow(code) => code.read(reader),
104 // This variant is never constructed, but is needed for the type system.
105 Self::_Phantom(_) => unreachable!(),
106 }
107 }
108}
109
110/// A stateful reader for an `IntVec` that provides fast random access.
111///
112/// This reader is created by the [`IntVec::reader`](super::IntVec::reader)
113/// method. It maintains an internal, reusable bitstream reader, making it highly
114/// efficient for performing multiple random lookups where the access pattern is
115/// not known ahead of time.
116///
117/// # Examples
118///
119/// ```
120/// use compressed_intvec::variable::{IntVec, UIntVec};
121///
122/// let data: Vec<u32> = (0..100).rev().collect(); // Data is not sequential
123/// let vec: UIntVec<u32> = IntVec::from_slice(&data).unwrap();
124///
125/// // Create a reusable reader
126/// let mut reader = vec.reader();
127///
128/// // Perform multiple random reads efficiently
129/// assert_eq!(reader.get(99).unwrap(), Some(0));
130/// assert_eq!(reader.get(0).unwrap(), Some(99));
131/// assert_eq!(reader.get(50).unwrap(), Some(49));
132/// ```
133pub struct IntVecReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
134where
135 for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
136 + CodesRead<E>
137 + BitSeek<Error = core::convert::Infallible>,
138{
139 /// A reference to the parent `IntVec`.
140 pub(super) intvec: &'a IntVec<T, E, B>,
141 /// The stateful, reusable bitstream reader.
142 pub(super) reader: IntVecBitReader<'a, E>,
143 /// The hybrid dispatcher that handles codec reading.
144 pub(super) code_reader: CodecReader<'a, T, E, B>,
145}
146
147impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> IntVecReader<'a, T, E, B>
148where
149 for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
150 + CodesRead<E>
151 + BitSeek<Error = core::convert::Infallible>,
152{
153 /// Creates a new `IntVecReader`.
154 pub(super) fn new(intvec: &'a IntVec<T, E, B>) -> Self {
155 let bit_reader = IntVecBitReader::new(dsi_bitstream::impls::MemWordReader::new(
156 intvec.data.as_ref(),
157 ));
158 // This robustly selects the best available dispatch strategy.
159 let code_reader = CodecReader::new(intvec.encoding);
160 Self {
161 intvec,
162 reader: bit_reader,
163 code_reader,
164 }
165 }
166
167 /// Retrieves the element at `index`, or `None` if out of bounds.
168 ///
169 /// This method leverages the stateful nature of the reader to perform efficient
170 /// random access by seeking to the nearest preceding sample point and decoding
171 /// sequentially from there.
172 #[inline]
173 pub fn get(&mut self, index: usize) -> Result<Option<T>, IntVecError> {
174 if index >= self.intvec.len {
175 return Ok(None);
176 }
177 // SAFETY: The bounds check has been performed.
178 let value = unsafe { self.get_unchecked(index) };
179 Ok(Some(value))
180 }
181
182 /// Retrieves the element at `index` without bounds checking.
183 ///
184 /// # Safety
185 ///
186 /// Calling this method with an out-of-bounds `index` is undefined behavior.
187 /// The caller must ensure that `index < self.intvec.len()`.
188 #[inline]
189 pub unsafe fn get_unchecked(&mut self, index: usize) -> T {
190 debug_assert!(
191 index < self.intvec.len(),
192 "Index out of bounds: index was {} but length was {}",
193 index,
194 self.intvec.len()
195 );
196
197 let k = self.intvec.k;
198 let sample_index = index / k;
199 // SAFETY: The caller guarantees that `index` is in bounds, which implies
200 // that `sample_index` is also a valid index into the samples vector.
201 let start_bit = self.intvec.samples.get_unchecked(sample_index);
202 let start_index = sample_index * k;
203
204 // The underlying bitstream operations are infallible, so unwrap is safe.
205 self.reader.set_bit_pos(start_bit).unwrap();
206
207 // Sequentially decode elements from the sample point up to the target index.
208 for _ in start_index..index {
209 // We use the hybrid dispatcher here. It will either call a function
210 // pointer or use a match statement, depending on the codec.
211 self.code_reader.read(&mut self.reader).unwrap();
212 }
213 // Read the target value.
214 let word = self.code_reader.read(&mut self.reader).unwrap();
215 Storable::from_word(word)
216 }
217}