compressed_intvec/variable/reader.rs
1//! A reader for efficient, repeated random access into an [`VarVec`].
2//!
3//! This module provides [`VarVecReader`], a reusable reader that is designed to
4//! optimize random access performance.
5//!
6//! # Performance
7//!
8//! A standard call to [`get`](super::VarVec::get) is convenient, but it
9//! creates and discards an internal bitstream reader for each call. When
10//! performing many random lookups, this can introduce significant overhead.
11//!
12//! [`VarVecReader`] avoids this by maintaining a persistent, reusable
13//! reader instance. This amortizes the setup cost across multiple `get` operations,
14//! making it ideal for access patterns where lookup indices are not known in
15//! advance (e.g., graph traversals, pointer chasing).
16//!
17//! For reading a predefined list of indices, [`get_many`](super::VarVec::get_many)
18//! is generally more efficient, as it can pre-sort the indices for a single
19//! sequential scan.
20//!
21//! [`VarVec`]: crate::variable::VarVec
22
23use super::{traits::Storable, VarVec, VarVecError};
24use crate::common::codec_reader::{CodecReader, VarVecBitReader};
25use dsi_bitstream::{
26 dispatch::{CodesRead, StaticCodeRead},
27 prelude::{BitRead, BitSeek, Endianness},
28};
29use std::fmt;
30
31/// A stateful reader for a [`VarVec`] that provides fast random access.
32///
33/// This reader is created by the [`VarVec::reader`](super::VarVec::reader)
34/// method. It maintains an internal, reusable bitstream reader, making it highly
35/// efficient for performing multiple random lookups where the access pattern is
36/// not known ahead of time.
37///
38/// # Examples
39///
40/// ```
41/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
42/// use compressed_intvec::variable::{VarVec, UVarVec};
43///
44/// let data: Vec<u32> = (0..100).rev().collect(); // Data is not sequential
45/// let vec: UVarVec<u32> = VarVec::from_slice(&data)?;
46///
47/// // Create a reusable reader
48/// let mut reader = vec.reader();
49///
50/// // Perform multiple random reads efficiently
51/// assert_eq!(reader.get(99)?, Some(0));
52/// assert_eq!(reader.get(0)?, Some(99));
53/// assert_eq!(reader.get(50)?, Some(49));
54/// # Ok(())
55/// # }
56/// ```
57pub struct VarVecReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
58where
59 for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
60 + CodesRead<E>
61 + BitSeek<Error = core::convert::Infallible>,
62{
63 /// A reference to the parent [`VarVec`].
64 pub(super) intvec: &'a VarVec<T, E, B>,
65 /// The stateful, reusable bitstream reader.
66 pub(super) reader: VarVecBitReader<'a, E>,
67 /// The hybrid dispatcher that handles codec reading.
68 pub(super) code_reader: CodecReader<'a, E>,
69}
70
71impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> VarVecReader<'a, T, E, B>
72where
73 for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
74 + CodesRead<E>
75 + BitSeek<Error = core::convert::Infallible>,
76{
77 /// Creates a new `VarVecReader`.
78 pub(super) fn new(intvec: &'a VarVec<T, E, B>) -> Self {
79 let bit_reader = VarVecBitReader::new(dsi_bitstream::impls::MemWordReader::new_inf(
80 intvec.data.as_ref(),
81 ));
82 // This robustly selects the best available dispatch strategy.
83 let code_reader = CodecReader::new(intvec.encoding);
84 Self {
85 intvec,
86 reader: bit_reader,
87 code_reader,
88 }
89 }
90
91 /// Retrieves the element at `index`, or `None` if out of bounds.
92 ///
93 /// This method leverages the stateful nature of the reader to perform efficient
94 /// random access by seeking to the nearest preceding sample point and decoding
95 /// sequentially from there.
96 #[inline]
97 pub fn get(&mut self, index: usize) -> Result<Option<T>, VarVecError> {
98 if index >= self.intvec.len {
99 return Ok(None);
100 }
101 // SAFETY: The bounds check has been performed.
102 let value = unsafe { self.get_unchecked(index) };
103 Ok(Some(value))
104 }
105
106 /// Retrieves the element at `index` without bounds checking.
107 ///
108 /// # Safety
109 ///
110 /// Calling this method with an out-of-bounds `index` is undefined behavior.
111 /// The caller must ensure that `index < self.intvec.len()`.
112 #[inline]
113 pub unsafe fn get_unchecked(&mut self, index: usize) -> T {
114 debug_assert!(
115 index < self.intvec.len(),
116 "Index out of bounds: index was {} but length was {}",
117 index,
118 self.intvec.len()
119 );
120
121 let k = self.intvec.k;
122 let (sample_index, start_index) = if k.is_power_of_two() {
123 let k_exp = k.trailing_zeros();
124 let si = index >> k_exp;
125 (si, si << k_exp)
126 } else {
127 let si = index / k;
128 (si, si * k)
129 };
130 // SAFETY: The caller guarantees that `index` is in bounds, which implies
131 // that `sample_index` is also a valid index into the samples vector.
132 let start_bit = unsafe { self.intvec.samples.get_unchecked(sample_index) };
133
134 // The underlying bitstream operations are infallible, so unwrap is safe.
135 self.reader.set_bit_pos(start_bit).unwrap();
136
137 // Sequentially decode elements from the sample point up to the target index.
138 for _ in start_index..index {
139 // We use the hybrid dispatcher here. It will either call a function
140 // pointer or use a match statement, depending on the codec.
141 self.code_reader.read(&mut self.reader).unwrap();
142 }
143 // Read the target value.
144 let word = self.code_reader.read(&mut self.reader).unwrap();
145 Storable::from_word(word)
146 }
147}
148
149impl<T: Storable, E: Endianness, B: AsRef<[u64]>> fmt::Debug for VarVecReader<'_, T, E, B>
150where
151 for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
152 + CodesRead<E>
153 + BitSeek<Error = core::convert::Infallible>,
154{
155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156 f.debug_struct("VarVecReader").finish_non_exhaustive()
157 }
158}