compressed_intvec/variable/seq_reader.rs
1//! A stateful reader for efficient, sequential access into an [`VarVec`].
2//!
3//! This module provides [`VarVecSeqReader`], a reusable reader that is
4//! specifically optimized for access patterns that are sequential or have a high
5//! degree of locality (i.e., indices are strictly increasing or close to each other).
6//!
7//! # Performance
8//!
9//! [`VarVecSeqReader`] maintains an internal state of the current decoding position.
10//! When a new [`get`](super::VarVec::get) request is made, it decides whether to:
11//!
12//! 1. **Decode Forward (Fast Path):** If the requested index is at or after the
13//! current position and within the same sample block, the reader decodes
14//! forward from its last position. This avoids a costly seek operation and
15//! is the primary optimization.
16//!
17//! 2. **Seek and Decode (Fallback):** If the requested index is far away or
18//! requires moving backward, the reader falls back to seeking to the
19//! nearest sample point and decoding from there, just like [`VarVecReader`].
20//!
21//! This makes it very efficient for iterating through indices that are
22//! sorted or clustered together.
23//!
24//! [`VarVec`]: crate::variable::VarVec
25//! [`VarVecReader`]: crate::variable::reader::VarVecReader
26
27use super::{traits::Storable, VarVec, VarVecError};
28use crate::common::codec_reader::{CodecReader, VarVecBitReader};
29use dsi_bitstream::{
30 dispatch::{CodesRead, StaticCodeRead},
31 prelude::{BitRead, BitSeek, Endianness},
32};
33use std::fmt;
34
35/// A stateful, sequential reader for an [`VarVec`] optimized for forward access.
36///
37/// This reader is created by the [`seq_reader`](super::VarVec::seq_reader)
38/// method. It maintains an internal cursor corresponding to the last-read
39/// element's position, making it highly efficient for sequential or
40/// mostly-forward access patterns.
41///
42/// It is a more specialized tool than [`VarVecReader`](super::reader::VarVecReader).
43///
44/// # Performance
45///
46/// [`VarVecSeqReader`] maintains an internal state of the current decoding position.
47/// When a new [`get`](super::VarVec::get) request is made, it decides whether to:
48///
49/// 1. **Decode Forward (Fast Path):** If the requested index is at or after the
50/// current position and within the same sample block, the reader decodes
51/// forward from its last position. This avoids a costly seek operation and
52/// is the primary optimization.
53///
54/// 2. **Seek and Decode (Fallback):** If the requested index is far away or
55/// requires moving backward, the reader falls back to seeking to the
56/// nearest sample point and decoding from there, just like [`VarVecReader`](super::reader::VarVecReader).
57///
58/// # Examples
59///
60/// ```
61/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
62/// use compressed_intvec::variable::{VarVec, UVarVec};
63///
64/// let data: Vec<u32> = (0..100).collect();
65/// let vec: UVarVec<u32> = VarVec::from_slice(&data)?;
66///
67/// // Create a reader optimized for sequential access
68/// let mut seq_reader = vec.seq_reader();
69///
70/// // Accessing indices in increasing order is very efficient
71/// assert_eq!(seq_reader.get(10)?, Some(10));
72/// assert_eq!(seq_reader.get(15)?, Some(15)); // Decodes forward from index 10
73/// assert_eq!(seq_reader.get(90)?, Some(90)); // Jumps to a new sample
74/// # Ok(())
75/// # }
76/// ```
77pub struct VarVecSeqReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
78where
79 for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
80 + CodesRead<E>
81 + BitSeek<Error = core::convert::Infallible>,
82{
83 /// Immutable reference to the parent VarVec.
84 intvec: &'a VarVec<T, E, B>,
85 /// The stateful, reusable bitstream reader.
86 reader: VarVecBitReader<'a, E>,
87 /// The hybrid dispatcher that handles codec reading robustly.
88 code_reader: CodecReader<'a, E>,
89 /// The index of the element *after* the one most recently read. This acts
90 /// as a cursor for the current decoding position.
91 current_index: usize,
92}
93
94impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> VarVecSeqReader<'a, T, E, B>
95where
96 for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
97 + CodesRead<E>
98 + BitSeek<Error = core::convert::Infallible>,
99{
100 /// Creates a new `VarVecSeqReader`.
101 pub(super) fn new(intvec: &'a VarVec<T, E, B>) -> Self {
102 // Instantiate the hybrid dispatcher. This will not panic, as it falls
103 // back to a slower but universally compatible method if the codec's
104 // parameters are not supported by the fast path.
105 let code_reader = CodecReader::new(intvec.encoding);
106 Self {
107 intvec,
108 reader: VarVecBitReader::new(dsi_bitstream::impls::MemWordReader::new_inf(
109 intvec.data.as_ref(),
110 )),
111 code_reader,
112 current_index: 0,
113 }
114 }
115
116 /// Retrieves the element at `index`, or `None` if out of bounds.
117 ///
118 /// This method leverages the reader's internal state to optimize access,
119 /// especially for sequential reads.
120 pub fn get(&mut self, index: usize) -> Result<Option<T>, VarVecError> {
121 if index >= self.intvec.len {
122 return Ok(None);
123 }
124 // SAFETY: The bounds check has been performed.
125 Ok(Some(unsafe { self.get_unchecked(index) }))
126 }
127
128 /// Retrieves the element at `index` without bounds checking.
129 ///
130 /// # Safety
131 ///
132 /// Calling this method with an out-of-bounds index is undefined behavior.
133 /// The caller must ensure that `index < self.intvec.len()`.
134 pub unsafe fn get_unchecked(&mut self, index: usize) -> T {
135 debug_assert!(
136 index < self.intvec.len,
137 "Index out of bounds: index was {} but length was {}",
138 index,
139 self.intvec.len
140 );
141
142 let k = self.intvec.k;
143 let (target_sample_block, current_sample_block, block_start) = if k.is_power_of_two() {
144 let k_exp = k.trailing_zeros();
145 let tsb = index >> k_exp;
146 let csb = if self.current_index == 0 {
147 // A special case for the very first access.
148 // We treat it as being in a "different" block to force a seek.
149 usize::MAX
150 } else {
151 (self.current_index - 1) >> k_exp
152 };
153 (tsb, csb, tsb << k_exp)
154 } else {
155 let tsb = index / k;
156 let csb = if self.current_index == 0 {
157 // A special case for the very first access.
158 // We treat it as being in a "different" block to force a seek.
159 usize::MAX
160 } else {
161 (self.current_index - 1) / k
162 };
163 (tsb, csb, tsb * k)
164 };
165
166 // This is the core optimization: if the target index is ahead of the
167 // current position and within the same sample block, we can just
168 // decode forward instead of performing a full seek.
169 if index < self.current_index || target_sample_block != current_sample_block {
170 // Slow Path: A seek is required because we are moving backward or
171 // jumping to a different sample block.
172 // SAFETY: The public-facing get() performs bounds checks, and
173 // internal callers are expected to uphold the same contract.
174 let start_bit = unsafe { self.intvec.samples.get_unchecked(target_sample_block) };
175 self.reader.set_bit_pos(start_bit).unwrap();
176 self.current_index = block_start;
177 }
178
179 // Decode and discard intermediate elements. The hybrid dispatcher
180 // will use the fastest available method.
181 for _ in self.current_index..index {
182 self.code_reader.read(&mut self.reader).unwrap();
183 }
184 // Read the target value.
185 let word = self.code_reader.read(&mut self.reader).unwrap();
186 // Update the current index to the next position.
187 self.current_index = index + 1;
188 Storable::from_word(word)
189 }
190}
191
192impl<T: Storable, E: Endianness, B: AsRef<[u64]>> fmt::Debug for VarVecSeqReader<'_, T, E, B>
193where
194 for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
195 + CodesRead<E>
196 + BitSeek<Error = core::convert::Infallible>,
197{
198 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
199 f.debug_struct("VarVecSeqReader").finish_non_exhaustive()
200 }
201}