compressed_intvec/variable/seq_reader.rs
1//! A stateful reader for efficient, sequential access into an [`IntVec`].
2//!
3//! This module provides [`IntVecSeqReader`], a reusable reader that is
4//! specifically optimized for access patterns that are sequential or have a high
5//! degree of locality (i.e., indices are strictly increasing or close to each other).
6//!
7//! # Performance
8//!
9//! [`IntVecSeqReader`] maintains an internal state of the current decoding position.
10//! When a new [`get`](super::IntVec::get) request is made, it decides whether to:
11//!
12//! 1. **Decode Forward (Fast Path):** If the requested index is at or after the
13//! current position and within the same sample block, the reader decodes
14//! forward from its last position. This avoids a costly seek operation and
15//! is the primary optimization.
16//!
17//! 2. **Seek and Decode (Fallback):** If the requested index is far away or
18//! requires moving backward, the reader falls back to seeking to the
19//! nearest sample point and decoding from there, just like [`IntVecReader`].
20//!
21//! This makes it very efficient for iterating through indices that are
22//! sorted or clustered together.
23//!
24//! [`IntVec`]: crate::variable::IntVec
25//! [`IntVecReader`]: crate::variable::reader::IntVecReader
26
27use super::{
28 reader::CodecReader, // Import the robust hybrid dispatcher
29 traits::Storable,
30 IntVec, IntVecBitReader, IntVecError,
31};
32use dsi_bitstream::{
33 dispatch::{CodesRead, StaticCodeRead},
34 prelude::{BitRead, BitSeek, Endianness},
35};
36
37/// A stateful, sequential reader for an [`IntVec`] optimized for forward access.
38///
39/// This reader is created by the [`seq_reader`](super::IntVec::seq_reader)
40/// method. It maintains an internal cursor corresponding to the last-read
41/// element's position, making it highly efficient for sequential or
42/// mostly-forward access patterns.
43///
44/// It is a more specialized tool than [`IntVecReader`](super::reader::IntVecReader).
45///
46/// # Performance
47///
48/// [`IntVecSeqReader`] maintains an internal state of the current decoding position.
49/// When a new [`get`](super::IntVec::get) request is made, it decides whether to:
50///
51/// 1. **Decode Forward (Fast Path):** If the requested index is at or after the
52/// current position and within the same sample block, the reader decodes
53/// forward from its last position. This avoids a costly seek operation and
54/// is the primary optimization.
55///
56/// 2. **Seek and Decode (Fallback):** If the requested index is far away or
57/// requires moving backward, the reader falls back to seeking to the
58/// nearest sample point and decoding from there, just like [`IntVecReader`](super::reader::IntVecReader).
59///
60/// # Examples
61///
62/// ```
63/// use compressed_intvec::variable::{IntVec, UIntVec};
64///
65/// let data: Vec<u32> = (0..100).collect();
66/// let vec: UIntVec<u32> = IntVec::from_slice(&data).unwrap();
67///
68/// // Create a reader optimized for sequential access
69/// let mut seq_reader = vec.seq_reader();
70///
71/// // Accessing indices in increasing order is very efficient
72/// assert_eq!(seq_reader.get(10).unwrap(), Some(10));
73/// assert_eq!(seq_reader.get(15).unwrap(), Some(15)); // Decodes forward from index 10
74/// assert_eq!(seq_reader.get(90).unwrap(), Some(90)); // Jumps to a new sample
75/// ```
76pub struct IntVecSeqReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
77where
78 for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
79 + CodesRead<E>
80 + BitSeek<Error = core::convert::Infallible>,
81{
82 /// Immutable reference to the parent IntVec.
83 intvec: &'a IntVec<T, E, B>,
84 /// The stateful, reusable bitstream reader.
85 reader: IntVecBitReader<'a, E>,
86 /// The hybrid dispatcher that handles codec reading robustly.
87 code_reader: CodecReader<'a, T, E, B>,
88 /// The index of the element *after* the one most recently read. This acts
89 /// as a cursor for the current decoding position.
90 current_index: usize,
91}
92
93impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> IntVecSeqReader<'a, T, E, B>
94where
95 for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
96 + CodesRead<E>
97 + BitSeek<Error = core::convert::Infallible>,
98{
99 /// Creates a new `IntVecSeqReader`.
100 pub(super) fn new(intvec: &'a IntVec<T, E, B>) -> Self {
101 // Instantiate the hybrid dispatcher. This will not panic, as it falls
102 // back to a slower but universally compatible method if the codec's
103 // parameters are not supported by the fast path.
104 let code_reader = CodecReader::new(intvec.encoding);
105 Self {
106 intvec,
107 reader: IntVecBitReader::new(dsi_bitstream::impls::MemWordReader::new(
108 intvec.data.as_ref(),
109 )),
110 code_reader,
111 current_index: 0,
112 }
113 }
114
115 /// Retrieves the element at `index`, or `None` if out of bounds.
116 ///
117 /// This method leverages the reader's internal state to optimize access,
118 /// especially for sequential reads.
119 pub fn get(&mut self, index: usize) -> Result<Option<T>, IntVecError> {
120 if index >= self.intvec.len {
121 return Ok(None);
122 }
123 // SAFETY: The bounds check has been performed.
124 Ok(Some(unsafe { self.get_unchecked(index) }))
125 }
126
127 /// Retrieves the element at `index` without bounds checking.
128 ///
129 /// # Safety
130 ///
131 /// Calling this method with an out-of-bounds index is undefined behavior.
132 /// The caller must ensure that `index < self.intvec.len()`.
133 pub unsafe fn get_unchecked(&mut self, index: usize) -> T {
134 debug_assert!(
135 index < self.intvec.len,
136 "Index out of bounds: index was {} but length was {}",
137 index,
138 self.intvec.len
139 );
140
141 let k = self.intvec.k;
142 let target_sample_block = index / k;
143 let current_sample_block = if self.current_index == 0 {
144 // A special case for the very first access.
145 // We treat it as being in a "different" block to force a seek.
146 usize::MAX
147 } else {
148 (self.current_index - 1) / k
149 };
150
151 // This is the core optimization: if the target index is ahead of the
152 // current position and within the same sample block, we can just
153 // decode forward instead of performing a full seek.
154 if index < self.current_index || target_sample_block != current_sample_block {
155 // Slow Path: A seek is required because we are moving backward or
156 // jumping to a different sample block.
157 // SAFETY: The public-facing get() performs bounds checks, and
158 // internal callers are expected to uphold the same contract.
159 let start_bit = self.intvec.samples.get_unchecked(target_sample_block);
160 self.reader.set_bit_pos(start_bit).unwrap();
161 self.current_index = target_sample_block * k;
162 }
163
164 // Decode and discard intermediate elements. The hybrid dispatcher
165 // will use the fastest available method.
166 for _ in self.current_index..index {
167 self.code_reader.read(&mut self.reader).unwrap();
168 }
169 // Read the target value.
170 let word = self.code_reader.read(&mut self.reader).unwrap();
171 // Update the current index to the next position.
172 self.current_index = index + 1;
173 Storable::from_word(word)
174 }
175}