compressed_intvec/variable/
seq_reader.rs

1//! A stateful reader for efficient, sequential access into an [`IntVec`].
2//!
3//! This module provides [`IntVecSeqReader`], a reusable reader that is
4//! specifically optimized for access patterns that are sequential or have a high
5//! degree of locality (i.e., indices are strictly increasing or close to each other).
6//!
7//! # Performance
8//!
9//! [`IntVecSeqReader`] maintains an internal state of the current decoding position.
10//! When a new [`get`](super::IntVec::get) request is made, it decides whether to:
11//!
12//! 1.  **Decode Forward (Fast Path):** If the requested index is at or after the
13//!     current position and within the same sample block, the reader decodes
14//!     forward from its last position. This avoids a costly seek operation and
15//!     is the primary optimization.
16//!
17//! 2.  **Seek and Decode (Fallback):** If the requested index is far away or
18//!     requires moving backward, the reader falls back to seeking to the
19//!     nearest sample point and decoding from there, just like [`IntVecReader`].
20//!
21//! This makes it very efficient for iterating through indices that are
22//! sorted or clustered together.
23//!
24//! [`IntVec`]: crate::variable::IntVec
25//! [`IntVecReader`]: crate::variable::reader::IntVecReader
26
27use super::{
28    reader::CodecReader, // Import the robust hybrid dispatcher
29    traits::Storable,
30    IntVec, IntVecBitReader, IntVecError,
31};
32use dsi_bitstream::{
33    dispatch::{CodesRead, StaticCodeRead},
34    prelude::{BitRead, BitSeek, Endianness},
35};
36
37/// A stateful, sequential reader for an [`IntVec`] optimized for forward access.
38///
39/// This reader is created by the [`seq_reader`](super::IntVec::seq_reader)
40/// method. It maintains an internal cursor corresponding to the last-read
41/// element's position, making it highly efficient for sequential or
42/// mostly-forward access patterns.
43///
44/// It is a more specialized tool than [`IntVecReader`](super::reader::IntVecReader).
45///
46/// # Performance
47///
48/// [`IntVecSeqReader`] maintains an internal state of the current decoding position.
49/// When a new [`get`](super::IntVec::get) request is made, it decides whether to:
50///
51/// 1.  **Decode Forward (Fast Path):** If the requested index is at or after the
52///     current position and within the same sample block, the reader decodes
53///     forward from its last position. This avoids a costly seek operation and
54///     is the primary optimization.
55///
56/// 2.  **Seek and Decode (Fallback):** If the requested index is far away or
57///     requires moving backward, the reader falls back to seeking to the
58///     nearest sample point and decoding from there, just like [`IntVecReader`](super::reader::IntVecReader).
59///
60/// # Examples
61///
62/// ```
63/// use compressed_intvec::variable::{IntVec, UIntVec};
64///
65/// let data: Vec<u32> = (0..100).collect();
66/// let vec: UIntVec<u32> = IntVec::from_slice(&data).unwrap();
67///
68/// // Create a reader optimized for sequential access
69/// let mut seq_reader = vec.seq_reader();
70///
71/// // Accessing indices in increasing order is very efficient
72/// assert_eq!(seq_reader.get(10).unwrap(), Some(10));
73/// assert_eq!(seq_reader.get(15).unwrap(), Some(15)); // Decodes forward from index 10
74/// assert_eq!(seq_reader.get(90).unwrap(), Some(90)); // Jumps to a new sample
75/// ```
76pub struct IntVecSeqReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
77where
78    for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
79        + CodesRead<E>
80        + BitSeek<Error = core::convert::Infallible>,
81{
82    /// Immutable reference to the parent IntVec.
83    intvec: &'a IntVec<T, E, B>,
84    /// The stateful, reusable bitstream reader.
85    reader: IntVecBitReader<'a, E>,
86    /// The hybrid dispatcher that handles codec reading robustly.
87    code_reader: CodecReader<'a, T, E, B>,
88    /// The index of the element *after* the one most recently read. This acts
89    /// as a cursor for the current decoding position.
90    current_index: usize,
91}
92
93impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> IntVecSeqReader<'a, T, E, B>
94where
95    for<'b> IntVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
96        + CodesRead<E>
97        + BitSeek<Error = core::convert::Infallible>,
98{
99    /// Creates a new `IntVecSeqReader`.
100    pub(super) fn new(intvec: &'a IntVec<T, E, B>) -> Self {
101        // Instantiate the hybrid dispatcher. This will not panic, as it falls
102        // back to a slower but universally compatible method if the codec's
103        // parameters are not supported by the fast path.
104        let code_reader = CodecReader::new(intvec.encoding);
105        Self {
106            intvec,
107            reader: IntVecBitReader::new(dsi_bitstream::impls::MemWordReader::new(
108                intvec.data.as_ref(),
109            )),
110            code_reader,
111            current_index: 0,
112        }
113    }
114
115    /// Retrieves the element at `index`, or `None` if out of bounds.
116    ///
117    /// This method leverages the reader's internal state to optimize access,
118    /// especially for sequential reads.
119    pub fn get(&mut self, index: usize) -> Result<Option<T>, IntVecError> {
120        if index >= self.intvec.len {
121            return Ok(None);
122        }
123        // SAFETY: The bounds check has been performed.
124        Ok(Some(unsafe { self.get_unchecked(index) }))
125    }
126
127    /// Retrieves the element at `index` without bounds checking.
128    ///
129    /// # Safety
130    ///
131    /// Calling this method with an out-of-bounds index is undefined behavior.
132    /// The caller must ensure that `index < self.intvec.len()`.
133    pub unsafe fn get_unchecked(&mut self, index: usize) -> T {
134        debug_assert!(
135            index < self.intvec.len,
136            "Index out of bounds: index was {} but length was {}",
137            index,
138            self.intvec.len
139        );
140
141        let k = self.intvec.k;
142        let target_sample_block = index / k;
143        let current_sample_block = if self.current_index == 0 {
144            // A special case for the very first access.
145            // We treat it as being in a "different" block to force a seek.
146            usize::MAX
147        } else {
148            (self.current_index - 1) / k
149        };
150
151        // This is the core optimization: if the target index is ahead of the
152        // current position and within the same sample block, we can just
153        // decode forward instead of performing a full seek.
154        if index < self.current_index || target_sample_block != current_sample_block {
155            // Slow Path: A seek is required because we are moving backward or
156            // jumping to a different sample block.
157            // SAFETY: The public-facing get() performs bounds checks, and
158            // internal callers are expected to uphold the same contract.
159            let start_bit = self.intvec.samples.get_unchecked(target_sample_block);
160            self.reader.set_bit_pos(start_bit).unwrap();
161            self.current_index = target_sample_block * k;
162        }
163
164        // Decode and discard intermediate elements. The hybrid dispatcher
165        // will use the fastest available method.
166        for _ in self.current_index..index {
167            self.code_reader.read(&mut self.reader).unwrap();
168        }
169        // Read the target value.
170        let word = self.code_reader.read(&mut self.reader).unwrap();
171        // Update the current index to the next position.
172        self.current_index = index + 1;
173        Storable::from_word(word)
174    }
175}