Skip to main content

compressed_intvec/variable/
seq_reader.rs

1//! A stateful reader for efficient, sequential access into an [`VarVec`].
2//!
3//! This module provides [`VarVecSeqReader`], a reusable reader that is
4//! specifically optimized for access patterns that are sequential or have a high
5//! degree of locality (i.e., indices are strictly increasing or close to each other).
6//!
7//! # Performance
8//!
9//! [`VarVecSeqReader`] maintains an internal state of the current decoding position.
10//! When a new [`get`](super::VarVec::get) request is made, it decides whether to:
11//!
12//! 1.  **Decode Forward (Fast Path):** If the requested index is at or after the
13//!     current position and within the same sample block, the reader decodes
14//!     forward from its last position. This avoids a costly seek operation and
15//!     is the primary optimization.
16//!
17//! 2.  **Seek and Decode (Fallback):** If the requested index is far away or
18//!     requires moving backward, the reader falls back to seeking to the
19//!     nearest sample point and decoding from there, just like [`VarVecReader`].
20//!
21//! This makes it very efficient for iterating through indices that are
22//! sorted or clustered together.
23//!
24//! [`VarVec`]: crate::variable::VarVec
25//! [`VarVecReader`]: crate::variable::reader::VarVecReader
26
27use super::{traits::Storable, VarVec, VarVecError};
28use crate::common::codec_reader::{CodecReader, VarVecBitReader};
29use dsi_bitstream::{
30    dispatch::{CodesRead, StaticCodeRead},
31    prelude::{BitRead, BitSeek, Endianness},
32};
33use std::fmt;
34
35/// A stateful, sequential reader for an [`VarVec`] optimized for forward access.
36///
37/// This reader is created by the [`seq_reader`](super::VarVec::seq_reader)
38/// method. It maintains an internal cursor corresponding to the last-read
39/// element's position, making it highly efficient for sequential or
40/// mostly-forward access patterns.
41///
42/// It is a more specialized tool than [`VarVecReader`](super::reader::VarVecReader).
43///
44/// # Performance
45///
46/// [`VarVecSeqReader`] maintains an internal state of the current decoding position.
47/// When a new [`get`](super::VarVec::get) request is made, it decides whether to:
48///
49/// 1.  **Decode Forward (Fast Path):** If the requested index is at or after the
50///     current position and within the same sample block, the reader decodes
51///     forward from its last position. This avoids a costly seek operation and
52///     is the primary optimization.
53///
54/// 2.  **Seek and Decode (Fallback):** If the requested index is far away or
55///     requires moving backward, the reader falls back to seeking to the
56///     nearest sample point and decoding from there, just like [`VarVecReader`](super::reader::VarVecReader).
57///
58/// # Examples
59///
60/// ```
61/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
62/// use compressed_intvec::variable::{VarVec, UVarVec};
63///
64/// let data: Vec<u32> = (0..100).collect();
65/// let vec: UVarVec<u32> = VarVec::from_slice(&data)?;
66///
67/// // Create a reader optimized for sequential access
68/// let mut seq_reader = vec.seq_reader();
69///
70/// // Accessing indices in increasing order is very efficient
71/// assert_eq!(seq_reader.get(10)?, Some(10));
72/// assert_eq!(seq_reader.get(15)?, Some(15)); // Decodes forward from index 10
73/// assert_eq!(seq_reader.get(90)?, Some(90)); // Jumps to a new sample
74/// # Ok(())
75/// # }
76/// ```
77pub struct VarVecSeqReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
78where
79    for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
80        + CodesRead<E>
81        + BitSeek<Error = core::convert::Infallible>,
82{
83    /// Immutable reference to the parent VarVec.
84    intvec: &'a VarVec<T, E, B>,
85    /// The stateful, reusable bitstream reader.
86    reader: VarVecBitReader<'a, E>,
87    /// The hybrid dispatcher that handles codec reading robustly.
88    code_reader: CodecReader<'a, E>,
89    /// The index of the element *after* the one most recently read. This acts
90    /// as a cursor for the current decoding position.
91    current_index: usize,
92}
93
94impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> VarVecSeqReader<'a, T, E, B>
95where
96    for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
97        + CodesRead<E>
98        + BitSeek<Error = core::convert::Infallible>,
99{
100    /// Creates a new `VarVecSeqReader`.
101    pub(super) fn new(intvec: &'a VarVec<T, E, B>) -> Self {
102        // Instantiate the hybrid dispatcher. This will not panic, as it falls
103        // back to a slower but universally compatible method if the codec's
104        // parameters are not supported by the fast path.
105        let code_reader = CodecReader::new(intvec.encoding);
106        Self {
107            intvec,
108            reader: VarVecBitReader::new(dsi_bitstream::impls::MemWordReader::new_inf(
109                intvec.data.as_ref(),
110            )),
111            code_reader,
112            current_index: 0,
113        }
114    }
115
116    /// Retrieves the element at `index`, or `None` if out of bounds.
117    ///
118    /// This method leverages the reader's internal state to optimize access,
119    /// especially for sequential reads.
120    pub fn get(&mut self, index: usize) -> Result<Option<T>, VarVecError> {
121        if index >= self.intvec.len {
122            return Ok(None);
123        }
124        // SAFETY: The bounds check has been performed.
125        Ok(Some(unsafe { self.get_unchecked(index) }))
126    }
127
128    /// Retrieves the element at `index` without bounds checking.
129    ///
130    /// # Safety
131    ///
132    /// Calling this method with an out-of-bounds index is undefined behavior.
133    /// The caller must ensure that `index < self.intvec.len()`.
134    pub unsafe fn get_unchecked(&mut self, index: usize) -> T {
135        debug_assert!(
136            index < self.intvec.len,
137            "Index out of bounds: index was {} but length was {}",
138            index,
139            self.intvec.len
140        );
141
142        let k = self.intvec.k;
143        let (target_sample_block, current_sample_block, block_start) = if k.is_power_of_two() {
144            let k_exp = k.trailing_zeros();
145            let tsb = index >> k_exp;
146            let csb = if self.current_index == 0 {
147                // A special case for the very first access.
148                // We treat it as being in a "different" block to force a seek.
149                usize::MAX
150            } else {
151                (self.current_index - 1) >> k_exp
152            };
153            (tsb, csb, tsb << k_exp)
154        } else {
155            let tsb = index / k;
156            let csb = if self.current_index == 0 {
157                // A special case for the very first access.
158                // We treat it as being in a "different" block to force a seek.
159                usize::MAX
160            } else {
161                (self.current_index - 1) / k
162            };
163            (tsb, csb, tsb * k)
164        };
165
166        // This is the core optimization: if the target index is ahead of the
167        // current position and within the same sample block, we can just
168        // decode forward instead of performing a full seek.
169        if index < self.current_index || target_sample_block != current_sample_block {
170            // Slow Path: A seek is required because we are moving backward or
171            // jumping to a different sample block.
172            // SAFETY: The public-facing get() performs bounds checks, and
173            // internal callers are expected to uphold the same contract.
174            let start_bit = unsafe { self.intvec.samples.get_unchecked(target_sample_block) };
175            self.reader.set_bit_pos(start_bit).unwrap();
176            self.current_index = block_start;
177        }
178
179        // Decode and discard intermediate elements. The hybrid dispatcher
180        // will use the fastest available method.
181        for _ in self.current_index..index {
182            self.code_reader.read(&mut self.reader).unwrap();
183        }
184        // Read the target value.
185        let word = self.code_reader.read(&mut self.reader).unwrap();
186        // Update the current index to the next position.
187        self.current_index = index + 1;
188        Storable::from_word(word)
189    }
190}
191
192impl<T: Storable, E: Endianness, B: AsRef<[u64]>> fmt::Debug for VarVecSeqReader<'_, T, E, B>
193where
194    for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
195        + CodesRead<E>
196        + BitSeek<Error = core::convert::Infallible>,
197{
198    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
199        f.debug_struct("VarVecSeqReader").finish_non_exhaustive()
200    }
201}