Skip to main content

compressed_intvec/variable/
reader.rs

1//! A reader for efficient, repeated random access into an [`VarVec`].
2//!
3//! This module provides [`VarVecReader`], a reusable reader that is designed to
4//! optimize random access performance.
5//!
6//! # Performance
7//!
8//! A standard call to [`get`](super::VarVec::get) is convenient, but it
9//! creates and discards an internal bitstream reader for each call. When
10//! performing many random lookups, this can introduce significant overhead.
11//!
12//! [`VarVecReader`] avoids this by maintaining a persistent, reusable
13//! reader instance. This amortizes the setup cost across multiple `get` operations,
14//! making it ideal for access patterns where lookup indices are not known in
15//! advance (e.g., graph traversals, pointer chasing).
16//!
17//! For reading a predefined list of indices, [`get_many`](super::VarVec::get_many)
18//! is generally more efficient, as it can pre-sort the indices for a single
19//! sequential scan.
20//!
21//! [`VarVec`]: crate::variable::VarVec
22
23use super::{traits::Storable, VarVec, VarVecError};
24use crate::common::codec_reader::{CodecReader, VarVecBitReader};
25use dsi_bitstream::{
26    dispatch::{CodesRead, StaticCodeRead},
27    prelude::{BitRead, BitSeek, Endianness},
28};
29use std::fmt;
30
31/// A stateful reader for a [`VarVec`] that provides fast random access.
32///
33/// This reader is created by the [`VarVec::reader`](super::VarVec::reader)
34/// method. It maintains an internal, reusable bitstream reader, making it highly
35/// efficient for performing multiple random lookups where the access pattern is
36/// not known ahead of time.
37///
38/// # Examples
39///
40/// ```
41/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
42/// use compressed_intvec::variable::{VarVec, UVarVec};
43///
44/// let data: Vec<u32> = (0..100).rev().collect(); // Data is not sequential
45/// let vec: UVarVec<u32> = VarVec::from_slice(&data)?;
46///
47/// // Create a reusable reader
48/// let mut reader = vec.reader();
49///
50/// // Perform multiple random reads efficiently
51/// assert_eq!(reader.get(99)?, Some(0));
52/// assert_eq!(reader.get(0)?, Some(99));
53/// assert_eq!(reader.get(50)?, Some(49));
54/// # Ok(())
55/// # }
56/// ```
57pub struct VarVecReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
58where
59    for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
60        + CodesRead<E>
61        + BitSeek<Error = core::convert::Infallible>,
62{
63    /// A reference to the parent [`VarVec`].
64    pub(super) intvec: &'a VarVec<T, E, B>,
65    /// The stateful, reusable bitstream reader.
66    pub(super) reader: VarVecBitReader<'a, E>,
67    /// The hybrid dispatcher that handles codec reading.
68    pub(super) code_reader: CodecReader<'a, E>,
69}
70
71impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> VarVecReader<'a, T, E, B>
72where
73    for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
74        + CodesRead<E>
75        + BitSeek<Error = core::convert::Infallible>,
76{
77    /// Creates a new `VarVecReader`.
78    pub(super) fn new(intvec: &'a VarVec<T, E, B>) -> Self {
79        let bit_reader = VarVecBitReader::new(dsi_bitstream::impls::MemWordReader::new_inf(
80            intvec.data.as_ref(),
81        ));
82        // This robustly selects the best available dispatch strategy.
83        let code_reader = CodecReader::new(intvec.encoding);
84        Self {
85            intvec,
86            reader: bit_reader,
87            code_reader,
88        }
89    }
90
91    /// Retrieves the element at `index`, or `None` if out of bounds.
92    ///
93    /// This method leverages the stateful nature of the reader to perform efficient
94    /// random access by seeking to the nearest preceding sample point and decoding
95    /// sequentially from there.
96    #[inline]
97    pub fn get(&mut self, index: usize) -> Result<Option<T>, VarVecError> {
98        if index >= self.intvec.len {
99            return Ok(None);
100        }
101        // SAFETY: The bounds check has been performed.
102        let value = unsafe { self.get_unchecked(index) };
103        Ok(Some(value))
104    }
105
106    /// Retrieves the element at `index` without bounds checking.
107    ///
108    /// # Safety
109    ///
110    /// Calling this method with an out-of-bounds `index` is undefined behavior.
111    /// The caller must ensure that `index < self.intvec.len()`.
112    #[inline]
113    pub unsafe fn get_unchecked(&mut self, index: usize) -> T {
114        debug_assert!(
115            index < self.intvec.len(),
116            "Index out of bounds: index was {} but length was {}",
117            index,
118            self.intvec.len()
119        );
120
121        let k = self.intvec.k;
122        let (sample_index, start_index) = if k.is_power_of_two() {
123            let k_exp = k.trailing_zeros();
124            let si = index >> k_exp;
125            (si, si << k_exp)
126        } else {
127            let si = index / k;
128            (si, si * k)
129        };
130        // SAFETY: The caller guarantees that `index` is in bounds, which implies
131        // that `sample_index` is also a valid index into the samples vector.
132        let start_bit = unsafe { self.intvec.samples.get_unchecked(sample_index) };
133
134        // The underlying bitstream operations are infallible, so unwrap is safe.
135        self.reader.set_bit_pos(start_bit).unwrap();
136
137        // Sequentially decode elements from the sample point up to the target index.
138        for _ in start_index..index {
139            // We use the hybrid dispatcher here. It will either call a function
140            // pointer or use a match statement, depending on the codec.
141            self.code_reader.read(&mut self.reader).unwrap();
142        }
143        // Read the target value.
144        let word = self.code_reader.read(&mut self.reader).unwrap();
145        Storable::from_word(word)
146    }
147}
148
149impl<T: Storable, E: Endianness, B: AsRef<[u64]>> fmt::Debug for VarVecReader<'_, T, E, B>
150where
151    for<'b> VarVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
152        + CodesRead<E>
153        + BitSeek<Error = core::convert::Infallible>,
154{
155    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156        f.debug_struct("VarVecReader").finish_non_exhaustive()
157    }
158}