Skip to main content

compressed_intvec/seq/
reader.rs

1//! A reader for efficient, repeated random access into a [`SeqVec`].
2//!
3//! This module provides [`SeqVecReader`], a reusable, stateful reader designed
4//! to provide a convenient interface for performing multiple random sequence
5//! lookups with optimized reader reuse.
6//!
7//! # Stateful Design
8//!
9//! [`SeqVecReader`] maintains an internal bitstream reader and codec dispatcher,
10//! enabling efficient reuse across multiple sequence accesses. This design mirrors
11//! [`VarVecReader`](crate::variable::VarVecReader) in the `variable` module.
12//!
13//! - **`decode_into()`**: Decodes directly into a buffer using the internal reader,
14//!   avoiding iterator overhead.
15//!
16//! [`SeqVec`]: crate::seq::SeqVec
17
18use super::{SeqVec, iter::SeqVecBitReader};
19use crate::common::codec_reader::{CodecReader, VarVecBitReader};
20use crate::variable::traits::Storable;
21use dsi_bitstream::{
22    dispatch::{CodesRead, StaticCodeRead},
23    prelude::{BitRead, BitSeek, Endianness},
24};
25use std::fmt;
26
27/// A stateful reader for a [`SeqVec`] that provides convenient random sequence
28/// access with optimized reader reuse.
29///
30/// This reader is created by the [`SeqVec::reader`] method. It provides a
31/// convenient interface for performing multiple random sequence lookups, with
32/// internal reader reuse for efficiency.
33///
34/// ## Design
35///
36/// Unlike the stateless [`SeqVec`] accessors, `SeqVecReader` maintains an
37/// internal `VarVecBitReader` and `CodecReader` that are reused across
38/// multiple accesses. This design mirrors [`VarVecReader`] in the `variable`
39/// module.
40///
41/// The reader exposes only stateful, allocation-aware APIs that benefit from
42/// internal reader reuse. For lazy iteration, use [`SeqVec::get`] directly.
43///
44/// [`SeqVec`]: super::SeqVec
45/// [`SeqVec::reader`]: super::SeqVec::reader
46/// [`SeqVec::get`]: super::SeqVec::get
47/// [`VarVecReader`]: crate::variable::VarVecReader
48///
49/// # Examples
50///
51/// ```
52/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
53/// use compressed_intvec::seq::{SeqVec, USeqVec};
54///
55/// let sequences: &[&[u32]] = &[
56///     &[10, 20, 30],
57///     &[100, 200],
58///     &[1000, 2000, 3000, 4000],
59/// ];
60/// let vec: USeqVec<u32> = SeqVec::from_slices(sequences)?;
61///
62/// // Create a reusable reader
63/// let mut reader = vec.reader();
64///
65/// // Perform multiple random reads with optimized decode_into()
66/// let mut buffer = Vec::new();
67/// reader.decode_into(2, &mut buffer).unwrap();
68/// assert_eq!(buffer, vec![1000, 2000, 3000, 4000]);
69///
70/// // Or use SeqVec::get() for lazy iteration
71/// let seq0: Vec<u32> = vec.get(0).unwrap().collect();
72/// assert_eq!(seq0, vec![10, 20, 30]);
73/// #     Ok(())
74/// # }
75/// ```
76pub struct SeqVecReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
77where
78    for<'b> SeqVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
79        + CodesRead<E>
80        + BitSeek<Error = core::convert::Infallible>,
81{
82    /// A reference to the parent [`SeqVec`].
83    ///
84    /// [`SeqVec`]: super::SeqVec
85    seqvec: &'a SeqVec<T, E, B>,
86    /// The reusable bitstream reader for decoding sequences.
87    reader: VarVecBitReader<'a, E>,
88    /// The hybrid codec reader for efficient element decoding.
89    code_reader: CodecReader<'a, E>,
90}
91
92impl<T: Storable, E: Endianness, B: AsRef<[u64]>> fmt::Debug for SeqVecReader<'_, T, E, B>
93where
94    for<'b> SeqVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
95        + CodesRead<E>
96        + BitSeek<Error = core::convert::Infallible>,
97{
98    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
99        f.debug_struct("SeqVecReader").finish_non_exhaustive()
100    }
101}
102
103impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> SeqVecReader<'a, T, E, B>
104where
105    for<'b> SeqVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
106        + CodesRead<E>
107        + BitSeek<Error = core::convert::Infallible>,
108{
109    /// Creates a new [`SeqVecReader`].
110    #[inline]
111    pub(super) fn new(seqvec: &'a SeqVec<T, E, B>) -> Self {
112        let reader = VarVecBitReader::new(dsi_bitstream::impls::MemWordReader::new_inf(
113            seqvec.data.as_ref(),
114        ));
115        let code_reader = CodecReader::new(seqvec.encoding);
116        Self {
117            seqvec,
118            reader,
119            code_reader,
120        }
121    }
122
123    /// Retrieves the sequence at `index` as a `Vec<T>`, or `None` if out of
124    /// bounds.
125    ///
126    /// This method reuses the internal bitstream reader and codec dispatcher,
127    /// providing better performance than collecting a [`SeqVec::get`] iterator
128    /// into a vector. For optimal memory allocation, ensure the builder option
129    /// [`SeqVecBuilder::store_lengths`] was used during construction.
130    ///
131    /// [`SeqVec::get`]: super::SeqVec::get
132    /// [`SeqVecBuilder::store_lengths`]: crate::seq::SeqVecBuilder::store_lengths
133    ///
134    /// # Examples
135    ///
136    /// ```
137    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
138    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
139    ///
140    /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20]];
141    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
142    ///
143    /// let mut reader = vec.reader();
144    /// assert_eq!(reader.decode_vec(0), Some(vec![1, 2, 3]));
145    /// assert_eq!(reader.decode_vec(2), None);
146    /// #     Ok(())
147    /// # }
148    /// ```
149    #[inline]
150    pub fn decode_vec(&mut self, index: usize) -> Option<Vec<T>> {
151        let mut buf = Vec::new();
152        self.decode_into(index, &mut buf).map(|_| buf)
153    }
154
155    /// Retrieves the sequence at `index` into the provided buffer, returning the
156    /// number of elements decoded.
157    ///
158    /// The buffer is cleared before decoding. This method is useful for reusing
159    /// allocations across multiple sequence retrievals.
160    ///
161    /// Returns `None` if `index` is out of bounds.
162    ///
163    /// This implementation reuses the internal bitstream reader and codec
164    /// dispatcher, avoiding the overhead of creating a temporary iterator.
165    ///
166    /// # Examples
167    ///
168    /// ```
169    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
170    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
171    ///
172    /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20, 30, 40]];
173    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
174    ///
175    /// let mut reader = vec.reader();
176    /// let mut buffer = Vec::new();
177    ///
178    /// // Decode first sequence
179    /// let count = reader.decode_into(0, &mut buffer).unwrap();
180    /// assert_eq!(count, 3);
181    /// assert_eq!(buffer, vec![1, 2, 3]);
182    ///
183    /// // Reuse buffer for second sequence
184    /// let count = reader.decode_into(1, &mut buffer).unwrap();
185    /// assert_eq!(count, 4);
186    /// assert_eq!(buffer, vec![10, 20, 30, 40]);
187    /// #     Ok(())
188    /// # }
189    /// ```
190    #[inline]
191    pub fn decode_into(&mut self, index: usize, buf: &mut Vec<T>) -> Option<usize> {
192        if index >= self.seqvec.num_sequences() {
193            return None;
194        }
195
196        // SAFETY: Bounds check has been performed.
197        Some(unsafe { self.decode_into_unchecked(index, buf) })
198    }
199
200    /// Retrieves the sequence at `index` into the provided buffer without
201    /// bounds checking.
202    ///
203    /// # Safety
204    ///
205    /// Calling this method with an out-of-bounds `index` is undefined behavior.
206    #[inline]
207    pub unsafe fn decode_into_unchecked(&mut self, index: usize, buf: &mut Vec<T>) -> usize {
208        let start_bit = unsafe { self.seqvec.sequence_start_bit_unchecked(index) };
209
210        buf.clear();
211
212        // Always seek to the start position (random access pattern).
213        let _ = self.reader.set_bit_pos(start_bit);
214
215        if let Some(lengths) = &self.seqvec.seq_lengths {
216            let count = unsafe { lengths.get_unchecked(index) as usize };
217            buf.reserve(count);
218            for _ in 0..count {
219                let word = self.code_reader.read(&mut self.reader).unwrap();
220                buf.push(T::from_word(word));
221            }
222        } else {
223            let end_bit = unsafe { self.seqvec.sequence_end_bit_unchecked(index) };
224
225            // Decode all elements in the sequence until we reach the end boundary.
226            // For VarVecBitReader backed by MemWordReader, bit_pos() is infallible.
227            while self.reader.bit_pos().unwrap() < end_bit {
228                let word = self.code_reader.read(&mut self.reader).unwrap();
229                buf.push(T::from_word(word));
230            }
231        }
232
233        buf.len()
234    }
235}