compressed_intvec/seq/reader.rs
1//! A reader for efficient, repeated random access into a [`SeqVec`].
2//!
3//! This module provides [`SeqVecReader`], a reusable, stateful reader designed
4//! to provide a convenient interface for performing multiple random sequence
5//! lookups with optimized reader reuse.
6//!
7//! # Stateful Design
8//!
9//! [`SeqVecReader`] maintains an internal bitstream reader and codec dispatcher,
10//! enabling efficient reuse across multiple sequence accesses. This design mirrors
11//! [`VarVecReader`](crate::variable::VarVecReader) in the `variable` module.
12//!
13//! - **`decode_into()`**: Decodes directly into a buffer using the internal reader,
14//! avoiding iterator overhead.
15//!
16//! [`SeqVec`]: crate::seq::SeqVec
17
18use super::{SeqVec, iter::SeqVecBitReader};
19use crate::common::codec_reader::{CodecReader, VarVecBitReader};
20use crate::variable::traits::Storable;
21use dsi_bitstream::{
22 dispatch::{CodesRead, StaticCodeRead},
23 prelude::{BitRead, BitSeek, Endianness},
24};
25use std::fmt;
26
27/// A stateful reader for a [`SeqVec`] that provides convenient random sequence
28/// access with optimized reader reuse.
29///
30/// This reader is created by the [`SeqVec::reader`] method. It provides a
31/// convenient interface for performing multiple random sequence lookups, with
32/// internal reader reuse for efficiency.
33///
34/// ## Design
35///
36/// Unlike the stateless [`SeqVec`] accessors, `SeqVecReader` maintains an
37/// internal `VarVecBitReader` and `CodecReader` that are reused across
38/// multiple accesses. This design mirrors [`VarVecReader`] in the `variable`
39/// module.
40///
41/// The reader exposes only stateful, allocation-aware APIs that benefit from
42/// internal reader reuse. For lazy iteration, use [`SeqVec::get`] directly.
43///
44/// [`SeqVec`]: super::SeqVec
45/// [`SeqVec::reader`]: super::SeqVec::reader
46/// [`SeqVec::get`]: super::SeqVec::get
47/// [`VarVecReader`]: crate::variable::VarVecReader
48///
49/// # Examples
50///
51/// ```
52/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
53/// use compressed_intvec::seq::{SeqVec, USeqVec};
54///
55/// let sequences: &[&[u32]] = &[
56/// &[10, 20, 30],
57/// &[100, 200],
58/// &[1000, 2000, 3000, 4000],
59/// ];
60/// let vec: USeqVec<u32> = SeqVec::from_slices(sequences)?;
61///
62/// // Create a reusable reader
63/// let mut reader = vec.reader();
64///
65/// // Perform multiple random reads with optimized decode_into()
66/// let mut buffer = Vec::new();
67/// reader.decode_into(2, &mut buffer).unwrap();
68/// assert_eq!(buffer, vec![1000, 2000, 3000, 4000]);
69///
70/// // Or use SeqVec::get() for lazy iteration
71/// let seq0: Vec<u32> = vec.get(0).unwrap().collect();
72/// assert_eq!(seq0, vec![10, 20, 30]);
73/// # Ok(())
74/// # }
75/// ```
76pub struct SeqVecReader<'a, T: Storable, E: Endianness, B: AsRef<[u64]>>
77where
78 for<'b> SeqVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
79 + CodesRead<E>
80 + BitSeek<Error = core::convert::Infallible>,
81{
82 /// A reference to the parent [`SeqVec`].
83 ///
84 /// [`SeqVec`]: super::SeqVec
85 seqvec: &'a SeqVec<T, E, B>,
86 /// The reusable bitstream reader for decoding sequences.
87 reader: VarVecBitReader<'a, E>,
88 /// The hybrid codec reader for efficient element decoding.
89 code_reader: CodecReader<'a, E>,
90}
91
92impl<T: Storable, E: Endianness, B: AsRef<[u64]>> fmt::Debug for SeqVecReader<'_, T, E, B>
93where
94 for<'b> SeqVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
95 + CodesRead<E>
96 + BitSeek<Error = core::convert::Infallible>,
97{
98 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
99 f.debug_struct("SeqVecReader").finish_non_exhaustive()
100 }
101}
102
103impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> SeqVecReader<'a, T, E, B>
104where
105 for<'b> SeqVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
106 + CodesRead<E>
107 + BitSeek<Error = core::convert::Infallible>,
108{
109 /// Creates a new [`SeqVecReader`].
110 #[inline]
111 pub(super) fn new(seqvec: &'a SeqVec<T, E, B>) -> Self {
112 let reader = VarVecBitReader::new(dsi_bitstream::impls::MemWordReader::new_inf(
113 seqvec.data.as_ref(),
114 ));
115 let code_reader = CodecReader::new(seqvec.encoding);
116 Self {
117 seqvec,
118 reader,
119 code_reader,
120 }
121 }
122
123 /// Retrieves the sequence at `index` as a `Vec<T>`, or `None` if out of
124 /// bounds.
125 ///
126 /// This method reuses the internal bitstream reader and codec dispatcher,
127 /// providing better performance than collecting a [`SeqVec::get`] iterator
128 /// into a vector. For optimal memory allocation, ensure the builder option
129 /// [`SeqVecBuilder::store_lengths`] was used during construction.
130 ///
131 /// [`SeqVec::get`]: super::SeqVec::get
132 /// [`SeqVecBuilder::store_lengths`]: crate::seq::SeqVecBuilder::store_lengths
133 ///
134 /// # Examples
135 ///
136 /// ```
137 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
138 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
139 ///
140 /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20]];
141 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
142 ///
143 /// let mut reader = vec.reader();
144 /// assert_eq!(reader.decode_vec(0), Some(vec![1, 2, 3]));
145 /// assert_eq!(reader.decode_vec(2), None);
146 /// # Ok(())
147 /// # }
148 /// ```
149 #[inline]
150 pub fn decode_vec(&mut self, index: usize) -> Option<Vec<T>> {
151 let mut buf = Vec::new();
152 self.decode_into(index, &mut buf).map(|_| buf)
153 }
154
155 /// Retrieves the sequence at `index` into the provided buffer, returning the
156 /// number of elements decoded.
157 ///
158 /// The buffer is cleared before decoding. This method is useful for reusing
159 /// allocations across multiple sequence retrievals.
160 ///
161 /// Returns `None` if `index` is out of bounds.
162 ///
163 /// This implementation reuses the internal bitstream reader and codec
164 /// dispatcher, avoiding the overhead of creating a temporary iterator.
165 ///
166 /// # Examples
167 ///
168 /// ```
169 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
170 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
171 ///
172 /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20, 30, 40]];
173 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
174 ///
175 /// let mut reader = vec.reader();
176 /// let mut buffer = Vec::new();
177 ///
178 /// // Decode first sequence
179 /// let count = reader.decode_into(0, &mut buffer).unwrap();
180 /// assert_eq!(count, 3);
181 /// assert_eq!(buffer, vec![1, 2, 3]);
182 ///
183 /// // Reuse buffer for second sequence
184 /// let count = reader.decode_into(1, &mut buffer).unwrap();
185 /// assert_eq!(count, 4);
186 /// assert_eq!(buffer, vec![10, 20, 30, 40]);
187 /// # Ok(())
188 /// # }
189 /// ```
190 #[inline]
191 pub fn decode_into(&mut self, index: usize, buf: &mut Vec<T>) -> Option<usize> {
192 if index >= self.seqvec.num_sequences() {
193 return None;
194 }
195
196 // SAFETY: Bounds check has been performed.
197 Some(unsafe { self.decode_into_unchecked(index, buf) })
198 }
199
200 /// Retrieves the sequence at `index` into the provided buffer without
201 /// bounds checking.
202 ///
203 /// # Safety
204 ///
205 /// Calling this method with an out-of-bounds `index` is undefined behavior.
206 #[inline]
207 pub unsafe fn decode_into_unchecked(&mut self, index: usize, buf: &mut Vec<T>) -> usize {
208 let start_bit = unsafe { self.seqvec.sequence_start_bit_unchecked(index) };
209
210 buf.clear();
211
212 // Always seek to the start position (random access pattern).
213 let _ = self.reader.set_bit_pos(start_bit);
214
215 if let Some(lengths) = &self.seqvec.seq_lengths {
216 let count = unsafe { lengths.get_unchecked(index) as usize };
217 buf.reserve(count);
218 for _ in 0..count {
219 let word = self.code_reader.read(&mut self.reader).unwrap();
220 buf.push(T::from_word(word));
221 }
222 } else {
223 let end_bit = unsafe { self.seqvec.sequence_end_bit_unchecked(index) };
224
225 // Decode all elements in the sequence until we reach the end boundary.
226 // For VarVecBitReader backed by MemWordReader, bit_pos() is infallible.
227 while self.reader.bit_pos().unwrap() < end_bit {
228 let word = self.code_reader.read(&mut self.reader).unwrap();
229 buf.push(T::from_word(word));
230 }
231 }
232
233 buf.len()
234 }
235}