compressed_intvec/seq/mod.rs
1//! A compressed vector of variable-length sequences with indexed access.
2//!
3//! This module provides [`SeqVec`], a data structure for storing multiple
4//! integer sequences in a single compressed bitstream. Each sequence is
5//! accessed by its index (rank), and all elements within a sequence are
6//! decoded together.
7//!
8//! # Core Concepts
9//!
10//! ## Use Case
11//!
12//! [`SeqVec`] is designed for scenarios where:
13//!
14//! - Data is naturally organized as many variable-length sequences.
15//! - Access patterns retrieve entire sequences rather than individual elements.
16//! - Memory overhead of per-sequence pointers and padding must be minimized.
17//!
18//! A common application is representing **adjacency lists** in a compressed
19//! graph, where each node's neighbors form a sequence.
20//!
21//! ## Differences from [`VarVec`]
22//!
23//! | Aspect | [`VarVec`] | [`SeqVec`] |
24//! |--------|-----------|------------|
25//! | Access unit | Single element | Entire sequence |
26//! | Index meaning | Element position | Sequence rank |
27//! | Sampling | Periodic (every k elements) | At sequence boundaries |
28//! | Primary operation | `get(i) → T` | `get(i) → Iterator<T>` |
29//!
30//! ## Compression
31//!
32//! Like [`VarVec`], [`SeqVec`] uses instantaneous variable-length codes (Gamma,
33//! Delta, Zeta, etc.) from the [`dsi-bitstream`] crate. All sequences are
34//! concatenated into a single compressed bitstream, with a [`FixedVec`] index
35//! storing the bit offset of each sequence's start.
36//!
37//! ## Sequence Length
38//!
39//! Sequence lengths are **not stored by default**. The iterator for a sequence
40//! terminates when the current bit position reaches the start of the next
41//! sequence. This means:
42//!
43//! - Retrieving a sequence is O(length) for decoding — unavoidable.
44//! - Computing sequence length requires full iteration unless lengths are stored.
45//!
46//! You can opt-in to storing explicit lengths via
47//! [`SeqVecBuilder::store_lengths`](crate::seq::SeqVecBuilder::store_lengths).
48//! When enabled, O(1) length queries become available via
49//! [`SeqVec::sequence_len`](crate::seq::SeqVec::sequence_len), and decoding
50//! can avoid the end-bit check in hot loops.
51//!
52//! ## Immutability
53//!
54//! [`SeqVec`] is **immutable** after creation. Variable-length encoding makes
55//! in-place modification impractical, as changing one element could shift all
56//! subsequent data.
57//!
58//! # Main Components
59//!
60//! - [`SeqVec`]: The core compressed sequence vector.
61//! - [`SeqVecBuilder`]: Builder for constructing a [`SeqVec`] with custom codec.
62//! - [`SeqIter`]: Zero-allocation iterator over elements of a single sequence.
63//! - [`SeqVecIter`]: Iterator over all sequences, yielding [`SeqIter`] instances.
64//!
65//! # Examples
66//!
67//! ## Basic Usage
68//!
69//! ```
70//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
71//! use compressed_intvec::seq::{SeqVec, LESeqVec};
72//!
73//! let sequences: &[&[u32]] = &[
74//! &[1, 2, 3],
75//! &[10, 20],
76//! &[100, 200, 300, 400],
77//! &[], // Empty sequences are supported
78//! ];
79//!
80//! let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
81//!
82//! assert_eq!(vec.num_sequences(), 4);
83//!
84//! // Access a sequence by index
85//! let seq1: Vec<u32> = vec.get(1).unwrap().collect();
86//! assert_eq!(seq1, vec![10, 20]);
87//! # Ok(())
88//! # }
89//! ```
90//!
91//! ## Custom Codec
92//!
93//! ```
94//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
95//! use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
96//!
97//! let sequences: Vec<Vec<u64>> = vec![
98//! vec![1, 1, 1, 2, 3],
99//! vec![100, 200, 300],
100//! ];
101//!
102//! let vec: LESeqVec<u64> = SeqVec::builder()
103//! .codec(Codec::Zeta { k: Some(3) })
104//! .build(&sequences)?;
105//! # Ok(())
106//! # }
107//! ```
108//!
109//! [`VarVec`]: crate::variable::VarVec
110//! [`FixedVec`]: crate::fixed::FixedVec
111//! [`dsi-bitstream`]: https://crates.io/crates/dsi-bitstream
112
113mod builder;
114mod iter;
115mod macros;
116#[cfg(feature = "parallel")]
117mod parallel;
118mod reader;
119#[cfg(feature = "serde")]
120mod serde;
121mod slice;
122
123pub use builder::{SeqVecBuilder, SeqVecFromIterBuilder};
124pub use iter::{SeqIter, SeqVecIntoIter, SeqVecIter};
125pub use reader::SeqVecReader;
126pub use slice::SeqVecSlice;
127
128// Re-export codec spec for convenience.
129pub use crate::variable::codec::Codec;
130
131// Re-export deprecated alias for backward compatibility.
132#[allow(deprecated)]
133pub use crate::variable::VariableCodecSpec;
134
135use crate::common::codec_reader::CodecReader;
136use crate::fixed::{Error as FixedVecError, FixedVec};
137use crate::variable::traits::Storable;
138use dsi_bitstream::{
139 dispatch::{Codes, CodesRead, StaticCodeRead},
140 impls::{BufBitWriter, MemWordWriterVec},
141 prelude::{BE, BitRead, BitSeek, BitWrite, CodesWrite, Endianness, LE},
142};
143use iter::SeqVecBitReader;
144use mem_dbg::{DbgFlags, FlatType, MemDbgImpl, MemSize, SizeFlags};
145use std::marker::PhantomData;
146use std::{error::Error, fmt};
147
148/// Errors that can occur when working with [`SeqVec`].
149#[derive(Debug)]
150pub enum SeqVecError {
151 /// An I/O error from bitstream operations.
152 Io(std::io::Error),
153 /// An error from the bitstream library during encoding or decoding.
154 Bitstream(Box<dyn Error + Send + Sync>),
155 /// Invalid parameters were provided during construction.
156 InvalidParameters(String),
157 /// An error during codec resolution or dispatch.
158 CodecDispatch(String),
159 /// The requested sequence index is out of bounds.
160 IndexOutOfBounds(usize),
161}
162
163impl fmt::Display for SeqVecError {
164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165 match self {
166 SeqVecError::Io(e) => write!(f, "I/O error: {}", e),
167 SeqVecError::Bitstream(e) => write!(f, "Bitstream error: {}", e),
168 SeqVecError::InvalidParameters(s) => write!(f, "Invalid parameters: {}", s),
169 SeqVecError::CodecDispatch(s) => write!(f, "Codec dispatch error: {}", s),
170 SeqVecError::IndexOutOfBounds(idx) => {
171 write!(f, "Sequence index out of bounds: {}", idx)
172 }
173 }
174 }
175}
176
177impl Error for SeqVecError {
178 fn source(&self) -> Option<&(dyn Error + 'static)> {
179 match self {
180 SeqVecError::Io(e) => Some(e),
181 SeqVecError::Bitstream(e) => Some(e.as_ref()),
182 _ => None,
183 }
184 }
185}
186
187impl From<std::io::Error> for SeqVecError {
188 fn from(e: std::io::Error) -> Self {
189 SeqVecError::Io(e)
190 }
191}
192
193impl From<core::convert::Infallible> for SeqVecError {
194 fn from(_: core::convert::Infallible) -> Self {
195 unreachable!()
196 }
197}
198
199impl From<FixedVecError> for SeqVecError {
200 fn from(e: FixedVecError) -> Self {
201 SeqVecError::InvalidParameters(e.to_string())
202 }
203}
204
205/// Type alias for the bit writer used internally by [`SeqVec`] builders.
206pub(crate) type SeqVecBitWriter<E> = BufBitWriter<E, MemWordWriterVec<u64, Vec<u64>>>;
207
208/// A compressed, indexed vector of integer sequences.
209///
210/// [`SeqVec`] stores multiple sequences of integers in a single compressed
211/// bitstream, with an auxiliary index for O(1) access to each sequence by
212/// its rank. This is ideal for representing collections of variable-length
213/// sequences with minimal memory overhead.
214///
215/// See the [module-level documentation](self) for detailed usage information.
216///
217/// # Type Parameters
218///
219/// * `T` - The element type (e.g., `u32`, `i16`). Must implement [`Storable`].
220/// * `E` - The [`Endianness`] of the underlying bitstream ([`LE`] or [`BE`]).
221/// * `B` - The backing buffer type, enabling owned (`Vec<u64>`) or borrowed
222/// (`&[u64]`) storage for zero-copy operations.
223///
224/// [`Storable`]: crate::variable::traits::Storable
225/// [`Endianness`]: dsi_bitstream::prelude::Endianness
226/// [`LE`]: dsi_bitstream::prelude::LE
227/// [`BE`]: dsi_bitstream::prelude::BE
228#[derive(Debug, Clone)]
229pub struct SeqVec<T: Storable, E: Endianness, B: AsRef<[u64]> = Vec<u64>> {
230 /// The compressed bitstream containing all sequences concatenated.
231 data: B,
232 /// Bit offsets marking the start of each sequence. Contains N+1 elements
233 /// where N is the number of sequences. The final element is a sentinel
234 /// containing the total bit length.
235 /// Uses the same endianness `E` as the struct for design consistency.
236 bit_offsets: FixedVec<u64, u64, E, B>,
237 /// Optional per-sequence lengths stored in a compact fixed-width vector.
238 /// Uses `u64` (architecture-independent) to ensure portability across 32-bit
239 /// and 64-bit systems. Accessor methods return `usize` via safe casting.
240 seq_lengths: Option<FixedVec<u64, u64, E, Vec<u64>>>,
241 /// The compression codec used for all elements.
242 encoding: Codes,
243 /// Zero-sized markers for the generic type parameters.
244 _markers: PhantomData<(T, E)>,
245}
246
247// --- Type Aliases ---
248
249/// A [`SeqVec`] with little-endian bit ordering.
250pub type LESeqVec<T, B = Vec<u64>> = SeqVec<T, LE, B>;
251
252/// A [`SeqVec`] with big-endian bit ordering.
253pub type BESeqVec<T, B = Vec<u64>> = SeqVec<T, BE, B>;
254
255/// A [`SeqVec`] for unsigned integers with little-endian bit ordering.
256///
257/// Use this type alias for sequences of unsigned integer values stored with
258/// little-endian bit ordering.
259pub type USeqVec<T, B = Vec<u64>> = SeqVec<T, LE, B>;
260
261/// A [`SeqVec`] for signed integers with little-endian bit ordering.
262///
263/// Signed integers are transparently encoded using zig-zag encoding via the
264/// [`Storable`] trait.
265///
266/// [`Storable`]: crate::variable::traits::Storable
267pub type SSeqVec<T, B = Vec<u64>> = SeqVec<T, LE, B>;
268
269/// A [`SeqVec`] for signed integers with big-endian bit ordering.
270///
271/// Signed integers are transparently encoded using zig-zag encoding via the
272/// [`Storable`] trait.
273///
274/// [`Storable`]: crate::variable::traits::Storable
275pub type BESSeqVec<T, B = Vec<u64>> = SeqVec<T, BE, B>;
276
277/// A [`SeqVec`] for signed integers with little-endian bit ordering.
278///
279/// This is an alias for [`SSeqVec`], provided for consistency with the naming
280/// pattern `{Endianness}{SignedUnsigned}SeqVec`.
281///
282/// Signed integers are transparently encoded using zig-zag encoding via the
283/// [`Storable`] trait.
284///
285/// [`Storable`]: crate::variable::traits::Storable
286pub type LESSeqVec<T, B = Vec<u64>> = SeqVec<T, LE, B>;
287
288/// Type alias for a tuple of two [`SeqVecSlice`] references.
289pub type SeqVecSlicePair<'a, T, E, B> = (SeqVecSlice<'a, T, E, B>, SeqVecSlice<'a, T, E, B>);
290
291// --- Construction (Owned) ---
292
293impl<T: Storable + 'static, E: Endianness> SeqVec<T, E, Vec<u64>> {
294 /// Creates a [`SeqVec`] from a slice of slices using default settings.
295 ///
296 /// This method uses [`Codec::Auto`] to select an optimal codec
297 /// based on the data distribution.
298 ///
299 /// # Errors
300 ///
301 /// Returns a [`SeqVecError`] if codec resolution or encoding fails.
302 ///
303 /// # Examples
304 ///
305 /// ```
306 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
307 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
308 ///
309 /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20], &[100]];
310 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
311 ///
312 /// assert_eq!(vec.num_sequences(), 3);
313 /// # Ok(())
314 /// # }
315 /// ```
316 pub fn from_slices<S: AsRef<[T]>>(sequences: &[S]) -> Result<Self, SeqVecError>
317 where
318 SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
319 {
320 Self::builder().codec(Codec::Auto).build(sequences)
321 }
322
323 /// Consumes the [`SeqVec`] and returns all sequences as a `Vec<Vec<T>>`.
324 ///
325 /// This method decodes the entire compressed data, allocating a separate
326 /// vector for each sequence. The operation requires time proportional to the
327 /// total number of elements across all sequences.
328 ///
329 /// # Examples
330 ///
331 /// ```
332 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
333 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
334 ///
335 /// let sequences: &[&[u32]] = &[&[1, 2], &[10, 20, 30]];
336 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
337 ///
338 /// let decoded: Vec<Vec<u32>> = vec.into_vecs();
339 /// assert_eq!(decoded, vec![vec![1, 2], vec![10, 20, 30]]);
340 /// # Ok(())
341 /// # }
342 /// ```
343 pub fn into_vecs(self) -> Vec<Vec<T>>
344 where
345 for<'a> SeqVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
346 + CodesRead<E>
347 + BitSeek<Error = core::convert::Infallible>,
348 {
349 self.iter().map(|seq_iter| seq_iter.collect()).collect()
350 }
351}
352
353// --- Construction (Generic Buffer) ---
354
355impl<T: Storable, E: Endianness, B: AsRef<[u64]>> SeqVec<T, E, B> {
356 /// Creates a [`SeqVec`] from raw components for zero-copy views.
357 ///
358 /// This constructor is intended for advanced use cases such as memory-mapping
359 /// a pre-built [`SeqVec`] from disk or creating a borrowed view.
360 ///
361 /// # Arguments
362 ///
363 /// * `data` - The compressed bitstream buffer.
364 /// * `bit_offsets_data` - The buffer containing the bit offsets index.
365 /// * `bit_offsets_len` - Number of entries in the bit offsets index (N+1).
366 /// * `bit_offsets_num_bits` - Bit width of each entry in the offsets index.
367 /// * `encoding` - The codec used for compression.
368 ///
369 /// # Errors
370 ///
371 /// Returns [`SeqVecError::InvalidParameters`] if:
372 /// * `bit_offsets_len` is zero (must have at least the sentinel entry).
373 /// * The underlying [`FixedVec`] construction fails.
374 ///
375 /// [`FixedVec`]: crate::fixed::FixedVec
376 ///
377 /// # Safety Considerations
378 ///
379 /// The caller must ensure that:
380 /// * `data` contains valid compressed data encoded with `encoding`.
381 /// Invalid data will cause panics or incorrect results during decoding.
382 /// * `bit_offsets_data` contains monotonically increasing bit positions.
383 /// Unsorted offsets will cause out-of-order or corrupted sequence retrieval.
384 /// * The sentinel value (final bit offset) does not exceed the total bits
385 /// in `data`. Violations cause panics during decoding.
386 /// * All bit positions fall within valid boundaries of the bitstream.
387 ///
388 /// # Examples
389 ///
390 /// ```
391 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
392 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
393 /// use compressed_intvec::fixed::FixedVec;
394 /// use dsi_bitstream::prelude::LE;
395 /// use dsi_bitstream::impls::{BufBitWriter, MemWordWriterVec};
396 /// use dsi_bitstream::prelude::{BitWrite, CodesWrite};
397 ///
398 /// // Create a simple vector using high-level API
399 /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
400 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
401 ///
402 /// // In a real zero-copy scenario, these would come from disk/memory-map
403 /// let data = vec.as_limbs().to_vec();
404 /// let offsets_ref = vec.bit_offsets_ref();
405 ///
406 /// // Verify structure is sound before reconstruction
407 /// assert_eq!(vec.num_sequences(), 2);
408 /// # Ok(())
409 /// # }
410 /// ```
411 pub fn from_parts(
412 data: B,
413 bit_offsets_data: B,
414 bit_offsets_len: usize,
415 bit_offsets_num_bits: usize,
416 encoding: Codes,
417 ) -> Result<Self, SeqVecError> {
418 if bit_offsets_len == 0 {
419 return Err(SeqVecError::InvalidParameters(
420 "bit_offsets must have at least one entry (the sentinel)".to_string(),
421 ));
422 }
423
424 let bit_offsets = FixedVec::<u64, u64, E, B>::from_parts(
425 bit_offsets_data,
426 bit_offsets_len,
427 bit_offsets_num_bits,
428 )?;
429
430 Ok(Self {
431 data,
432 bit_offsets,
433 seq_lengths: None,
434 encoding,
435 _markers: PhantomData,
436 })
437 }
438
439 /// Creates a [`SeqVec`] from raw components with optional stored lengths.
440 ///
441 /// Use this variant when you have pre-computed sequence lengths from an
442 /// earlier encoding. The `seq_lengths` parameter must be consistent with
443 /// `bit_offsets` when provided (element count must equal `num_sequences()`).
444 ///
445 /// # Errors
446 ///
447 /// Returns [`SeqVecError::InvalidParameters`] if the number of lengths does
448 /// not equal the number of sequences.
449 ///
450 /// # Examples
451 ///
452 /// ```
453 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
454 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
455 ///
456 /// // Typical usage: start with high-level API
457 /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
458 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
459 ///
460 /// // Verify structure
461 /// assert!(!vec.has_stored_lengths());
462 /// assert_eq!(vec.num_sequences(), 2);
463 /// # Ok(())
464 /// # }
465 /// ```
466 pub fn from_parts_with_lengths(
467 data: B,
468 bit_offsets_data: B,
469 bit_offsets_len: usize,
470 bit_offsets_num_bits: usize,
471 seq_lengths: Option<FixedVec<u64, u64, E, Vec<u64>>>,
472 encoding: Codes,
473 ) -> Result<Self, SeqVecError> {
474 if bit_offsets_len == 0 {
475 return Err(SeqVecError::InvalidParameters(
476 "bit_offsets must have at least one entry (the sentinel)".to_string(),
477 ));
478 }
479
480 if let Some(lengths) = &seq_lengths
481 && lengths.len() + 1 != bit_offsets_len
482 {
483 return Err(SeqVecError::InvalidParameters(
484 "seq_lengths length must match number of sequences".to_string(),
485 ));
486 }
487
488 let bit_offsets = FixedVec::<u64, u64, E, B>::from_parts(
489 bit_offsets_data,
490 bit_offsets_len,
491 bit_offsets_num_bits,
492 )?;
493
494 Ok(Self {
495 data,
496 bit_offsets,
497 seq_lengths,
498 encoding,
499 _markers: PhantomData,
500 })
501 }
502
503 /// Creates a [`SeqVec`] from pre-built components without validation.
504 ///
505 /// # Safety
506 ///
507 /// The caller must ensure all components are consistent and valid. Mismatched
508 /// parameters will lead to panics or incorrect data retrieval.
509 #[inline]
510 pub unsafe fn from_parts_unchecked(
511 data: B,
512 bit_offsets: FixedVec<u64, u64, E, B>,
513 encoding: Codes,
514 ) -> Self {
515 Self {
516 data,
517 bit_offsets,
518 seq_lengths: None,
519 encoding,
520 _markers: PhantomData,
521 }
522 }
523
524 /// Creates a [`SeqVec`] from pre-built components with optional lengths
525 /// without validation.
526 ///
527 /// # Safety
528 ///
529 /// The caller must ensure all components are consistent and valid.
530 #[inline]
531 pub unsafe fn from_parts_with_lengths_unchecked(
532 data: B,
533 bit_offsets: FixedVec<u64, u64, E, B>,
534 seq_lengths: Option<FixedVec<u64, u64, E, Vec<u64>>>,
535 encoding: Codes,
536 ) -> Self {
537 Self {
538 data,
539 bit_offsets,
540 seq_lengths,
541 encoding,
542 _markers: PhantomData,
543 }
544 }
545}
546
547// --- Query Methods ---
548
549impl<T: Storable, E: Endianness, B: AsRef<[u64]>> SeqVec<T, E, B> {
550 /// Returns the number of sequences stored.
551 ///
552 /// This is O(1) as it is derived from the bit offsets index length.
553 #[inline(always)]
554 pub fn num_sequences(&self) -> usize {
555 // bit_offsets has N+1 entries for N sequences (always at least the sentinel).
556 self.bit_offsets.len() - 1
557 }
558
559 /// Returns `true` if there are no sequences stored.
560 #[inline(always)]
561 pub fn is_empty(&self) -> bool {
562 self.num_sequences() == 0
563 }
564
565 /// Returns the compression codec used for encoding.
566 #[inline(always)]
567 pub fn encoding(&self) -> Codes {
568 self.encoding
569 }
570
571 /// Returns a reference to the underlying compressed data buffer.
572 #[inline(always)]
573 pub fn as_limbs(&self) -> &[u64] {
574 self.data.as_ref()
575 }
576
577 /// Returns a reference to the bit offsets index.
578 #[inline(always)]
579 pub fn bit_offsets_ref(&self) -> &FixedVec<u64, u64, E, B> {
580 &self.bit_offsets
581 }
582
583 /// Returns `true` if explicit sequence lengths were stored at construction time.
584 #[inline(always)]
585 pub fn has_stored_lengths(&self) -> bool {
586 self.seq_lengths.is_some()
587 }
588
589 /// Returns the length of sequence `index` if explicit lengths are stored.
590 ///
591 /// Returns `None` if `index` is out of bounds or if lengths were not
592 /// stored at construction time. Use [`has_stored_lengths`](Self::has_stored_lengths)
593 /// to distinguish between these cases.
594 ///
595 /// If lengths are stored, this query completes in O(1) time. Otherwise,
596 /// determining sequence length requires full iteration. Store lengths at
597 /// construction time via [`SeqVecBuilder::store_lengths`](crate::seq::SeqVecBuilder::store_lengths)
598 /// when O(1) length queries are needed.
599 ///
600 /// # Examples
601 ///
602 /// ```
603 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
604 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
605 ///
606 /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20]];
607 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
608 ///
609 /// // Without storing lengths, returns None
610 /// assert_eq!(vec.sequence_len(0), None);
611 /// assert!(!vec.has_stored_lengths());
612 /// # Ok(())
613 /// # }
614 /// ```
615 #[inline(always)]
616 pub fn sequence_len(&self, index: usize) -> Option<usize> {
617 if index >= self.num_sequences() {
618 return None;
619 }
620
621 self.seq_lengths
622 .as_ref()
623 .map(|lengths| unsafe { lengths.get_unchecked(index) as usize })
624 }
625
626 /// Returns the total number of bits in the compressed data.
627 ///
628 /// This is the sentinel value at the end of the bit offsets index.
629 #[inline(always)]
630 pub fn total_bits(&self) -> u64 {
631 // bit_offsets always has at least one sentinel entry by construction invariant.
632 unsafe { self.bit_offsets.get_unchecked(self.bit_offsets.len() - 1) }
633 }
634
635 /// Returns the bit offset where sequence `index` starts in the compressed data.
636 ///
637 /// Returns `None` if `index >= num_sequences()`. This is useful for
638 /// understanding the compression footprint or verifying sequence boundaries.
639 ///
640 /// # Examples
641 ///
642 /// ```
643 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
644 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
645 ///
646 /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
647 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
648 ///
649 /// // First sequence always starts at bit 0
650 /// assert_eq!(vec.sequence_start_bit(0), Some(0));
651 /// // Second sequence starts after the first
652 /// assert!(vec.sequence_start_bit(1).is_some());
653 /// // Out of bounds
654 /// assert_eq!(vec.sequence_start_bit(2), None);
655 /// # Ok(())
656 /// # }
657 /// ```
658 #[inline(always)]
659 pub fn sequence_start_bit(&self, index: usize) -> Option<u64> {
660 if index >= self.num_sequences() {
661 return None;
662 }
663 // SAFETY: bounds checked above.
664 Some(unsafe { self.bit_offsets.get_unchecked(index) })
665 }
666
667 /// Returns the bit offset where sequence `index` starts, without bounds checking.
668 ///
669 /// # Safety
670 ///
671 /// The caller must ensure `index < num_sequences()`.
672 #[inline(always)]
673 pub unsafe fn sequence_start_bit_unchecked(&self, index: usize) -> u64 {
674 debug_assert!(
675 index < self.num_sequences(),
676 "index {} out of bounds for {} sequences",
677 index,
678 self.num_sequences()
679 );
680 unsafe { self.bit_offsets.get_unchecked(index) }
681 }
682
683 /// Returns the bit offset immediately after sequence `index` ends.
684 ///
685 /// This is equivalent to the start bit of the next sequence, or the total
686 /// bit length for the final sequence. Useful for calculating per-sequence
687 /// compression footprint.
688 ///
689 /// # Examples
690 ///
691 /// ```
692 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
693 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
694 ///
695 /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
696 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
697 ///
698 /// let start = vec.sequence_start_bit(0).unwrap();
699 /// let end = unsafe { vec.sequence_end_bit_unchecked(0) };
700 /// println!("Sequence 0 occupies {} bits", end - start);
701 /// # Ok(())
702 /// # }
703 /// ```
704 ///
705 /// # Safety
706 ///
707 /// The caller must ensure `index < num_sequences()`.
708 #[inline]
709 pub unsafe fn sequence_end_bit_unchecked(&self, index: usize) -> u64 {
710 debug_assert!(
711 index < self.num_sequences(),
712 "index {} out of bounds for {} sequences",
713 index,
714 self.num_sequences()
715 );
716 unsafe { self.bit_offsets.get_unchecked(index + 1) }
717 }
718}
719
720// --- Access Methods ---
721
722impl<T: Storable, E: Endianness, B: AsRef<[u64]>> SeqVec<T, E, B>
723where
724 for<'a> SeqVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
725 + CodesRead<E>
726 + BitSeek<Error = core::convert::Infallible>,
727{
728 /// Returns an iterator over the elements of sequence `index`.
729 ///
730 /// Returns `None` if `index >= num_sequences()`.
731 ///
732 /// # Examples
733 ///
734 /// ```
735 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
736 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
737 ///
738 /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[4, 5]];
739 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
740 ///
741 /// let first: Vec<u32> = vec.get(0).unwrap().collect();
742 /// assert_eq!(first, vec![1, 2, 3]);
743 /// # Ok(())
744 /// # }
745 /// ```
746 #[inline(always)]
747 pub fn get(&self, index: usize) -> Option<SeqIter<'_, T, E>> {
748 if index >= self.num_sequences() {
749 return None;
750 }
751 // SAFETY: bounds checked above.
752 Some(unsafe { self.get_unchecked(index) })
753 }
754
755 /// Returns an iterator over the elements of sequence `index` without
756 /// bounds checking.
757 ///
758 /// # Safety
759 ///
760 /// Calling this method with `index >= num_sequences()` is undefined behavior.
761 #[inline(always)]
762 pub unsafe fn get_unchecked(&self, index: usize) -> SeqIter<'_, T, E> {
763 debug_assert!(
764 index < self.num_sequences(),
765 "index {} out of bounds for {} sequences",
766 index,
767 self.num_sequences()
768 );
769
770 let start_bit = unsafe { self.sequence_start_bit_unchecked(index) };
771 let end_bit = unsafe { self.sequence_end_bit_unchecked(index) };
772 let len = self
773 .seq_lengths
774 .as_ref()
775 .map(|lengths| unsafe { lengths.get_unchecked(index) as usize });
776
777 SeqIter::new_with_len(self.data.as_ref(), start_bit, end_bit, self.encoding, len)
778 }
779
780 /// Returns the elements of sequence `index` as a newly allocated `Vec`.
781 ///
782 /// Returns `None` if `index >= num_sequences()`.
783 ///
784 /// # Examples
785 ///
786 /// ```
787 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
788 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
789 ///
790 /// let sequences: &[&[u32]] = &[&[10, 20, 30]];
791 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
792 ///
793 /// assert_eq!(vec.decode_vec(0), Some(vec![10, 20, 30]));
794 /// assert_eq!(vec.decode_vec(1), None);
795 /// # Ok(())
796 /// # }
797 /// ```
798 #[inline(always)]
799 pub fn decode_vec(&self, index: usize) -> Option<Vec<T>> {
800 if index >= self.num_sequences() {
801 return None;
802 }
803
804 // SAFETY: Bounds check has been performed.
805 Some(unsafe { self.decode_vec_unchecked(index) })
806 }
807
808 /// Returns the elements of sequence `index` as a newly allocated `Vec`
809 /// without bounds checking.
810 ///
811 /// # Safety
812 ///
813 /// Calling this method with `index >= num_sequences()` is undefined behavior.
814 #[inline(always)]
815 pub unsafe fn decode_vec_unchecked(&self, index: usize) -> Vec<T> {
816 unsafe { self.get_unchecked(index).collect() }
817 }
818
819 /// Decodes sequence `index` into the provided buffer.
820 ///
821 /// The buffer is cleared before use. Returns the number of elements
822 /// decoded, or `None` if `index >= num_sequences()`.
823 ///
824 /// This method is more efficient than [`decode_vec`](Self::decode_vec) when
825 /// reusing a buffer across multiple calls.
826 ///
827 /// # Examples
828 ///
829 /// ```
830 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
831 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
832 ///
833 /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
834 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
835 ///
836 /// let mut buf = Vec::new();
837 /// assert_eq!(vec.decode_into(0, &mut buf), Some(2));
838 /// assert_eq!(buf, vec![1, 2]);
839 ///
840 /// // Buffer is reused (cleared internally).
841 /// assert_eq!(vec.decode_into(1, &mut buf), Some(3));
842 /// assert_eq!(buf, vec![3, 4, 5]);
843 /// # Ok(())
844 /// # }
845 /// ```
846 #[inline(always)]
847 pub fn decode_into(&self, index: usize, buf: &mut Vec<T>) -> Option<usize> {
848 if index >= self.num_sequences() {
849 return None;
850 }
851
852 // SAFETY: Bounds check has been performed.
853 Some(unsafe { self.decode_into_unchecked(index, buf) })
854 }
855
856 /// Decodes sequence `index` into the provided buffer without bounds checking.
857 ///
858 /// # Safety
859 ///
860 /// Calling this method with `index >= num_sequences()` is undefined behavior.
861 #[inline(always)]
862 pub unsafe fn decode_into_unchecked(&self, index: usize, buf: &mut Vec<T>) -> usize {
863 let start_bit = unsafe { self.sequence_start_bit_unchecked(index) };
864
865 buf.clear();
866
867 // Create reader and codec dispatcher once, then decode all elements
868 // directly into the buffer without creating an intermediate SeqIter.
869 // This avoids iterator overhead and enables better compiler optimization.
870 let mut reader =
871 SeqVecBitReader::<E>::new(dsi_bitstream::impls::MemWordReader::new_inf(self.data.as_ref()));
872 let _ = reader.set_bit_pos(start_bit);
873 let code_reader = CodecReader::new(self.encoding);
874
875 if let Some(lengths) = &self.seq_lengths {
876 let count = unsafe { lengths.get_unchecked(index) as usize };
877 self.decode_counted(&mut reader, &code_reader, buf, count);
878 } else {
879 let end_bit = unsafe { self.sequence_end_bit_unchecked(index) };
880 self.decode_until(&mut reader, &code_reader, buf, end_bit);
881 }
882
883 buf.len()
884 }
885
886 /// Decodes a known number of elements into `buf`.
887 #[inline(always)]
888 fn decode_counted<'a>(
889 &self,
890 reader: &mut SeqVecBitReader<'a, E>,
891 code_reader: &CodecReader<'a, E>,
892 buf: &mut Vec<T>,
893 count: usize,
894 ) {
895 buf.reserve(count);
896 for _ in 0..count {
897 let word = code_reader.read(reader).unwrap();
898 buf.push(T::from_word(word));
899 }
900 }
901
902 /// Decodes elements until the reader reaches `end_bit`.
903 #[inline(always)]
904 fn decode_until<'a>(
905 &self,
906 reader: &mut SeqVecBitReader<'a, E>,
907 code_reader: &CodecReader<'a, E>,
908 buf: &mut Vec<T>,
909 end_bit: u64,
910 ) {
911 while reader.bit_pos().unwrap() < end_bit {
912 let word = code_reader.read(reader).unwrap();
913 buf.push(T::from_word(word));
914 }
915 }
916
917 /// Returns an iterator over all sequences.
918 ///
919 /// Each element of the returned iterator is a [`SeqIter`] for the
920 /// corresponding sequence.
921 ///
922 /// # Examples
923 ///
924 /// ```
925 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
926 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
927 ///
928 /// let sequences: &[&[u32]] = &[&[1, 2], &[3], &[4, 5, 6]];
929 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
930 ///
931 /// for (i, seq) in vec.iter().enumerate() {
932 /// println!("Sequence {}: {:?}", i, seq.collect::<Vec<_>>());
933 /// }
934 /// # Ok(())
935 /// # }
936 /// ```
937 #[inline(always)]
938 pub fn iter(&self) -> SeqVecIter<'_, T, E, B> {
939 SeqVecIter::new(
940 self.data.as_ref(),
941 &self.bit_offsets,
942 self.seq_lengths.as_ref(),
943 self.encoding,
944 self.num_sequences(),
945 )
946 }
947
948 /// Creates a reusable reader for convenient random access to sequences.
949 ///
950 /// The returned [`SeqVecReader`] provides a convenient interface for
951 /// performing multiple sequence retrievals. While the current implementation
952 /// is a thin wrapper, it serves as a natural extension point for future
953 /// optimizations such as position tracking or caching.
954 ///
955 /// ## Performance Considerations
956 ///
957 /// - **Zero-copy iteration**: Returned iterators borrow directly from the
958 /// compressed data without intermediate allocations.
959 /// - **Stateless operation**: Each call to [`get`](Self::get) is independent and creates a fresh [`SeqIter`].
960 /// - **Convenience methods**: The reader provides [`decode_vec`](SeqVecReader::decode_vec)
961 /// and [`decode_into`](SeqVecReader::decode_into) for common patterns.
962 ///
963 /// ## When to Use
964 ///
965 /// Use a reader when:
966 /// - You prefer a consistent interface for multiple accesses.
967 /// - You want to use convenience methods like `decode_vec` or `decode_into`.
968 /// - Future stateful optimizations would benefit your access pattern.
969 ///
970 /// For single queries or simple iteration, direct calls to [`get`](Self::get)
971 /// or [`iter`](Self::iter) are equally efficient.
972 ///
973 /// # Examples
974 ///
975 /// ```
976 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
977 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
978 ///
979 /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5], &[6]];
980 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
981 ///
982 /// let mut reader = vec.reader();
983 ///
984 /// // Perform multiple random accesses efficiently
985 /// assert_eq!(reader.decode_vec(2), Some(vec![6]));
986 /// assert_eq!(reader.decode_vec(0), Some(vec![1, 2]));
987 /// if let Some(seq) = reader.decode_vec(1) {
988 /// for value in seq {
989 /// assert!(value <= 5);
990 /// }
991 /// }
992 /// # Ok(())
993 /// # }
994 /// ```
995 #[inline(always)]
996 pub fn reader(&self) -> SeqVecReader<'_, T, E, B> {
997 SeqVecReader::new(self)
998 }
999
1000 /// Creates a zero-copy slice of a contiguous range of sequences.
1001 ///
1002 /// The slice provides a view into `len` sequences starting at `start`,
1003 /// without copying the underlying compressed data.
1004 ///
1005 /// # Arguments
1006 ///
1007 /// * `start` - The index of the first sequence in the slice.
1008 /// * `len` - The number of sequences to include in the slice.
1009 ///
1010 /// # Returns
1011 ///
1012 /// Returns `Some(SeqVecSlice)` if the range is valid, or `None` if
1013 /// `start + len` exceeds the number of sequences.
1014 ///
1015 /// # Examples
1016 ///
1017 /// ```
1018 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1019 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
1020 ///
1021 /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5], &[6], &[7, 8]];
1022 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
1023 ///
1024 /// // Create a slice of sequences 1 and 2
1025 /// let slice = vec.slice(1, 2).unwrap();
1026 /// assert_eq!(slice.len(), 2);
1027 ///
1028 /// // Index 0 of the slice refers to sequence 1 of the original vector
1029 /// let seq: Vec<u32> = slice.get(0).unwrap().collect();
1030 /// assert_eq!(seq, vec![3, 4, 5]);
1031 /// # Ok(())
1032 /// # }
1033 /// ```
1034 pub fn slice(&self, start: usize, len: usize) -> Option<slice::SeqVecSlice<'_, T, E, B>> {
1035 if start.saturating_add(len) > self.num_sequences() {
1036 return None;
1037 }
1038 Some(slice::SeqVecSlice::new(self, start..start + len))
1039 }
1040
1041 /// Splits the vector into two slices at the specified index.
1042 ///
1043 /// Returns a tuple of two slices: the first contains sequences `[0, mid)`
1044 /// and the second contains sequences `[mid, len)`.
1045 ///
1046 /// # Returns
1047 ///
1048 /// Returns `Some((left_slice, right_slice))` if `mid <= num_sequences()`,
1049 /// or `None` if `mid` is out of bounds.
1050 ///
1051 /// # Examples
1052 ///
1053 /// ```
1054 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1055 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
1056 ///
1057 /// let sequences: &[&[u32]] = &[&[1], &[2], &[3], &[4]];
1058 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
1059 ///
1060 /// let (left, right) = vec.split_at(2).unwrap();
1061 /// assert_eq!(left.len(), 2);
1062 /// assert_eq!(right.len(), 2);
1063 ///
1064 /// assert_eq!(left.decode_vec(0), Some(vec![1]));
1065 /// assert_eq!(right.decode_vec(0), Some(vec![3]));
1066 /// # Ok(())
1067 /// # }
1068 /// ```
1069 pub fn split_at(&self, mid: usize) -> Option<SeqVecSlicePair<'_, T, E, B>> {
1070 if mid > self.num_sequences() {
1071 return None;
1072 }
1073 Some((
1074 slice::SeqVecSlice::new(self, 0..mid),
1075 slice::SeqVecSlice::new(self, mid..self.num_sequences()),
1076 ))
1077 }
1078}
1079
1080// --- MemSize Implementation ---
1081
1082impl<T: Storable, E: Endianness, B: AsRef<[u64]> + MemSize + FlatType> MemSize for SeqVec<T, E, B> {
1083 fn mem_size_rec(&self, flags: SizeFlags, _refs: &mut mem_dbg::HashMap<usize, usize>) -> usize {
1084 let mut total = core::mem::size_of::<Self>();
1085 // Add heap-allocated memory for the data buffer.
1086 total += self.data.mem_size(flags) - core::mem::size_of::<B>();
1087 // Add heap-allocated memory for the bit_offsets index.
1088 total +=
1089 self.bit_offsets.mem_size(flags) - core::mem::size_of::<FixedVec<u64, u64, E, B>>();
1090 // Add heap-allocated memory for optional sequence lengths.
1091 if let Some(lengths) = &self.seq_lengths {
1092 total +=
1093 lengths.mem_size(flags) - core::mem::size_of::<FixedVec<u64, u64, E, Vec<u64>>>();
1094 }
1095 total
1096 }
1097}
1098
1099// --- MemDbgImpl Implementation ---
1100
1101// Wrapper for Codes to provide correct MemDbgImpl, following the pattern in
1102// variable::VarVec. This is necessary because the derived implementation for
1103// Codes is incorrect and cannot be fixed due to the orphan rule.
1104struct CodeWrapper<'a>(&'a Codes);
1105
1106impl MemSize for CodeWrapper<'_> {
1107 fn mem_size_rec(&self, _flags: SizeFlags, _refs: &mut mem_dbg::HashMap<usize, usize>) -> usize {
1108 core::mem::size_of_val(self.0)
1109 }
1110}
1111
1112impl MemDbgImpl for CodeWrapper<'_> {
1113 fn _mem_dbg_depth_on(
1114 &self,
1115 writer: &mut impl core::fmt::Write,
1116 total_size: usize,
1117 max_depth: usize,
1118 prefix: &mut String,
1119 field_name: Option<&str>,
1120 is_last: bool,
1121 padded_size: usize,
1122 flags: DbgFlags,
1123 _dbg_refs: &mut mem_dbg::HashSet<usize>,
1124 ) -> core::fmt::Result {
1125 use core::fmt::Write;
1126
1127 if prefix.len() > max_depth {
1128 return Ok(());
1129 }
1130
1131 let real_size = self.mem_size(flags.to_size_flags());
1132 let mut buffer = String::new();
1133
1134 if flags.contains(DbgFlags::HUMANIZE) {
1135 let (value, uom) = mem_dbg::humanize_float(real_size);
1136 if uom == " B" {
1137 write!(buffer, "{:>4}{}", value as usize, uom)?;
1138 } else {
1139 write!(buffer, "{:>4.2}{}", value, uom)?;
1140 }
1141 } else {
1142 write!(buffer, "{:>9}", real_size)?;
1143 }
1144
1145 if flags.contains(DbgFlags::PERCENTAGE) {
1146 let percentage = 100.0 * real_size as f64 / total_size as f64;
1147 write!(buffer, " {:>6.2}%", percentage)?;
1148 }
1149
1150 write!(writer, "{}", buffer)?;
1151 write!(writer, " {} {}", prefix, if is_last { "╰" } else { "├" })?;
1152
1153 if let Some(name) = field_name {
1154 write!(writer, "{}", name)?;
1155 }
1156
1157 // Print the Debug format of the enum with type_color() when TYPE_NAME is set.
1158 if flags.contains(DbgFlags::TYPE_NAME) {
1159 if flags.contains(DbgFlags::COLOR) {
1160 write!(writer, "{}", mem_dbg::type_color())?;
1161 }
1162 write!(writer, ": {:?}", self.0)?;
1163 if flags.contains(DbgFlags::COLOR) {
1164 write!(writer, "{}", mem_dbg::reset_color())?;
1165 }
1166 }
1167
1168 let padding = padded_size - core::mem::size_of_val(self.0);
1169 if padding != 0 {
1170 write!(writer, " [{}B]", padding)?;
1171 }
1172
1173 writeln!(writer)?;
1174 Ok(())
1175 }
1176
1177 fn _mem_dbg_rec_on(
1178 &self,
1179 _writer: &mut impl core::fmt::Write,
1180 _total_size: usize,
1181 _max_depth: usize,
1182 _prefix: &mut String,
1183 _is_last: bool,
1184 _flags: DbgFlags,
1185 _dbg_refs: &mut mem_dbg::HashSet<usize>,
1186 ) -> core::fmt::Result {
1187 Ok(())
1188 }
1189}
1190
1191impl<T: Storable, E: Endianness, B: AsRef<[u64]> + MemDbgImpl + FlatType> MemDbgImpl for SeqVec<T, E, B> {
1192 fn _mem_dbg_rec_on(
1193 &self,
1194 writer: &mut impl core::fmt::Write,
1195 total_size: usize,
1196 max_depth: usize,
1197 prefix: &mut String,
1198 _is_last: bool,
1199 flags: DbgFlags,
1200 _dbg_refs: &mut mem_dbg::HashSet<usize>,
1201 ) -> core::fmt::Result {
1202 self.data._mem_dbg_depth_on(
1203 writer,
1204 total_size,
1205 max_depth,
1206 prefix,
1207 Some("data"),
1208 false,
1209 core::mem::size_of_val(&self.data),
1210 flags,
1211 _dbg_refs,
1212 )?;
1213 self.bit_offsets._mem_dbg_depth_on(
1214 writer,
1215 total_size,
1216 max_depth,
1217 prefix,
1218 Some("bit_offsets"),
1219 false,
1220 core::mem::size_of_val(&self.bit_offsets),
1221 flags,
1222 _dbg_refs,
1223 )?;
1224
1225 if let Some(lengths) = &self.seq_lengths {
1226 lengths._mem_dbg_depth_on(
1227 writer,
1228 total_size,
1229 max_depth,
1230 prefix,
1231 Some("seq_lengths"),
1232 false,
1233 core::mem::size_of_val(lengths),
1234 flags,
1235 _dbg_refs,
1236 )?;
1237 }
1238
1239 let code_wrapper = CodeWrapper(&self.encoding);
1240 code_wrapper._mem_dbg_depth_on(
1241 writer,
1242 total_size,
1243 max_depth,
1244 prefix,
1245 Some("encoding"),
1246 false,
1247 core::mem::size_of_val(&self.encoding),
1248 flags,
1249 _dbg_refs,
1250 )?;
1251
1252 self._markers._mem_dbg_depth_on(
1253 writer,
1254 total_size,
1255 max_depth,
1256 prefix,
1257 Some("_markers"),
1258 true,
1259 core::mem::size_of_val(&self._markers),
1260 flags,
1261 _dbg_refs,
1262 )?;
1263 Ok(())
1264 }
1265}
1266
1267// --- PartialEq Implementation ---
1268
1269impl<T: Storable + PartialEq, E: Endianness, B: AsRef<[u64]>> PartialEq for SeqVec<T, E, B>
1270where
1271 for<'a> SeqVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
1272 + CodesRead<E>
1273 + BitSeek<Error = core::convert::Infallible>,
1274{
1275 fn eq(&self, other: &Self) -> bool {
1276 // Quick check: same number of sequences?
1277 if self.num_sequences() != other.num_sequences() {
1278 return false;
1279 }
1280
1281 // Compare all sequences element-by-element
1282 for i in 0..self.num_sequences() {
1283 // SAFETY: i < num_sequences() by loop invariant
1284 let self_iter = unsafe { self.get_unchecked(i) };
1285 let other_iter = unsafe { other.get_unchecked(i) };
1286
1287 if self_iter.ne(other_iter) {
1288 return false;
1289 }
1290 }
1291
1292 true
1293 }
1294}
1295
1296// --- decode_many Implementation ---
1297
1298impl<T: Storable, E: Endianness, B: AsRef<[u64]>> SeqVec<T, E, B>
1299where
1300 for<'a> SeqVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
1301 + CodesRead<E>
1302 + BitSeek<Error = core::convert::Infallible>,
1303{
1304 /// Retrieves multiple sequences by their indices.
1305 ///
1306 /// This method decodes the requested sequences in sorted order for efficient
1307 /// sequential access to the bitstream, then returns them in the order
1308 /// corresponding to the input indices.
1309 ///
1310 /// # Arguments
1311 ///
1312 /// * `indices` - A slice of sequence indices to retrieve.
1313 ///
1314 /// # Returns
1315 ///
1316 /// - `Ok(Vec<Vec<T>>)` containing the sequences in the order of `indices`.
1317 /// - `Err(SeqVecError::IndexOutOfBounds(idx))` if any index is out of bounds.
1318 ///
1319 /// # Examples
1320 ///
1321 /// ```
1322 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1323 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
1324 ///
1325 /// let sequences: &[&[u32]] = &[
1326 /// &[1, 2, 3],
1327 /// &[10, 20],
1328 /// &[100, 200, 300],
1329 /// &[1000],
1330 /// ];
1331 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
1332 ///
1333 /// let indices = [3, 0, 2];
1334 /// let sequences = vec.decode_many(&indices)?;
1335 /// assert_eq!(sequences, vec![
1336 /// vec![1000],
1337 /// vec![1, 2, 3],
1338 /// vec![100, 200, 300],
1339 /// ]);
1340 /// # Ok(())
1341 /// # }
1342 /// ```
1343 pub fn decode_many(&self, indices: &[usize]) -> Result<Vec<Vec<T>>, SeqVecError> {
1344 if indices.is_empty() {
1345 return Ok(Vec::new());
1346 }
1347
1348 // Bounds checking
1349 for &index in indices {
1350 if index >= self.num_sequences() {
1351 return Err(SeqVecError::IndexOutOfBounds(index));
1352 }
1353 }
1354
1355 // SAFETY: We have just performed the bounds checks.
1356 Ok(unsafe { self.decode_many_unchecked(indices) })
1357 }
1358
1359 /// Retrieves multiple sequences without bounds checking.
1360 ///
1361 /// # Safety
1362 ///
1363 /// Calling this method with any out-of-bounds index is undefined behavior.
1364 pub unsafe fn decode_many_unchecked(&self, indices: &[usize]) -> Vec<Vec<T>> {
1365 if indices.is_empty() {
1366 return Vec::new();
1367 }
1368
1369 // Build indexed pairs: (sequence_index, original_position_in_results)
1370 let mut indexed_indices: Vec<(usize, usize)> = indices
1371 .iter()
1372 .enumerate()
1373 .map(|(i, &idx)| (idx, i))
1374 .collect();
1375
1376 // Sort by sequence index to enable more sequential bitstream access.
1377 indexed_indices.sort_unstable_by_key(|&(idx, _)| idx);
1378
1379 // Pre-allocate result vectors with estimated capacity based on bit ranges.
1380 // Iterate in original order to populate capacities before decoding.
1381 let mut results: Vec<Vec<T>> = indices
1382 .iter()
1383 .map(|&idx| {
1384 let start = unsafe { self.sequence_start_bit_unchecked(idx) };
1385 let end = unsafe { self.sequence_end_bit_unchecked(idx) };
1386 // Estimate ~4 bits per element (reasonable for Delta with values 1-10k).
1387 let cap = ((end - start) / 4).max(1) as usize;
1388 Vec::with_capacity(cap)
1389 })
1390 .collect();
1391
1392 // Decode in sorted order for compressed data locality.
1393 let mut reader = self.reader();
1394
1395 for &(target_index, original_pos) in &indexed_indices {
1396 let output = &mut results[original_pos];
1397 let _ = reader.decode_into(target_index, output);
1398 }
1399
1400 results
1401 }
1402
1403 /// Decodes multiple sequences into a caller-provided output vector.
1404 ///
1405 /// The output vector is cleared and resized to match `indices.len()`. Each
1406 /// sequence is decoded into its corresponding slot, maintaining the order
1407 /// specified by `indices`. This is more efficient than calling
1408 /// [`decode_vec`](Self::decode_vec) repeatedly, as the decoding process
1409 /// traverses the compressed data in sorted order for better cache locality.
1410 ///
1411 /// # Errors
1412 ///
1413 /// Returns [`SeqVecError::IndexOutOfBounds`] if any index is out of bounds.
1414 ///
1415 /// # Examples
1416 ///
1417 /// ```
1418 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1419 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
1420 ///
1421 /// let sequences: &[&[u32]] = &[
1422 /// &[1, 2],
1423 /// &[10, 20],
1424 /// &[100, 200, 300],
1425 /// ];
1426 /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
1427 ///
1428 /// let indices = [2, 0, 1];
1429 /// let mut output = Vec::new();
1430 /// vec.decode_many_into(&indices, &mut output)?;
1431 ///
1432 /// assert_eq!(output, vec![
1433 /// vec![100, 200, 300],
1434 /// vec![1, 2],
1435 /// vec![10, 20],
1436 /// ]);
1437 /// # Ok(())
1438 /// # }
1439 /// ```
1440 pub fn decode_many_into(
1441 &self,
1442 indices: &[usize],
1443 output: &mut Vec<Vec<T>>,
1444 ) -> Result<(), SeqVecError> {
1445 if indices.is_empty() {
1446 output.clear();
1447 return Ok(());
1448 }
1449
1450 for &index in indices {
1451 if index >= self.num_sequences() {
1452 return Err(SeqVecError::IndexOutOfBounds(index));
1453 }
1454 }
1455
1456 output.clear();
1457 output.resize_with(indices.len(), Vec::new);
1458
1459 // SAFETY: We have just performed the bounds checks.
1460 unsafe { self.decode_many_into_unchecked(indices, output.as_mut_slice()) };
1461 Ok(())
1462 }
1463
1464 /// Decodes multiple sequences into a caller-provided output slice without
1465 /// bounds checking.
1466 ///
1467 /// # Safety
1468 ///
1469 /// Calling this method with any out-of-bounds index is undefined behavior.
1470 pub unsafe fn decode_many_into_unchecked(&self, indices: &[usize], output: &mut [Vec<T>]) {
1471 debug_assert_eq!(indices.len(), output.len());
1472
1473 if indices.is_empty() {
1474 return;
1475 }
1476
1477 // Build indexed pairs: (sequence_index, original_position_in_output)
1478 let mut indexed_indices: Vec<(usize, usize)> = indices
1479 .iter()
1480 .enumerate()
1481 .map(|(i, &idx)| (idx, i))
1482 .collect();
1483
1484 // Sort by sequence index to enable more sequential bitstream access.
1485 indexed_indices.sort_unstable_by_key(|&(idx, _)| idx);
1486
1487 // Pre-allocate capacities for output vectors based on bit ranges.
1488 // Iterate in original order to populate capacities before decoding.
1489 for (i, &idx) in indices.iter().enumerate() {
1490 let start = unsafe { self.sequence_start_bit_unchecked(idx) };
1491 let end = unsafe { self.sequence_end_bit_unchecked(idx) };
1492 // Estimate ~4 bits per element (reasonable for Delta with values 1-10k).
1493 let cap = ((end - start) / 4).max(1) as usize;
1494 output[i].reserve(cap);
1495 }
1496
1497 // Decode in sorted order for compressed data locality.
1498 let mut reader = self.reader();
1499
1500 for &(target_index, original_pos) in &indexed_indices {
1501 let output_slot = &mut output[original_pos];
1502 let _ = reader.decode_into(target_index, output_slot);
1503 }
1504 }
1505}
1506
1507// --- IntoIterator ---
1508
1509impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> IntoIterator for &'a SeqVec<T, E, B>
1510where
1511 for<'b> SeqVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
1512 + CodesRead<E>
1513 + BitSeek<Error = core::convert::Infallible>,
1514{
1515 type Item = SeqIter<'a, T, E>;
1516 type IntoIter = SeqVecIter<'a, T, E, B>;
1517
1518 #[inline]
1519 fn into_iter(self) -> Self::IntoIter {
1520 self.iter()
1521 }
1522}
1523
1524impl<T: Storable + 'static, E: Endianness + 'static> IntoIterator for SeqVec<T, E, Vec<u64>>
1525where
1526 for<'a> SeqVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
1527 + CodesRead<E>
1528 + BitSeek<Error = core::convert::Infallible>,
1529{
1530 type Item = SeqIter<'static, T, E>;
1531 type IntoIter = SeqVecIntoIter<T, E>;
1532
1533 #[inline]
1534 fn into_iter(self) -> Self::IntoIter {
1535 SeqVecIntoIter::new(self)
1536 }
1537}