Skip to main content

compressed_intvec/seq/
mod.rs

1//! A compressed vector of variable-length sequences with indexed access.
2//!
3//! This module provides [`SeqVec`], a data structure for storing multiple
4//! integer sequences in a single compressed bitstream. Each sequence is
5//! accessed by its index (rank), and all elements within a sequence are
6//! decoded together.
7//!
8//! # Core Concepts
9//!
10//! ## Use Case
11//!
12//! [`SeqVec`] is designed for scenarios where:
13//!
14//! - Data is naturally organized as many variable-length sequences.
15//! - Access patterns retrieve entire sequences rather than individual elements.
16//! - Memory overhead of per-sequence pointers and padding must be minimized.
17//!
18//! A common application is representing **adjacency lists** in a compressed
19//! graph, where each node's neighbors form a sequence.
20//!
21//! ## Differences from [`VarVec`]
22//!
23//! | Aspect | [`VarVec`] | [`SeqVec`] |
24//! |--------|-----------|------------|
25//! | Access unit | Single element | Entire sequence |
26//! | Index meaning | Element position | Sequence rank |
27//! | Sampling | Periodic (every k elements) | At sequence boundaries |
28//! | Primary operation | `get(i) → T` | `get(i) → Iterator<T>` |
29//!
30//! ## Compression
31//!
32//! Like [`VarVec`], [`SeqVec`] uses instantaneous variable-length codes (Gamma,
33//! Delta, Zeta, etc.) from the [`dsi-bitstream`] crate. All sequences are
34//! concatenated into a single compressed bitstream, with a [`FixedVec`] index
35//! storing the bit offset of each sequence's start.
36//!
37//! ## Sequence Length
38//!
39//! Sequence lengths are **not stored by default**. The iterator for a sequence
40//! terminates when the current bit position reaches the start of the next
41//! sequence. This means:
42//!
43//! - Retrieving a sequence is O(length) for decoding — unavoidable.
44//! - Computing sequence length requires full iteration unless lengths are stored.
45//!
46//! You can opt-in to storing explicit lengths via
47//! [`SeqVecBuilder::store_lengths`](crate::seq::SeqVecBuilder::store_lengths).
48//! When enabled, O(1) length queries become available via
49//! [`SeqVec::sequence_len`](crate::seq::SeqVec::sequence_len), and decoding
50//! can avoid the end-bit check in hot loops.
51//!
52//! ## Immutability
53//!
54//! [`SeqVec`] is **immutable** after creation. Variable-length encoding makes
55//! in-place modification impractical, as changing one element could shift all
56//! subsequent data.
57//!
58//! # Main Components
59//!
60//! - [`SeqVec`]: The core compressed sequence vector.
61//! - [`SeqVecBuilder`]: Builder for constructing a [`SeqVec`] with custom codec.
62//! - [`SeqIter`]: Zero-allocation iterator over elements of a single sequence.
63//! - [`SeqVecIter`]: Iterator over all sequences, yielding [`SeqIter`] instances.
64//!
65//! # Examples
66//!
67//! ## Basic Usage
68//!
69//! ```
70//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
71//! use compressed_intvec::seq::{SeqVec, LESeqVec};
72//!
73//! let sequences: &[&[u32]] = &[
74//!     &[1, 2, 3],
75//!     &[10, 20],
76//!     &[100, 200, 300, 400],
77//!     &[], // Empty sequences are supported
78//! ];
79//!
80//! let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
81//!
82//! assert_eq!(vec.num_sequences(), 4);
83//!
84//! // Access a sequence by index
85//! let seq1: Vec<u32> = vec.get(1).unwrap().collect();
86//! assert_eq!(seq1, vec![10, 20]);
87//! #     Ok(())
88//! # }
89//! ```
90//!
91//! ## Custom Codec
92//!
93//! ```
94//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
95//! use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
96//!
97//! let sequences: Vec<Vec<u64>> = vec![
98//!     vec![1, 1, 1, 2, 3],
99//!     vec![100, 200, 300],
100//! ];
101//!
102//! let vec: LESeqVec<u64> = SeqVec::builder()
103//!     .codec(Codec::Zeta { k: Some(3) })
104//!     .build(&sequences)?;
105//! #     Ok(())
106//! # }
107//! ```
108//!
109//! [`VarVec`]: crate::variable::VarVec
110//! [`FixedVec`]: crate::fixed::FixedVec
111//! [`dsi-bitstream`]: https://crates.io/crates/dsi-bitstream
112
113mod builder;
114mod iter;
115mod macros;
116#[cfg(feature = "parallel")]
117mod parallel;
118mod reader;
119#[cfg(feature = "serde")]
120mod serde;
121mod slice;
122
123pub use builder::{SeqVecBuilder, SeqVecFromIterBuilder};
124pub use iter::{SeqIter, SeqVecIntoIter, SeqVecIter};
125pub use reader::SeqVecReader;
126pub use slice::SeqVecSlice;
127
128// Re-export codec spec for convenience.
129pub use crate::variable::codec::Codec;
130
131// Re-export deprecated alias for backward compatibility.
132#[allow(deprecated)]
133pub use crate::variable::VariableCodecSpec;
134
135use crate::common::codec_reader::CodecReader;
136use crate::fixed::{Error as FixedVecError, FixedVec};
137use crate::variable::traits::Storable;
138use dsi_bitstream::{
139    dispatch::{Codes, CodesRead, StaticCodeRead},
140    impls::{BufBitWriter, MemWordWriterVec},
141    prelude::{BE, BitRead, BitSeek, BitWrite, CodesWrite, Endianness, LE},
142};
143use iter::SeqVecBitReader;
144use mem_dbg::{DbgFlags, FlatType, MemDbgImpl, MemSize, SizeFlags};
145use std::marker::PhantomData;
146use std::{error::Error, fmt};
147
148/// Errors that can occur when working with [`SeqVec`].
149#[derive(Debug)]
150pub enum SeqVecError {
151    /// An I/O error from bitstream operations.
152    Io(std::io::Error),
153    /// An error from the bitstream library during encoding or decoding.
154    Bitstream(Box<dyn Error + Send + Sync>),
155    /// Invalid parameters were provided during construction.
156    InvalidParameters(String),
157    /// An error during codec resolution or dispatch.
158    CodecDispatch(String),
159    /// The requested sequence index is out of bounds.
160    IndexOutOfBounds(usize),
161}
162
163impl fmt::Display for SeqVecError {
164    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165        match self {
166            SeqVecError::Io(e) => write!(f, "I/O error: {}", e),
167            SeqVecError::Bitstream(e) => write!(f, "Bitstream error: {}", e),
168            SeqVecError::InvalidParameters(s) => write!(f, "Invalid parameters: {}", s),
169            SeqVecError::CodecDispatch(s) => write!(f, "Codec dispatch error: {}", s),
170            SeqVecError::IndexOutOfBounds(idx) => {
171                write!(f, "Sequence index out of bounds: {}", idx)
172            }
173        }
174    }
175}
176
177impl Error for SeqVecError {
178    fn source(&self) -> Option<&(dyn Error + 'static)> {
179        match self {
180            SeqVecError::Io(e) => Some(e),
181            SeqVecError::Bitstream(e) => Some(e.as_ref()),
182            _ => None,
183        }
184    }
185}
186
187impl From<std::io::Error> for SeqVecError {
188    fn from(e: std::io::Error) -> Self {
189        SeqVecError::Io(e)
190    }
191}
192
193impl From<core::convert::Infallible> for SeqVecError {
194    fn from(_: core::convert::Infallible) -> Self {
195        unreachable!()
196    }
197}
198
199impl From<FixedVecError> for SeqVecError {
200    fn from(e: FixedVecError) -> Self {
201        SeqVecError::InvalidParameters(e.to_string())
202    }
203}
204
205/// Type alias for the bit writer used internally by [`SeqVec`] builders.
206pub(crate) type SeqVecBitWriter<E> = BufBitWriter<E, MemWordWriterVec<u64, Vec<u64>>>;
207
208/// A compressed, indexed vector of integer sequences.
209///
210/// [`SeqVec`] stores multiple sequences of integers in a single compressed
211/// bitstream, with an auxiliary index for O(1) access to each sequence by
212/// its rank. This is ideal for representing collections of variable-length
213/// sequences with minimal memory overhead.
214///
215/// See the [module-level documentation](self) for detailed usage information.
216///
217/// # Type Parameters
218///
219/// * `T` - The element type (e.g., `u32`, `i16`). Must implement [`Storable`].
220/// * `E` - The [`Endianness`] of the underlying bitstream ([`LE`] or [`BE`]).
221/// * `B` - The backing buffer type, enabling owned (`Vec<u64>`) or borrowed
222///   (`&[u64]`) storage for zero-copy operations.
223///
224/// [`Storable`]: crate::variable::traits::Storable
225/// [`Endianness`]: dsi_bitstream::prelude::Endianness
226/// [`LE`]: dsi_bitstream::prelude::LE
227/// [`BE`]: dsi_bitstream::prelude::BE
228#[derive(Debug, Clone)]
229pub struct SeqVec<T: Storable, E: Endianness, B: AsRef<[u64]> = Vec<u64>> {
230    /// The compressed bitstream containing all sequences concatenated.
231    data: B,
232    /// Bit offsets marking the start of each sequence. Contains N+1 elements
233    /// where N is the number of sequences. The final element is a sentinel
234    /// containing the total bit length.
235    /// Uses the same endianness `E` as the struct for design consistency.
236    bit_offsets: FixedVec<u64, u64, E, B>,
237    /// Optional per-sequence lengths stored in a compact fixed-width vector.
238    /// Uses `u64` (architecture-independent) to ensure portability across 32-bit
239    /// and 64-bit systems. Accessor methods return `usize` via safe casting.
240    seq_lengths: Option<FixedVec<u64, u64, E, Vec<u64>>>,
241    /// The compression codec used for all elements.
242    encoding: Codes,
243    /// Zero-sized markers for the generic type parameters.
244    _markers: PhantomData<(T, E)>,
245}
246
247// --- Type Aliases ---
248
249/// A [`SeqVec`] with little-endian bit ordering.
250pub type LESeqVec<T, B = Vec<u64>> = SeqVec<T, LE, B>;
251
252/// A [`SeqVec`] with big-endian bit ordering.
253pub type BESeqVec<T, B = Vec<u64>> = SeqVec<T, BE, B>;
254
255/// A [`SeqVec`] for unsigned integers with little-endian bit ordering.
256///
257/// Use this type alias for sequences of unsigned integer values stored with
258/// little-endian bit ordering.
259pub type USeqVec<T, B = Vec<u64>> = SeqVec<T, LE, B>;
260
261/// A [`SeqVec`] for signed integers with little-endian bit ordering.
262///
263/// Signed integers are transparently encoded using zig-zag encoding via the
264/// [`Storable`] trait.
265///
266/// [`Storable`]: crate::variable::traits::Storable
267pub type SSeqVec<T, B = Vec<u64>> = SeqVec<T, LE, B>;
268
269/// A [`SeqVec`] for signed integers with big-endian bit ordering.
270///
271/// Signed integers are transparently encoded using zig-zag encoding via the
272/// [`Storable`] trait.
273///
274/// [`Storable`]: crate::variable::traits::Storable
275pub type BESSeqVec<T, B = Vec<u64>> = SeqVec<T, BE, B>;
276
277/// A [`SeqVec`] for signed integers with little-endian bit ordering.
278///
279/// This is an alias for [`SSeqVec`], provided for consistency with the naming
280/// pattern `{Endianness}{SignedUnsigned}SeqVec`.
281///
282/// Signed integers are transparently encoded using zig-zag encoding via the
283/// [`Storable`] trait.
284///
285/// [`Storable`]: crate::variable::traits::Storable
286pub type LESSeqVec<T, B = Vec<u64>> = SeqVec<T, LE, B>;
287
288/// Type alias for a tuple of two [`SeqVecSlice`] references.
289pub type SeqVecSlicePair<'a, T, E, B> = (SeqVecSlice<'a, T, E, B>, SeqVecSlice<'a, T, E, B>);
290
291// --- Construction (Owned) ---
292
293impl<T: Storable + 'static, E: Endianness> SeqVec<T, E, Vec<u64>> {
294    /// Creates a [`SeqVec`] from a slice of slices using default settings.
295    ///
296    /// This method uses [`Codec::Auto`] to select an optimal codec
297    /// based on the data distribution.
298    ///
299    /// # Errors
300    ///
301    /// Returns a [`SeqVecError`] if codec resolution or encoding fails.
302    ///
303    /// # Examples
304    ///
305    /// ```
306    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
307    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
308    ///
309    /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20], &[100]];
310    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
311    ///
312    /// assert_eq!(vec.num_sequences(), 3);
313    /// #     Ok(())
314    /// # }
315    /// ```
316    pub fn from_slices<S: AsRef<[T]>>(sequences: &[S]) -> Result<Self, SeqVecError>
317    where
318        SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
319    {
320        Self::builder().codec(Codec::Auto).build(sequences)
321    }
322
323    /// Consumes the [`SeqVec`] and returns all sequences as a `Vec<Vec<T>>`.
324    ///
325    /// This method decodes the entire compressed data, allocating a separate
326    /// vector for each sequence. The operation requires time proportional to the
327    /// total number of elements across all sequences.
328    ///
329    /// # Examples
330    ///
331    /// ```
332    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
333    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
334    ///
335    /// let sequences: &[&[u32]] = &[&[1, 2], &[10, 20, 30]];
336    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
337    ///
338    /// let decoded: Vec<Vec<u32>> = vec.into_vecs();
339    /// assert_eq!(decoded, vec![vec![1, 2], vec![10, 20, 30]]);
340    /// #     Ok(())
341    /// # }
342    /// ```
343    pub fn into_vecs(self) -> Vec<Vec<T>>
344    where
345        for<'a> SeqVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
346            + CodesRead<E>
347            + BitSeek<Error = core::convert::Infallible>,
348    {
349        self.iter().map(|seq_iter| seq_iter.collect()).collect()
350    }
351}
352
353// --- Construction (Generic Buffer) ---
354
355impl<T: Storable, E: Endianness, B: AsRef<[u64]>> SeqVec<T, E, B> {
356    /// Creates a [`SeqVec`] from raw components for zero-copy views.
357    ///
358    /// This constructor is intended for advanced use cases such as memory-mapping
359    /// a pre-built [`SeqVec`] from disk or creating a borrowed view.
360    ///
361    /// # Arguments
362    ///
363    /// * `data` - The compressed bitstream buffer.
364    /// * `bit_offsets_data` - The buffer containing the bit offsets index.
365    /// * `bit_offsets_len` - Number of entries in the bit offsets index (N+1).
366    /// * `bit_offsets_num_bits` - Bit width of each entry in the offsets index.
367    /// * `encoding` - The codec used for compression.
368    ///
369    /// # Errors
370    ///
371    /// Returns [`SeqVecError::InvalidParameters`] if:
372    /// * `bit_offsets_len` is zero (must have at least the sentinel entry).
373    /// * The underlying [`FixedVec`] construction fails.
374    ///
375    /// [`FixedVec`]: crate::fixed::FixedVec
376    ///
377    /// # Safety Considerations
378    ///
379    /// The caller must ensure that:
380    /// * `data` contains valid compressed data encoded with `encoding`.
381    ///   Invalid data will cause panics or incorrect results during decoding.
382    /// * `bit_offsets_data` contains monotonically increasing bit positions.
383    ///   Unsorted offsets will cause out-of-order or corrupted sequence retrieval.
384    /// * The sentinel value (final bit offset) does not exceed the total bits
385    ///   in `data`. Violations cause panics during decoding.
386    /// * All bit positions fall within valid boundaries of the bitstream.
387    ///
388    /// # Examples
389    ///
390    /// ```
391    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
392    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
393    /// use compressed_intvec::fixed::FixedVec;
394    /// use dsi_bitstream::prelude::LE;
395    /// use dsi_bitstream::impls::{BufBitWriter, MemWordWriterVec};
396    /// use dsi_bitstream::prelude::{BitWrite, CodesWrite};
397    ///
398    /// // Create a simple vector using high-level API
399    /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
400    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
401    ///
402    /// // In a real zero-copy scenario, these would come from disk/memory-map
403    /// let data = vec.as_limbs().to_vec();
404    /// let offsets_ref = vec.bit_offsets_ref();
405    ///
406    /// // Verify structure is sound before reconstruction
407    /// assert_eq!(vec.num_sequences(), 2);
408    /// #     Ok(())
409    /// # }
410    /// ```
411    pub fn from_parts(
412        data: B,
413        bit_offsets_data: B,
414        bit_offsets_len: usize,
415        bit_offsets_num_bits: usize,
416        encoding: Codes,
417    ) -> Result<Self, SeqVecError> {
418        if bit_offsets_len == 0 {
419            return Err(SeqVecError::InvalidParameters(
420                "bit_offsets must have at least one entry (the sentinel)".to_string(),
421            ));
422        }
423
424        let bit_offsets = FixedVec::<u64, u64, E, B>::from_parts(
425            bit_offsets_data,
426            bit_offsets_len,
427            bit_offsets_num_bits,
428        )?;
429
430        Ok(Self {
431            data,
432            bit_offsets,
433            seq_lengths: None,
434            encoding,
435            _markers: PhantomData,
436        })
437    }
438
439    /// Creates a [`SeqVec`] from raw components with optional stored lengths.
440    ///
441    /// Use this variant when you have pre-computed sequence lengths from an
442    /// earlier encoding. The `seq_lengths` parameter must be consistent with
443    /// `bit_offsets` when provided (element count must equal `num_sequences()`).
444    ///
445    /// # Errors
446    ///
447    /// Returns [`SeqVecError::InvalidParameters`] if the number of lengths does
448    /// not equal the number of sequences.
449    ///
450    /// # Examples
451    ///
452    /// ```
453    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
454    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
455    ///
456    /// // Typical usage: start with high-level API
457    /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
458    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
459    ///
460    /// // Verify structure
461    /// assert!(!vec.has_stored_lengths());
462    /// assert_eq!(vec.num_sequences(), 2);
463    /// #     Ok(())
464    /// # }
465    /// ```
466    pub fn from_parts_with_lengths(
467        data: B,
468        bit_offsets_data: B,
469        bit_offsets_len: usize,
470        bit_offsets_num_bits: usize,
471        seq_lengths: Option<FixedVec<u64, u64, E, Vec<u64>>>,
472        encoding: Codes,
473    ) -> Result<Self, SeqVecError> {
474        if bit_offsets_len == 0 {
475            return Err(SeqVecError::InvalidParameters(
476                "bit_offsets must have at least one entry (the sentinel)".to_string(),
477            ));
478        }
479
480        if let Some(lengths) = &seq_lengths
481            && lengths.len() + 1 != bit_offsets_len
482        {
483            return Err(SeqVecError::InvalidParameters(
484                "seq_lengths length must match number of sequences".to_string(),
485            ));
486        }
487
488        let bit_offsets = FixedVec::<u64, u64, E, B>::from_parts(
489            bit_offsets_data,
490            bit_offsets_len,
491            bit_offsets_num_bits,
492        )?;
493
494        Ok(Self {
495            data,
496            bit_offsets,
497            seq_lengths,
498            encoding,
499            _markers: PhantomData,
500        })
501    }
502
503    /// Creates a [`SeqVec`] from pre-built components without validation.
504    ///
505    /// # Safety
506    ///
507    /// The caller must ensure all components are consistent and valid. Mismatched
508    /// parameters will lead to panics or incorrect data retrieval.
509    #[inline]
510    pub unsafe fn from_parts_unchecked(
511        data: B,
512        bit_offsets: FixedVec<u64, u64, E, B>,
513        encoding: Codes,
514    ) -> Self {
515        Self {
516            data,
517            bit_offsets,
518            seq_lengths: None,
519            encoding,
520            _markers: PhantomData,
521        }
522    }
523
524    /// Creates a [`SeqVec`] from pre-built components with optional lengths
525    /// without validation.
526    ///
527    /// # Safety
528    ///
529    /// The caller must ensure all components are consistent and valid.
530    #[inline]
531    pub unsafe fn from_parts_with_lengths_unchecked(
532        data: B,
533        bit_offsets: FixedVec<u64, u64, E, B>,
534        seq_lengths: Option<FixedVec<u64, u64, E, Vec<u64>>>,
535        encoding: Codes,
536    ) -> Self {
537        Self {
538            data,
539            bit_offsets,
540            seq_lengths,
541            encoding,
542            _markers: PhantomData,
543        }
544    }
545}
546
547// --- Query Methods ---
548
549impl<T: Storable, E: Endianness, B: AsRef<[u64]>> SeqVec<T, E, B> {
550    /// Returns the number of sequences stored.
551    ///
552    /// This is O(1) as it is derived from the bit offsets index length.
553    #[inline(always)]
554    pub fn num_sequences(&self) -> usize {
555        // bit_offsets has N+1 entries for N sequences (always at least the sentinel).
556        self.bit_offsets.len() - 1
557    }
558
559    /// Returns `true` if there are no sequences stored.
560    #[inline(always)]
561    pub fn is_empty(&self) -> bool {
562        self.num_sequences() == 0
563    }
564
565    /// Returns the compression codec used for encoding.
566    #[inline(always)]
567    pub fn encoding(&self) -> Codes {
568        self.encoding
569    }
570
571    /// Returns a reference to the underlying compressed data buffer.
572    #[inline(always)]
573    pub fn as_limbs(&self) -> &[u64] {
574        self.data.as_ref()
575    }
576
577    /// Returns a reference to the bit offsets index.
578    #[inline(always)]
579    pub fn bit_offsets_ref(&self) -> &FixedVec<u64, u64, E, B> {
580        &self.bit_offsets
581    }
582
583    /// Returns `true` if explicit sequence lengths were stored at construction time.
584    #[inline(always)]
585    pub fn has_stored_lengths(&self) -> bool {
586        self.seq_lengths.is_some()
587    }
588
589    /// Returns the length of sequence `index` if explicit lengths are stored.
590    ///
591    /// Returns `None` if `index` is out of bounds or if lengths were not
592    /// stored at construction time. Use [`has_stored_lengths`](Self::has_stored_lengths)
593    /// to distinguish between these cases.
594    ///
595    /// If lengths are stored, this query completes in O(1) time. Otherwise,
596    /// determining sequence length requires full iteration. Store lengths at
597    /// construction time via [`SeqVecBuilder::store_lengths`](crate::seq::SeqVecBuilder::store_lengths)
598    /// when O(1) length queries are needed.
599    ///
600    /// # Examples
601    ///
602    /// ```
603    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
604    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
605    ///
606    /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20]];
607    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
608    ///
609    /// // Without storing lengths, returns None
610    /// assert_eq!(vec.sequence_len(0), None);
611    /// assert!(!vec.has_stored_lengths());
612    /// #     Ok(())
613    /// # }
614    /// ```
615    #[inline(always)]
616    pub fn sequence_len(&self, index: usize) -> Option<usize> {
617        if index >= self.num_sequences() {
618            return None;
619        }
620
621        self.seq_lengths
622            .as_ref()
623            .map(|lengths| unsafe { lengths.get_unchecked(index) as usize })
624    }
625
626    /// Returns the total number of bits in the compressed data.
627    ///
628    /// This is the sentinel value at the end of the bit offsets index.
629    #[inline(always)]
630    pub fn total_bits(&self) -> u64 {
631        // bit_offsets always has at least one sentinel entry by construction invariant.
632        unsafe { self.bit_offsets.get_unchecked(self.bit_offsets.len() - 1) }
633    }
634
635    /// Returns the bit offset where sequence `index` starts in the compressed data.
636    ///
637    /// Returns `None` if `index >= num_sequences()`. This is useful for
638    /// understanding the compression footprint or verifying sequence boundaries.
639    ///
640    /// # Examples
641    ///
642    /// ```
643    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
644    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
645    ///
646    /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
647    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
648    ///
649    /// // First sequence always starts at bit 0
650    /// assert_eq!(vec.sequence_start_bit(0), Some(0));
651    /// // Second sequence starts after the first
652    /// assert!(vec.sequence_start_bit(1).is_some());
653    /// // Out of bounds
654    /// assert_eq!(vec.sequence_start_bit(2), None);
655    /// #     Ok(())
656    /// # }
657    /// ```
658    #[inline(always)]
659    pub fn sequence_start_bit(&self, index: usize) -> Option<u64> {
660        if index >= self.num_sequences() {
661            return None;
662        }
663        // SAFETY: bounds checked above.
664        Some(unsafe { self.bit_offsets.get_unchecked(index) })
665    }
666
667    /// Returns the bit offset where sequence `index` starts, without bounds checking.
668    ///
669    /// # Safety
670    ///
671    /// The caller must ensure `index < num_sequences()`.
672    #[inline(always)]
673    pub unsafe fn sequence_start_bit_unchecked(&self, index: usize) -> u64 {
674        debug_assert!(
675            index < self.num_sequences(),
676            "index {} out of bounds for {} sequences",
677            index,
678            self.num_sequences()
679        );
680        unsafe { self.bit_offsets.get_unchecked(index) }
681    }
682
683    /// Returns the bit offset immediately after sequence `index` ends.
684    ///
685    /// This is equivalent to the start bit of the next sequence, or the total
686    /// bit length for the final sequence. Useful for calculating per-sequence
687    /// compression footprint.
688    ///
689    /// # Examples
690    ///
691    /// ```
692    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
693    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
694    ///
695    /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
696    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
697    ///
698    /// let start = vec.sequence_start_bit(0).unwrap();
699    /// let end = unsafe { vec.sequence_end_bit_unchecked(0) };
700    /// println!("Sequence 0 occupies {} bits", end - start);
701    /// #     Ok(())
702    /// # }
703    /// ```
704    ///
705    /// # Safety
706    ///
707    /// The caller must ensure `index < num_sequences()`.
708    #[inline]
709    pub unsafe fn sequence_end_bit_unchecked(&self, index: usize) -> u64 {
710        debug_assert!(
711            index < self.num_sequences(),
712            "index {} out of bounds for {} sequences",
713            index,
714            self.num_sequences()
715        );
716        unsafe { self.bit_offsets.get_unchecked(index + 1) }
717    }
718}
719
720// --- Access Methods ---
721
722impl<T: Storable, E: Endianness, B: AsRef<[u64]>> SeqVec<T, E, B>
723where
724    for<'a> SeqVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
725        + CodesRead<E>
726        + BitSeek<Error = core::convert::Infallible>,
727{
728    /// Returns an iterator over the elements of sequence `index`.
729    ///
730    /// Returns `None` if `index >= num_sequences()`.
731    ///
732    /// # Examples
733    ///
734    /// ```
735    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
736    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
737    ///
738    /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[4, 5]];
739    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
740    ///
741    /// let first: Vec<u32> = vec.get(0).unwrap().collect();
742    /// assert_eq!(first, vec![1, 2, 3]);
743    /// #     Ok(())
744    /// # }
745    /// ```
746    #[inline(always)]
747    pub fn get(&self, index: usize) -> Option<SeqIter<'_, T, E>> {
748        if index >= self.num_sequences() {
749            return None;
750        }
751        // SAFETY: bounds checked above.
752        Some(unsafe { self.get_unchecked(index) })
753    }
754
755    /// Returns an iterator over the elements of sequence `index` without
756    /// bounds checking.
757    ///
758    /// # Safety
759    ///
760    /// Calling this method with `index >= num_sequences()` is undefined behavior.
761    #[inline(always)]
762    pub unsafe fn get_unchecked(&self, index: usize) -> SeqIter<'_, T, E> {
763        debug_assert!(
764            index < self.num_sequences(),
765            "index {} out of bounds for {} sequences",
766            index,
767            self.num_sequences()
768        );
769
770        let start_bit = unsafe { self.sequence_start_bit_unchecked(index) };
771        let end_bit = unsafe { self.sequence_end_bit_unchecked(index) };
772        let len = self
773            .seq_lengths
774            .as_ref()
775            .map(|lengths| unsafe { lengths.get_unchecked(index) as usize });
776
777        SeqIter::new_with_len(self.data.as_ref(), start_bit, end_bit, self.encoding, len)
778    }
779
780    /// Returns the elements of sequence `index` as a newly allocated `Vec`.
781    ///
782    /// Returns `None` if `index >= num_sequences()`.
783    ///
784    /// # Examples
785    ///
786    /// ```
787    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
788    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
789    ///
790    /// let sequences: &[&[u32]] = &[&[10, 20, 30]];
791    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
792    ///
793    /// assert_eq!(vec.decode_vec(0), Some(vec![10, 20, 30]));
794    /// assert_eq!(vec.decode_vec(1), None);
795    /// #     Ok(())
796    /// # }
797    /// ```
798    #[inline(always)]
799    pub fn decode_vec(&self, index: usize) -> Option<Vec<T>> {
800        if index >= self.num_sequences() {
801            return None;
802        }
803
804        // SAFETY: Bounds check has been performed.
805        Some(unsafe { self.decode_vec_unchecked(index) })
806    }
807
808    /// Returns the elements of sequence `index` as a newly allocated `Vec`
809    /// without bounds checking.
810    ///
811    /// # Safety
812    ///
813    /// Calling this method with `index >= num_sequences()` is undefined behavior.
814    #[inline(always)]
815    pub unsafe fn decode_vec_unchecked(&self, index: usize) -> Vec<T> {
816        unsafe { self.get_unchecked(index).collect() }
817    }
818
819    /// Decodes sequence `index` into the provided buffer.
820    ///
821    /// The buffer is cleared before use. Returns the number of elements
822    /// decoded, or `None` if `index >= num_sequences()`.
823    ///
824    /// This method is more efficient than [`decode_vec`](Self::decode_vec) when
825    /// reusing a buffer across multiple calls.
826    ///
827    /// # Examples
828    ///
829    /// ```
830    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
831    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
832    ///
833    /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
834    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
835    ///
836    /// let mut buf = Vec::new();
837    /// assert_eq!(vec.decode_into(0, &mut buf), Some(2));
838    /// assert_eq!(buf, vec![1, 2]);
839    ///
840    /// // Buffer is reused (cleared internally).
841    /// assert_eq!(vec.decode_into(1, &mut buf), Some(3));
842    /// assert_eq!(buf, vec![3, 4, 5]);
843    /// #     Ok(())
844    /// # }
845    /// ```
846    #[inline(always)]
847    pub fn decode_into(&self, index: usize, buf: &mut Vec<T>) -> Option<usize> {
848        if index >= self.num_sequences() {
849            return None;
850        }
851
852        // SAFETY: Bounds check has been performed.
853        Some(unsafe { self.decode_into_unchecked(index, buf) })
854    }
855
856    /// Decodes sequence `index` into the provided buffer without bounds checking.
857    ///
858    /// # Safety
859    ///
860    /// Calling this method with `index >= num_sequences()` is undefined behavior.
861    #[inline(always)]
862    pub unsafe fn decode_into_unchecked(&self, index: usize, buf: &mut Vec<T>) -> usize {
863        let start_bit = unsafe { self.sequence_start_bit_unchecked(index) };
864
865        buf.clear();
866
867        // Create reader and codec dispatcher once, then decode all elements
868        // directly into the buffer without creating an intermediate SeqIter.
869        // This avoids iterator overhead and enables better compiler optimization.
870        let mut reader =
871            SeqVecBitReader::<E>::new(dsi_bitstream::impls::MemWordReader::new_inf(self.data.as_ref()));
872        let _ = reader.set_bit_pos(start_bit);
873        let code_reader = CodecReader::new(self.encoding);
874
875        if let Some(lengths) = &self.seq_lengths {
876            let count = unsafe { lengths.get_unchecked(index) as usize };
877            self.decode_counted(&mut reader, &code_reader, buf, count);
878        } else {
879            let end_bit = unsafe { self.sequence_end_bit_unchecked(index) };
880            self.decode_until(&mut reader, &code_reader, buf, end_bit);
881        }
882
883        buf.len()
884    }
885
886    /// Decodes a known number of elements into `buf`.
887    #[inline(always)]
888    fn decode_counted<'a>(
889        &self,
890        reader: &mut SeqVecBitReader<'a, E>,
891        code_reader: &CodecReader<'a, E>,
892        buf: &mut Vec<T>,
893        count: usize,
894    ) {
895        buf.reserve(count);
896        for _ in 0..count {
897            let word = code_reader.read(reader).unwrap();
898            buf.push(T::from_word(word));
899        }
900    }
901
902    /// Decodes elements until the reader reaches `end_bit`.
903    #[inline(always)]
904    fn decode_until<'a>(
905        &self,
906        reader: &mut SeqVecBitReader<'a, E>,
907        code_reader: &CodecReader<'a, E>,
908        buf: &mut Vec<T>,
909        end_bit: u64,
910    ) {
911        while reader.bit_pos().unwrap() < end_bit {
912            let word = code_reader.read(reader).unwrap();
913            buf.push(T::from_word(word));
914        }
915    }
916
917    /// Returns an iterator over all sequences.
918    ///
919    /// Each element of the returned iterator is a [`SeqIter`] for the
920    /// corresponding sequence.
921    ///
922    /// # Examples
923    ///
924    /// ```
925    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
926    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
927    ///
928    /// let sequences: &[&[u32]] = &[&[1, 2], &[3], &[4, 5, 6]];
929    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
930    ///
931    /// for (i, seq) in vec.iter().enumerate() {
932    ///     println!("Sequence {}: {:?}", i, seq.collect::<Vec<_>>());
933    /// }
934    /// #     Ok(())
935    /// # }
936    /// ```
937    #[inline(always)]
938    pub fn iter(&self) -> SeqVecIter<'_, T, E, B> {
939        SeqVecIter::new(
940            self.data.as_ref(),
941            &self.bit_offsets,
942            self.seq_lengths.as_ref(),
943            self.encoding,
944            self.num_sequences(),
945        )
946    }
947
948    /// Creates a reusable reader for convenient random access to sequences.
949    ///
950    /// The returned [`SeqVecReader`] provides a convenient interface for
951    /// performing multiple sequence retrievals. While the current implementation
952    /// is a thin wrapper, it serves as a natural extension point for future
953    /// optimizations such as position tracking or caching.
954    ///
955    /// ## Performance Considerations
956    ///
957    /// - **Zero-copy iteration**: Returned iterators borrow directly from the
958    ///   compressed data without intermediate allocations.
959    /// - **Stateless operation**: Each call to [`get`](Self::get) is independent and creates a fresh [`SeqIter`].
960    /// - **Convenience methods**: The reader provides [`decode_vec`](SeqVecReader::decode_vec)
961    ///   and [`decode_into`](SeqVecReader::decode_into) for common patterns.
962    ///
963    /// ## When to Use
964    ///
965    /// Use a reader when:
966    /// - You prefer a consistent interface for multiple accesses.
967    /// - You want to use convenience methods like `decode_vec` or `decode_into`.
968    /// - Future stateful optimizations would benefit your access pattern.
969    ///
970    /// For single queries or simple iteration, direct calls to [`get`](Self::get)
971    /// or [`iter`](Self::iter) are equally efficient.
972    ///
973    /// # Examples
974    ///
975    /// ```
976    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
977    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
978    ///
979    /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5], &[6]];
980    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
981    ///
982    /// let mut reader = vec.reader();
983    ///
984    /// // Perform multiple random accesses efficiently
985    /// assert_eq!(reader.decode_vec(2), Some(vec![6]));
986    /// assert_eq!(reader.decode_vec(0), Some(vec![1, 2]));
987    /// if let Some(seq) = reader.decode_vec(1) {
988    ///     for value in seq {
989    ///         assert!(value <= 5);
990    ///     }
991    /// }
992    /// #     Ok(())
993    /// # }
994    /// ```
995    #[inline(always)]
996    pub fn reader(&self) -> SeqVecReader<'_, T, E, B> {
997        SeqVecReader::new(self)
998    }
999
1000    /// Creates a zero-copy slice of a contiguous range of sequences.
1001    ///
1002    /// The slice provides a view into `len` sequences starting at `start`,
1003    /// without copying the underlying compressed data.
1004    ///
1005    /// # Arguments
1006    ///
1007    /// * `start` - The index of the first sequence in the slice.
1008    /// * `len` - The number of sequences to include in the slice.
1009    ///
1010    /// # Returns
1011    ///
1012    /// Returns `Some(SeqVecSlice)` if the range is valid, or `None` if
1013    /// `start + len` exceeds the number of sequences.
1014    ///
1015    /// # Examples
1016    ///
1017    /// ```
1018    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1019    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
1020    ///
1021    /// let sequences: &[&[u32]] = &[&[1, 2], &[3, 4, 5], &[6], &[7, 8]];
1022    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
1023    ///
1024    /// // Create a slice of sequences 1 and 2
1025    /// let slice = vec.slice(1, 2).unwrap();
1026    /// assert_eq!(slice.len(), 2);
1027    ///
1028    /// // Index 0 of the slice refers to sequence 1 of the original vector
1029    /// let seq: Vec<u32> = slice.get(0).unwrap().collect();
1030    /// assert_eq!(seq, vec![3, 4, 5]);
1031    /// #     Ok(())
1032    /// # }
1033    /// ```
1034    pub fn slice(&self, start: usize, len: usize) -> Option<slice::SeqVecSlice<'_, T, E, B>> {
1035        if start.saturating_add(len) > self.num_sequences() {
1036            return None;
1037        }
1038        Some(slice::SeqVecSlice::new(self, start..start + len))
1039    }
1040
1041    /// Splits the vector into two slices at the specified index.
1042    ///
1043    /// Returns a tuple of two slices: the first contains sequences `[0, mid)`
1044    /// and the second contains sequences `[mid, len)`.
1045    ///
1046    /// # Returns
1047    ///
1048    /// Returns `Some((left_slice, right_slice))` if `mid <= num_sequences()`,
1049    /// or `None` if `mid` is out of bounds.
1050    ///
1051    /// # Examples
1052    ///
1053    /// ```
1054    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1055    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
1056    ///
1057    /// let sequences: &[&[u32]] = &[&[1], &[2], &[3], &[4]];
1058    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
1059    ///
1060    /// let (left, right) = vec.split_at(2).unwrap();
1061    /// assert_eq!(left.len(), 2);
1062    /// assert_eq!(right.len(), 2);
1063    ///
1064    /// assert_eq!(left.decode_vec(0), Some(vec![1]));
1065    /// assert_eq!(right.decode_vec(0), Some(vec![3]));
1066    /// #     Ok(())
1067    /// # }
1068    /// ```
1069    pub fn split_at(&self, mid: usize) -> Option<SeqVecSlicePair<'_, T, E, B>> {
1070        if mid > self.num_sequences() {
1071            return None;
1072        }
1073        Some((
1074            slice::SeqVecSlice::new(self, 0..mid),
1075            slice::SeqVecSlice::new(self, mid..self.num_sequences()),
1076        ))
1077    }
1078}
1079
1080// --- MemSize Implementation ---
1081
1082impl<T: Storable, E: Endianness, B: AsRef<[u64]> + MemSize + FlatType> MemSize for SeqVec<T, E, B> {
1083    fn mem_size_rec(&self, flags: SizeFlags, _refs: &mut mem_dbg::HashMap<usize, usize>) -> usize {
1084        let mut total = core::mem::size_of::<Self>();
1085        // Add heap-allocated memory for the data buffer.
1086        total += self.data.mem_size(flags) - core::mem::size_of::<B>();
1087        // Add heap-allocated memory for the bit_offsets index.
1088        total +=
1089            self.bit_offsets.mem_size(flags) - core::mem::size_of::<FixedVec<u64, u64, E, B>>();
1090        // Add heap-allocated memory for optional sequence lengths.
1091        if let Some(lengths) = &self.seq_lengths {
1092            total +=
1093                lengths.mem_size(flags) - core::mem::size_of::<FixedVec<u64, u64, E, Vec<u64>>>();
1094        }
1095        total
1096    }
1097}
1098
1099// --- MemDbgImpl Implementation ---
1100
1101// Wrapper for Codes to provide correct MemDbgImpl, following the pattern in
1102// variable::VarVec. This is necessary because the derived implementation for
1103// Codes is incorrect and cannot be fixed due to the orphan rule.
1104struct CodeWrapper<'a>(&'a Codes);
1105
1106impl MemSize for CodeWrapper<'_> {
1107    fn mem_size_rec(&self, _flags: SizeFlags, _refs: &mut mem_dbg::HashMap<usize, usize>) -> usize {
1108        core::mem::size_of_val(self.0)
1109    }
1110}
1111
1112impl MemDbgImpl for CodeWrapper<'_> {
1113    fn _mem_dbg_depth_on(
1114        &self,
1115        writer: &mut impl core::fmt::Write,
1116        total_size: usize,
1117        max_depth: usize,
1118        prefix: &mut String,
1119        field_name: Option<&str>,
1120        is_last: bool,
1121        padded_size: usize,
1122        flags: DbgFlags,
1123        _dbg_refs: &mut mem_dbg::HashSet<usize>,
1124    ) -> core::fmt::Result {
1125        use core::fmt::Write;
1126
1127        if prefix.len() > max_depth {
1128            return Ok(());
1129        }
1130
1131        let real_size = self.mem_size(flags.to_size_flags());
1132        let mut buffer = String::new();
1133
1134        if flags.contains(DbgFlags::HUMANIZE) {
1135            let (value, uom) = mem_dbg::humanize_float(real_size);
1136            if uom == " B" {
1137                write!(buffer, "{:>4}{}", value as usize, uom)?;
1138            } else {
1139                write!(buffer, "{:>4.2}{}", value, uom)?;
1140            }
1141        } else {
1142            write!(buffer, "{:>9}", real_size)?;
1143        }
1144
1145        if flags.contains(DbgFlags::PERCENTAGE) {
1146            let percentage = 100.0 * real_size as f64 / total_size as f64;
1147            write!(buffer, " {:>6.2}%", percentage)?;
1148        }
1149
1150        write!(writer, "{}", buffer)?;
1151        write!(writer, " {} {}", prefix, if is_last { "╰" } else { "├" })?;
1152
1153        if let Some(name) = field_name {
1154            write!(writer, "{}", name)?;
1155        }
1156
1157        // Print the Debug format of the enum with type_color() when TYPE_NAME is set.
1158        if flags.contains(DbgFlags::TYPE_NAME) {
1159            if flags.contains(DbgFlags::COLOR) {
1160                write!(writer, "{}", mem_dbg::type_color())?;
1161            }
1162            write!(writer, ": {:?}", self.0)?;
1163            if flags.contains(DbgFlags::COLOR) {
1164                write!(writer, "{}", mem_dbg::reset_color())?;
1165            }
1166        }
1167
1168        let padding = padded_size - core::mem::size_of_val(self.0);
1169        if padding != 0 {
1170            write!(writer, " [{}B]", padding)?;
1171        }
1172
1173        writeln!(writer)?;
1174        Ok(())
1175    }
1176
1177    fn _mem_dbg_rec_on(
1178        &self,
1179        _writer: &mut impl core::fmt::Write,
1180        _total_size: usize,
1181        _max_depth: usize,
1182        _prefix: &mut String,
1183        _is_last: bool,
1184        _flags: DbgFlags,
1185        _dbg_refs: &mut mem_dbg::HashSet<usize>,
1186    ) -> core::fmt::Result {
1187        Ok(())
1188    }
1189}
1190
1191impl<T: Storable, E: Endianness, B: AsRef<[u64]> + MemDbgImpl + FlatType> MemDbgImpl for SeqVec<T, E, B> {
1192    fn _mem_dbg_rec_on(
1193        &self,
1194        writer: &mut impl core::fmt::Write,
1195        total_size: usize,
1196        max_depth: usize,
1197        prefix: &mut String,
1198        _is_last: bool,
1199        flags: DbgFlags,
1200        _dbg_refs: &mut mem_dbg::HashSet<usize>,
1201    ) -> core::fmt::Result {
1202        self.data._mem_dbg_depth_on(
1203            writer,
1204            total_size,
1205            max_depth,
1206            prefix,
1207            Some("data"),
1208            false,
1209            core::mem::size_of_val(&self.data),
1210            flags,
1211            _dbg_refs,
1212        )?;
1213        self.bit_offsets._mem_dbg_depth_on(
1214            writer,
1215            total_size,
1216            max_depth,
1217            prefix,
1218            Some("bit_offsets"),
1219            false,
1220            core::mem::size_of_val(&self.bit_offsets),
1221            flags,
1222            _dbg_refs,
1223        )?;
1224
1225        if let Some(lengths) = &self.seq_lengths {
1226            lengths._mem_dbg_depth_on(
1227                writer,
1228                total_size,
1229                max_depth,
1230                prefix,
1231                Some("seq_lengths"),
1232                false,
1233                core::mem::size_of_val(lengths),
1234                flags,
1235                _dbg_refs,
1236            )?;
1237        }
1238
1239        let code_wrapper = CodeWrapper(&self.encoding);
1240        code_wrapper._mem_dbg_depth_on(
1241            writer,
1242            total_size,
1243            max_depth,
1244            prefix,
1245            Some("encoding"),
1246            false,
1247            core::mem::size_of_val(&self.encoding),
1248            flags,
1249            _dbg_refs,
1250        )?;
1251
1252        self._markers._mem_dbg_depth_on(
1253            writer,
1254            total_size,
1255            max_depth,
1256            prefix,
1257            Some("_markers"),
1258            true,
1259            core::mem::size_of_val(&self._markers),
1260            flags,
1261            _dbg_refs,
1262        )?;
1263        Ok(())
1264    }
1265}
1266
1267// --- PartialEq Implementation ---
1268
1269impl<T: Storable + PartialEq, E: Endianness, B: AsRef<[u64]>> PartialEq for SeqVec<T, E, B>
1270where
1271    for<'a> SeqVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
1272        + CodesRead<E>
1273        + BitSeek<Error = core::convert::Infallible>,
1274{
1275    fn eq(&self, other: &Self) -> bool {
1276        // Quick check: same number of sequences?
1277        if self.num_sequences() != other.num_sequences() {
1278            return false;
1279        }
1280
1281        // Compare all sequences element-by-element
1282        for i in 0..self.num_sequences() {
1283            // SAFETY: i < num_sequences() by loop invariant
1284            let self_iter = unsafe { self.get_unchecked(i) };
1285            let other_iter = unsafe { other.get_unchecked(i) };
1286
1287            if self_iter.ne(other_iter) {
1288                return false;
1289            }
1290        }
1291
1292        true
1293    }
1294}
1295
1296// --- decode_many Implementation ---
1297
1298impl<T: Storable, E: Endianness, B: AsRef<[u64]>> SeqVec<T, E, B>
1299where
1300    for<'a> SeqVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
1301        + CodesRead<E>
1302        + BitSeek<Error = core::convert::Infallible>,
1303{
1304    /// Retrieves multiple sequences by their indices.
1305    ///
1306    /// This method decodes the requested sequences in sorted order for efficient
1307    /// sequential access to the bitstream, then returns them in the order
1308    /// corresponding to the input indices.
1309    ///
1310    /// # Arguments
1311    ///
1312    /// * `indices` - A slice of sequence indices to retrieve.
1313    ///
1314    /// # Returns
1315    ///
1316    /// - `Ok(Vec<Vec<T>>)` containing the sequences in the order of `indices`.
1317    /// - `Err(SeqVecError::IndexOutOfBounds(idx))` if any index is out of bounds.
1318    ///
1319    /// # Examples
1320    ///
1321    /// ```
1322    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1323    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
1324    ///
1325    /// let sequences: &[&[u32]] = &[
1326    ///     &[1, 2, 3],
1327    ///     &[10, 20],
1328    ///     &[100, 200, 300],
1329    ///     &[1000],
1330    /// ];
1331    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
1332    ///
1333    /// let indices = [3, 0, 2];
1334    /// let sequences = vec.decode_many(&indices)?;
1335    /// assert_eq!(sequences, vec![
1336    ///     vec![1000],
1337    ///     vec![1, 2, 3],
1338    ///     vec![100, 200, 300],
1339    /// ]);
1340    /// #     Ok(())
1341    /// # }
1342    /// ```
1343    pub fn decode_many(&self, indices: &[usize]) -> Result<Vec<Vec<T>>, SeqVecError> {
1344        if indices.is_empty() {
1345            return Ok(Vec::new());
1346        }
1347
1348        // Bounds checking
1349        for &index in indices {
1350            if index >= self.num_sequences() {
1351                return Err(SeqVecError::IndexOutOfBounds(index));
1352            }
1353        }
1354
1355        // SAFETY: We have just performed the bounds checks.
1356        Ok(unsafe { self.decode_many_unchecked(indices) })
1357    }
1358
1359    /// Retrieves multiple sequences without bounds checking.
1360    ///
1361    /// # Safety
1362    ///
1363    /// Calling this method with any out-of-bounds index is undefined behavior.
1364    pub unsafe fn decode_many_unchecked(&self, indices: &[usize]) -> Vec<Vec<T>> {
1365        if indices.is_empty() {
1366            return Vec::new();
1367        }
1368
1369        // Build indexed pairs: (sequence_index, original_position_in_results)
1370        let mut indexed_indices: Vec<(usize, usize)> = indices
1371            .iter()
1372            .enumerate()
1373            .map(|(i, &idx)| (idx, i))
1374            .collect();
1375
1376        // Sort by sequence index to enable more sequential bitstream access.
1377        indexed_indices.sort_unstable_by_key(|&(idx, _)| idx);
1378
1379        // Pre-allocate result vectors with estimated capacity based on bit ranges.
1380        // Iterate in original order to populate capacities before decoding.
1381        let mut results: Vec<Vec<T>> = indices
1382            .iter()
1383            .map(|&idx| {
1384                let start = unsafe { self.sequence_start_bit_unchecked(idx) };
1385                let end = unsafe { self.sequence_end_bit_unchecked(idx) };
1386                // Estimate ~4 bits per element (reasonable for Delta with values 1-10k).
1387                let cap = ((end - start) / 4).max(1) as usize;
1388                Vec::with_capacity(cap)
1389            })
1390            .collect();
1391
1392        // Decode in sorted order for compressed data locality.
1393        let mut reader = self.reader();
1394
1395        for &(target_index, original_pos) in &indexed_indices {
1396            let output = &mut results[original_pos];
1397            let _ = reader.decode_into(target_index, output);
1398        }
1399
1400        results
1401    }
1402
1403    /// Decodes multiple sequences into a caller-provided output vector.
1404    ///
1405    /// The output vector is cleared and resized to match `indices.len()`. Each
1406    /// sequence is decoded into its corresponding slot, maintaining the order
1407    /// specified by `indices`. This is more efficient than calling
1408    /// [`decode_vec`](Self::decode_vec) repeatedly, as the decoding process
1409    /// traverses the compressed data in sorted order for better cache locality.
1410    ///
1411    /// # Errors
1412    ///
1413    /// Returns [`SeqVecError::IndexOutOfBounds`] if any index is out of bounds.
1414    ///
1415    /// # Examples
1416    ///
1417    /// ```
1418    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1419    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
1420    ///
1421    /// let sequences: &[&[u32]] = &[
1422    ///     &[1, 2],
1423    ///     &[10, 20],
1424    ///     &[100, 200, 300],
1425    /// ];
1426    /// let vec: LESeqVec<u32> = SeqVec::from_slices(sequences)?;
1427    ///
1428    /// let indices = [2, 0, 1];
1429    /// let mut output = Vec::new();
1430    /// vec.decode_many_into(&indices, &mut output)?;
1431    ///
1432    /// assert_eq!(output, vec![
1433    ///     vec![100, 200, 300],
1434    ///     vec![1, 2],
1435    ///     vec![10, 20],
1436    /// ]);
1437    /// #     Ok(())
1438    /// # }
1439    /// ```
1440    pub fn decode_many_into(
1441        &self,
1442        indices: &[usize],
1443        output: &mut Vec<Vec<T>>,
1444    ) -> Result<(), SeqVecError> {
1445        if indices.is_empty() {
1446            output.clear();
1447            return Ok(());
1448        }
1449
1450        for &index in indices {
1451            if index >= self.num_sequences() {
1452                return Err(SeqVecError::IndexOutOfBounds(index));
1453            }
1454        }
1455
1456        output.clear();
1457        output.resize_with(indices.len(), Vec::new);
1458
1459        // SAFETY: We have just performed the bounds checks.
1460        unsafe { self.decode_many_into_unchecked(indices, output.as_mut_slice()) };
1461        Ok(())
1462    }
1463
1464    /// Decodes multiple sequences into a caller-provided output slice without
1465    /// bounds checking.
1466    ///
1467    /// # Safety
1468    ///
1469    /// Calling this method with any out-of-bounds index is undefined behavior.
1470    pub unsafe fn decode_many_into_unchecked(&self, indices: &[usize], output: &mut [Vec<T>]) {
1471        debug_assert_eq!(indices.len(), output.len());
1472
1473        if indices.is_empty() {
1474            return;
1475        }
1476
1477        // Build indexed pairs: (sequence_index, original_position_in_output)
1478        let mut indexed_indices: Vec<(usize, usize)> = indices
1479            .iter()
1480            .enumerate()
1481            .map(|(i, &idx)| (idx, i))
1482            .collect();
1483
1484        // Sort by sequence index to enable more sequential bitstream access.
1485        indexed_indices.sort_unstable_by_key(|&(idx, _)| idx);
1486
1487        // Pre-allocate capacities for output vectors based on bit ranges.
1488        // Iterate in original order to populate capacities before decoding.
1489        for (i, &idx) in indices.iter().enumerate() {
1490            let start = unsafe { self.sequence_start_bit_unchecked(idx) };
1491            let end = unsafe { self.sequence_end_bit_unchecked(idx) };
1492            // Estimate ~4 bits per element (reasonable for Delta with values 1-10k).
1493            let cap = ((end - start) / 4).max(1) as usize;
1494            output[i].reserve(cap);
1495        }
1496
1497        // Decode in sorted order for compressed data locality.
1498        let mut reader = self.reader();
1499
1500        for &(target_index, original_pos) in &indexed_indices {
1501            let output_slot = &mut output[original_pos];
1502            let _ = reader.decode_into(target_index, output_slot);
1503        }
1504    }
1505}
1506
1507// --- IntoIterator ---
1508
1509impl<'a, T: Storable, E: Endianness, B: AsRef<[u64]>> IntoIterator for &'a SeqVec<T, E, B>
1510where
1511    for<'b> SeqVecBitReader<'b, E>: BitRead<E, Error = core::convert::Infallible>
1512        + CodesRead<E>
1513        + BitSeek<Error = core::convert::Infallible>,
1514{
1515    type Item = SeqIter<'a, T, E>;
1516    type IntoIter = SeqVecIter<'a, T, E, B>;
1517
1518    #[inline]
1519    fn into_iter(self) -> Self::IntoIter {
1520        self.iter()
1521    }
1522}
1523
1524impl<T: Storable + 'static, E: Endianness + 'static> IntoIterator for SeqVec<T, E, Vec<u64>>
1525where
1526    for<'a> SeqVecBitReader<'a, E>: BitRead<E, Error = core::convert::Infallible>
1527        + CodesRead<E>
1528        + BitSeek<Error = core::convert::Infallible>,
1529{
1530    type Item = SeqIter<'static, T, E>;
1531    type IntoIter = SeqVecIntoIter<T, E>;
1532
1533    #[inline]
1534    fn into_iter(self) -> Self::IntoIter {
1535        SeqVecIntoIter::new(self)
1536    }
1537}