Skip to main content

compressed_intvec/seq/
builder.rs

1//! Builders for constructing a [`SeqVec`].
2//!
3//! This module provides two builders for creating a [`SeqVec`]:
4//!
5//! - [`SeqVecBuilder`]: For building from a collection of sequences already in
6//!   memory. This builder can analyze the data to automatically select an
7//!   optimal compression codec.
8//!
9//! - [`SeqVecFromIterBuilder`]: For building from an iterator of sequences.
10//!   This is suitable for large datasets generated on the fly, but requires
11//!   the codec to be specified manually since single-pass construction cannot
12//!   perform data analysis.
13//!
14//! [`SeqVec`]: crate::seq::SeqVec
15
16use super::{SeqVec, SeqVecBitWriter, SeqVecError};
17use crate::common::codec_writer::CodecWriter;
18use crate::fixed::{BitWidth, FixedVec};
19use crate::variable::codec::{self, Codec};
20use crate::variable::traits::Storable;
21use dsi_bitstream::{
22    dispatch::StaticCodeWrite,
23    impls::MemWordWriterVec,
24    prelude::{BitWrite, Codes, CodesWrite, Endianness},
25};
26use std::marker::PhantomData;
27
28/// A builder for creating a [`SeqVec`] from a collection of sequences.
29///
30/// This builder is the primary entry point for constructing a compressed
31/// sequence vector when the sequences are already available in memory. It
32/// allows configuration of the compression codec.
33///
34/// The builder always produces an owned `SeqVec<T, E, Vec<u64>>`.
35///
36/// ## Construction Strategy
37///
38/// When the codec is [`Codec::Auto`] or requires parameter
39/// estimation (e.g., `Rice { log2_b: None }`), the builder performs a two-pass
40/// construction:
41///
42/// 1. **Analysis pass**: Collects all elements to determine the optimal codec.
43/// 2. **Encoding pass**: Compresses the data using the selected codec.
44///
45/// When a fully-specified codec is provided (e.g., `Gamma`, `Delta`,
46/// `Zeta { k: Some(3) }`), the builder performs **single-pass construction**,
47/// avoiding the temporary allocation of all elements.
48///
49/// ## Examples
50///
51/// ```
52/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
53/// use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
54///
55/// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20], &[100]];
56///
57/// // Automatic codec selection (two-pass)
58/// let vec_auto: LESeqVec<u32> = SeqVec::builder()
59///     .codec(Codec::Auto)
60///     .build(sequences)?;
61///
62/// // Explicit codec (single-pass, more efficient)
63/// let vec_gamma: LESeqVec<u32> = SeqVec::builder()
64///     .codec(Codec::Gamma)
65///     .build(sequences)?;
66/// #     Ok(())
67/// # }
68/// ```
69#[derive(Debug, Clone)]
70pub struct SeqVecBuilder<T: Storable, E: Endianness> {
71    codec_spec: Codec,
72    store_lengths: bool,
73    _markers: PhantomData<(T, E)>,
74}
75
76impl<T: Storable, E: Endianness> Default for SeqVecBuilder<T, E> {
77    fn default() -> Self {
78        Self::new()
79    }
80}
81
82impl<T: Storable, E: Endianness> SeqVecBuilder<T, E> {
83    /// Creates a new builder with default settings.
84    ///
85    /// The default codec is [`Codec::Auto`], which analyzes the
86    /// data to select the best codec.
87    #[inline]
88    pub fn new() -> Self {
89        Self {
90            codec_spec: Codec::Auto,
91            store_lengths: false,
92            _markers: PhantomData,
93        }
94    }
95
96    /// Sets the compression codec to use.
97    ///
98    /// For the available codecs, see [`Codec`].
99    #[inline]
100    pub fn codec(mut self, codec_spec: Codec) -> Self {
101        self.codec_spec = codec_spec;
102        self
103    }
104
105    /// Enables or disables storing explicit sequence lengths.
106    ///
107    /// When enabled, the builder stores a compact [`FixedVec`] of per-sequence
108    /// lengths. This allows O(1) length queries and enables faster decoding
109    /// paths that avoid end-bit checks.
110    ///
111    /// The default is `false` to minimize memory usage.
112    ///
113    /// [`FixedVec`]: crate::fixed::FixedVec
114    #[inline]
115    pub fn store_lengths(mut self, store: bool) -> Self {
116        self.store_lengths = store;
117        self
118    }
119
120    /// Builds the [`SeqVec`] from a slice of sequences.
121    ///
122    /// Each element represents a sequence to compress and store. Empty sequences
123    /// are supported.
124    ///
125    /// ## Type Requirements
126    ///
127    /// The sequences can be any type that implements `AsRef<[T]>`, such as
128    /// `&[T]`, `Vec<T>`, or `Box<[T]>`.
129    ///
130    /// # Arguments
131    ///
132    /// * `sequences` - A slice of sequences to compress. Each sequence is accessed
133    ///   via `AsRef<[T]>`.
134    ///
135    /// # Errors
136    ///
137    /// Returns a [`SeqVecError`] if:
138    /// - Codec resolution fails.
139    /// - An I/O error occurs during encoding.
140    ///
141    /// ## Examples
142    ///
143    /// ```
144    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
145    /// use compressed_intvec::seq::{SeqVec, LESeqVec};
146    ///
147    /// // From slice of slices
148    /// let data: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
149    /// let vec: LESeqVec<u32> = SeqVec::builder().build(data)?;
150    ///
151    /// // From Vec of Vecs
152    /// let data: Vec<Vec<u32>> = vec![vec![1, 2], vec![3, 4, 5]];
153    /// let vec: LESeqVec<u32> = SeqVec::builder().build(&data)?;
154    /// #     Ok(())
155    /// # }
156    /// ```
157    pub fn build<S: AsRef<[T]>>(
158        self,
159        sequences: &[S],
160    ) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
161    where
162        T: 'static,
163        SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
164    {
165        // Determine if we can use single-pass construction.
166        // Single-pass is possible only when the codec is fully specified.
167        if self.codec_spec.requires_analysis() {
168            self.build_two_pass(sequences)
169        } else {
170            self.build_single_pass(sequences)
171        }
172    }
173
174    /// Two-pass construction: analyze data first, then encode.
175    ///
176    /// This method is used internally when the codec requires data analysis to
177    /// determine optimal parameters. It collects all elements in the first pass,
178    /// analyzes their distribution, then encodes them in a second pass using the
179    /// selected codec. This avoids unnecessary analysis for pre-specified codecs.
180    fn build_two_pass<S: AsRef<[T]>>(
181        self,
182        sequences: &[S],
183    ) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
184    where
185        T: 'static,
186        SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
187    {
188        // Resolve codec from iterator without intermediate allocation.
189        // This avoids materializing all elements to a vector when analyzing data.
190        let resolved_codec = codec::resolve_codec_from_iter(
191            sequences
192                .iter()
193                .flat_map(|seq| seq.as_ref().iter().map(|x| x.to_word())),
194            self.codec_spec,
195        )
196        .map_err(|e| SeqVecError::CodecDispatch(e.to_string()))?;
197
198        // Pass 2: Encode with the selected codec.
199        self.encode_sequences(sequences, resolved_codec)
200    }
201
202    /// Single-pass construction: encode directly without data analysis.
203    ///
204    /// This method is used when the codec is fully specified and requires no
205    /// data analysis. It streams sequences directly to the encoder without
206    /// collecting them, making it more memory-efficient for large datasets.
207    fn build_single_pass<S: AsRef<[T]>>(
208        self,
209        sequences: &[S],
210    ) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
211    where
212        T: 'static,
213        SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
214    {
215        // Resolve the codec without data analysis. Pass an empty u64 slice
216        // since fully-specified codecs do not require data.
217        let resolved_codec = codec::resolve_codec::<u64>(&[], self.codec_spec)
218            .map_err(|e| SeqVecError::CodecDispatch(e.to_string()))?;
219
220        self.encode_sequences(sequences, resolved_codec)
221    }
222
223    /// Core encoding logic shared by both construction paths.
224    ///
225    /// This method handles the actual compression of sequences, including:
226    /// - Iterating over all sequences and their elements
227    /// - Writing compressed data to the bit writer
228    /// - Tracking bit offsets for each sequence boundary
229    /// - Optionally storing per-sequence lengths
230    /// - Building the final [`SeqVec`] structure
231    ///
232    /// # Arguments
233    ///
234    /// * `sequences` - The sequences to compress.
235    /// * `resolved_codec` - The codec to use for encoding.
236    fn encode_sequences<S: AsRef<[T]>>(
237        self,
238        sequences: &[S],
239        resolved_codec: Codes,
240    ) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
241    where
242        SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
243    {
244        let num_sequences = sequences.len();
245
246        // Handle empty input.
247        if num_sequences == 0 {
248            let empty_offsets = FixedVec::<u64, u64, E>::builder()
249                .bit_width(BitWidth::Minimal)
250                .build(&[0u64])?;
251            let seq_lengths = if self.store_lengths {
252                Some(
253                    FixedVec::<u64, u64, E>::builder()
254                        .bit_width(BitWidth::Minimal)
255                        .build(&[])?,
256                )
257            } else {
258                None
259            };
260            return Ok(SeqVec {
261                data: Vec::new(),
262                bit_offsets: empty_offsets,
263                seq_lengths,
264                encoding: resolved_codec,
265                _markers: PhantomData,
266            });
267        }
268
269        let (data, offsets, lengths) = encode_sequences_impl(
270            sequences.iter(),
271            resolved_codec,
272            Vec::with_capacity(num_sequences + 1),
273            self.store_lengths,
274            num_sequences,
275        )?;
276
277        // Build the bit offsets index with minimal bit width.
278        let bit_offsets = FixedVec::<u64, u64, E>::builder()
279            .bit_width(BitWidth::Minimal)
280            .build(&offsets)?;
281
282        let seq_lengths = if let Some(lengths) = lengths {
283            Some(
284                FixedVec::<u64, u64, E>::builder()
285                    .bit_width(BitWidth::Minimal)
286                    .build(&lengths)?,
287            )
288        } else {
289            None
290        };
291
292        Ok(SeqVec {
293            data,
294            bit_offsets,
295            seq_lengths,
296            encoding: resolved_codec,
297            _markers: PhantomData,
298        })
299    }
300}
301
302/// A builder for creating a [`SeqVec`] from an iterator of sequences.
303///
304/// This builder is designed for constructing a [`SeqVec`] from a data source
305/// that produces sequences on the fly. It consumes the iterator in a single
306/// pass, compressing sequences as they arrive.
307///
308/// ## Limitations
309///
310/// This builder does **not** support:
311/// - [`Codec::Auto`]: Automatic codec selection requires analyzing
312///   all data, which is impossible in a single pass.
313/// - Parameter estimation for codecs like `Rice { log2_b: None }` or
314///   `Zeta { k: None }`.
315///
316/// The codec must be fully specified. If an unsupported codec is provided,
317/// the [`build`](Self::build) method will return an error.
318///
319/// ## Examples
320///
321/// ```
322/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
323/// use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
324///
325/// // Generate sequences on the fly
326/// let sequences_iter = (0..100).map(|i| vec![i as u32, i as u32 + 1]);
327///
328/// let vec: LESeqVec<u32> = SeqVec::from_iter_builder(sequences_iter)
329///     .codec(Codec::Gamma) // Must be specified
330///     .build()?;
331///
332/// assert_eq!(vec.num_sequences(), 100);
333/// #     Ok(())
334/// # }
335/// ```
336#[derive(Debug)]
337pub struct SeqVecFromIterBuilder<T: Storable, E: Endianness, I> {
338    iter: I,
339    codec_spec: Codec,
340    store_lengths: bool,
341    _markers: PhantomData<(T, E)>,
342}
343
344impl<T, E, I, S> SeqVecFromIterBuilder<T, E, I>
345where
346    T: Storable,
347    E: Endianness,
348    I: IntoIterator<Item = S>,
349    S: AsRef<[T]>,
350{
351    /// Creates a new builder from an iterator with default settings.
352    ///
353    /// The default codec is [`Codec::Gamma`], as automatic
354    /// selection is not possible in single-pass construction.
355    #[inline]
356    pub fn new(iter: I) -> Self {
357        Self {
358            iter,
359            codec_spec: Codec::Gamma,
360            store_lengths: false,
361            _markers: PhantomData,
362        }
363    }
364
365    /// Sets the compression codec to use.
366    ///
367    /// The codec must be fully specified (no `Auto`, no `None` parameters).
368    ///
369    /// # Arguments
370    ///
371    /// * `codec_spec` - The fully-specified codec to use for encoding.
372    ///
373    /// # Errors
374    ///
375    /// The [`build`](Self::build) method will return an error if a codec
376    /// requiring data analysis is provided.
377    #[inline]
378    pub fn codec(mut self, codec_spec: Codec) -> Self {
379        self.codec_spec = codec_spec;
380        self
381    }
382
383    /// Enables or disables storing explicit sequence lengths.
384    ///
385    /// When enabled, the builder stores a compact [`FixedVec`] of per-sequence
386    /// lengths. This allows O(1) length queries and enables faster decoding
387    /// paths that avoid end-bit checks.
388    ///
389    /// The default is `false` to minimize memory usage.
390    ///
391    /// [`FixedVec`]: crate::fixed::FixedVec
392    #[inline]
393    pub fn store_lengths(mut self, store: bool) -> Self {
394        self.store_lengths = store;
395        self
396    }
397
398    /// Builds the [`SeqVec`] by consuming the iterator.
399    ///
400    /// This method streams sequences directly from the iterator without
401    /// materializing them all in memory. Single-pass construction avoids
402    /// temporary allocations but requires the codec to be fully specified.
403    ///
404    /// # Errors
405    ///
406    /// Returns a [`SeqVecError`] if:
407    /// - An automatic or parameter-estimating codec spec is used.
408    /// - An I/O error occurs during encoding.
409    ///
410    /// ## Examples
411    ///
412    /// ```
413    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
414    /// use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
415    ///
416    /// let sequences: Vec<Vec<u32>> = vec![vec![1, 2], vec![3, 4, 5]];
417    ///
418    /// let vec: LESeqVec<u32> = SeqVec::from_iter_builder(sequences.into_iter())
419    ///     .codec(Codec::Delta)
420    ///     .build()?;
421    /// #     Ok(())
422    /// # }
423    /// ```
424    pub fn build(self) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
425    where
426        T: 'static,
427        SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
428    {
429        // Reject codecs that require data analysis.
430        if self.codec_spec.requires_analysis() {
431            return Err(SeqVecError::InvalidParameters(
432                "Automatic codec selection is not supported for iterator-based construction. \
433                 Please provide a fully-specified codec"
434                    .to_string(),
435            ));
436        }
437
438        // Resolve the codec without data analysis. Pass an empty u64 slice
439        // since fully-specified codecs do not require data.
440        let resolved_codec = codec::resolve_codec::<u64>(&[], self.codec_spec)
441            .map_err(|e| SeqVecError::CodecDispatch(e.to_string()))?;
442
443        let iter = self.iter.into_iter();
444        // Use size_hint to pre-allocate offsets for efficiency.
445        let (lower, _) = iter.size_hint();
446        let offsets = Vec::with_capacity(lower.saturating_add(1));
447
448        let (data, offsets, lengths) =
449            encode_sequences_impl(iter, resolved_codec, offsets, self.store_lengths, lower)?;
450
451        // Handle empty iterator case.
452        if offsets.is_empty() {
453            let empty_offsets = FixedVec::<u64, u64, E>::builder()
454                .bit_width(BitWidth::Minimal)
455                .build(&[0u64])?;
456            let seq_lengths = if self.store_lengths {
457                Some(
458                    FixedVec::<u64, u64, E>::builder()
459                        .bit_width(BitWidth::Minimal)
460                        .build(&[])?,
461                )
462            } else {
463                None
464            };
465            return Ok(SeqVec {
466                data: Vec::new(),
467                bit_offsets: empty_offsets,
468                seq_lengths,
469                encoding: resolved_codec,
470                _markers: PhantomData,
471            });
472        }
473
474        // Build the bit offsets index.
475        let bit_offsets = FixedVec::<u64, u64, E>::builder()
476            .bit_width(BitWidth::Minimal)
477            .build(&offsets)?;
478
479        let seq_lengths = if let Some(lengths) = lengths {
480            Some(
481                FixedVec::<u64, u64, E>::builder()
482                    .bit_width(BitWidth::Minimal)
483                    .build(&lengths)?,
484            )
485        } else {
486            None
487        };
488
489        Ok(SeqVec {
490            data,
491            bit_offsets,
492            seq_lengths,
493            encoding: resolved_codec,
494            _markers: PhantomData,
495        })
496    }
497}
498
499/// Extension trait for `Codec` to check if analysis is required.
500#[allow(dead_code)]
501trait CodecSpecExt {
502    /// Returns `true` if this codec spec requires data analysis to resolve.
503    fn requires_analysis(&self) -> bool;
504}
505
506impl CodecSpecExt for Codec {
507    #[inline]
508    fn requires_analysis(&self) -> bool {
509        matches!(
510            self,
511            Codec::Auto
512                | Codec::Rice { log2_b: None }
513                | Codec::Zeta { k: None }
514                | Codec::Golomb { b: None }
515                | Codec::Pi { k: None }
516                | Codec::ExpGolomb { k: None }
517        )
518    }
519}
520
521/// Type alias for the return value of `encode_sequences_impl`.
522///
523/// Contains the compressed word data, bit offset boundaries, and optional
524/// per-sequence lengths (stored as `u64` for architecture independence).
525type EncodeSequencesResult = (Vec<u64>, Vec<u64>, Option<Vec<u64>>);
526
527/// Shared encoding implementation for sequences from an iterator.
528///
529/// This function encodes all sequences using a single resolved codec and
530/// pre-allocated offsets vector. The codec dispatch is resolved once at the
531/// beginning via [`CodecWriter`] rather than per-element, avoiding repeated
532/// dispatch overhead and improving throughput.
533///
534/// [`CodecWriter`]: crate::common::codec_writer::CodecWriter
535///
536/// # Arguments
537///
538/// * `sequences` - Iterator of sequences to encode. Each sequence is accessed
539///   via `AsRef<[T]>`.
540/// * `resolved_codec` - The codec specification to use for all elements.
541/// * `offsets` - Pre-allocated vector to store bit offset boundaries. This vector
542///   is populated with one offset per sequence plus a final sentinel offset.
543/// * `store_lengths` - Whether to compute and store per-sequence lengths.
544/// * `lengths_capacity_hint` - Capacity hint for the lengths vector when
545///   `store_lengths` is true.
546///
547/// # Returns
548///
549/// A tuple containing:
550/// - Encoded word data (`Vec<u64>`)
551/// - Bit offset boundaries (`Vec<u64>`), with length = num_sequences + 1
552/// - Optional per-sequence lengths (`Vec<u64>`), if `store_lengths` is true
553fn encode_sequences_impl<T: Storable, E: Endianness, I, S>(
554    sequences: I,
555    resolved_codec: Codes,
556    mut offsets: Vec<u64>,
557    store_lengths: bool,
558    lengths_capacity_hint: usize,
559) -> Result<EncodeSequencesResult, SeqVecError>
560where
561    I: IntoIterator<Item = S>,
562    S: AsRef<[T]>,
563    SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
564{
565    // Initialize the bit writer.
566    let word_writer = MemWordWriterVec::new(Vec::new());
567    let mut writer = SeqVecBitWriter::<E>::new(word_writer);
568    let mut current_bit_offset: u64 = 0;
569
570    // Resolve the codec dispatch ONCE at the beginning.
571    // This eliminates per-element match overhead for common codecs.
572    let code_writer = CodecWriter::new(resolved_codec);
573
574    // Prepare optional length storage.
575    let mut lengths = if store_lengths {
576        Some(Vec::with_capacity(lengths_capacity_hint))
577    } else {
578        None
579    };
580
581    // Process each sequence, recording bit offsets at boundaries.
582    for seq in sequences {
583        let seq_ref = seq.as_ref();
584        offsets.push(current_bit_offset);
585
586        if let Some(ref mut lengths) = lengths {
587            lengths.push(seq_ref.len() as u64);
588        }
589
590        for elem in seq_ref {
591            let bits_written = code_writer.write(&mut writer, elem.to_word())?;
592            current_bit_offset += bits_written as u64;
593        }
594    }
595
596    // Sentinel: total bit length.
597    offsets.push(current_bit_offset);
598
599    // Finalize the writer.
600    writer.flush()?;
601    let mut data = writer.into_inner()?.into_inner();
602    data.shrink_to_fit();
603
604    Ok((data, offsets, lengths))
605}
606
607// --- Integration with SeqVec ---
608
609impl<T: Storable + 'static, E: Endianness> SeqVec<T, E, Vec<u64>> {
610    /// Creates a builder for constructing a [`SeqVec`] with custom settings.
611    ///
612    /// This is the most flexible way to create a [`SeqVec`], allowing
613    /// customization of the compression codec and other parameters.
614    ///
615    /// # Examples
616    ///
617    /// ```
618    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
619    /// use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
620    ///
621    /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20]];
622    ///
623    /// let vec: LESeqVec<u32> = SeqVec::builder()
624    ///     .codec(Codec::Zeta { k: Some(3) })
625    ///     .build(sequences)?;
626    /// #     Ok(())
627    /// # }
628    /// ```
629    #[inline]
630    pub fn builder() -> SeqVecBuilder<T, E> {
631        SeqVecBuilder::new()
632    }
633
634    /// Creates a builder for constructing a [`SeqVec`] from an iterator.
635    ///
636    /// This is useful for large datasets that are generated on the fly.
637    /// The codec must be specified explicitly since single-pass construction
638    /// cannot perform data analysis.
639    ///
640    /// # Examples
641    ///
642    /// ```
643    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
644    /// use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
645    ///
646    /// // Generate sequences programmatically
647    /// let sequences = (0..50).map(|i| vec![i as u32; i % 5 + 1]);
648    ///
649    /// let vec: LESeqVec<u32> = SeqVec::from_iter_builder(sequences)
650    ///     .codec(Codec::Gamma)
651    ///     .build()?;
652    ///
653    /// assert_eq!(vec.num_sequences(), 50);
654    /// #     Ok(())
655    /// # }
656    /// ```
657    #[inline]
658    pub fn from_iter_builder<I, S>(iter: I) -> SeqVecFromIterBuilder<T, E, I>
659    where
660        I: IntoIterator<Item = S>,
661        S: AsRef<[T]>,
662    {
663        SeqVecFromIterBuilder::new(iter)
664    }
665
666    /// Creates a `SeqVec` from raw parts without validation.
667    ///
668    /// # Arguments
669    ///
670    /// * `data` - The compressed data buffer containing encoded words.
671    /// * `bit_offsets` - Bit offset index where each offset points to the start
672    ///   of a sequence. Must contain at least 2 elements: the start offset and
673    ///   end sentinel.
674    /// * `encoding` - The codec specification used to encode the data.
675    ///
676    /// # Safety
677    ///
678    /// This method is unsafe because it does not validate that the `data` and
679    /// `bit_offsets` are consistent with each other. The caller must ensure:
680    /// - The `bit_offsets` array has at least 2 elements (start and end sentinel).
681    /// - All offsets are valid bit positions within the `data` buffer.
682    /// - The last offset equals the total number of bits in the compressed data.
683    #[inline]
684    pub unsafe fn from_raw_parts(
685        data: Vec<u64>,
686        bit_offsets: crate::fixed::FixedVec<u64, u64, E, Vec<u64>>,
687        encoding: dsi_bitstream::prelude::Codes,
688    ) -> Self {
689        SeqVec {
690            data,
691            bit_offsets,
692            seq_lengths: None,
693            encoding,
694            _markers: PhantomData,
695        }
696    }
697
698    /// Creates a `SeqVec` from raw parts with optional stored sequence lengths.
699    ///
700    /// This method is identical to [`from_raw_parts`](Self::from_raw_parts) but
701    /// allows providing pre-computed per-sequence lengths for faster random access.
702    ///
703    /// # Arguments
704    ///
705    /// * `data` - The compressed data buffer.
706    /// * `bit_offsets` - Bit offset boundaries for each sequence.
707    /// * `seq_lengths` - Optional per-sequence lengths. If provided, must have
708    ///   length equal to the number of sequences (offsets.len() - 1).
709    /// * `encoding` - The codec specification used to encode the data.
710    ///
711    /// # Safety
712    ///
713    /// The caller must ensure that `data`, `bit_offsets`, and `seq_lengths`
714    /// (if present) are consistent with each other and the codec.
715    #[inline]
716    pub unsafe fn from_raw_parts_with_lengths(
717        data: Vec<u64>,
718        bit_offsets: crate::fixed::FixedVec<u64, u64, E, Vec<u64>>,
719        seq_lengths: Option<crate::fixed::FixedVec<u64, u64, E, Vec<u64>>>,
720        encoding: dsi_bitstream::prelude::Codes,
721    ) -> Self {
722        SeqVec {
723            data,
724            bit_offsets,
725            seq_lengths,
726            encoding,
727            _markers: PhantomData,
728        }
729    }
730}