compressed_intvec/seq/builder.rs
1//! Builders for constructing a [`SeqVec`].
2//!
3//! This module provides two builders for creating a [`SeqVec`]:
4//!
5//! - [`SeqVecBuilder`]: For building from a collection of sequences already in
6//! memory. This builder can analyze the data to automatically select an
7//! optimal compression codec.
8//!
9//! - [`SeqVecFromIterBuilder`]: For building from an iterator of sequences.
10//! This is suitable for large datasets generated on the fly, but requires
11//! the codec to be specified manually since single-pass construction cannot
12//! perform data analysis.
13//!
14//! [`SeqVec`]: crate::seq::SeqVec
15
16use super::{SeqVec, SeqVecBitWriter, SeqVecError};
17use crate::common::codec_writer::CodecWriter;
18use crate::fixed::{BitWidth, FixedVec};
19use crate::variable::codec::{self, Codec};
20use crate::variable::traits::Storable;
21use dsi_bitstream::{
22 dispatch::StaticCodeWrite,
23 impls::MemWordWriterVec,
24 prelude::{BitWrite, Codes, CodesWrite, Endianness},
25};
26use std::marker::PhantomData;
27
28/// A builder for creating a [`SeqVec`] from a collection of sequences.
29///
30/// This builder is the primary entry point for constructing a compressed
31/// sequence vector when the sequences are already available in memory. It
32/// allows configuration of the compression codec.
33///
34/// The builder always produces an owned `SeqVec<T, E, Vec<u64>>`.
35///
36/// ## Construction Strategy
37///
38/// When the codec is [`Codec::Auto`] or requires parameter
39/// estimation (e.g., `Rice { log2_b: None }`), the builder performs a two-pass
40/// construction:
41///
42/// 1. **Analysis pass**: Collects all elements to determine the optimal codec.
43/// 2. **Encoding pass**: Compresses the data using the selected codec.
44///
45/// When a fully-specified codec is provided (e.g., `Gamma`, `Delta`,
46/// `Zeta { k: Some(3) }`), the builder performs **single-pass construction**,
47/// avoiding the temporary allocation of all elements.
48///
49/// ## Examples
50///
51/// ```
52/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
53/// use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
54///
55/// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20], &[100]];
56///
57/// // Automatic codec selection (two-pass)
58/// let vec_auto: LESeqVec<u32> = SeqVec::builder()
59/// .codec(Codec::Auto)
60/// .build(sequences)?;
61///
62/// // Explicit codec (single-pass, more efficient)
63/// let vec_gamma: LESeqVec<u32> = SeqVec::builder()
64/// .codec(Codec::Gamma)
65/// .build(sequences)?;
66/// # Ok(())
67/// # }
68/// ```
69#[derive(Debug, Clone)]
70pub struct SeqVecBuilder<T: Storable, E: Endianness> {
71 codec_spec: Codec,
72 store_lengths: bool,
73 _markers: PhantomData<(T, E)>,
74}
75
76impl<T: Storable, E: Endianness> Default for SeqVecBuilder<T, E> {
77 fn default() -> Self {
78 Self::new()
79 }
80}
81
82impl<T: Storable, E: Endianness> SeqVecBuilder<T, E> {
83 /// Creates a new builder with default settings.
84 ///
85 /// The default codec is [`Codec::Auto`], which analyzes the
86 /// data to select the best codec.
87 #[inline]
88 pub fn new() -> Self {
89 Self {
90 codec_spec: Codec::Auto,
91 store_lengths: false,
92 _markers: PhantomData,
93 }
94 }
95
96 /// Sets the compression codec to use.
97 ///
98 /// For the available codecs, see [`Codec`].
99 #[inline]
100 pub fn codec(mut self, codec_spec: Codec) -> Self {
101 self.codec_spec = codec_spec;
102 self
103 }
104
105 /// Enables or disables storing explicit sequence lengths.
106 ///
107 /// When enabled, the builder stores a compact [`FixedVec`] of per-sequence
108 /// lengths. This allows O(1) length queries and enables faster decoding
109 /// paths that avoid end-bit checks.
110 ///
111 /// The default is `false` to minimize memory usage.
112 ///
113 /// [`FixedVec`]: crate::fixed::FixedVec
114 #[inline]
115 pub fn store_lengths(mut self, store: bool) -> Self {
116 self.store_lengths = store;
117 self
118 }
119
120 /// Builds the [`SeqVec`] from a slice of sequences.
121 ///
122 /// Each element represents a sequence to compress and store. Empty sequences
123 /// are supported.
124 ///
125 /// ## Type Requirements
126 ///
127 /// The sequences can be any type that implements `AsRef<[T]>`, such as
128 /// `&[T]`, `Vec<T>`, or `Box<[T]>`.
129 ///
130 /// # Arguments
131 ///
132 /// * `sequences` - A slice of sequences to compress. Each sequence is accessed
133 /// via `AsRef<[T]>`.
134 ///
135 /// # Errors
136 ///
137 /// Returns a [`SeqVecError`] if:
138 /// - Codec resolution fails.
139 /// - An I/O error occurs during encoding.
140 ///
141 /// ## Examples
142 ///
143 /// ```
144 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
145 /// use compressed_intvec::seq::{SeqVec, LESeqVec};
146 ///
147 /// // From slice of slices
148 /// let data: &[&[u32]] = &[&[1, 2], &[3, 4, 5]];
149 /// let vec: LESeqVec<u32> = SeqVec::builder().build(data)?;
150 ///
151 /// // From Vec of Vecs
152 /// let data: Vec<Vec<u32>> = vec![vec![1, 2], vec![3, 4, 5]];
153 /// let vec: LESeqVec<u32> = SeqVec::builder().build(&data)?;
154 /// # Ok(())
155 /// # }
156 /// ```
157 pub fn build<S: AsRef<[T]>>(
158 self,
159 sequences: &[S],
160 ) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
161 where
162 T: 'static,
163 SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
164 {
165 // Determine if we can use single-pass construction.
166 // Single-pass is possible only when the codec is fully specified.
167 if self.codec_spec.requires_analysis() {
168 self.build_two_pass(sequences)
169 } else {
170 self.build_single_pass(sequences)
171 }
172 }
173
174 /// Two-pass construction: analyze data first, then encode.
175 ///
176 /// This method is used internally when the codec requires data analysis to
177 /// determine optimal parameters. It collects all elements in the first pass,
178 /// analyzes their distribution, then encodes them in a second pass using the
179 /// selected codec. This avoids unnecessary analysis for pre-specified codecs.
180 fn build_two_pass<S: AsRef<[T]>>(
181 self,
182 sequences: &[S],
183 ) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
184 where
185 T: 'static,
186 SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
187 {
188 // Resolve codec from iterator without intermediate allocation.
189 // This avoids materializing all elements to a vector when analyzing data.
190 let resolved_codec = codec::resolve_codec_from_iter(
191 sequences
192 .iter()
193 .flat_map(|seq| seq.as_ref().iter().map(|x| x.to_word())),
194 self.codec_spec,
195 )
196 .map_err(|e| SeqVecError::CodecDispatch(e.to_string()))?;
197
198 // Pass 2: Encode with the selected codec.
199 self.encode_sequences(sequences, resolved_codec)
200 }
201
202 /// Single-pass construction: encode directly without data analysis.
203 ///
204 /// This method is used when the codec is fully specified and requires no
205 /// data analysis. It streams sequences directly to the encoder without
206 /// collecting them, making it more memory-efficient for large datasets.
207 fn build_single_pass<S: AsRef<[T]>>(
208 self,
209 sequences: &[S],
210 ) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
211 where
212 T: 'static,
213 SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
214 {
215 // Resolve the codec without data analysis. Pass an empty u64 slice
216 // since fully-specified codecs do not require data.
217 let resolved_codec = codec::resolve_codec::<u64>(&[], self.codec_spec)
218 .map_err(|e| SeqVecError::CodecDispatch(e.to_string()))?;
219
220 self.encode_sequences(sequences, resolved_codec)
221 }
222
223 /// Core encoding logic shared by both construction paths.
224 ///
225 /// This method handles the actual compression of sequences, including:
226 /// - Iterating over all sequences and their elements
227 /// - Writing compressed data to the bit writer
228 /// - Tracking bit offsets for each sequence boundary
229 /// - Optionally storing per-sequence lengths
230 /// - Building the final [`SeqVec`] structure
231 ///
232 /// # Arguments
233 ///
234 /// * `sequences` - The sequences to compress.
235 /// * `resolved_codec` - The codec to use for encoding.
236 fn encode_sequences<S: AsRef<[T]>>(
237 self,
238 sequences: &[S],
239 resolved_codec: Codes,
240 ) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
241 where
242 SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
243 {
244 let num_sequences = sequences.len();
245
246 // Handle empty input.
247 if num_sequences == 0 {
248 let empty_offsets = FixedVec::<u64, u64, E>::builder()
249 .bit_width(BitWidth::Minimal)
250 .build(&[0u64])?;
251 let seq_lengths = if self.store_lengths {
252 Some(
253 FixedVec::<u64, u64, E>::builder()
254 .bit_width(BitWidth::Minimal)
255 .build(&[])?,
256 )
257 } else {
258 None
259 };
260 return Ok(SeqVec {
261 data: Vec::new(),
262 bit_offsets: empty_offsets,
263 seq_lengths,
264 encoding: resolved_codec,
265 _markers: PhantomData,
266 });
267 }
268
269 let (data, offsets, lengths) = encode_sequences_impl(
270 sequences.iter(),
271 resolved_codec,
272 Vec::with_capacity(num_sequences + 1),
273 self.store_lengths,
274 num_sequences,
275 )?;
276
277 // Build the bit offsets index with minimal bit width.
278 let bit_offsets = FixedVec::<u64, u64, E>::builder()
279 .bit_width(BitWidth::Minimal)
280 .build(&offsets)?;
281
282 let seq_lengths = if let Some(lengths) = lengths {
283 Some(
284 FixedVec::<u64, u64, E>::builder()
285 .bit_width(BitWidth::Minimal)
286 .build(&lengths)?,
287 )
288 } else {
289 None
290 };
291
292 Ok(SeqVec {
293 data,
294 bit_offsets,
295 seq_lengths,
296 encoding: resolved_codec,
297 _markers: PhantomData,
298 })
299 }
300}
301
302/// A builder for creating a [`SeqVec`] from an iterator of sequences.
303///
304/// This builder is designed for constructing a [`SeqVec`] from a data source
305/// that produces sequences on the fly. It consumes the iterator in a single
306/// pass, compressing sequences as they arrive.
307///
308/// ## Limitations
309///
310/// This builder does **not** support:
311/// - [`Codec::Auto`]: Automatic codec selection requires analyzing
312/// all data, which is impossible in a single pass.
313/// - Parameter estimation for codecs like `Rice { log2_b: None }` or
314/// `Zeta { k: None }`.
315///
316/// The codec must be fully specified. If an unsupported codec is provided,
317/// the [`build`](Self::build) method will return an error.
318///
319/// ## Examples
320///
321/// ```
322/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
323/// use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
324///
325/// // Generate sequences on the fly
326/// let sequences_iter = (0..100).map(|i| vec![i as u32, i as u32 + 1]);
327///
328/// let vec: LESeqVec<u32> = SeqVec::from_iter_builder(sequences_iter)
329/// .codec(Codec::Gamma) // Must be specified
330/// .build()?;
331///
332/// assert_eq!(vec.num_sequences(), 100);
333/// # Ok(())
334/// # }
335/// ```
336#[derive(Debug)]
337pub struct SeqVecFromIterBuilder<T: Storable, E: Endianness, I> {
338 iter: I,
339 codec_spec: Codec,
340 store_lengths: bool,
341 _markers: PhantomData<(T, E)>,
342}
343
344impl<T, E, I, S> SeqVecFromIterBuilder<T, E, I>
345where
346 T: Storable,
347 E: Endianness,
348 I: IntoIterator<Item = S>,
349 S: AsRef<[T]>,
350{
351 /// Creates a new builder from an iterator with default settings.
352 ///
353 /// The default codec is [`Codec::Gamma`], as automatic
354 /// selection is not possible in single-pass construction.
355 #[inline]
356 pub fn new(iter: I) -> Self {
357 Self {
358 iter,
359 codec_spec: Codec::Gamma,
360 store_lengths: false,
361 _markers: PhantomData,
362 }
363 }
364
365 /// Sets the compression codec to use.
366 ///
367 /// The codec must be fully specified (no `Auto`, no `None` parameters).
368 ///
369 /// # Arguments
370 ///
371 /// * `codec_spec` - The fully-specified codec to use for encoding.
372 ///
373 /// # Errors
374 ///
375 /// The [`build`](Self::build) method will return an error if a codec
376 /// requiring data analysis is provided.
377 #[inline]
378 pub fn codec(mut self, codec_spec: Codec) -> Self {
379 self.codec_spec = codec_spec;
380 self
381 }
382
383 /// Enables or disables storing explicit sequence lengths.
384 ///
385 /// When enabled, the builder stores a compact [`FixedVec`] of per-sequence
386 /// lengths. This allows O(1) length queries and enables faster decoding
387 /// paths that avoid end-bit checks.
388 ///
389 /// The default is `false` to minimize memory usage.
390 ///
391 /// [`FixedVec`]: crate::fixed::FixedVec
392 #[inline]
393 pub fn store_lengths(mut self, store: bool) -> Self {
394 self.store_lengths = store;
395 self
396 }
397
398 /// Builds the [`SeqVec`] by consuming the iterator.
399 ///
400 /// This method streams sequences directly from the iterator without
401 /// materializing them all in memory. Single-pass construction avoids
402 /// temporary allocations but requires the codec to be fully specified.
403 ///
404 /// # Errors
405 ///
406 /// Returns a [`SeqVecError`] if:
407 /// - An automatic or parameter-estimating codec spec is used.
408 /// - An I/O error occurs during encoding.
409 ///
410 /// ## Examples
411 ///
412 /// ```
413 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
414 /// use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
415 ///
416 /// let sequences: Vec<Vec<u32>> = vec![vec![1, 2], vec![3, 4, 5]];
417 ///
418 /// let vec: LESeqVec<u32> = SeqVec::from_iter_builder(sequences.into_iter())
419 /// .codec(Codec::Delta)
420 /// .build()?;
421 /// # Ok(())
422 /// # }
423 /// ```
424 pub fn build(self) -> Result<SeqVec<T, E, Vec<u64>>, SeqVecError>
425 where
426 T: 'static,
427 SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
428 {
429 // Reject codecs that require data analysis.
430 if self.codec_spec.requires_analysis() {
431 return Err(SeqVecError::InvalidParameters(
432 "Automatic codec selection is not supported for iterator-based construction. \
433 Please provide a fully-specified codec"
434 .to_string(),
435 ));
436 }
437
438 // Resolve the codec without data analysis. Pass an empty u64 slice
439 // since fully-specified codecs do not require data.
440 let resolved_codec = codec::resolve_codec::<u64>(&[], self.codec_spec)
441 .map_err(|e| SeqVecError::CodecDispatch(e.to_string()))?;
442
443 let iter = self.iter.into_iter();
444 // Use size_hint to pre-allocate offsets for efficiency.
445 let (lower, _) = iter.size_hint();
446 let offsets = Vec::with_capacity(lower.saturating_add(1));
447
448 let (data, offsets, lengths) =
449 encode_sequences_impl(iter, resolved_codec, offsets, self.store_lengths, lower)?;
450
451 // Handle empty iterator case.
452 if offsets.is_empty() {
453 let empty_offsets = FixedVec::<u64, u64, E>::builder()
454 .bit_width(BitWidth::Minimal)
455 .build(&[0u64])?;
456 let seq_lengths = if self.store_lengths {
457 Some(
458 FixedVec::<u64, u64, E>::builder()
459 .bit_width(BitWidth::Minimal)
460 .build(&[])?,
461 )
462 } else {
463 None
464 };
465 return Ok(SeqVec {
466 data: Vec::new(),
467 bit_offsets: empty_offsets,
468 seq_lengths,
469 encoding: resolved_codec,
470 _markers: PhantomData,
471 });
472 }
473
474 // Build the bit offsets index.
475 let bit_offsets = FixedVec::<u64, u64, E>::builder()
476 .bit_width(BitWidth::Minimal)
477 .build(&offsets)?;
478
479 let seq_lengths = if let Some(lengths) = lengths {
480 Some(
481 FixedVec::<u64, u64, E>::builder()
482 .bit_width(BitWidth::Minimal)
483 .build(&lengths)?,
484 )
485 } else {
486 None
487 };
488
489 Ok(SeqVec {
490 data,
491 bit_offsets,
492 seq_lengths,
493 encoding: resolved_codec,
494 _markers: PhantomData,
495 })
496 }
497}
498
499/// Extension trait for `Codec` to check if analysis is required.
500#[allow(dead_code)]
501trait CodecSpecExt {
502 /// Returns `true` if this codec spec requires data analysis to resolve.
503 fn requires_analysis(&self) -> bool;
504}
505
506impl CodecSpecExt for Codec {
507 #[inline]
508 fn requires_analysis(&self) -> bool {
509 matches!(
510 self,
511 Codec::Auto
512 | Codec::Rice { log2_b: None }
513 | Codec::Zeta { k: None }
514 | Codec::Golomb { b: None }
515 | Codec::Pi { k: None }
516 | Codec::ExpGolomb { k: None }
517 )
518 }
519}
520
521/// Type alias for the return value of `encode_sequences_impl`.
522///
523/// Contains the compressed word data, bit offset boundaries, and optional
524/// per-sequence lengths (stored as `u64` for architecture independence).
525type EncodeSequencesResult = (Vec<u64>, Vec<u64>, Option<Vec<u64>>);
526
527/// Shared encoding implementation for sequences from an iterator.
528///
529/// This function encodes all sequences using a single resolved codec and
530/// pre-allocated offsets vector. The codec dispatch is resolved once at the
531/// beginning via [`CodecWriter`] rather than per-element, avoiding repeated
532/// dispatch overhead and improving throughput.
533///
534/// [`CodecWriter`]: crate::common::codec_writer::CodecWriter
535///
536/// # Arguments
537///
538/// * `sequences` - Iterator of sequences to encode. Each sequence is accessed
539/// via `AsRef<[T]>`.
540/// * `resolved_codec` - The codec specification to use for all elements.
541/// * `offsets` - Pre-allocated vector to store bit offset boundaries. This vector
542/// is populated with one offset per sequence plus a final sentinel offset.
543/// * `store_lengths` - Whether to compute and store per-sequence lengths.
544/// * `lengths_capacity_hint` - Capacity hint for the lengths vector when
545/// `store_lengths` is true.
546///
547/// # Returns
548///
549/// A tuple containing:
550/// - Encoded word data (`Vec<u64>`)
551/// - Bit offset boundaries (`Vec<u64>`), with length = num_sequences + 1
552/// - Optional per-sequence lengths (`Vec<u64>`), if `store_lengths` is true
553fn encode_sequences_impl<T: Storable, E: Endianness, I, S>(
554 sequences: I,
555 resolved_codec: Codes,
556 mut offsets: Vec<u64>,
557 store_lengths: bool,
558 lengths_capacity_hint: usize,
559) -> Result<EncodeSequencesResult, SeqVecError>
560where
561 I: IntoIterator<Item = S>,
562 S: AsRef<[T]>,
563 SeqVecBitWriter<E>: BitWrite<E, Error = core::convert::Infallible> + CodesWrite<E>,
564{
565 // Initialize the bit writer.
566 let word_writer = MemWordWriterVec::new(Vec::new());
567 let mut writer = SeqVecBitWriter::<E>::new(word_writer);
568 let mut current_bit_offset: u64 = 0;
569
570 // Resolve the codec dispatch ONCE at the beginning.
571 // This eliminates per-element match overhead for common codecs.
572 let code_writer = CodecWriter::new(resolved_codec);
573
574 // Prepare optional length storage.
575 let mut lengths = if store_lengths {
576 Some(Vec::with_capacity(lengths_capacity_hint))
577 } else {
578 None
579 };
580
581 // Process each sequence, recording bit offsets at boundaries.
582 for seq in sequences {
583 let seq_ref = seq.as_ref();
584 offsets.push(current_bit_offset);
585
586 if let Some(ref mut lengths) = lengths {
587 lengths.push(seq_ref.len() as u64);
588 }
589
590 for elem in seq_ref {
591 let bits_written = code_writer.write(&mut writer, elem.to_word())?;
592 current_bit_offset += bits_written as u64;
593 }
594 }
595
596 // Sentinel: total bit length.
597 offsets.push(current_bit_offset);
598
599 // Finalize the writer.
600 writer.flush()?;
601 let mut data = writer.into_inner()?.into_inner();
602 data.shrink_to_fit();
603
604 Ok((data, offsets, lengths))
605}
606
607// --- Integration with SeqVec ---
608
609impl<T: Storable + 'static, E: Endianness> SeqVec<T, E, Vec<u64>> {
610 /// Creates a builder for constructing a [`SeqVec`] with custom settings.
611 ///
612 /// This is the most flexible way to create a [`SeqVec`], allowing
613 /// customization of the compression codec and other parameters.
614 ///
615 /// # Examples
616 ///
617 /// ```
618 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
619 /// use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
620 ///
621 /// let sequences: &[&[u32]] = &[&[1, 2, 3], &[10, 20]];
622 ///
623 /// let vec: LESeqVec<u32> = SeqVec::builder()
624 /// .codec(Codec::Zeta { k: Some(3) })
625 /// .build(sequences)?;
626 /// # Ok(())
627 /// # }
628 /// ```
629 #[inline]
630 pub fn builder() -> SeqVecBuilder<T, E> {
631 SeqVecBuilder::new()
632 }
633
634 /// Creates a builder for constructing a [`SeqVec`] from an iterator.
635 ///
636 /// This is useful for large datasets that are generated on the fly.
637 /// The codec must be specified explicitly since single-pass construction
638 /// cannot perform data analysis.
639 ///
640 /// # Examples
641 ///
642 /// ```
643 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
644 /// use compressed_intvec::seq::{SeqVec, LESeqVec, Codec};
645 ///
646 /// // Generate sequences programmatically
647 /// let sequences = (0..50).map(|i| vec![i as u32; i % 5 + 1]);
648 ///
649 /// let vec: LESeqVec<u32> = SeqVec::from_iter_builder(sequences)
650 /// .codec(Codec::Gamma)
651 /// .build()?;
652 ///
653 /// assert_eq!(vec.num_sequences(), 50);
654 /// # Ok(())
655 /// # }
656 /// ```
657 #[inline]
658 pub fn from_iter_builder<I, S>(iter: I) -> SeqVecFromIterBuilder<T, E, I>
659 where
660 I: IntoIterator<Item = S>,
661 S: AsRef<[T]>,
662 {
663 SeqVecFromIterBuilder::new(iter)
664 }
665
666 /// Creates a `SeqVec` from raw parts without validation.
667 ///
668 /// # Arguments
669 ///
670 /// * `data` - The compressed data buffer containing encoded words.
671 /// * `bit_offsets` - Bit offset index where each offset points to the start
672 /// of a sequence. Must contain at least 2 elements: the start offset and
673 /// end sentinel.
674 /// * `encoding` - The codec specification used to encode the data.
675 ///
676 /// # Safety
677 ///
678 /// This method is unsafe because it does not validate that the `data` and
679 /// `bit_offsets` are consistent with each other. The caller must ensure:
680 /// - The `bit_offsets` array has at least 2 elements (start and end sentinel).
681 /// - All offsets are valid bit positions within the `data` buffer.
682 /// - The last offset equals the total number of bits in the compressed data.
683 #[inline]
684 pub unsafe fn from_raw_parts(
685 data: Vec<u64>,
686 bit_offsets: crate::fixed::FixedVec<u64, u64, E, Vec<u64>>,
687 encoding: dsi_bitstream::prelude::Codes,
688 ) -> Self {
689 SeqVec {
690 data,
691 bit_offsets,
692 seq_lengths: None,
693 encoding,
694 _markers: PhantomData,
695 }
696 }
697
698 /// Creates a `SeqVec` from raw parts with optional stored sequence lengths.
699 ///
700 /// This method is identical to [`from_raw_parts`](Self::from_raw_parts) but
701 /// allows providing pre-computed per-sequence lengths for faster random access.
702 ///
703 /// # Arguments
704 ///
705 /// * `data` - The compressed data buffer.
706 /// * `bit_offsets` - Bit offset boundaries for each sequence.
707 /// * `seq_lengths` - Optional per-sequence lengths. If provided, must have
708 /// length equal to the number of sequences (offsets.len() - 1).
709 /// * `encoding` - The codec specification used to encode the data.
710 ///
711 /// # Safety
712 ///
713 /// The caller must ensure that `data`, `bit_offsets`, and `seq_lengths`
714 /// (if present) are consistent with each other and the codec.
715 #[inline]
716 pub unsafe fn from_raw_parts_with_lengths(
717 data: Vec<u64>,
718 bit_offsets: crate::fixed::FixedVec<u64, u64, E, Vec<u64>>,
719 seq_lengths: Option<crate::fixed::FixedVec<u64, u64, E, Vec<u64>>>,
720 encoding: dsi_bitstream::prelude::Codes,
721 ) -> Self {
722 SeqVec {
723 data,
724 bit_offsets,
725 seq_lengths,
726 encoding,
727 _markers: PhantomData,
728 }
729 }
730}