simple_sds_sbwt/
raw_vector.rs

1//! The basic vector implementing the low-level functionality used by other vectors in the crate.
2
3use crate::serialize::Serialize;
4#[cfg(not(target_family = "wasm"))]
5use crate::serialize::{MappedSlice, MemoryMap, MemoryMapped};
6use crate::bits;
7
8use std::fs::{File, OpenOptions};
9use std::io::{Error, ErrorKind, Seek, SeekFrom};
10use std::path::{Path, PathBuf};
11use std::{cmp, io};
12
13#[cfg(test)]
14mod tests;
15
16//-----------------------------------------------------------------------------
17
18/// Random access to bits and variable-width integers in a bit array.
19///
20/// # Examples
21///
22/// ```
23/// use simple_sds_sbwt::raw_vector::AccessRaw;
24/// use simple_sds_sbwt::bits;
25///
26/// struct Example(Vec<u64>);
27///
28/// impl AccessRaw for Example {
29///     fn bit(&self, bit_offset: usize) -> bool {
30///         let (index, offset) = bits::split_offset(bit_offset);
31///         (self.0[index] & (1u64 << offset)) != 0
32///     }
33///
34///     unsafe fn int(&self, bit_offset: usize, width: usize) -> u64 {
35///         bits::read_int(&self.0, bit_offset, width)
36///     }
37///
38///     fn word(&self, index: usize) -> u64 {
39///         self.0[index]
40///     }
41///
42///     unsafe fn word_unchecked(&self, index: usize) -> u64 {
43///         *self.0.get_unchecked(index)
44///     }
45///
46///     fn is_mutable(&self) -> bool {
47///         true
48///     }
49///
50///     fn set_bit(&mut self, bit_offset: usize, value: bool) {
51///         let (index, offset) = bits::split_offset(bit_offset);
52///         self.0[index] &= !(1u64 << offset);
53///         self.0[index] |= (value as u64) << offset;
54///     }
55///
56///     unsafe fn set_int(&mut self, bit_offset: usize, value: u64, width: usize) {
57///         bits::write_int(&mut self.0, bit_offset, value, width);
58///     }
59/// }
60///
61/// let mut example = Example(vec![0u64; 2]);
62/// assert!(example.is_mutable());
63///
64/// unsafe {
65///    example.set_int(4, 0x33, 8);
66///    example.set_int(63, 2, 2);
67/// }
68/// example.set_bit(72, true);
69/// assert_eq!(example.0[0], 0x330);
70/// assert_eq!(example.0[1], 0x101);
71///
72/// assert!(example.bit(72));
73/// assert!(!example.bit(68));
74/// unsafe {
75///     assert_eq!(example.int(4, 8), 0x33);
76///     assert_eq!(example.int(63, 2), 2);
77/// }
78/// assert_eq!(example.word(1), 0x101);
79/// ```
80pub trait AccessRaw {
81    /// Reads a bit from the array.
82    ///
83    /// # Panics
84    ///
85    /// May panic if `bit_offset` is not a valid offset in the bit array.
86    /// May panic from I/O errors.
87    fn bit(&self, bit_offset: usize) -> bool;
88
89    /// Reads an integer from the container.
90    ///
91    /// # Arguments
92    ///
93    /// * `bit_offset`: Starting offset in the bit array.
94    /// * `width`: The width of the integer in bits.
95    ///
96    /// # Safety
97    ///
98    /// Behavior is undefined if `width > 64`.
99    ///
100    /// # Panics
101    ///
102    /// May panic if `bit_offset + width - 1` is not a valid offset in the bit array.
103    /// May panic from I/O errors.
104    unsafe fn int(&self, bit_offset: usize, width: usize) -> u64;
105
106    /// Reads a 64-bit word from the container.
107    ///
108    /// This may be faster than calling `self.int(index * 64, 64)`.
109    ///
110    /// # Panics
111    ///
112    /// May panic if `index * 64` is not a valid offset in the bit array.
113    /// May panic from I/O errors.
114    fn word(&self, index: usize) -> u64;
115
116    /// Unsafe version of [`AccessRaw::word`] without bounds checks.
117    ///
118    /// # Safety
119    ///
120    /// Behavior is undefined in situations where the safe versions may panic.
121    unsafe fn word_unchecked(&self, index: usize) -> u64;
122
123    /// Returns `true` if the underlying data is mutable.
124    ///
125    /// This is relevant, for example, with memory-mapped vectors, where the underlying file may be opened as read-only.
126    fn is_mutable(&self) -> bool;
127
128    /// Writes a bit to the container.
129    ///
130    /// # Arguments
131    ///
132    /// * `bit_offset`: Offset in the bit array.
133    /// * `value`: The value of the bit.
134    ///
135    /// # Panics
136    ///
137    /// May panic if `bit_offset` is not a valid offset in the bit array.
138    /// May panic if the underlying data is not mutable.
139    /// May panic from I/O errors.
140    fn set_bit(&mut self, bit_offset: usize, value: bool);
141
142    /// Writes an integer to the container.
143    ///
144    /// # Arguments
145    ///
146    /// * `bit_offset`: Starting offset in the bit array.
147    /// * `value`: The integer to be written.
148    /// * `width`: The width of the integer in bits.
149    ///
150    /// # Safety
151    ///
152    /// Behavior is undefined if `width > 64`.
153    ///
154    /// # Panics
155    ///
156    /// May panic if `bit_offset + width - 1` is not a valid offset in the bit array.
157    /// May panic if the underlying data is not mutable.
158    /// May panic from I/O errors.
159    unsafe fn set_int(&mut self, bit_offset: usize, value: u64, width: usize);
160}
161
162//-----------------------------------------------------------------------------
163
164/// Append bits and variable-width integers to a container.
165///
166/// The container is not required to remember the types of the pushed items.
167///
168/// # Examples
169/// ```
170/// use simple_sds_sbwt::raw_vector::PushRaw;
171/// use simple_sds_sbwt::bits;
172///
173/// struct Example(Vec<bool>, Vec<u64>);
174///
175/// impl Example{
176///     fn new() -> Example {
177///         Example(Vec::new(), Vec::new())
178///     }
179/// }
180///
181/// impl PushRaw for Example {
182///     fn push_bit(&mut self, value: bool) {
183///         self.0.push(value);
184///     }
185///
186///     unsafe fn push_int(&mut self, value: u64, width: usize) {
187///         self.1.push(value & bits::low_set(width));
188///     }
189/// }
190///
191/// let mut example = Example::new();
192/// example.push_bit(false);
193/// unsafe {
194///     example.push_int(123, 8);
195///     example.push_int(456, 9);
196/// }
197/// example.push_bit(true);
198///
199/// assert_eq!(example.0.len(), 2);
200/// assert_eq!(example.1.len(), 2);
201/// ```
202pub trait PushRaw {
203    /// Appends a bit to the container.
204    ///
205    /// # Panics
206    ///
207    /// May panic from I/O errors.
208    /// May panic if there is an integer overflow.
209    fn push_bit(&mut self, value: bool);
210
211    /// Appends an integer to the container.
212    ///
213    /// # Arguments
214    ///
215    /// * `value`: The integer to be appended.
216    /// * `width`: The width of the integer in bits.
217    ///
218    /// # Safety
219    ///
220    /// Behavior is undefined if `width > 64`.
221    ///
222    /// # Panics
223    ///
224    /// May panic from I/O errors.
225    /// May panic if there is an integer overflow.
226    unsafe fn push_int(&mut self, value: u64, width: usize);
227}
228
229/// Remove and return bits and variable-width integers from a container.
230///
231/// Behavior is implementation-dependent if the sequence of pop operations is not the reverse of push operations.
232///
233/// # Examples
234/// ```
235/// use simple_sds_sbwt::raw_vector::PopRaw;
236///
237/// struct Example(Vec<bool>, Vec<u64>);
238///
239/// impl Example{
240///     fn new() -> Example {
241///         Example(Vec::new(), Vec::new())
242///     }
243/// }
244///
245/// impl PopRaw for Example {
246///     fn pop_bit(&mut self) -> Option<bool> {
247///         self.0.pop()
248///     }
249///
250///     unsafe fn pop_int(&mut self, _: usize) -> Option<u64> {
251///         self.1.pop()
252///     }
253/// }
254///
255/// let mut example = Example::new();
256/// example.0.push(false);
257/// example.1.push(123);
258/// example.1.push(456);
259/// example.0.push(true);
260///
261/// assert_eq!(example.pop_bit().unwrap(), true);
262/// unsafe {
263///     assert_eq!(example.pop_int(9).unwrap(), 456);
264///     assert_eq!(example.pop_int(8).unwrap(), 123);
265/// }
266/// assert_eq!(example.pop_bit().unwrap(), false);
267/// assert_eq!(example.pop_bit(), None);
268/// unsafe { assert_eq!(example.pop_int(1), None); }
269/// ```
270pub trait PopRaw {
271    /// Removes and returns the last bit from the container.
272    ///
273    /// Returns [`None`] the container does not have more bits.
274    fn pop_bit(&mut self) -> Option<bool>;
275
276    /// Removes and returns the last `width` bits from the container as an integer.
277    ///
278    /// Returns [`None`] if the container does not have more integers of that width.
279    ///
280    /// # Safety
281    ///
282    /// Behavior is undefined if `width > 64`.
283    unsafe fn pop_int(&mut self, width: usize) -> Option<u64>;
284}
285
286//-----------------------------------------------------------------------------
287
288/// A contiguous growable array of bits and up to 64-bit integers based on [`Vec`] of [`u64`] values.
289///
290/// There are no iterators over the vector, because it may contain items of varying widths.
291///
292/// # Notes
293///
294/// * The unused part of the last integer is always set to `0`.
295/// * The underlying vector may allocate but not use more integers than are strictly necessary.
296/// * `RawVector` never panics from I/O errors.
297#[derive(Clone, Debug, PartialEq, Eq, Default)]
298pub struct RawVector {
299    len: usize,
300    data: Vec<u64>,
301}
302
303impl RawVector {
304    /// Returns the length of the vector in bits.
305    #[inline]
306    pub fn len(&self) -> usize {
307        self.len
308    }
309
310    /// Returns `true` if the vector is empty.
311    #[inline]
312    pub fn is_empty(&self) -> bool {
313        self.len() == 0
314    }
315
316    /// Returns the capacity of the vector in bits.
317    #[inline]
318    pub fn capacity(&self) -> usize {
319        bits::words_to_bits(self.data.capacity())
320    }
321
322    /// Counts the number of ones in the bit array.
323    ///
324    /// # Examples
325    ///
326    /// ```
327    /// use simple_sds_sbwt::raw_vector::{RawVector, AccessRaw};
328    ///
329    /// let mut v = RawVector::with_len(137, false);
330    /// assert_eq!(v.count_ones(), 0);
331    /// v.set_bit(1, true); v.set_bit(33, true); v.set_bit(95, true); v.set_bit(123, true);
332    /// assert_eq!(v.count_ones(), 4);
333    /// ```
334    pub fn count_ones(&self) -> usize {
335        let mut result: usize = 0;
336        for value in self.data.iter() {
337            result += (*value).count_ones() as usize;
338        }
339        result
340    }
341
342    /// Creates an empty vector.
343    ///
344    /// # Examples
345    ///
346    /// ```
347    /// use simple_sds_sbwt::raw_vector::RawVector;
348    ///
349    /// let v = RawVector::new();
350    /// assert!(v.is_empty());
351    /// assert_eq!(v.capacity(), 0);
352    /// ```
353    pub fn new() -> RawVector {
354        RawVector::default()
355    }
356
357    /// Creates an initialized vector of specified length.
358    ///
359    /// # Arguments
360    ///
361    /// * `len`: Length of the vector in bits.
362    /// * `value`: Initialization value.
363    ///
364    /// # Examples
365    ///
366    /// ```
367    /// use simple_sds_sbwt::raw_vector::RawVector;
368    ///
369    /// let v = RawVector::with_len(137, false);
370    /// assert_eq!(v.len(), 137);
371    /// ```
372    pub fn with_len(len: usize, value: bool) -> RawVector {
373        let val = bits::filler_value(value);
374        let data: Vec<u64> = vec![val; bits::bits_to_words(len)];
375        let mut result = RawVector {
376            len, data,
377        };
378        result.set_unused_bits(false);
379        result
380    }
381
382    /// Creates an empty vector with enough capacity for at least `capacity` bits.
383    ///
384    /// # Examples
385    ///
386    /// ```
387    /// use simple_sds_sbwt::raw_vector::RawVector;
388    ///
389    /// let v = RawVector::with_capacity(137);
390    /// assert!(v.capacity() >= 137);
391    /// ```
392    pub fn with_capacity(capacity: usize) -> RawVector {
393        RawVector {
394            len: 0,
395            data: Vec::with_capacity(bits::bits_to_words(capacity)),
396        }
397    }
398
399    /// Returns the size of a serialized vector with the given capacity in [`u64`] elements.
400    ///
401    /// # Examples
402    ///
403    /// ```
404    /// use simple_sds_sbwt::raw_vector::RawVector;
405    ///
406    /// assert_eq!(RawVector::size_by_params(247), 6);
407    /// ```
408    pub fn size_by_params(capacity: usize) -> usize {
409        2 + bits::bits_to_words(capacity)
410    }
411
412    /// Returns a copy of the vector with each bit flipped.
413    ///
414    /// # Examples
415    ///
416    /// ```
417    /// use simple_sds_sbwt::raw_vector::{RawVector, AccessRaw};
418    ///
419    /// let mut original = RawVector::with_len(137, false);
420    /// original.set_bit(1, true); original.set_bit(33, true);
421    /// unsafe { original.set_int(95, 456, 9); } original.set_bit(123, true);
422    /// let complement = original.complement();
423    /// for i in 0..137 {
424    ///     assert_eq!(!(complement.bit(i)), original.bit(i));
425    /// }
426    /// ```
427    pub fn complement(&self) -> RawVector {
428        let mut result = self.clone();
429        for word in result.data.iter_mut() {
430            *word = !*word;
431        }
432        result.set_unused_bits(false);
433        result
434    }
435
436    /// Resizes the vector to a specified length.
437    ///
438    /// If `new_len > self.len()`, the new `new_len - self.len()` bits will be initialized.
439    /// If `new_len < self.len()`, the vector is truncated.
440    ///
441    /// # Arguments
442    ///
443    /// * `new_len`: New length of the vector in bits.
444    /// * `value`: Initialization value.
445    ///
446    /// # Examples
447    ///
448    /// ```
449    /// use simple_sds_sbwt::raw_vector::RawVector;
450    ///
451    /// let mut v = RawVector::new();
452    /// v.resize(137, true);
453    /// let w = RawVector::with_len(137, true);
454    /// assert_eq!(v, w);
455    /// ```
456    pub fn resize(&mut self, new_len: usize, value: bool) {
457        // Fill the unused bits if necessary.
458        if new_len > self.len() {
459            self.set_unused_bits(value);
460        }
461
462        // Use more space if necessary.
463        self.data.resize(bits::bits_to_words(new_len), bits::filler_value(value));
464        self.len = new_len;
465        self.set_unused_bits(false);
466    }
467
468    /// Clears the vector without freeing the data.
469    ///
470    /// # Examples
471    ///
472    /// ```
473    /// use simple_sds_sbwt::raw_vector::RawVector;
474    ///
475    /// let mut v = RawVector::with_len(137, true);
476    /// assert_eq!(v.len(), 137);
477    /// v.clear();
478    /// assert!(v.is_empty());
479    /// ```
480    pub fn clear(&mut self) {
481        self.data.clear();
482        self.len = 0;
483    }
484
485    /// Reserves space for storing at least `self.len() + additional` bits in the vector.
486    ///
487    /// Does nothing if the capacity is already sufficient.
488    ///
489    /// # Examples
490    ///
491    /// ```
492    /// use simple_sds_sbwt::raw_vector::RawVector;
493    ///
494    /// let mut v = RawVector::with_len(137, false);
495    /// v.reserve(318);
496    /// assert!(v.capacity() >= 137 + 318);
497    /// ```
498    ///
499    /// # Panics
500    ///
501    /// May panic if `self.len() + additional + 63 > usize::MAX`.
502    pub fn reserve(&mut self, additional: usize) {
503        let words_needed = bits::bits_to_words(self.len() + additional);
504        if words_needed > self.data.capacity() {
505            self.data.reserve(words_needed - self.data.capacity());
506        }
507    }
508
509    // Set the unused bits in the last integer to the specified value.
510    fn set_unused_bits(&mut self, value: bool) {
511        let (index, width) = bits::split_offset(self.len());
512        if width > 0 {
513            if value {
514                self.data[index] |= !bits::low_set(width);
515            }
516            else {
517                self.data[index] &= bits::low_set(width);
518            }
519        }
520    }
521
522
523    pub fn get_words(&self) -> &[u64] {
524        &self.data
525    }
526
527}
528
529//-----------------------------------------------------------------------------
530
531impl AccessRaw for RawVector {
532    #[inline]
533    fn bit(&self, bit_offset: usize) -> bool {
534        let (index, offset) = bits::split_offset(bit_offset);
535        ((self.data[index] >> offset) & 1) == 1
536    }
537
538    #[inline]
539    unsafe fn int(&self, bit_offset: usize, width: usize) -> u64 {
540        bits::read_int(&self.data, bit_offset, width)
541    }
542
543    #[inline]
544    fn word(&self, index: usize) -> u64 {
545        self.data[index]
546    }
547
548    #[inline]
549    unsafe fn word_unchecked(&self, index: usize) -> u64 {
550        *self.data.get_unchecked(index)
551    }
552
553    #[inline]
554    fn is_mutable(&self) -> bool {
555        true
556    }
557
558    #[inline]
559    fn set_bit(&mut self, bit_offset: usize, value: bool) {
560        let (index, offset) = bits::split_offset(bit_offset);
561        self.data[index] &= !(1u64 << offset);
562        self.data[index] |= (value as u64) << offset;
563    }
564
565    #[inline]
566    unsafe fn set_int(&mut self, bit_offset: usize, value: u64, width: usize) {
567        bits::write_int(&mut self.data, bit_offset, value, width);
568    }
569}
570
571impl PushRaw for RawVector {
572    fn push_bit(&mut self, value: bool) {
573        let (index, offset) = bits::split_offset(self.len);
574        if index == self.data.len() {
575            self.data.push(0);
576        }
577        self.data[index] |= (value as u64) << offset;
578        self.len += 1;
579    }
580
581    unsafe fn push_int(&mut self, value: u64, width: usize) {
582        if self.len + width > bits::words_to_bits(self.data.len()) {
583            self.data.push(0);
584        }
585        bits::write_int(&mut self.data, self.len, value, width);
586        self.len += width;
587    }
588}
589
590impl PopRaw for RawVector {
591    fn pop_bit(&mut self) -> Option<bool> {
592        if !self.is_empty() {
593            let result = self.bit(self.len - 1);
594            self.len -= 1;
595            self.data.resize(bits::bits_to_words(self.len()), 0); // Avoid using unnecessary words.
596            self.set_unused_bits(false);
597            Some(result)
598        } else {
599            None
600        }
601    }
602
603    unsafe fn pop_int(&mut self, width: usize) -> Option<u64> {
604        if self.len() >= width {
605            let result = self.int(self.len - width, width);
606            self.len -= width;
607            self.data.resize(bits::bits_to_words(self.len()), 0); // Avoid using unnecessary words.
608            self.set_unused_bits(false);
609            Some(result)
610        } else {
611            None
612        }
613    }
614}
615
616impl Serialize for RawVector {
617    fn serialize_header<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
618        self.len.serialize(writer)?;
619        self.data.serialize_header(writer)?;
620        Ok(())
621    }
622
623    fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
624        self.data.serialize_body(writer)?;
625        Ok(())
626    }
627
628    fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
629        let len = usize::load(reader)?;
630        let data = <Vec<u64> as Serialize>::load(reader)?;
631        if bits::bits_to_words(len) != data.len() {
632            Err(Error::new(ErrorKind::InvalidData, "Bit length / word length mismatch"))
633        } else {
634            Ok(RawVector {
635                len, data,
636            })
637        }
638    }
639
640    fn size_in_elements(&self) -> usize {
641        self.len.size_in_elements() + self.data.size_in_elements()
642    }
643}
644
645//-----------------------------------------------------------------------------
646
647impl AsRef<[u64]> for RawVector {
648    #[inline]
649    fn as_ref(&self) -> &[u64] {
650        self.data.as_ref()
651    }
652}
653
654//-----------------------------------------------------------------------------
655
656/// A buffered file writer compatible with the serialization format of [`RawVector`].
657///
658/// When the writer goes out of scope, the internal buffer is flushed, the file is closed, and all errors are ignored.
659/// Call [`RawVectorWriter::close`] explicitly to handle the errors.
660///
661/// # Examples
662///
663/// ```
664/// use simple_sds_sbwt::raw_vector::{RawVector, RawVectorWriter, AccessRaw, PushRaw};
665/// use simple_sds_sbwt::serialize;
666/// use std::fs;
667///
668/// let filename = serialize::temp_file_name("raw-vector-writer");
669/// let width = 29;
670/// let mut header: Vec<u64> = Vec::new();
671/// let mut writer = RawVectorWriter::new(&filename, &mut header).unwrap();
672/// unsafe {
673///     writer.push_int(123, width);
674///     writer.push_int(456, width);
675///     writer.push_int(789, width);
676/// }
677/// writer.close();
678///
679/// let v: RawVector = serialize::load_from(&filename).unwrap();
680/// assert_eq!(v.len(), 3 * width);
681/// unsafe {
682///     assert_eq!(v.int(0, width), 123);
683///     assert_eq!(v.int(width, width), 456);
684///     assert_eq!(v.int(2 * width, width), 789);
685/// }
686///
687/// fs::remove_file(&filename);
688/// ```
689#[derive(Debug)]
690pub struct RawVectorWriter {
691    len: usize,
692    buf_len: usize,
693    buf: RawVector,
694    file: Option<File>,
695    filename: PathBuf,
696}
697
698// Ways of flushing a write buffer.
699#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
700enum FlushMode {
701    // Only flush the part of the buffer that can be flushed safely.
702    Safe,
703    // Flush the entire buffer.
704    // Subsequent writes to the buffer may leave it in an invalid state.
705    Final,
706}
707
708impl RawVectorWriter {
709    /// Default buffer size in bits.
710    pub const DEFAULT_BUFFER_SIZE: usize = 8 * 1024 * 1024;
711
712    /// Returns the length of the vector in bits.
713    #[inline]
714    pub fn len(&self) -> usize {
715        self.len
716    }
717
718    /// Returns `true` if the vector is empty.
719    #[inline]
720    pub fn is_empty(&self) -> bool {
721        self.len() == 0
722    }
723
724    /// Creates an empty vector stored in the specified file with the default buffer size.
725    ///
726    /// If the file already exists, it will be overwritten.
727    ///
728    /// # Arguments
729    ///
730    /// * `filename`: Name of the file.
731    /// * `header`: Header of the parent structure (may be empty).
732    pub fn new<P: AsRef<Path>>(filename: P, header: &mut Vec<u64>) -> io::Result<RawVectorWriter> {
733        let mut options = OpenOptions::new();
734        let file = options.create(true).write(true).truncate(true).open(&filename)?;
735        // Allocate one extra word for overflow.
736        let buf = RawVector::with_capacity(Self::DEFAULT_BUFFER_SIZE + bits::WORD_BITS);
737        let mut name = PathBuf::new();
738        name.push(&filename);
739        let mut result = RawVectorWriter {
740            len: 0,
741            buf_len: Self::DEFAULT_BUFFER_SIZE,
742            buf,
743            file: Some(file),
744            filename: name,
745        };
746        result.write_header(header)?;
747        Ok(result)
748    }
749
750    /// Creates an empty vector stored in the specified file with user-defined buffer size.
751    ///
752    /// If the file already exists, it will be overwritten.
753    /// The buffer size will be rounded up to the next multiple of [`bits::WORD_BITS`].
754    ///
755    /// # Arguments
756    ///
757    /// * `filename`: Name of the file.
758    /// * `header`: Header of the parent structure (may be empty).
759    /// * `buf_len`: Buffer size in bits.
760    pub fn with_buf_len<P: AsRef<Path>>(filename: P, header: &mut Vec<u64>, buf_len: usize) -> io::Result<RawVectorWriter> {
761        // Buffer length must be a positive multiple of `bits::WORD_BITS`.
762        let buf_len = cmp::max(bits::round_up_to_word_bits(buf_len), bits::WORD_BITS);
763        let mut options = OpenOptions::new();
764        let file = options.create(true).write(true).truncate(true).open(&filename)?;
765        // Allocate one extra word for overflow.
766        let buf = RawVector::with_capacity(buf_len + bits::WORD_BITS);
767        let mut name = PathBuf::new();
768        name.push(&filename);
769        let mut result = RawVectorWriter {
770            len: 0,
771            buf_len,
772            buf,
773            file: Some(file),
774            filename: name,
775        };
776        result.write_header(header)?;
777        Ok(result)
778    }
779
780    /// Returns the name of the file.
781    pub fn filename(&self) -> &Path {
782        self.filename.as_path()
783    }
784
785    /// Returns `true` if the file is open for writing.
786    pub fn is_open(&self) -> bool {
787        self.file.is_some()
788    }
789
790    // Flushes the buffer.
791    fn flush(&mut self, mode: FlushMode) -> io::Result<()> {
792        if let Some(f) = self.file.as_mut() {
793            // Handle the overflow if not serializing the entire buffer.
794            let mut overflow: (u64, usize) = (0, 0);
795            if let FlushMode::Safe = mode {
796                if self.buf.len() > self.buf_len {
797                    unsafe { overflow = (self.buf.int(self.buf_len, self.buf.len() - self.buf_len), self.buf.len() - self.buf_len); }
798                    self.buf.resize(self.buf_len, false);
799                }
800            }
801
802            // Serialize and clear the buffer.
803            self.buf.serialize_body(f)?;
804            self.buf.clear();
805
806            // Push the overflow back to the buffer.
807            if let FlushMode::Safe = mode {
808                if overflow.1 > 0 {
809                    unsafe { self.buf.push_int(overflow.0, overflow.1); }
810                }
811            }
812        }
813        Ok(())
814    }
815
816    // Seeks to the start of the file, appends its own header to `header`, and writes it into the file.
817    fn write_header(&mut self, header: &mut Vec<u64>) -> io::Result<()> {
818        if let Some(f) = self.file.as_mut() {
819            f.seek(SeekFrom::Start(0))?;
820            header.push(self.len as u64);
821            header.push(bits::bits_to_words(self.len) as u64);
822            header.serialize_body(f)?;
823        }
824        Ok(())
825    }
826
827    /// Flushes the buffer, writes the header, and closes the file.
828    ///
829    /// No effect if the file is closed.
830    ///
831    /// # Errors
832    ///
833    /// Any I/O errors will be passed through.
834    pub fn close(&mut self) -> io::Result<()> {
835        let mut header: Vec<u64> = Vec::new();
836        self.close_with_header(&mut header)
837    }
838
839    /// Flushes the buffer, writes the header, and closes the file.
840    ///
841    /// No effect if the file is closed.
842    /// This method should only be called by the `close` method of a parent writer.
843    ///
844    /// # Errors
845    ///
846    /// Any I/O errors will be passed through.
847    pub fn close_with_header(&mut self, header: &mut Vec<u64>) -> io::Result<()> {
848        if self.is_open() {
849            self.flush(FlushMode::Final)?;
850            self.write_header(header)?;
851            self.file = None
852        }
853        Ok(())
854    }
855}
856
857//-----------------------------------------------------------------------------
858
859impl PushRaw for RawVectorWriter {
860    fn push_bit(&mut self, value: bool) {
861        self.buf.push_bit(value); self.len += 1;
862        if self.buf.len() >= self.buf_len {
863            self.flush(FlushMode::Safe).unwrap();
864        }
865    }
866
867    unsafe fn push_int(&mut self, value: u64, width: usize) {
868        self.buf.push_int(value, width); self.len += width;
869        if self.buf.len() >= self.buf_len {
870            self.flush(FlushMode::Safe).unwrap();
871        }
872    }
873}
874
875impl Drop for RawVectorWriter {
876    fn drop(&mut self) {
877        let _ = self.close();
878    }
879}
880
881//-----------------------------------------------------------------------------
882
883/// An immutable memory-mapped [`RawVector`].
884///
885/// This is compatible with the serialization format of [`RawVector`].
886///
887/// # Examples
888///
889/// ```
890/// use simple_sds_sbwt::raw_vector::{RawVector, RawVectorMapper, AccessRaw, PushRaw};
891/// use simple_sds_sbwt::serialize::{MemoryMap, MemoryMapped, MappingMode};
892/// use simple_sds_sbwt::serialize;
893/// use std::fs;
894///
895/// let filename = serialize::temp_file_name("raw-vector-mapper");
896/// let width = 29;
897/// let mut original = RawVector::new();
898/// unsafe {
899///     original.push_int(123, width);
900///     original.push_int(456, width);
901///     original.push_int(789, width);
902/// }
903/// serialize::serialize_to(&original, &filename);
904///
905/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
906/// let mapper = RawVectorMapper::new(&map, 0).unwrap();
907/// assert_eq!(mapper.len(), 3 * width);
908/// unsafe {
909///     assert_eq!(mapper.int(0, width), 123);
910///     assert_eq!(mapper.int(width, width), 456);
911///     assert_eq!(mapper.int(2 * width, width), 789);
912/// }
913///
914/// drop(mapper); drop(map);
915/// fs::remove_file(&filename);
916/// ```
917#[cfg(not(target_family = "wasm"))]
918#[derive(PartialEq, Eq, Debug)]
919pub struct RawVectorMapper<'a> {
920    len: usize,
921    data: MappedSlice<'a, u64>,
922}
923
924#[cfg(not(target_family = "wasm"))]
925impl<'a> RawVectorMapper<'a> {
926    /// Returns the length of the vector in bits.
927    #[inline]
928    pub fn len(&self) -> usize {
929        self.len
930    }
931
932    /// Returns `true` if the vector is empty.
933    #[inline]
934    pub fn is_empty(&self) -> bool {
935        self.len() == 0
936    }
937
938    /// Counts the number of ones in the bit array.
939    pub fn count_ones(&self) -> usize {
940        let mut result: usize = 0;
941        for value in self.data.iter() {
942            result += (*value).count_ones() as usize;
943        }
944        result
945    }
946}
947
948#[cfg(not(target_family = "wasm"))]
949impl<'a> AccessRaw for RawVectorMapper<'a> {
950    #[inline]
951    fn bit(&self, bit_offset: usize) -> bool {
952        let (index, offset) = bits::split_offset(bit_offset);
953        ((self.data[index] >> offset) & 1) == 1
954    }
955
956    #[inline]
957    unsafe fn int(&self, bit_offset: usize, width: usize) -> u64 {
958        bits::read_int(&self.data, bit_offset, width)
959    }
960
961    #[inline]
962    fn word(&self, index: usize) -> u64 {
963        self.data[index]
964    }
965
966    #[inline]
967    unsafe fn word_unchecked(&self, index: usize) -> u64 {
968        *self.data.get_unchecked(index)
969    }
970
971    #[inline]
972    fn is_mutable(&self) -> bool {
973        false
974    }
975
976    #[inline]
977    fn set_bit(&mut self, _: usize, _: bool) {
978        panic!("RawVectorMapper::set_bit(): Not implemented");
979    }
980
981    #[inline]
982    unsafe fn set_int(&mut self, _: usize, _: u64, _: usize) {
983        panic!("RawVectorMapper::set_int(): Not implemented");
984    }
985}
986
987#[cfg(not(target_family = "wasm"))]
988impl<'a> MemoryMapped<'a> for RawVectorMapper<'a> {
989    fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
990        if offset >= map.len() {
991            return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
992        }
993        let slice: &[u64] = map.as_ref();
994        let len = slice[offset] as usize;
995        let data = MappedSlice::new(map, offset + 1)?;
996        Ok(RawVectorMapper {
997            len, data,
998        })
999    }
1000
1001    fn map_offset(&self) -> usize {
1002        self.data.map_offset() - 1
1003    }
1004
1005    fn map_len(&self) -> usize {
1006        self.data.map_len() + 1
1007    }
1008}
1009
1010#[cfg(not(target_family = "wasm"))]
1011impl<'a> AsRef<MappedSlice<'a, u64>> for RawVectorMapper<'a> {
1012    #[inline]
1013    fn as_ref(&self) -> &MappedSlice<'a, u64> {
1014        &(self.data)
1015    }
1016}
1017
1018//-----------------------------------------------------------------------------