simple_sds_sbwt/raw_vector.rs
1//! The basic vector implementing the low-level functionality used by other vectors in the crate.
2
3use crate::serialize::Serialize;
4#[cfg(not(target_family = "wasm"))]
5use crate::serialize::{MappedSlice, MemoryMap, MemoryMapped};
6use crate::bits;
7
8use std::fs::{File, OpenOptions};
9use std::io::{Error, ErrorKind, Seek, SeekFrom};
10use std::path::{Path, PathBuf};
11use std::{cmp, io};
12
13#[cfg(test)]
14mod tests;
15
16//-----------------------------------------------------------------------------
17
18/// Random access to bits and variable-width integers in a bit array.
19///
20/// # Examples
21///
22/// ```
23/// use simple_sds_sbwt::raw_vector::AccessRaw;
24/// use simple_sds_sbwt::bits;
25///
26/// struct Example(Vec<u64>);
27///
28/// impl AccessRaw for Example {
29/// fn bit(&self, bit_offset: usize) -> bool {
30/// let (index, offset) = bits::split_offset(bit_offset);
31/// (self.0[index] & (1u64 << offset)) != 0
32/// }
33///
34/// unsafe fn int(&self, bit_offset: usize, width: usize) -> u64 {
35/// bits::read_int(&self.0, bit_offset, width)
36/// }
37///
38/// fn word(&self, index: usize) -> u64 {
39/// self.0[index]
40/// }
41///
42/// unsafe fn word_unchecked(&self, index: usize) -> u64 {
43/// *self.0.get_unchecked(index)
44/// }
45///
46/// fn is_mutable(&self) -> bool {
47/// true
48/// }
49///
50/// fn set_bit(&mut self, bit_offset: usize, value: bool) {
51/// let (index, offset) = bits::split_offset(bit_offset);
52/// self.0[index] &= !(1u64 << offset);
53/// self.0[index] |= (value as u64) << offset;
54/// }
55///
56/// unsafe fn set_int(&mut self, bit_offset: usize, value: u64, width: usize) {
57/// bits::write_int(&mut self.0, bit_offset, value, width);
58/// }
59/// }
60///
61/// let mut example = Example(vec![0u64; 2]);
62/// assert!(example.is_mutable());
63///
64/// unsafe {
65/// example.set_int(4, 0x33, 8);
66/// example.set_int(63, 2, 2);
67/// }
68/// example.set_bit(72, true);
69/// assert_eq!(example.0[0], 0x330);
70/// assert_eq!(example.0[1], 0x101);
71///
72/// assert!(example.bit(72));
73/// assert!(!example.bit(68));
74/// unsafe {
75/// assert_eq!(example.int(4, 8), 0x33);
76/// assert_eq!(example.int(63, 2), 2);
77/// }
78/// assert_eq!(example.word(1), 0x101);
79/// ```
80pub trait AccessRaw {
81 /// Reads a bit from the array.
82 ///
83 /// # Panics
84 ///
85 /// May panic if `bit_offset` is not a valid offset in the bit array.
86 /// May panic from I/O errors.
87 fn bit(&self, bit_offset: usize) -> bool;
88
89 /// Reads an integer from the container.
90 ///
91 /// # Arguments
92 ///
93 /// * `bit_offset`: Starting offset in the bit array.
94 /// * `width`: The width of the integer in bits.
95 ///
96 /// # Safety
97 ///
98 /// Behavior is undefined if `width > 64`.
99 ///
100 /// # Panics
101 ///
102 /// May panic if `bit_offset + width - 1` is not a valid offset in the bit array.
103 /// May panic from I/O errors.
104 unsafe fn int(&self, bit_offset: usize, width: usize) -> u64;
105
106 /// Reads a 64-bit word from the container.
107 ///
108 /// This may be faster than calling `self.int(index * 64, 64)`.
109 ///
110 /// # Panics
111 ///
112 /// May panic if `index * 64` is not a valid offset in the bit array.
113 /// May panic from I/O errors.
114 fn word(&self, index: usize) -> u64;
115
116 /// Unsafe version of [`AccessRaw::word`] without bounds checks.
117 ///
118 /// # Safety
119 ///
120 /// Behavior is undefined in situations where the safe versions may panic.
121 unsafe fn word_unchecked(&self, index: usize) -> u64;
122
123 /// Returns `true` if the underlying data is mutable.
124 ///
125 /// This is relevant, for example, with memory-mapped vectors, where the underlying file may be opened as read-only.
126 fn is_mutable(&self) -> bool;
127
128 /// Writes a bit to the container.
129 ///
130 /// # Arguments
131 ///
132 /// * `bit_offset`: Offset in the bit array.
133 /// * `value`: The value of the bit.
134 ///
135 /// # Panics
136 ///
137 /// May panic if `bit_offset` is not a valid offset in the bit array.
138 /// May panic if the underlying data is not mutable.
139 /// May panic from I/O errors.
140 fn set_bit(&mut self, bit_offset: usize, value: bool);
141
142 /// Writes an integer to the container.
143 ///
144 /// # Arguments
145 ///
146 /// * `bit_offset`: Starting offset in the bit array.
147 /// * `value`: The integer to be written.
148 /// * `width`: The width of the integer in bits.
149 ///
150 /// # Safety
151 ///
152 /// Behavior is undefined if `width > 64`.
153 ///
154 /// # Panics
155 ///
156 /// May panic if `bit_offset + width - 1` is not a valid offset in the bit array.
157 /// May panic if the underlying data is not mutable.
158 /// May panic from I/O errors.
159 unsafe fn set_int(&mut self, bit_offset: usize, value: u64, width: usize);
160}
161
162//-----------------------------------------------------------------------------
163
164/// Append bits and variable-width integers to a container.
165///
166/// The container is not required to remember the types of the pushed items.
167///
168/// # Examples
169/// ```
170/// use simple_sds_sbwt::raw_vector::PushRaw;
171/// use simple_sds_sbwt::bits;
172///
173/// struct Example(Vec<bool>, Vec<u64>);
174///
175/// impl Example{
176/// fn new() -> Example {
177/// Example(Vec::new(), Vec::new())
178/// }
179/// }
180///
181/// impl PushRaw for Example {
182/// fn push_bit(&mut self, value: bool) {
183/// self.0.push(value);
184/// }
185///
186/// unsafe fn push_int(&mut self, value: u64, width: usize) {
187/// self.1.push(value & bits::low_set(width));
188/// }
189/// }
190///
191/// let mut example = Example::new();
192/// example.push_bit(false);
193/// unsafe {
194/// example.push_int(123, 8);
195/// example.push_int(456, 9);
196/// }
197/// example.push_bit(true);
198///
199/// assert_eq!(example.0.len(), 2);
200/// assert_eq!(example.1.len(), 2);
201/// ```
202pub trait PushRaw {
203 /// Appends a bit to the container.
204 ///
205 /// # Panics
206 ///
207 /// May panic from I/O errors.
208 /// May panic if there is an integer overflow.
209 fn push_bit(&mut self, value: bool);
210
211 /// Appends an integer to the container.
212 ///
213 /// # Arguments
214 ///
215 /// * `value`: The integer to be appended.
216 /// * `width`: The width of the integer in bits.
217 ///
218 /// # Safety
219 ///
220 /// Behavior is undefined if `width > 64`.
221 ///
222 /// # Panics
223 ///
224 /// May panic from I/O errors.
225 /// May panic if there is an integer overflow.
226 unsafe fn push_int(&mut self, value: u64, width: usize);
227}
228
229/// Remove and return bits and variable-width integers from a container.
230///
231/// Behavior is implementation-dependent if the sequence of pop operations is not the reverse of push operations.
232///
233/// # Examples
234/// ```
235/// use simple_sds_sbwt::raw_vector::PopRaw;
236///
237/// struct Example(Vec<bool>, Vec<u64>);
238///
239/// impl Example{
240/// fn new() -> Example {
241/// Example(Vec::new(), Vec::new())
242/// }
243/// }
244///
245/// impl PopRaw for Example {
246/// fn pop_bit(&mut self) -> Option<bool> {
247/// self.0.pop()
248/// }
249///
250/// unsafe fn pop_int(&mut self, _: usize) -> Option<u64> {
251/// self.1.pop()
252/// }
253/// }
254///
255/// let mut example = Example::new();
256/// example.0.push(false);
257/// example.1.push(123);
258/// example.1.push(456);
259/// example.0.push(true);
260///
261/// assert_eq!(example.pop_bit().unwrap(), true);
262/// unsafe {
263/// assert_eq!(example.pop_int(9).unwrap(), 456);
264/// assert_eq!(example.pop_int(8).unwrap(), 123);
265/// }
266/// assert_eq!(example.pop_bit().unwrap(), false);
267/// assert_eq!(example.pop_bit(), None);
268/// unsafe { assert_eq!(example.pop_int(1), None); }
269/// ```
270pub trait PopRaw {
271 /// Removes and returns the last bit from the container.
272 ///
273 /// Returns [`None`] the container does not have more bits.
274 fn pop_bit(&mut self) -> Option<bool>;
275
276 /// Removes and returns the last `width` bits from the container as an integer.
277 ///
278 /// Returns [`None`] if the container does not have more integers of that width.
279 ///
280 /// # Safety
281 ///
282 /// Behavior is undefined if `width > 64`.
283 unsafe fn pop_int(&mut self, width: usize) -> Option<u64>;
284}
285
286//-----------------------------------------------------------------------------
287
288/// A contiguous growable array of bits and up to 64-bit integers based on [`Vec`] of [`u64`] values.
289///
290/// There are no iterators over the vector, because it may contain items of varying widths.
291///
292/// # Notes
293///
294/// * The unused part of the last integer is always set to `0`.
295/// * The underlying vector may allocate but not use more integers than are strictly necessary.
296/// * `RawVector` never panics from I/O errors.
297#[derive(Clone, Debug, PartialEq, Eq, Default)]
298pub struct RawVector {
299 len: usize,
300 data: Vec<u64>,
301}
302
303impl RawVector {
304 /// Returns the length of the vector in bits.
305 #[inline]
306 pub fn len(&self) -> usize {
307 self.len
308 }
309
310 /// Returns `true` if the vector is empty.
311 #[inline]
312 pub fn is_empty(&self) -> bool {
313 self.len() == 0
314 }
315
316 /// Returns the capacity of the vector in bits.
317 #[inline]
318 pub fn capacity(&self) -> usize {
319 bits::words_to_bits(self.data.capacity())
320 }
321
322 /// Counts the number of ones in the bit array.
323 ///
324 /// # Examples
325 ///
326 /// ```
327 /// use simple_sds_sbwt::raw_vector::{RawVector, AccessRaw};
328 ///
329 /// let mut v = RawVector::with_len(137, false);
330 /// assert_eq!(v.count_ones(), 0);
331 /// v.set_bit(1, true); v.set_bit(33, true); v.set_bit(95, true); v.set_bit(123, true);
332 /// assert_eq!(v.count_ones(), 4);
333 /// ```
334 pub fn count_ones(&self) -> usize {
335 let mut result: usize = 0;
336 for value in self.data.iter() {
337 result += (*value).count_ones() as usize;
338 }
339 result
340 }
341
342 /// Creates an empty vector.
343 ///
344 /// # Examples
345 ///
346 /// ```
347 /// use simple_sds_sbwt::raw_vector::RawVector;
348 ///
349 /// let v = RawVector::new();
350 /// assert!(v.is_empty());
351 /// assert_eq!(v.capacity(), 0);
352 /// ```
353 pub fn new() -> RawVector {
354 RawVector::default()
355 }
356
357 /// Creates an initialized vector of specified length.
358 ///
359 /// # Arguments
360 ///
361 /// * `len`: Length of the vector in bits.
362 /// * `value`: Initialization value.
363 ///
364 /// # Examples
365 ///
366 /// ```
367 /// use simple_sds_sbwt::raw_vector::RawVector;
368 ///
369 /// let v = RawVector::with_len(137, false);
370 /// assert_eq!(v.len(), 137);
371 /// ```
372 pub fn with_len(len: usize, value: bool) -> RawVector {
373 let val = bits::filler_value(value);
374 let data: Vec<u64> = vec![val; bits::bits_to_words(len)];
375 let mut result = RawVector {
376 len, data,
377 };
378 result.set_unused_bits(false);
379 result
380 }
381
382 /// Creates an empty vector with enough capacity for at least `capacity` bits.
383 ///
384 /// # Examples
385 ///
386 /// ```
387 /// use simple_sds_sbwt::raw_vector::RawVector;
388 ///
389 /// let v = RawVector::with_capacity(137);
390 /// assert!(v.capacity() >= 137);
391 /// ```
392 pub fn with_capacity(capacity: usize) -> RawVector {
393 RawVector {
394 len: 0,
395 data: Vec::with_capacity(bits::bits_to_words(capacity)),
396 }
397 }
398
399 /// Returns the size of a serialized vector with the given capacity in [`u64`] elements.
400 ///
401 /// # Examples
402 ///
403 /// ```
404 /// use simple_sds_sbwt::raw_vector::RawVector;
405 ///
406 /// assert_eq!(RawVector::size_by_params(247), 6);
407 /// ```
408 pub fn size_by_params(capacity: usize) -> usize {
409 2 + bits::bits_to_words(capacity)
410 }
411
412 /// Returns a copy of the vector with each bit flipped.
413 ///
414 /// # Examples
415 ///
416 /// ```
417 /// use simple_sds_sbwt::raw_vector::{RawVector, AccessRaw};
418 ///
419 /// let mut original = RawVector::with_len(137, false);
420 /// original.set_bit(1, true); original.set_bit(33, true);
421 /// unsafe { original.set_int(95, 456, 9); } original.set_bit(123, true);
422 /// let complement = original.complement();
423 /// for i in 0..137 {
424 /// assert_eq!(!(complement.bit(i)), original.bit(i));
425 /// }
426 /// ```
427 pub fn complement(&self) -> RawVector {
428 let mut result = self.clone();
429 for word in result.data.iter_mut() {
430 *word = !*word;
431 }
432 result.set_unused_bits(false);
433 result
434 }
435
436 /// Resizes the vector to a specified length.
437 ///
438 /// If `new_len > self.len()`, the new `new_len - self.len()` bits will be initialized.
439 /// If `new_len < self.len()`, the vector is truncated.
440 ///
441 /// # Arguments
442 ///
443 /// * `new_len`: New length of the vector in bits.
444 /// * `value`: Initialization value.
445 ///
446 /// # Examples
447 ///
448 /// ```
449 /// use simple_sds_sbwt::raw_vector::RawVector;
450 ///
451 /// let mut v = RawVector::new();
452 /// v.resize(137, true);
453 /// let w = RawVector::with_len(137, true);
454 /// assert_eq!(v, w);
455 /// ```
456 pub fn resize(&mut self, new_len: usize, value: bool) {
457 // Fill the unused bits if necessary.
458 if new_len > self.len() {
459 self.set_unused_bits(value);
460 }
461
462 // Use more space if necessary.
463 self.data.resize(bits::bits_to_words(new_len), bits::filler_value(value));
464 self.len = new_len;
465 self.set_unused_bits(false);
466 }
467
468 /// Clears the vector without freeing the data.
469 ///
470 /// # Examples
471 ///
472 /// ```
473 /// use simple_sds_sbwt::raw_vector::RawVector;
474 ///
475 /// let mut v = RawVector::with_len(137, true);
476 /// assert_eq!(v.len(), 137);
477 /// v.clear();
478 /// assert!(v.is_empty());
479 /// ```
480 pub fn clear(&mut self) {
481 self.data.clear();
482 self.len = 0;
483 }
484
485 /// Reserves space for storing at least `self.len() + additional` bits in the vector.
486 ///
487 /// Does nothing if the capacity is already sufficient.
488 ///
489 /// # Examples
490 ///
491 /// ```
492 /// use simple_sds_sbwt::raw_vector::RawVector;
493 ///
494 /// let mut v = RawVector::with_len(137, false);
495 /// v.reserve(318);
496 /// assert!(v.capacity() >= 137 + 318);
497 /// ```
498 ///
499 /// # Panics
500 ///
501 /// May panic if `self.len() + additional + 63 > usize::MAX`.
502 pub fn reserve(&mut self, additional: usize) {
503 let words_needed = bits::bits_to_words(self.len() + additional);
504 if words_needed > self.data.capacity() {
505 self.data.reserve(words_needed - self.data.capacity());
506 }
507 }
508
509 // Set the unused bits in the last integer to the specified value.
510 fn set_unused_bits(&mut self, value: bool) {
511 let (index, width) = bits::split_offset(self.len());
512 if width > 0 {
513 if value {
514 self.data[index] |= !bits::low_set(width);
515 }
516 else {
517 self.data[index] &= bits::low_set(width);
518 }
519 }
520 }
521
522
523 pub fn get_words(&self) -> &[u64] {
524 &self.data
525 }
526
527}
528
529//-----------------------------------------------------------------------------
530
531impl AccessRaw for RawVector {
532 #[inline]
533 fn bit(&self, bit_offset: usize) -> bool {
534 let (index, offset) = bits::split_offset(bit_offset);
535 ((self.data[index] >> offset) & 1) == 1
536 }
537
538 #[inline]
539 unsafe fn int(&self, bit_offset: usize, width: usize) -> u64 {
540 bits::read_int(&self.data, bit_offset, width)
541 }
542
543 #[inline]
544 fn word(&self, index: usize) -> u64 {
545 self.data[index]
546 }
547
548 #[inline]
549 unsafe fn word_unchecked(&self, index: usize) -> u64 {
550 *self.data.get_unchecked(index)
551 }
552
553 #[inline]
554 fn is_mutable(&self) -> bool {
555 true
556 }
557
558 #[inline]
559 fn set_bit(&mut self, bit_offset: usize, value: bool) {
560 let (index, offset) = bits::split_offset(bit_offset);
561 self.data[index] &= !(1u64 << offset);
562 self.data[index] |= (value as u64) << offset;
563 }
564
565 #[inline]
566 unsafe fn set_int(&mut self, bit_offset: usize, value: u64, width: usize) {
567 bits::write_int(&mut self.data, bit_offset, value, width);
568 }
569}
570
571impl PushRaw for RawVector {
572 fn push_bit(&mut self, value: bool) {
573 let (index, offset) = bits::split_offset(self.len);
574 if index == self.data.len() {
575 self.data.push(0);
576 }
577 self.data[index] |= (value as u64) << offset;
578 self.len += 1;
579 }
580
581 unsafe fn push_int(&mut self, value: u64, width: usize) {
582 if self.len + width > bits::words_to_bits(self.data.len()) {
583 self.data.push(0);
584 }
585 bits::write_int(&mut self.data, self.len, value, width);
586 self.len += width;
587 }
588}
589
590impl PopRaw for RawVector {
591 fn pop_bit(&mut self) -> Option<bool> {
592 if !self.is_empty() {
593 let result = self.bit(self.len - 1);
594 self.len -= 1;
595 self.data.resize(bits::bits_to_words(self.len()), 0); // Avoid using unnecessary words.
596 self.set_unused_bits(false);
597 Some(result)
598 } else {
599 None
600 }
601 }
602
603 unsafe fn pop_int(&mut self, width: usize) -> Option<u64> {
604 if self.len() >= width {
605 let result = self.int(self.len - width, width);
606 self.len -= width;
607 self.data.resize(bits::bits_to_words(self.len()), 0); // Avoid using unnecessary words.
608 self.set_unused_bits(false);
609 Some(result)
610 } else {
611 None
612 }
613 }
614}
615
616impl Serialize for RawVector {
617 fn serialize_header<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
618 self.len.serialize(writer)?;
619 self.data.serialize_header(writer)?;
620 Ok(())
621 }
622
623 fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
624 self.data.serialize_body(writer)?;
625 Ok(())
626 }
627
628 fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
629 let len = usize::load(reader)?;
630 let data = <Vec<u64> as Serialize>::load(reader)?;
631 if bits::bits_to_words(len) != data.len() {
632 Err(Error::new(ErrorKind::InvalidData, "Bit length / word length mismatch"))
633 } else {
634 Ok(RawVector {
635 len, data,
636 })
637 }
638 }
639
640 fn size_in_elements(&self) -> usize {
641 self.len.size_in_elements() + self.data.size_in_elements()
642 }
643}
644
645//-----------------------------------------------------------------------------
646
647impl AsRef<[u64]> for RawVector {
648 #[inline]
649 fn as_ref(&self) -> &[u64] {
650 self.data.as_ref()
651 }
652}
653
654//-----------------------------------------------------------------------------
655
656/// A buffered file writer compatible with the serialization format of [`RawVector`].
657///
658/// When the writer goes out of scope, the internal buffer is flushed, the file is closed, and all errors are ignored.
659/// Call [`RawVectorWriter::close`] explicitly to handle the errors.
660///
661/// # Examples
662///
663/// ```
664/// use simple_sds_sbwt::raw_vector::{RawVector, RawVectorWriter, AccessRaw, PushRaw};
665/// use simple_sds_sbwt::serialize;
666/// use std::fs;
667///
668/// let filename = serialize::temp_file_name("raw-vector-writer");
669/// let width = 29;
670/// let mut header: Vec<u64> = Vec::new();
671/// let mut writer = RawVectorWriter::new(&filename, &mut header).unwrap();
672/// unsafe {
673/// writer.push_int(123, width);
674/// writer.push_int(456, width);
675/// writer.push_int(789, width);
676/// }
677/// writer.close();
678///
679/// let v: RawVector = serialize::load_from(&filename).unwrap();
680/// assert_eq!(v.len(), 3 * width);
681/// unsafe {
682/// assert_eq!(v.int(0, width), 123);
683/// assert_eq!(v.int(width, width), 456);
684/// assert_eq!(v.int(2 * width, width), 789);
685/// }
686///
687/// fs::remove_file(&filename);
688/// ```
689#[derive(Debug)]
690pub struct RawVectorWriter {
691 len: usize,
692 buf_len: usize,
693 buf: RawVector,
694 file: Option<File>,
695 filename: PathBuf,
696}
697
698// Ways of flushing a write buffer.
699#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
700enum FlushMode {
701 // Only flush the part of the buffer that can be flushed safely.
702 Safe,
703 // Flush the entire buffer.
704 // Subsequent writes to the buffer may leave it in an invalid state.
705 Final,
706}
707
708impl RawVectorWriter {
709 /// Default buffer size in bits.
710 pub const DEFAULT_BUFFER_SIZE: usize = 8 * 1024 * 1024;
711
712 /// Returns the length of the vector in bits.
713 #[inline]
714 pub fn len(&self) -> usize {
715 self.len
716 }
717
718 /// Returns `true` if the vector is empty.
719 #[inline]
720 pub fn is_empty(&self) -> bool {
721 self.len() == 0
722 }
723
724 /// Creates an empty vector stored in the specified file with the default buffer size.
725 ///
726 /// If the file already exists, it will be overwritten.
727 ///
728 /// # Arguments
729 ///
730 /// * `filename`: Name of the file.
731 /// * `header`: Header of the parent structure (may be empty).
732 pub fn new<P: AsRef<Path>>(filename: P, header: &mut Vec<u64>) -> io::Result<RawVectorWriter> {
733 let mut options = OpenOptions::new();
734 let file = options.create(true).write(true).truncate(true).open(&filename)?;
735 // Allocate one extra word for overflow.
736 let buf = RawVector::with_capacity(Self::DEFAULT_BUFFER_SIZE + bits::WORD_BITS);
737 let mut name = PathBuf::new();
738 name.push(&filename);
739 let mut result = RawVectorWriter {
740 len: 0,
741 buf_len: Self::DEFAULT_BUFFER_SIZE,
742 buf,
743 file: Some(file),
744 filename: name,
745 };
746 result.write_header(header)?;
747 Ok(result)
748 }
749
750 /// Creates an empty vector stored in the specified file with user-defined buffer size.
751 ///
752 /// If the file already exists, it will be overwritten.
753 /// The buffer size will be rounded up to the next multiple of [`bits::WORD_BITS`].
754 ///
755 /// # Arguments
756 ///
757 /// * `filename`: Name of the file.
758 /// * `header`: Header of the parent structure (may be empty).
759 /// * `buf_len`: Buffer size in bits.
760 pub fn with_buf_len<P: AsRef<Path>>(filename: P, header: &mut Vec<u64>, buf_len: usize) -> io::Result<RawVectorWriter> {
761 // Buffer length must be a positive multiple of `bits::WORD_BITS`.
762 let buf_len = cmp::max(bits::round_up_to_word_bits(buf_len), bits::WORD_BITS);
763 let mut options = OpenOptions::new();
764 let file = options.create(true).write(true).truncate(true).open(&filename)?;
765 // Allocate one extra word for overflow.
766 let buf = RawVector::with_capacity(buf_len + bits::WORD_BITS);
767 let mut name = PathBuf::new();
768 name.push(&filename);
769 let mut result = RawVectorWriter {
770 len: 0,
771 buf_len,
772 buf,
773 file: Some(file),
774 filename: name,
775 };
776 result.write_header(header)?;
777 Ok(result)
778 }
779
780 /// Returns the name of the file.
781 pub fn filename(&self) -> &Path {
782 self.filename.as_path()
783 }
784
785 /// Returns `true` if the file is open for writing.
786 pub fn is_open(&self) -> bool {
787 self.file.is_some()
788 }
789
790 // Flushes the buffer.
791 fn flush(&mut self, mode: FlushMode) -> io::Result<()> {
792 if let Some(f) = self.file.as_mut() {
793 // Handle the overflow if not serializing the entire buffer.
794 let mut overflow: (u64, usize) = (0, 0);
795 if let FlushMode::Safe = mode {
796 if self.buf.len() > self.buf_len {
797 unsafe { overflow = (self.buf.int(self.buf_len, self.buf.len() - self.buf_len), self.buf.len() - self.buf_len); }
798 self.buf.resize(self.buf_len, false);
799 }
800 }
801
802 // Serialize and clear the buffer.
803 self.buf.serialize_body(f)?;
804 self.buf.clear();
805
806 // Push the overflow back to the buffer.
807 if let FlushMode::Safe = mode {
808 if overflow.1 > 0 {
809 unsafe { self.buf.push_int(overflow.0, overflow.1); }
810 }
811 }
812 }
813 Ok(())
814 }
815
816 // Seeks to the start of the file, appends its own header to `header`, and writes it into the file.
817 fn write_header(&mut self, header: &mut Vec<u64>) -> io::Result<()> {
818 if let Some(f) = self.file.as_mut() {
819 f.seek(SeekFrom::Start(0))?;
820 header.push(self.len as u64);
821 header.push(bits::bits_to_words(self.len) as u64);
822 header.serialize_body(f)?;
823 }
824 Ok(())
825 }
826
827 /// Flushes the buffer, writes the header, and closes the file.
828 ///
829 /// No effect if the file is closed.
830 ///
831 /// # Errors
832 ///
833 /// Any I/O errors will be passed through.
834 pub fn close(&mut self) -> io::Result<()> {
835 let mut header: Vec<u64> = Vec::new();
836 self.close_with_header(&mut header)
837 }
838
839 /// Flushes the buffer, writes the header, and closes the file.
840 ///
841 /// No effect if the file is closed.
842 /// This method should only be called by the `close` method of a parent writer.
843 ///
844 /// # Errors
845 ///
846 /// Any I/O errors will be passed through.
847 pub fn close_with_header(&mut self, header: &mut Vec<u64>) -> io::Result<()> {
848 if self.is_open() {
849 self.flush(FlushMode::Final)?;
850 self.write_header(header)?;
851 self.file = None
852 }
853 Ok(())
854 }
855}
856
857//-----------------------------------------------------------------------------
858
859impl PushRaw for RawVectorWriter {
860 fn push_bit(&mut self, value: bool) {
861 self.buf.push_bit(value); self.len += 1;
862 if self.buf.len() >= self.buf_len {
863 self.flush(FlushMode::Safe).unwrap();
864 }
865 }
866
867 unsafe fn push_int(&mut self, value: u64, width: usize) {
868 self.buf.push_int(value, width); self.len += width;
869 if self.buf.len() >= self.buf_len {
870 self.flush(FlushMode::Safe).unwrap();
871 }
872 }
873}
874
875impl Drop for RawVectorWriter {
876 fn drop(&mut self) {
877 let _ = self.close();
878 }
879}
880
881//-----------------------------------------------------------------------------
882
883/// An immutable memory-mapped [`RawVector`].
884///
885/// This is compatible with the serialization format of [`RawVector`].
886///
887/// # Examples
888///
889/// ```
890/// use simple_sds_sbwt::raw_vector::{RawVector, RawVectorMapper, AccessRaw, PushRaw};
891/// use simple_sds_sbwt::serialize::{MemoryMap, MemoryMapped, MappingMode};
892/// use simple_sds_sbwt::serialize;
893/// use std::fs;
894///
895/// let filename = serialize::temp_file_name("raw-vector-mapper");
896/// let width = 29;
897/// let mut original = RawVector::new();
898/// unsafe {
899/// original.push_int(123, width);
900/// original.push_int(456, width);
901/// original.push_int(789, width);
902/// }
903/// serialize::serialize_to(&original, &filename);
904///
905/// let map = MemoryMap::new(&filename, MappingMode::ReadOnly).unwrap();
906/// let mapper = RawVectorMapper::new(&map, 0).unwrap();
907/// assert_eq!(mapper.len(), 3 * width);
908/// unsafe {
909/// assert_eq!(mapper.int(0, width), 123);
910/// assert_eq!(mapper.int(width, width), 456);
911/// assert_eq!(mapper.int(2 * width, width), 789);
912/// }
913///
914/// drop(mapper); drop(map);
915/// fs::remove_file(&filename);
916/// ```
917#[cfg(not(target_family = "wasm"))]
918#[derive(PartialEq, Eq, Debug)]
919pub struct RawVectorMapper<'a> {
920 len: usize,
921 data: MappedSlice<'a, u64>,
922}
923
924#[cfg(not(target_family = "wasm"))]
925impl<'a> RawVectorMapper<'a> {
926 /// Returns the length of the vector in bits.
927 #[inline]
928 pub fn len(&self) -> usize {
929 self.len
930 }
931
932 /// Returns `true` if the vector is empty.
933 #[inline]
934 pub fn is_empty(&self) -> bool {
935 self.len() == 0
936 }
937
938 /// Counts the number of ones in the bit array.
939 pub fn count_ones(&self) -> usize {
940 let mut result: usize = 0;
941 for value in self.data.iter() {
942 result += (*value).count_ones() as usize;
943 }
944 result
945 }
946}
947
948#[cfg(not(target_family = "wasm"))]
949impl<'a> AccessRaw for RawVectorMapper<'a> {
950 #[inline]
951 fn bit(&self, bit_offset: usize) -> bool {
952 let (index, offset) = bits::split_offset(bit_offset);
953 ((self.data[index] >> offset) & 1) == 1
954 }
955
956 #[inline]
957 unsafe fn int(&self, bit_offset: usize, width: usize) -> u64 {
958 bits::read_int(&self.data, bit_offset, width)
959 }
960
961 #[inline]
962 fn word(&self, index: usize) -> u64 {
963 self.data[index]
964 }
965
966 #[inline]
967 unsafe fn word_unchecked(&self, index: usize) -> u64 {
968 *self.data.get_unchecked(index)
969 }
970
971 #[inline]
972 fn is_mutable(&self) -> bool {
973 false
974 }
975
976 #[inline]
977 fn set_bit(&mut self, _: usize, _: bool) {
978 panic!("RawVectorMapper::set_bit(): Not implemented");
979 }
980
981 #[inline]
982 unsafe fn set_int(&mut self, _: usize, _: u64, _: usize) {
983 panic!("RawVectorMapper::set_int(): Not implemented");
984 }
985}
986
987#[cfg(not(target_family = "wasm"))]
988impl<'a> MemoryMapped<'a> for RawVectorMapper<'a> {
989 fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
990 if offset >= map.len() {
991 return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
992 }
993 let slice: &[u64] = map.as_ref();
994 let len = slice[offset] as usize;
995 let data = MappedSlice::new(map, offset + 1)?;
996 Ok(RawVectorMapper {
997 len, data,
998 })
999 }
1000
1001 fn map_offset(&self) -> usize {
1002 self.data.map_offset() - 1
1003 }
1004
1005 fn map_len(&self) -> usize {
1006 self.data.map_len() + 1
1007 }
1008}
1009
1010#[cfg(not(target_family = "wasm"))]
1011impl<'a> AsRef<MappedSlice<'a, u64>> for RawVectorMapper<'a> {
1012 #[inline]
1013 fn as_ref(&self) -> &MappedSlice<'a, u64> {
1014 &(self.data)
1015 }
1016}
1017
1018//-----------------------------------------------------------------------------