mmap_bitvec/
mmap_bitvec.rs

1use std::fs::{metadata, OpenOptions};
2use std::io;
3use std::io::{Read, Write};
4use std::mem::transmute;
5use std::ops::Range;
6use std::path::Path;
7
8use memmap2::{Mmap, MmapMut, MmapOptions};
9
10use crate::bitvec::BitVector;
11
12/// Enum representing either a read-only mmap or a mutable mmap
13pub enum MmapKind {
14    /// A mutable mmap
15    MmapMut(MmapMut),
16    /// A read-only mmap
17    Mmap(Mmap),
18}
19
20impl MmapKind {
21    /// Get a non-mutable pointer to the mmap
22    #[inline]
23    pub fn as_ptr(&self) -> *const u8 {
24        match self {
25            MmapKind::MmapMut(x) => x.as_ptr(),
26            MmapKind::Mmap(x) => x.as_ptr(),
27        }
28    }
29
30    /// Get a mutable pointer to the mmap
31    #[inline]
32    pub fn as_mut_ptr(&mut self) -> Result<*mut u8, io::Error> {
33        match self {
34            MmapKind::MmapMut(x) => Ok(x.as_mut_ptr()),
35            MmapKind::Mmap(_) => Err(io::Error::new(
36                io::ErrorKind::Other,
37                "attempted to get a mutable pointer to a read-only mmap",
38            )),
39        }
40    }
41
42    /// Flush to disk. A no-op if the mmap is read-only
43    #[inline]
44    pub fn flush(&mut self) -> Result<(), io::Error> {
45        match self {
46            MmapKind::MmapMut(x) => x.flush(),
47            MmapKind::Mmap(_) => Ok(()),
48        }
49    }
50
51    /// Gets the slice
52    #[inline]
53    pub fn as_slice(&self) -> &[u8] {
54        match self {
55            MmapKind::MmapMut(x) => x.as_ref(),
56            MmapKind::Mmap(x) => x.as_ref(),
57        }
58    }
59}
60
61/// Bit vector backed by a mmap-ed file
62///
63/// # Examples
64///
65/// ```rust
66/// use mmap_bitvec::{BitVector, MmapBitVec};
67///
68/// let mut bv = MmapBitVec::from_memory(128).unwrap();
69/// bv.set_range_bytes(2..12, &[0b10, 0b01101101]);
70/// assert_eq!(bv.get_range(2..12), 0b1001101101);
71/// ```
72pub struct MmapBitVec {
73    /// The mmap we are using, either a mutable or read-only one
74    pub mmap: MmapKind,
75    /// Number of bits in the bitvector
76    pub size: usize,
77    /// Arbitrary data prepended to file (when file-backed)
78    header: Box<[u8]>,
79    /// controls whether the mapping is backed by a file (see `MAP_ANONYMOUS` here: <https://man7.org/linux/man-pages/man2/mmap.2.html>)
80    is_map_anonymous: bool,
81}
82
83fn create_bitvec_file(
84    filename: &Path,
85    size: usize,
86    magic: Option<[u8; 2]>,
87    header: &[u8],
88) -> Result<(std::fs::File, u64), io::Error> {
89    let byte_size = ((size - 1) >> 3) as u64 + 1;
90    let mut file = OpenOptions::new()
91        .read(true)
92        .write(true)
93        .create(true)
94        .open(filename)?;
95    let magic_len = if let Some(m) = magic { m.len() } else { 0 };
96    // two magic bytes indicating file type (if passed in), u16 header length, header, u64 bitvec length, bitvec
97    let total_header_size = (magic_len + 2 + header.len() + 8) as u64;
98    file.set_len(total_header_size + byte_size)?;
99
100    if let Some(m) = magic {
101        file.write_all(&m)?;
102    }
103
104    let serialized_header_size: [u8; 2] = (header.len() as u16).to_be_bytes();
105    file.write_all(&serialized_header_size)?;
106    file.write_all(header)?;
107    let serialized_size: [u8; 8] = (size as u64).to_be_bytes();
108    file.write_all(&serialized_size)?;
109
110    Ok((file, total_header_size))
111}
112
113impl MmapBitVec {
114    /// Creates a new `MmapBitVec` file
115    ///
116    /// The overall size of bit vector (in bits) and a fixed-size header must
117    /// also be provided (although the header can be 0-length).
118    pub fn create<P: AsRef<Path>>(
119        filename: P,
120        size: usize,
121        magic: Option<[u8; 2]>,
122        header: &[u8],
123    ) -> Result<Self, io::Error> {
124        assert!(
125            header.len() < 65_536,
126            "Headers longer than 65636 bytes not supported"
127        );
128
129        let (file, total_header_size) = create_bitvec_file(filename.as_ref(), size, magic, header)?;
130        let mmap = unsafe { MmapOptions::new().offset(total_header_size).map_mut(&file) }?;
131        Ok(MmapBitVec {
132            mmap: MmapKind::MmapMut(mmap),
133            size,
134            header: header.to_vec().into_boxed_slice(),
135            is_map_anonymous: false,
136        })
137    }
138
139    /// Opens an existing `MmapBitVec` file
140    ///
141    /// If magic bytes are passed to indicate file type, they are checked against the file.
142    ///
143    /// The header size must be specified (as it isn't stored in the file to
144    /// allow the magic bytes to be set) and there is an optional `read_only`
145    /// property that will lock the underlying mmap from writing.
146    pub fn open<P>(filename: P, magic: Option<&[u8; 2]>, read_only: bool) -> Result<Self, io::Error>
147    where
148        P: AsRef<Path>,
149    {
150        // we have to open with write access to satisfy `MmapMut` (which we're
151        // using because there's no generic over both MmapMut and Mmap so
152        // picking one simplifies the types)
153        let mut file = OpenOptions::new()
154            .read(true)
155            .write(!read_only)
156            .open(filename)?;
157
158        if let Some(m) = magic {
159            // read the magic bytes and (optionally) check if it matches
160            let mut file_magic = [0; 2];
161            file.read_exact(&mut file_magic)?;
162
163            if &file_magic != m {
164                return Err(io::Error::new(
165                    io::ErrorKind::InvalidData,
166                    format!(
167                        "file has wrong magic bytes {:x?} (expected {:x?})",
168                        file_magic, m
169                    ),
170                ));
171            }
172        }
173
174        // read the header size and the header
175        let mut serialized_header_size = [0; 2];
176        file.read_exact(&mut serialized_header_size)?;
177        let header_size: usize =
178            u16::from_be(unsafe { transmute(serialized_header_size) }) as usize;
179        let mut header = vec![0; header_size];
180        file.read_exact(&mut header)?;
181
182        // read the bitvec size and calculate the total number of bytes
183        let mut serialized_size = [0; 8];
184        file.read_exact(&mut serialized_size)?;
185        let size: u64 = u64::from_be(unsafe { transmute(serialized_size) });
186
187        let magic_len = if let Some(m) = magic { m.len() } else { 0 };
188        let total_header_size = (magic_len + 2 + header_size + 8) as u64;
189        let byte_size = ((size - 1) >> 3) + 1;
190        if file.metadata()?.len() != total_header_size + byte_size {
191            return Err(io::Error::new(
192                io::ErrorKind::InvalidData,
193                format!(
194                    "file should be {} bytes (with {} header), but file is {} bytes",
195                    byte_size + total_header_size,
196                    total_header_size,
197                    file.metadata()?.len(),
198                ),
199            ));
200        }
201
202        let mmap = if read_only {
203            let mmap = unsafe { MmapOptions::new().offset(total_header_size).map(&file) }?;
204            MmapKind::Mmap(mmap)
205        } else {
206            let mmap = unsafe { MmapOptions::new().offset(total_header_size).map_mut(&file) }?;
207            MmapKind::MmapMut(mmap)
208        };
209
210        Ok(MmapBitVec {
211            mmap,
212            size: size as usize,
213            header: header.into_boxed_slice(),
214            is_map_anonymous: false,
215        })
216    }
217
218    /// Opens a `MmapBitVec` file that doesn't have our "standard" file header format
219    /// TODO: what is the standard file header? if we put in the docstring on the struct we can say to refer to that here
220    pub fn open_no_header<P>(filename: P, offset: usize) -> Result<Self, io::Error>
221    where
222        P: AsRef<Path>,
223    {
224        let file_size = metadata(&filename)?.len() as usize;
225        let byte_size = file_size - offset;
226        let f = OpenOptions::new().read(true).write(false).open(&filename)?;
227        let mmap = unsafe { MmapOptions::new().offset(offset as u64).map(&f) }?;
228
229        Ok(MmapBitVec {
230            mmap: MmapKind::Mmap(mmap),
231            size: byte_size * 8,
232            header: Box::new([]),
233            is_map_anonymous: false,
234        })
235    }
236
237    /// Creates an in-memory  `MmapBitVec` (not backed by a file).
238    ///
239    /// Note that unlike the `create` and `open` no header is set.
240    /// The MmapBitVec is also read/write by default.
241    pub fn from_memory(size: usize) -> Result<Self, io::Error> {
242        let byte_size = ((size - 1) >> 3) as u64 + 1;
243        let mmap = MmapOptions::new().len(byte_size as usize).map_anon()?;
244        Ok(MmapBitVec {
245            mmap: MmapKind::MmapMut(mmap),
246            size,
247            header: vec![].into_boxed_slice(),
248            is_map_anonymous: true,
249        })
250    }
251
252    /// Save in-memory mmap bitvector to disk.
253    /// This is a no-op if the mmap is already file-backed.
254    pub fn save_to_disk<P: AsRef<Path>>(
255        &self,
256        filename: P,
257        magic: Option<[u8; 2]>,
258        header: &[u8],
259    ) -> Result<(), io::Error> {
260        if !self.is_map_anonymous {
261            return Ok(());
262        }
263        let (mut file, _) = create_bitvec_file(filename.as_ref(), self.size, magic, header)?;
264        // We should already be at the right byte to write the content
265        file.write_all(self.mmap.as_slice())?;
266        Ok(())
267    }
268
269    /// Returns the header
270    pub fn header(&self) -> &[u8] {
271        &self.header
272    }
273
274    /// Read/copy an unaligned chunk of the `MmapBitVec`
275    ///
276    /// # Panics
277    ///
278    /// Explicitly panics if the end location, `r.end`, is outside the bounds
279    /// of the bit vector. A panic may also occur if `r.start` is greater than
280    /// `r.end`.
281    pub fn get_range_bytes(&self, r: Range<usize>) -> Vec<u8> {
282        if r.end > self.size {
283            panic!("Range ends outside of BitVec")
284        }
285        let byte_idx_st = r.start >> 3;
286        let byte_idx_en = (r.end - 1) >> 3;
287        let new_size: usize = (((r.end - r.start) - 1) >> 3) + 1;
288
289        let ptr: *const u8 = self.mmap.as_ptr();
290        let mut v = vec![0; new_size];
291
292        // `shift` is the same as the position of the last bit
293        let shift = (r.end & 7) as u8;
294        for (new_idx, old_idx) in (byte_idx_st..=byte_idx_en).enumerate() {
295            let old_val = unsafe { *ptr.add(old_idx) };
296            if new_idx > 0 {
297                if let Some(shifted_val) = old_val.checked_shr(u32::from(shift)) {
298                    v[new_idx - 1] |= shifted_val;
299                }
300            }
301            if new_idx < new_size {
302                v[new_idx] |= (old_val & (0xFF >> shift)) << shift;
303            }
304        }
305        v
306    }
307
308    /// Set an unaligned range of bits in the bit vector from a byte slice.
309    ///
310    /// Note this operation ORs the passed byteslice and the existing bitmask.
311    ///
312    /// # Panics
313    ///
314    /// Explicitly panics if the end location, `r.end`, is outside the bounds
315    /// of the bit vector or if the byte slice passed in is a different size
316    /// from the range specified. A panic may also occur if `r.start` is greater
317    /// than `r.end`.
318    pub fn set_range_bytes(&mut self, r: Range<usize>, x: &[u8]) {
319        if r.end > self.size {
320            panic!("Range ends outside of BitVec")
321        }
322        let new_size: usize = r.end - r.start;
323        if ((new_size - 1) >> 3) + 1 != x.len() {
324            panic!("Range and array passed are different sizes")
325        }
326        // Ignoring r.start except for checking that it roughly
327        // matches up with the size of the byte slice. This works because
328        // we're ORing the bits together, so any extra zeros at the front
329        // of the first byte in x shouldn't affect the final result
330        let max_len = 8 * x.len();
331        let byte_idx_st = if r.end - 1 > max_len {
332            ((r.end - 1 - max_len) >> 3) + 1
333        } else {
334            0
335        };
336        let byte_idx_en = (r.end - 1) >> 3;
337
338        let mmap: *mut u8 = self
339            .mmap
340            .as_mut_ptr()
341            .expect("set_range_bytes can only be called on a mutable mmap");
342
343        let shift = 8 - (r.end & 7) as u8;
344        let mask = 0xFFu8.checked_shr(u32::from(8 - shift)).unwrap_or(0xFF);
345        for (val, idx) in x.iter().zip(byte_idx_st..=byte_idx_en) {
346            let shifted_val = val.checked_shr(u32::from(8 - shift)).unwrap_or(0);
347            if idx > 0 && shift != 8 {
348                unsafe {
349                    *mmap.offset(idx as isize - 1) |= shifted_val;
350                }
351            }
352            let shifted_val = (val & mask).checked_shl(u32::from(shift)).unwrap_or(*val);
353            unsafe {
354                *mmap.add(idx) |= shifted_val;
355            }
356        }
357    }
358}
359
360impl BitVector for MmapBitVec {
361    /// Check a single value in the `MmapBitVec`, returning its true/false status
362    ///
363    /// # Panics
364    ///
365    /// Panics if the location, `i`, is outside the bounds of the bit vector
366    fn get(&self, i: usize) -> bool {
367        if i > self.size {
368            panic!("Invalid bit vector index");
369        }
370        let byte_idx = (i >> 3) as isize;
371        let bit_idx = 7 - (i & 7) as u8;
372
373        let mmap: *const u8 = self.mmap.as_ptr();
374        unsafe { (*mmap.offset(byte_idx) & (1 << bit_idx)) != 0 }
375    }
376
377    /// Set a single bit in the bit vector
378    ///
379    /// # Panics
380    ///
381    /// Panics if the location, `i`, is outside the bounds of the bit vector
382    fn set(&mut self, i: usize, x: bool) {
383        if i > self.size {
384            panic!("Invalid bit vector index");
385        }
386        let byte_idx = (i >> 3) as isize;
387        let bit_idx = 7 - (i & 7) as u8;
388
389        let mmap: *mut u8 = self
390            .mmap
391            .as_mut_ptr()
392            .expect("set can only be called on a mutable mmap");
393        unsafe {
394            if x {
395                *mmap.offset(byte_idx) |= 1 << bit_idx
396            } else {
397                *mmap.offset(byte_idx) &= !(1 << bit_idx)
398            }
399        }
400    }
401
402    /// Returns the length (in bits) of the bit vector
403    fn size(&self) -> usize {
404        self.size
405    }
406
407    /// Return the number of set bits in the range `r`
408    fn rank(&self, r: Range<usize>) -> usize {
409        let byte_idx_st = r.start >> 3;
410        let byte_idx_en = (r.end - 1) >> 3;
411        let mmap: *const u8 = self.mmap.as_ptr();
412
413        let mut bit_count = 0usize;
414
415        let mut size_front = 8u8 - (r.start & 7) as u8;
416        if size_front == 8 {
417            size_front = 0;
418        }
419        if let Some(mask) = 0xFFu8.checked_shl(u32::from(size_front)) {
420            let byte = unsafe { *mmap.add(byte_idx_st) & mask };
421            bit_count += byte.count_ones() as usize
422        }
423
424        // if the front is all there is, we can bail now
425        if byte_idx_st == byte_idx_en {
426            return bit_count;
427        }
428
429        // get the last byte (also a "partial" byte like the first)
430        let mut size_back = (r.end & 7) as u8;
431        if size_back == 8 {
432            size_back = 0;
433        }
434        if let Some(mask) = 0xFFu8.checked_shr(u32::from(size_back)) {
435            let byte = unsafe { *mmap.add(byte_idx_en) & mask };
436            bit_count += byte.count_ones() as usize
437        }
438
439        // only two bytes long, bail out
440        if byte_idx_st + 1 == byte_idx_en {
441            return bit_count;
442        }
443
444        // get all the intermediate bytes (which don't need masking)
445        for byte_idx in (byte_idx_st + 1)..byte_idx_en {
446            let byte = unsafe { *mmap.add(byte_idx) };
447            bit_count += byte.count_ones() as usize
448        }
449
450        bit_count
451    }
452
453    /// Return the position of the `nth` set bit with `start` treated as the 0th position, or `None` if there is no set bit
454    fn select(&self, n: usize, start: usize) -> Option<usize> {
455        let byte_idx_st = start >> 3;
456        let size_front = 8u8 - (start & 7) as u8;
457        let mmap: *const u8 = self.mmap.as_ptr();
458
459        let mut rank_count = 0usize;
460        for byte_idx in byte_idx_st.. {
461            let mut byte = unsafe { *mmap.add(byte_idx) };
462            if byte_idx == byte_idx_st {
463                if let Some(mask) = 0xFFu8.checked_shl(u32::from(size_front)) {
464                    byte &= mask;
465                }
466            }
467            if rank_count + byte.count_ones() as usize >= n {
468                for bit_idx in 0..8 {
469                    if (0b1000_0000 >> bit_idx) & byte != 0 {
470                        rank_count += 1;
471                    }
472                    if rank_count == n {
473                        return Some((byte_idx << 3) + bit_idx);
474                    }
475                }
476                panic!("Select failed to find enough bits (but there were!)");
477            }
478            rank_count += byte.count_ones() as usize;
479        }
480        None
481    }
482
483    /// Read an unaligned chunk of the `MmapBitVec` into a `u128`
484    ///
485    /// # Panics
486    ///
487    /// Explicitly panics if the end location, `r.end`, is outside the bounds
488    /// of the bit vector or if the range specified is greater than 128 bits.
489    /// (Use `get_range_bytes` instead if you need to read larger chunks) A panic
490    /// will also occur when `r.start` is greater than `r.end`.
491    fn get_range(&self, r: Range<usize>) -> u128 {
492        if r.end - r.start > 128usize {
493            panic!("Range too large (>128)")
494        } else if r.end > self.size {
495            panic!("Range ends outside of BitVec")
496        }
497        let byte_idx_st = r.start >> 3;
498        let byte_idx_en = (r.end - 1) >> 3;
499        let new_size: u8 = (r.end - r.start) as u8;
500
501        let mut v;
502        let ptr: *const u8 = self.mmap.as_ptr();
503
504        // read the last byte first
505        unsafe {
506            v = u128::from(*ptr.add(byte_idx_en));
507        }
508        // align the end of the data with the end of the u128
509        v >>= 7 - ((r.end - 1) & 7);
510
511        if r.start < self.size - 128usize {
512            unsafe {
513                // we have to transmute since we don't know if it's a u64 or u128
514                #[allow(clippy::transmute_ptr_to_ptr)]
515                let lg_ptr: *const u128 = transmute(ptr.add(byte_idx_st));
516                v |= lg_ptr.read_unaligned().to_be() << (r.start & 7) >> (128 - new_size);
517            }
518        } else {
519            // special case if we can't get a whole u64 out without running outside the buffer
520            let bit_offset = new_size + (r.start & 7) as u8;
521            for (new_idx, old_idx) in (byte_idx_st..byte_idx_en).enumerate() {
522                unsafe {
523                    v |= u128::from(*ptr.add(old_idx)) << (bit_offset - 8u8 * (new_idx as u8 + 1));
524                }
525            }
526        }
527
528        // mask out the high bits in case we copied extra
529        v & (u128::MAX >> (128 - new_size))
530    }
531
532    /// Set an unaligned range of bits using a `u64`.
533    ///
534    /// Note this operation ORs the passed u64 and the existing bitmask
535    ///
536    /// # Panics
537    ///
538    /// Explicitly panics if the end location, `r.end`, is outside the bounds
539    /// of the bit vector. A panic will also occur when `r.start` is greater than
540    /// `r.end`.
541    fn set_range(&mut self, r: Range<usize>, x: u128) {
542        if r.end > self.size {
543            panic!("Range ends outside of BitVec")
544        }
545        let byte_idx_st = r.start >> 3;
546        let byte_idx_en = (r.end - 1) >> 3;
547        let new_size: u8 = (r.end - r.start) as u8;
548
549        // split off the front byte
550        let size_front = 8u8 - (r.start & 7) as u8;
551        let front_byte = if size_front >= new_size {
552            (x << (size_front - new_size)) as u8
553        } else {
554            (x >> (new_size - size_front)) as u8
555        };
556
557        // set front with an AND mask to make sure all the 0s in the
558        // new value get masked over the existing 1s
559        let mmap: *mut u8 = self
560            .mmap
561            .as_mut_ptr()
562            .expect("set_range can only be called on a mutable mmap");
563        unsafe {
564            *mmap.add(byte_idx_st) |= front_byte;
565        }
566
567        // if the front is all there is, we can bail now
568        if byte_idx_st == byte_idx_en {
569            return;
570        }
571
572        // set the last byte (also a "partial" byte like the first)
573        let mut size_back = (r.end & 7) as u8;
574        if size_back == 0 {
575            size_back = 8;
576        }
577        let back_byte = (x << (128 - size_back) >> 120) as u8;
578        unsafe {
579            *mmap.add(byte_idx_en) |= back_byte;
580        }
581
582        // only two bytes long, bail out
583        if byte_idx_st + 1 == byte_idx_en {
584            return;
585        }
586
587        let size_main = new_size - size_front;
588        // shift off the first byte (and we don't care about the last byte,
589        // because we won't iterate through it) and then make sure that the
590        // u64 is stored in the "right" order in memory
591        let main_chunk = (x << (128 - size_main)).to_be();
592
593        let bytes: [u8; 16] = main_chunk.to_le_bytes();
594        for (byte_idx, byte) in ((byte_idx_st + 1)..byte_idx_en).zip(bytes.iter()) {
595            unsafe {
596                *mmap.add(byte_idx) |= *byte;
597            }
598        }
599    }
600
601    fn clear_range(&mut self, r: Range<usize>) {
602        if (r.end - 1) > self.size {
603            panic!("Range ends outside of BitVec")
604        }
605        let byte_idx_st = r.start >> 3;
606        let byte_idx_en = (r.end - 1) >> 3;
607
608        let mmap: *mut u8 = self
609            .mmap
610            .as_mut_ptr()
611            .expect("clear range can only be called on a mutable mmap");
612
613        // set front with an AND mask to make sure all the 0s in the
614        // new value get masked over the existing 1s
615        let size_front = 8u8 - (r.start & 7) as u8;
616        if let Some(mask) = 0xFFu8.checked_shl(u32::from(size_front)) {
617            unsafe {
618                *mmap.add(byte_idx_st) &= mask;
619            }
620        }
621
622        // if the front is all there is, we can bail now
623        if byte_idx_st == byte_idx_en {
624            return;
625        }
626
627        // set the last byte (also a "partial" byte like the first)
628        let mut size_back = (r.end & 7) as u8;
629        if size_back == 0 {
630            size_back = 8;
631        }
632        if let Some(mask) = 0xFFu8.checked_shr(u32::from(size_back)) {
633            unsafe {
634                *mmap.add(byte_idx_en) &= mask;
635            }
636        }
637
638        // only two bytes long, bail out
639        if byte_idx_st + 1 == byte_idx_en {
640            return;
641        }
642
643        // zero out all the middle bytes
644        for byte_idx in (byte_idx_st + 1)..byte_idx_en {
645            unsafe {
646                *mmap.add(byte_idx) = 0u8;
647            }
648        }
649    }
650}
651
652/// Drop is implemented for `BitVec` to explicitly flush any changes to the
653/// file before the memory map is closed.
654impl Drop for MmapBitVec {
655    fn drop(&mut self) {
656        let _ = self.mmap.flush();
657    }
658}
659
660#[cfg(test)]
661mod test {
662    use std::path::Path;
663
664    use super::MmapBitVec;
665    use crate::bitvec::BitVector;
666
667    #[test]
668    fn test_bitvec() {
669        use std::fs::remove_file;
670
671        let header = vec![];
672        let mut b = MmapBitVec::create("./test", 100, None, &header).unwrap();
673        b.set(2, true);
674        assert!(!b.get(1));
675        assert!(b.get(2));
676        assert!(!b.get(100));
677        drop(b);
678        assert!(Path::new("./test").exists());
679
680        let b = MmapBitVec::open("./test", None, true).unwrap();
681        assert!(!b.get(1));
682        assert!(b.get(2));
683        assert!(!b.get(100));
684
685        remove_file("./test").unwrap();
686    }
687
688    #[test]
689    fn test_open_no_header() {
690        use std::fs::remove_file;
691
692        let header = vec![];
693        // the bitvector has to be a size with a multiple of 8 because the
694        // no_header code always opens to the end of the last byte
695        let _ = MmapBitVec::create("./test_headerless", 80, None, &header).unwrap();
696        assert!(Path::new("./test_headerless").exists());
697        let b = MmapBitVec::open_no_header("./test_headerless", 12).unwrap();
698        assert_eq!(b.size(), 64);
699        remove_file("./test_headerless").unwrap();
700    }
701
702    #[test]
703    fn test_bitvec_get_range() {
704        let mut b = MmapBitVec::from_memory(1024).unwrap();
705        b.set(2, true);
706        b.set(3, true);
707        b.set(5, true);
708        assert_eq!(b.get_range(0..8), 52, "indexing within a single byte");
709        assert_eq!(b.get_range(0..16), 13312, "indexing multiple bytes");
710        assert_eq!(
711            b.get_range(0..64),
712            3_746_994_889_972_252_672,
713            "indexing the maximum # of bytes"
714        );
715        assert_eq!(
716            b.get_range(64..128),
717            0,
718            "indexing the maximum # of bytes to the end"
719        );
720        assert_eq!(b.get_range(2..10), 208, "indexing across bytes");
721        assert_eq!(
722            b.get_range(2..66),
723            14_987_979_559_889_010_688,
724            "indexing the maximum # of bytes across bytes"
725        );
726        assert_eq!(b.get_range(115..128), 0, "indexing across bytes to the end");
727    }
728
729    #[test]
730    fn test_bitvec_get_range_bytes() {
731        let mut b = MmapBitVec::from_memory(128).unwrap();
732        b.set(2, true);
733        b.set(3, true);
734        b.set(5, true);
735        assert_eq!(
736            b.get_range_bytes(0..8),
737            &[0x34],
738            "indexing within a single byte"
739        );
740        assert_eq!(
741            b.get_range_bytes(0..16),
742            &[0x34, 0x00],
743            "indexing multiple bytes"
744        );
745        assert_eq!(
746            b.get_range_bytes(0..64),
747            &[0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00],
748            "indexing the maximum # of bytes"
749        );
750        assert_eq!(
751            b.get_range_bytes(64..128),
752            &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00],
753            "indexing the maximum # of bytes to the end"
754        );
755        assert_eq!(b.get_range_bytes(2..10), &[0xD0], "indexing across bytes");
756        assert_eq!(
757            b.get_range_bytes(2..66),
758            &[0xD0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00],
759            "indexing the maximum # of bytes across bytes"
760        );
761        assert_eq!(
762            b.get_range_bytes(115..128),
763            &[0x00, 0x00],
764            "indexing across bytes to the end"
765        );
766    }
767
768    #[test]
769    fn test_bitvec_set_range() {
770        let mut b = MmapBitVec::from_memory(128).unwrap();
771        b.set_range(0..4, 0b0101);
772        assert_eq!(b.get_range(0..4), 0b0101);
773        b.set_range(5..8, 0b0101);
774        assert_eq!(b.get_range(5..8), 0b0101);
775        b.set_range(123..127, 0b0101);
776        assert_eq!(b.get_range(123..127), 0b0101);
777
778        // test across a byte boundary
779        b.set_range(6..9, 0b111);
780        assert_eq!(b.get_range(6..9), 0b111);
781
782        // test zeroing works on both sides of a byte boundary
783        b.set_range(0..16, 0xFFFF);
784        assert_eq!(b.get_range(0..16), 0xFFFF);
785        b.clear_range(4..12);
786        assert_eq!(b.get_range(0..16), 0xF00F);
787
788        // test setting multiple bytes (and that overflow doesn't happen)
789        b.set_range(20..36, 0xFFFF);
790        assert_eq!(b.get_range(16..20), 0x0);
791        assert_eq!(b.get_range(20..36), 0xFFFF);
792        assert_eq!(b.get_range(36..44), 0x0);
793
794        // set an entire range
795        assert_eq!(b.get_range(39..103), 0x0);
796        b.set_range(39..103, 0xABCD1234);
797        assert_eq!(b.get_range(39..103), 0xABCD1234);
798    }
799
800    #[test]
801    fn test_bitvec_set_range_bytes() {
802        let mut b = MmapBitVec::from_memory(128).unwrap();
803        b.set_range_bytes(0..4, &[0x05u8]);
804        assert_eq!(b.get_range(0..4), 0b0101);
805        b.set_range_bytes(5..8, &[0x05u8]);
806        assert_eq!(b.get_range(5..8), 0b0101);
807
808        // clear the first part
809        b.clear_range(0..16);
810
811        // test across a byte boundary
812        b.set_range_bytes(6..10, &[0x0Du8]);
813        assert_eq!(b.get_range(6..10), 0x0D);
814
815        // test setting multiple bytes
816        b.set_range_bytes(0..16, &[0xFFu8, 0xFFu8]);
817        assert_eq!(b.get_range(0..16), 0xFFFF);
818
819        // test setting multiple bytes across boundaries
820        b.set_range_bytes(20..36, &[0xFFu8, 0xFFu8]);
821        assert_eq!(b.get_range(20..36), 0xFFFF);
822
823        // testing ORing works
824        b.set_range_bytes(64..80, &[0xA0u8, 0x0Au8]);
825        assert_eq!(b.get_range(64..80), 0xA00A);
826        b.set_range_bytes(64..80, &[0x0Bu8, 0xB0u8]);
827        assert_eq!(b.get_range(64..80), 0xABBA);
828    }
829
830    #[test]
831    fn test_rank_select() {
832        let mut b = MmapBitVec::from_memory(128).unwrap();
833        b.set(7, true);
834        b.set(56, true);
835        b.set(127, true);
836
837        assert_eq!(b.rank(0..8), 1);
838        assert_eq!(b.rank(0..128), 3);
839
840        assert_eq!(b.select(1, 0), Some(7));
841        assert_eq!(b.select(3, 0), Some(127));
842    }
843
844    #[test]
845    fn can_write_anon_mmap_to_disk() {
846        let mut b = MmapBitVec::from_memory(128).unwrap();
847        b.set(0, true);
848        b.set(7, true);
849        b.set(56, true);
850        b.set(127, true);
851        let dir = tempfile::tempdir().unwrap();
852        b.save_to_disk(dir.path().join("test"), None, &[]).unwrap();
853        let f = MmapBitVec::open(dir.path().join("test"), None, false).unwrap();
854        assert_eq!(f.get(0), true);
855        assert_eq!(f.get(7), true);
856        assert_eq!(f.get(56), true);
857        assert_eq!(f.get(127), true);
858        assert_eq!(f.get(10), false);
859    }
860}