use crate::serialize::Serialize;
#[cfg(not(target_family = "wasm"))]
use crate::serialize::{MappedSlice, MemoryMap, MemoryMapped};
use crate::bits;
use std::fs::{File, OpenOptions};
use std::io::{Error, ErrorKind, Seek, SeekFrom};
use std::path::{Path, PathBuf};
use std::{cmp, io};
#[cfg(test)]
mod tests;
pub trait AccessRaw {
fn bit(&self, bit_offset: usize) -> bool;
unsafe fn int(&self, bit_offset: usize, width: usize) -> u64;
fn word(&self, index: usize) -> u64;
unsafe fn word_unchecked(&self, index: usize) -> u64;
fn is_mutable(&self) -> bool;
fn set_bit(&mut self, bit_offset: usize, value: bool);
unsafe fn set_int(&mut self, bit_offset: usize, value: u64, width: usize);
}
pub trait PushRaw {
fn push_bit(&mut self, value: bool);
unsafe fn push_int(&mut self, value: u64, width: usize);
}
pub trait PopRaw {
fn pop_bit(&mut self) -> Option<bool>;
unsafe fn pop_int(&mut self, width: usize) -> Option<u64>;
}
#[derive(Clone, Debug, PartialEq, Eq, Default)]
pub struct RawVector {
len: usize,
data: Vec<u64>,
}
impl RawVector {
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn capacity(&self) -> usize {
bits::words_to_bits(self.data.capacity())
}
pub fn count_ones(&self) -> usize {
let mut result: usize = 0;
for value in self.data.iter() {
result += (*value).count_ones() as usize;
}
result
}
pub fn new() -> RawVector {
RawVector::default()
}
pub fn with_len(len: usize, value: bool) -> RawVector {
let val = bits::filler_value(value);
let data: Vec<u64> = vec![val; bits::bits_to_words(len)];
let mut result = RawVector {
len, data,
};
result.set_unused_bits(false);
result
}
pub fn with_capacity(capacity: usize) -> RawVector {
RawVector {
len: 0,
data: Vec::with_capacity(bits::bits_to_words(capacity)),
}
}
pub fn size_by_params(capacity: usize) -> usize {
2 + bits::bits_to_words(capacity)
}
pub fn complement(&self) -> RawVector {
let mut result = self.clone();
for word in result.data.iter_mut() {
*word = !*word;
}
result.set_unused_bits(false);
result
}
pub fn resize(&mut self, new_len: usize, value: bool) {
if new_len > self.len() {
self.set_unused_bits(value);
}
self.data.resize(bits::bits_to_words(new_len), bits::filler_value(value));
self.len = new_len;
self.set_unused_bits(false);
}
pub fn clear(&mut self) {
self.data.clear();
self.len = 0;
}
pub fn reserve(&mut self, additional: usize) {
let words_needed = bits::bits_to_words(self.len() + additional);
if words_needed > self.data.capacity() {
self.data.reserve(words_needed - self.data.capacity());
}
}
fn set_unused_bits(&mut self, value: bool) {
let (index, width) = bits::split_offset(self.len());
if width > 0 {
if value {
self.data[index] |= !bits::low_set(width);
}
else {
self.data[index] &= bits::low_set(width);
}
}
}
pub fn get_words(&self) -> &[u64] {
&self.data
}
}
impl AccessRaw for RawVector {
#[inline]
fn bit(&self, bit_offset: usize) -> bool {
let (index, offset) = bits::split_offset(bit_offset);
((self.data[index] >> offset) & 1) == 1
}
#[inline]
unsafe fn int(&self, bit_offset: usize, width: usize) -> u64 {
bits::read_int(&self.data, bit_offset, width)
}
#[inline]
fn word(&self, index: usize) -> u64 {
self.data[index]
}
#[inline]
unsafe fn word_unchecked(&self, index: usize) -> u64 {
*self.data.get_unchecked(index)
}
#[inline]
fn is_mutable(&self) -> bool {
true
}
#[inline]
fn set_bit(&mut self, bit_offset: usize, value: bool) {
let (index, offset) = bits::split_offset(bit_offset);
self.data[index] &= !(1u64 << offset);
self.data[index] |= (value as u64) << offset;
}
#[inline]
unsafe fn set_int(&mut self, bit_offset: usize, value: u64, width: usize) {
bits::write_int(&mut self.data, bit_offset, value, width);
}
}
impl PushRaw for RawVector {
fn push_bit(&mut self, value: bool) {
let (index, offset) = bits::split_offset(self.len);
if index == self.data.len() {
self.data.push(0);
}
self.data[index] |= (value as u64) << offset;
self.len += 1;
}
unsafe fn push_int(&mut self, value: u64, width: usize) {
if self.len + width > bits::words_to_bits(self.data.len()) {
self.data.push(0);
}
bits::write_int(&mut self.data, self.len, value, width);
self.len += width;
}
}
impl PopRaw for RawVector {
fn pop_bit(&mut self) -> Option<bool> {
if !self.is_empty() {
let result = self.bit(self.len - 1);
self.len -= 1;
self.data.resize(bits::bits_to_words(self.len()), 0); self.set_unused_bits(false);
Some(result)
} else {
None
}
}
unsafe fn pop_int(&mut self, width: usize) -> Option<u64> {
if self.len() >= width {
let result = self.int(self.len - width, width);
self.len -= width;
self.data.resize(bits::bits_to_words(self.len()), 0); self.set_unused_bits(false);
Some(result)
} else {
None
}
}
}
impl Serialize for RawVector {
fn serialize_header<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
self.len.serialize(writer)?;
self.data.serialize_header(writer)?;
Ok(())
}
fn serialize_body<T: io::Write>(&self, writer: &mut T) -> io::Result<()> {
self.data.serialize_body(writer)?;
Ok(())
}
fn load<T: io::Read>(reader: &mut T) -> io::Result<Self> {
let len = usize::load(reader)?;
let data = <Vec<u64> as Serialize>::load(reader)?;
if bits::bits_to_words(len) != data.len() {
Err(Error::new(ErrorKind::InvalidData, "Bit length / word length mismatch"))
} else {
Ok(RawVector {
len, data,
})
}
}
fn size_in_elements(&self) -> usize {
self.len.size_in_elements() + self.data.size_in_elements()
}
}
impl AsRef<[u64]> for RawVector {
#[inline]
fn as_ref(&self) -> &[u64] {
self.data.as_ref()
}
}
#[derive(Debug)]
pub struct RawVectorWriter {
len: usize,
buf_len: usize,
buf: RawVector,
file: Option<File>,
filename: PathBuf,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
enum FlushMode {
Safe,
Final,
}
impl RawVectorWriter {
pub const DEFAULT_BUFFER_SIZE: usize = 8 * 1024 * 1024;
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn new<P: AsRef<Path>>(filename: P, header: &mut Vec<u64>) -> io::Result<RawVectorWriter> {
let mut options = OpenOptions::new();
let file = options.create(true).write(true).truncate(true).open(&filename)?;
let buf = RawVector::with_capacity(Self::DEFAULT_BUFFER_SIZE + bits::WORD_BITS);
let mut name = PathBuf::new();
name.push(&filename);
let mut result = RawVectorWriter {
len: 0,
buf_len: Self::DEFAULT_BUFFER_SIZE,
buf,
file: Some(file),
filename: name,
};
result.write_header(header)?;
Ok(result)
}
pub fn with_buf_len<P: AsRef<Path>>(filename: P, header: &mut Vec<u64>, buf_len: usize) -> io::Result<RawVectorWriter> {
let buf_len = cmp::max(bits::round_up_to_word_bits(buf_len), bits::WORD_BITS);
let mut options = OpenOptions::new();
let file = options.create(true).write(true).truncate(true).open(&filename)?;
let buf = RawVector::with_capacity(buf_len + bits::WORD_BITS);
let mut name = PathBuf::new();
name.push(&filename);
let mut result = RawVectorWriter {
len: 0,
buf_len,
buf,
file: Some(file),
filename: name,
};
result.write_header(header)?;
Ok(result)
}
pub fn filename(&self) -> &Path {
self.filename.as_path()
}
pub fn is_open(&self) -> bool {
self.file.is_some()
}
fn flush(&mut self, mode: FlushMode) -> io::Result<()> {
if let Some(f) = self.file.as_mut() {
let mut overflow: (u64, usize) = (0, 0);
if let FlushMode::Safe = mode {
if self.buf.len() > self.buf_len {
unsafe { overflow = (self.buf.int(self.buf_len, self.buf.len() - self.buf_len), self.buf.len() - self.buf_len); }
self.buf.resize(self.buf_len, false);
}
}
self.buf.serialize_body(f)?;
self.buf.clear();
if let FlushMode::Safe = mode {
if overflow.1 > 0 {
unsafe { self.buf.push_int(overflow.0, overflow.1); }
}
}
}
Ok(())
}
fn write_header(&mut self, header: &mut Vec<u64>) -> io::Result<()> {
if let Some(f) = self.file.as_mut() {
f.seek(SeekFrom::Start(0))?;
header.push(self.len as u64);
header.push(bits::bits_to_words(self.len) as u64);
header.serialize_body(f)?;
}
Ok(())
}
pub fn close(&mut self) -> io::Result<()> {
let mut header: Vec<u64> = Vec::new();
self.close_with_header(&mut header)
}
pub fn close_with_header(&mut self, header: &mut Vec<u64>) -> io::Result<()> {
if self.is_open() {
self.flush(FlushMode::Final)?;
self.write_header(header)?;
self.file = None
}
Ok(())
}
}
impl PushRaw for RawVectorWriter {
fn push_bit(&mut self, value: bool) {
self.buf.push_bit(value); self.len += 1;
if self.buf.len() >= self.buf_len {
self.flush(FlushMode::Safe).unwrap();
}
}
unsafe fn push_int(&mut self, value: u64, width: usize) {
self.buf.push_int(value, width); self.len += width;
if self.buf.len() >= self.buf_len {
self.flush(FlushMode::Safe).unwrap();
}
}
}
impl Drop for RawVectorWriter {
fn drop(&mut self) {
let _ = self.close();
}
}
#[cfg(not(target_family = "wasm"))]
#[derive(PartialEq, Eq, Debug)]
pub struct RawVectorMapper<'a> {
len: usize,
data: MappedSlice<'a, u64>,
}
#[cfg(not(target_family = "wasm"))]
impl<'a> RawVectorMapper<'a> {
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn count_ones(&self) -> usize {
let mut result: usize = 0;
for value in self.data.iter() {
result += (*value).count_ones() as usize;
}
result
}
}
#[cfg(not(target_family = "wasm"))]
impl<'a> AccessRaw for RawVectorMapper<'a> {
#[inline]
fn bit(&self, bit_offset: usize) -> bool {
let (index, offset) = bits::split_offset(bit_offset);
((self.data[index] >> offset) & 1) == 1
}
#[inline]
unsafe fn int(&self, bit_offset: usize, width: usize) -> u64 {
bits::read_int(&self.data, bit_offset, width)
}
#[inline]
fn word(&self, index: usize) -> u64 {
self.data[index]
}
#[inline]
unsafe fn word_unchecked(&self, index: usize) -> u64 {
*self.data.get_unchecked(index)
}
#[inline]
fn is_mutable(&self) -> bool {
false
}
#[inline]
fn set_bit(&mut self, _: usize, _: bool) {
panic!("RawVectorMapper::set_bit(): Not implemented");
}
#[inline]
unsafe fn set_int(&mut self, _: usize, _: u64, _: usize) {
panic!("RawVectorMapper::set_int(): Not implemented");
}
}
#[cfg(not(target_family = "wasm"))]
impl<'a> MemoryMapped<'a> for RawVectorMapper<'a> {
fn new(map: &'a MemoryMap, offset: usize) -> io::Result<Self> {
if offset >= map.len() {
return Err(Error::new(ErrorKind::UnexpectedEof, "The starting offset is out of range"));
}
let slice: &[u64] = map.as_ref();
let len = slice[offset] as usize;
let data = MappedSlice::new(map, offset + 1)?;
Ok(RawVectorMapper {
len, data,
})
}
fn map_offset(&self) -> usize {
self.data.map_offset() - 1
}
fn map_len(&self) -> usize {
self.data.map_len() + 1
}
}
#[cfg(not(target_family = "wasm"))]
impl<'a> AsRef<MappedSlice<'a, u64>> for RawVectorMapper<'a> {
#[inline]
fn as_ref(&self) -> &MappedSlice<'a, u64> {
&(self.data)
}
}