use crate::{
prelude::{FileBufReader, MemBufReader},
utils::MmapHelper,
};
use anyhow::{Context, ensure};
use bitflags::bitflags;
use common_traits::UnsignedInt;
use dsi_bitstream::{
impls::{BufBitReader, MemWordReader, WordAdapter, buf_bit_reader},
prelude::{CodesRead, CodesReaderFactory},
traits::{BitRead, Endianness},
};
use epserde::Epserde;
use std::{
fs::File,
io::{BufReader, Read},
marker::PhantomData,
mem::MaybeUninit,
path::Path,
};
use sux::traits::{IndexedSeq, Types};
#[derive(Debug, Clone)]
pub struct FileFactory<E: Endianness> {
path: Box<Path>,
_marker: core::marker::PhantomData<E>,
}
impl<E: Endianness> FileFactory<E> {
pub fn new(path: impl AsRef<Path>) -> anyhow::Result<Self> {
let path: Box<Path> = path.as_ref().into();
let metadata = std::fs::metadata(&path)
.with_context(|| format!("Could not stat {}", path.display()))?;
ensure!(metadata.is_file(), "File {} is not a file", path.display());
Ok(Self {
path,
_marker: core::marker::PhantomData,
})
}
}
impl<E: Endianness> CodesReaderFactory<E> for FileFactory<E>
where
FileBufReader<E>: BitRead<E> + CodesRead<E>,
{
type CodesReader<'a>
= BufBitReader<E, WordAdapter<u32, BufReader<File>>>
where
Self: 'a;
fn new_reader(&self) -> Self::CodesReader<'_> {
buf_bit_reader::from_path::<E, u32>(&self.path).unwrap()
}
}
bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct MemoryFlags: u32 {
const TRANSPARENT_HUGE_PAGES = 1 << 0;
const SEQUENTIAL = 1 << 1;
const RANDOM_ACCESS = 1 << 2;
}
}
impl core::default::Default for MemoryFlags {
fn default() -> Self {
MemoryFlags::empty()
}
}
impl From<MemoryFlags> for mmap_rs::MmapFlags {
fn from(flags: MemoryFlags) -> Self {
let mut mmap_flags = mmap_rs::MmapFlags::empty();
if flags.contains(MemoryFlags::SEQUENTIAL) {
mmap_flags |= mmap_rs::MmapFlags::SEQUENTIAL;
}
if flags.contains(MemoryFlags::RANDOM_ACCESS) {
mmap_flags |= mmap_rs::MmapFlags::RANDOM_ACCESS;
}
if flags.contains(MemoryFlags::TRANSPARENT_HUGE_PAGES) {
mmap_flags |= mmap_rs::MmapFlags::TRANSPARENT_HUGE_PAGES;
}
mmap_flags
}
}
impl From<MemoryFlags> for epserde::deser::Flags {
fn from(flags: MemoryFlags) -> Self {
let mut deser_flags = epserde::deser::Flags::empty();
if flags.contains(MemoryFlags::SEQUENTIAL) {
deser_flags |= epserde::deser::Flags::SEQUENTIAL;
}
if flags.contains(MemoryFlags::RANDOM_ACCESS) {
deser_flags |= epserde::deser::Flags::RANDOM_ACCESS;
}
if flags.contains(MemoryFlags::TRANSPARENT_HUGE_PAGES) {
deser_flags |= epserde::deser::Flags::TRANSPARENT_HUGE_PAGES;
}
deser_flags
}
}
#[derive(Debug, Clone)]
pub struct MemoryFactory<E: Endianness, M: AsRef<[u32]>> {
data: M,
_marker: core::marker::PhantomData<E>,
}
impl<E: Endianness, T: AsRef<[u32]>> MemoryFactory<E, T> {
pub fn from_data(data: T) -> Self {
Self {
data,
_marker: core::marker::PhantomData,
}
}
}
impl<E: Endianness> MemoryFactory<E, Box<[u32]>> {
pub fn new_mem(path: impl AsRef<Path>) -> anyhow::Result<Self> {
let path = path.as_ref();
let file_len = path
.metadata()
.with_context(|| format!("Could not stat {}", path.display()))?
.len() as usize;
let mut file = std::fs::File::open(path)
.with_context(|| format!("Could not open {}", path.display()))?;
let vec_len = file_len.div_ceil(usize::try_from(u32::BITS / 8).unwrap());
let mut data = Vec::<u32>::with_capacity(vec_len);
*data.spare_capacity_mut().last_mut().unwrap() = MaybeUninit::zeroed();
let bytes =
unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, file_len) };
file.read_exact(bytes)
.with_context(|| format!("Could not read {}", path.display()))?;
unsafe { data.set_len(vec_len) };
Ok(Self {
data: data.into_boxed_slice(),
_marker: core::marker::PhantomData,
})
}
}
impl<E: Endianness> MemoryFactory<E, MmapHelper<u32>> {
pub fn new_mmap(path: impl AsRef<Path>, flags: MemoryFlags) -> anyhow::Result<Self> {
let path = path.as_ref();
let file_len = path
.metadata()
.with_context(|| format!("Could not stat {}", path.display()))?
.len() as usize;
let mut file = std::fs::File::open(path)
.with_context(|| format!("Could not open {}", path.display()))?;
let capacity = file_len.align_to(16);
let mut mmap = mmap_rs::MmapOptions::new(capacity)?
.with_flags(flags.into())
.map_mut()
.context("Could not create anonymous mmap")?;
file.read_exact(&mut mmap[..file_len])
.with_context(|| format!("Could not read {}", path.display()))?;
mmap[file_len..].fill(0);
Ok(Self {
data: MmapHelper::try_from(
mmap.make_read_only()
.map_err(|(_, err)| err)
.context("Could not make memory read-only")?,
)
.context("Could not create mmap backend")?,
_marker: core::marker::PhantomData,
})
}
}
impl<E: Endianness, M: AsRef<[u32]>> CodesReaderFactory<E> for MemoryFactory<E, M>
where
for<'a> MemBufReader<'a, E>: CodesRead<E>,
{
type CodesReader<'a>
= MemBufReader<'a, E>
where
Self: 'a;
fn new_reader(&self) -> Self::CodesReader<'_> {
BufBitReader::<E, _>::new(MemWordReader::new(self.data.as_ref()))
}
}
#[derive(Epserde, Debug, Clone)]
pub struct EmptyDict<I, O> {
_marker: core::marker::PhantomData<(I, O)>,
}
impl<I, O> Types for EmptyDict<I, O> {
type Input = usize;
type Output<'a> = usize;
}
impl<I, O> IndexedSeq for EmptyDict<I, O> {
fn get(&self, _key: Self::Input) -> Self::Output<'_> {
panic!();
}
unsafe fn get_unchecked(&self, _index: usize) -> Self::Output<'_> {
panic!();
}
fn len(&self) -> usize {
0
}
}
impl<I, O> Default for EmptyDict<I, O> {
fn default() -> Self {
Self {
_marker: PhantomData,
}
}
}
impl<E: Endianness> CodesReaderFactory<E> for MmapHelper<u32>
where
for<'a> MemBufReader<'a, E>: CodesRead<E>,
{
type CodesReader<'a>
= MemBufReader<'a, E>
where
Self: 'a;
fn new_reader(&self) -> Self::CodesReader<'_> {
BufBitReader::<E, _>::new(MemWordReader::new(self.as_ref()))
}
}