use crate::Result;
use memmap2::{Mmap, MmapMut};
use std::{borrow::Cow, ops::Range, ptr::NonNull, sync::Arc};
#[cfg(unix)]
pub(crate) use std::os::fd::AsRawFd as Mmappable;
#[cfg(windows)]
pub(crate) use std::os::windows::io::AsRawHandle as Mmappable;
pub struct SegmentRaw<Buf> {
range: Range<u64>,
data: Buf,
}
pub type SegmentMut = SegmentRaw<MmapMut>;
pub type Segment = SegmentRaw<Mmap>;
impl<Buf> SegmentRaw<Buf>
where
Buf: AsRef<[u8]>,
{
pub const MAX_SIZE: u64 = 1 << 20;
#[inline]
pub fn start(&self) -> u64 {
self.range.start
}
#[inline]
pub fn translate_inner_data_index(&self, start: u64) -> u64 {
debug_assert!(self.range.start <= start);
debug_assert!(start <= self.range.end);
start - self.range.start
}
#[inline]
pub fn translate_inner_data_range(&self, start: u64, end: u64) -> Range<u64> {
self.translate_inner_data_index(start)..self.translate_inner_data_index(end)
}
#[inline]
pub fn id_of_data(start: u64) -> usize {
(start / Self::MAX_SIZE) as usize
}
#[inline]
pub fn data_range_of_id(id: usize) -> Range<u64> {
let start = id as u64 * Self::MAX_SIZE;
start..start + Self::MAX_SIZE
}
}
impl<Buf> std::ops::Deref for SegmentRaw<Buf>
where
Buf: std::ops::Deref<Target = [u8]>,
{
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.data
}
}
impl<Buf> std::ops::DerefMut for SegmentRaw<Buf>
where
Buf: std::ops::DerefMut<Target = [u8]>,
{
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.data
}
}
impl SegmentMut {
pub(crate) fn new(start: u64) -> Result<Self> {
let data = memmap2::MmapOptions::new()
.len(Self::MAX_SIZE as usize)
.map_anon()?;
#[cfg(unix)]
data.advise(memmap2::Advice::Sequential)?;
Ok(Self {
data,
range: start..start + Self::MAX_SIZE,
})
}
pub fn into_read_only(self) -> Result<Segment> {
Ok(Segment {
data: self.data.make_read_only()?,
range: self.range,
})
}
}
impl Segment {
pub(crate) fn map_file<F: Mmappable>(range: Range<u64>, file: &F) -> Result<Self> {
let size = range.end - range.start;
debug_assert!(size <= Self::MAX_SIZE);
let data = unsafe {
memmap2::MmapOptions::new()
.offset(range.start)
.len(size as usize)
.map(file)?
};
#[cfg(unix)]
data.advise(memmap2::Advice::WillNeed)?;
Ok(Self { data, range })
}
#[inline]
pub fn get_line(self: &Arc<Self>, range: Range<u64>) -> SegStr {
SegStr::from_bytes(self.get_bytes(range))
}
#[inline]
pub fn get_bytes(self: &Arc<Self>, range: Range<u64>) -> SegBytes {
SegBytes::new_borrow(self.clone(), range)
}
}
pub struct SegBytes(SegBytesRepr);
enum SegBytesRepr {
Borrowed {
_ref: Arc<Segment>,
ptr: NonNull<u8>,
len: usize,
},
Owned(Vec<u8>),
}
impl SegBytes {
fn new_borrow(origin: Arc<Segment>, range: Range<u64>) -> Self {
let data = &origin.data[range.start as usize..range.end as usize];
Self(SegBytesRepr::Borrowed {
ptr: unsafe { NonNull::new(data.as_ptr().cast_mut()).unwrap_unchecked() },
len: data.len(),
_ref: origin,
})
}
#[inline]
pub fn new_owned(s: Vec<u8>) -> Self {
Self(SegBytesRepr::Owned(s))
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
match &self.0 {
SegBytesRepr::Borrowed { ptr, len, .. } => unsafe {
std::slice::from_raw_parts(ptr.as_ptr(), *len)
},
SegBytesRepr::Owned(s) => s.as_slice(),
}
}
}
impl std::borrow::Borrow<[u8]> for SegBytes {
#[inline]
fn borrow(&self) -> &[u8] {
self
}
}
impl std::ops::Deref for SegBytes {
type Target = [u8];
#[inline]
fn deref(&self) -> &Self::Target {
self.as_bytes()
}
}
impl std::convert::AsRef<[u8]> for SegBytes {
#[inline]
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
#[derive(Clone)]
pub struct SegStr(SegStrRepr);
#[derive(Clone)]
enum SegStrRepr {
Borrowed {
_ref: Arc<Segment>,
ptr: NonNull<u8>,
len: usize,
},
Owned(String),
}
impl SegStr {
pub fn from_bytes(bytes: SegBytes) -> Self {
match bytes.0 {
SegBytesRepr::Borrowed { _ref, ptr, len } => {
let data = unsafe { std::slice::from_raw_parts(ptr.as_ptr(), len) };
match String::from_utf8_lossy(data) {
Cow::Owned(s) => Self(SegStrRepr::Owned(s)),
Cow::Borrowed(_) => Self(SegStrRepr::Borrowed { ptr, len, _ref }),
}
}
SegBytesRepr::Owned(b) => match String::from_utf8_lossy(&b) {
Cow::Owned(s) => Self(SegStrRepr::Owned(s)),
Cow::Borrowed(_) => {
Self(SegStrRepr::Owned(unsafe { String::from_utf8_unchecked(b) }))
}
},
}
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
match &self.0 {
SegStrRepr::Borrowed { ptr, len, .. } => unsafe {
std::slice::from_raw_parts(ptr.as_ptr(), *len)
},
SegStrRepr::Owned(s) => s.as_bytes(),
}
}
#[inline]
pub fn as_str(&self) -> &str {
unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
}
}
impl std::borrow::Borrow<str> for SegStr {
#[inline]
fn borrow(&self) -> &str {
self
}
}
impl std::ops::Deref for SegStr {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl std::convert::AsRef<str> for SegStr {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl std::fmt::Debug for SegStr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(self.as_str(), f)
}
}