use memmap2::Mmap;
use crate::{cloud_store::CloudObject, Uri};
#[non_exhaustive]
#[derive(Debug, thiserror::Error)]
pub enum BinaryError {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("out of bounds: {start}..{end} (len {len})")]
OutOfBounds {
start: usize,
end: usize,
len: usize,
},
#[error("memory-mapped view unavailable for this file type")]
ViewUnavailable,
#[error("cloud error: {0}")]
Cloud(#[from] crate::CloudError),
}
#[derive(Debug)]
pub struct BinaryWriter {
file: std::fs::File,
}
impl BinaryWriter {
pub fn new(path: impl Into<Uri>) -> Result<Self, BinaryError> {
let uri: Uri = path.into();
let p = uri.as_path().ok_or_else(|| {
BinaryError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Can only write to local paths",
))
})?;
let file = std::fs::File::create(p)?;
Ok(BinaryWriter { file })
}
pub fn write(&mut self, data: &[u8]) -> Result<(), BinaryError> {
use std::io::Write;
self.file.write_all(data)?;
Ok(())
}
pub fn upload(
source: impl Into<Uri>,
dest: impl Into<Uri>,
) -> Result<(), BinaryError> {
let source_uri: Uri = source.into();
let source_path = source_uri.as_path().ok_or_else(|| {
BinaryError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Can only upload from local paths",
))
})?;
let dest_uri: Uri = dest.into();
if !dest_uri.is_cloud() {
return Err(BinaryError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Can only upload to cloud paths",
)));
}
CloudObject::new(dest_uri.clone())?.upload_from(source_path)?;
Ok(())
}
pub fn upload_bytes(
bytes: Vec<u8>,
dest: impl Into<Uri>,
) -> Result<(), BinaryError> {
let dest_uri: Uri = dest.into();
if !dest_uri.is_cloud() {
return Err(BinaryError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Can only upload to cloud paths",
)));
}
CloudObject::new(dest_uri.clone())?.upload_bytes(bytes)?;
Ok(())
}
}
#[derive(Debug)]
enum Inner {
Mmap(Mmap),
Cloud(CloudObject),
}
impl Inner {
fn range(
&self,
range: std::ops::Range<usize>,
) -> Result<Vec<u8>, BinaryError> {
match self {
Inner::Mmap(m) => m
.get(range.clone())
.ok_or_else(|| BinaryError::OutOfBounds {
start: range.start,
end: range.end,
len: m.len(),
})
.map(|s| s.to_vec()),
Inner::Cloud(c) => c.range(range).map_err(BinaryError::Cloud),
}
}
fn view_range(
&self,
range: std::ops::Range<usize>,
) -> Result<&[u8], BinaryError> {
match self {
Inner::Mmap(m) => {
m.get(range.clone())
.ok_or_else(|| BinaryError::OutOfBounds {
start: range.start,
end: range.end,
len: m.len(),
})
},
Inner::Cloud(_) => Err(BinaryError::ViewUnavailable),
}
}
}
impl TryFrom<&Uri> for Inner {
type Error = BinaryError;
fn try_from(uri: &Uri) -> Result<Self, Self::Error> {
if uri.is_local() {
let path = uri.as_path().ok_or_else(|| {
BinaryError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"not a local path",
))
})?;
let file = std::fs::File::open(path)?;
let mmap = unsafe { memmap2::MmapOptions::new().map(&file) }?;
Ok(Inner::Mmap(mmap))
} else if uri.is_cloud() {
let cloud_object =
CloudObject::new(uri.clone()).map_err(BinaryError::Cloud)?;
Ok(Inner::Cloud(cloud_object))
} else {
Err(BinaryError::Io(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
format!("unsupported URI: {uri}"),
)))
}
}
}
#[derive(Debug)]
pub struct BinaryReader {
inner: Inner,
len: usize,
original_uri: Uri,
effective_uri: Uri,
}
impl BinaryReader {
pub fn from(uri: impl Into<Uri>) -> Result<Self, BinaryError> {
let original_uri = uri.into();
let effective_uri = original_uri.soft_cache();
let inner = Inner::try_from(&effective_uri)?;
let len = match &inner {
Inner::Mmap(m) => m.len(),
Inner::Cloud(c) => c.len().map_err(BinaryError::Cloud)?,
};
Ok(BinaryReader {
inner,
len,
original_uri,
effective_uri,
})
}
pub fn len(&self) -> usize {
self.len
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn read_range(
&self,
range: impl std::ops::RangeBounds<usize>,
) -> Result<Vec<u8>, BinaryError> {
let range = crate::range(range, self.len());
self.inner.range(range)
}
pub fn view_range(
&self,
range: impl std::ops::RangeBounds<usize>,
) -> Result<&[u8], BinaryError> {
let range = crate::range(range, self.len());
self.inner.view_range(range)
}
pub fn original_uri(&self) -> &Uri {
&self.original_uri
}
pub fn effective_uri(&self) -> &Uri {
&self.effective_uri
}
}