use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::ops::{Bound, RangeBounds};
use std::sync::Arc;
#[derive(Clone, Debug)]
pub struct FileSlice {
file: Arc<File>,
cursor: u64,
start: u64,
end: u64,
}
impl FileSlice {
pub fn new(file: File) -> FileSlice {
let end = file.metadata().unwrap().len();
FileSlice {
file: Arc::new(file),
cursor: 0,
start: 0,
end,
}
}
pub fn slice<T>(&self, range: T) -> FileSlice
where
T: RangeBounds<u64>,
{
let start = match range.start_bound() {
Bound::Included(x) => self.start + x,
Bound::Excluded(x) => self.start + x + 1,
Bound::Unbounded => self.start,
};
let end = match range.end_bound() {
Bound::Included(x) => self.start + x + 1,
Bound::Excluded(x) => self.start + x,
Bound::Unbounded => self.end,
};
let end = end
.min(self.end) .max(start); FileSlice {
file: self.file.clone(),
cursor: start,
start,
end,
}
}
}
impl FileSlice {
pub fn start_pos(&self) -> u64 {
self.start
}
pub fn end_pos(&self) -> u64 {
self.end
}
pub fn cursor_pos(&self) -> u64 {
self.cursor
}
pub fn is_empty(&self) -> bool {
self.start == self.end
}
pub fn len(&self) -> usize {
(self.end - self.start) as usize
}
pub fn bytes_remaining(&self) -> usize {
(self.end - self.cursor) as usize
}
}
impl Read for FileSlice {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let remaining = (self.end - self.cursor) as usize;
let buf = if buf.len() > remaining {
&mut buf[..remaining]
} else {
buf
};
let x;
#[cfg(target_family = "unix")]
{
use std::os::unix::fs::FileExt;
x = self.file.read_at(buf, self.cursor)?;
}
#[cfg(target_family = "windows")]
{
use std::os::windows::fs::FileExt;
x = self.file.seek_read(buf, self.cursor)?;
}
#[cfg(target_family = "wasm")]
{
use std::os::wasi::fs::FileExt;
x = self.file.read_at(buf, self.cursor)?;
}
self.cursor += x as u64;
Ok(x)
}
}
impl Seek for FileSlice {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
let cursor = match pos {
SeekFrom::Current(x) => i128::from(self.cursor) + i128::from(x),
SeekFrom::Start(x) => i128::from(self.start + x),
SeekFrom::End(x) => i128::from(self.end) + i128::from(x),
};
let cursor = match u64::try_from(cursor) {
Ok(x) if x >= self.start => x,
_ => {
return Err(std::io::Error::new(
std::io::ErrorKind::Other,
"Out of bounds",
))
}
};
self.cursor = cursor;
self.stream_position()
}
fn stream_position(&mut self) -> std::io::Result<u64> {
Ok(self.cursor - self.start)
}
}
impl FileSlice {
pub fn expand(&mut self) {
self.start = 0;
self.end = self.file.metadata().unwrap().len();
}
pub fn try_unwrap(self) -> Result<File, FileSlice> {
Arc::try_unwrap(self.file).map_err(|file| FileSlice {
file,
cursor: self.cursor,
start: self.start,
end: self.end,
})
}
}
#[cfg(feature = "parquet")]
mod parquet_impls {
use super::*;
use bytes::Bytes;
use parquet::file::reader::{ChunkReader, Length};
impl Length for FileSlice {
fn len(&self) -> u64 {
self.end - self.start
}
}
impl ChunkReader for FileSlice {
type T = FileSlice;
fn get_read(&self, start: u64) -> parquet::errors::Result<FileSlice> {
Ok(self.slice(start..self.end))
}
fn get_bytes(&self, start: u64, length: usize) -> parquet::errors::Result<Bytes> {
let mut buf = vec![0; length];
self.slice(start..(start + length as u64))
.read_exact(&mut buf)?;
Ok(buf.into())
}
}
}
#[cfg(feature = "tar")]
pub fn slice_tarball(
mut archive: tar::Archive<File>,
) -> std::io::Result<impl Iterator<Item = (tar::Header, FileSlice)>> {
let headers = archive
.entries_with_seek()?
.map(move |entry| {
let entry = entry.unwrap();
let start = entry.raw_file_position();
let end = start + entry.size();
(entry.header().clone(), start, end)
})
.collect::<Vec<_>>();
let file = FileSlice::new(archive.into_inner());
Ok(headers
.into_iter()
.map(move |(header, start, end)| (header, file.slice(start..end))))
}