#![warn(missing_debug_implementations, missing_docs)]
#![warn(clippy::pedantic, clippy::cargo)]
#![allow(clippy::wildcard_imports, dead_code)]
pub use deserialize::{parse, File, Files};
pub use serialize::{walk_dir, write};
pub const MAGIC_NUMBER: &[u8] = b"";
pub(crate) fn read_saturate<R: std::io::Read>(
bytes: &mut [u8],
mut reader: R,
) -> Result<usize, std::io::Error> {
let mut read = 0;
loop {
match reader.read(&mut bytes[read..]) {
Err(ref err) if err.kind() == std::io::ErrorKind::Interrupted => {
std::thread::yield_now();
continue;
}
Err(err) => {
return Err(err);
}
Ok(0) => break,
Ok(rd) => {
read += rd;
if read == bytes.len() {
break;
}
}
}
}
Ok(read)
}
pub(crate) const DEFAULT_BUFFER_SIZE: usize = 1024 * 64;
pub mod deserialize {
use super::*;
use std::{
cell::RefCell,
convert::{TryFrom, TryInto},
io::{self, prelude::*, Seek, SeekFrom},
mem,
path::{Path, PathBuf},
};
#[derive(Debug)]
pub struct File<'a, R: Read + Seek> {
source: &'a RefCell<&'a mut R>,
size: u64,
position: u64,
path: PathBuf,
offset: u64,
}
impl<'a, R: Read + Seek> File<'a, R> {
#[must_use]
#[inline]
pub fn path(&self) -> &Path {
self.path.as_path()
}
#[must_use]
#[inline]
pub fn into_path(self) -> PathBuf {
self.path
}
#[must_use]
#[inline]
pub fn size(&self) -> u64 {
self.size
}
#[inline]
pub fn align(&mut self) -> io::Result<()> {
match self
.source
.borrow_mut()
.seek(SeekFrom::Start(self.position + self.offset))
{
Err(err) => Err(err),
Ok(_) => Ok(()),
}
}
#[inline]
pub fn align_to_start(&mut self) -> io::Result<()> {
match self
.source
.borrow_mut()
.seek(SeekFrom::Start(self.position))
{
Err(err) => Err(err),
Ok(_) => Ok(()),
}
}
}
impl<'a, R: Read + Seek> Read for File<'a, R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.align()?;
if self.offset == self.size {
return Ok(0);
}
let slice = if self.size - self.offset < buf.len() as u64 {
#[allow(clippy::cast_possible_truncation)]
&mut buf[..(self.size - self.offset) as usize]
} else {
buf
};
match self.source.borrow_mut().read(slice) {
Err(err) => Err(err),
Ok(read) => {
self.offset += read as u64;
Ok(read)
}
}
}
}
impl<'a, R: Read + Seek> Seek for File<'a, R> {
fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
#[allow(clippy::cast_sign_loss)]
match pos {
io::SeekFrom::Current(offset) => {
if offset.is_negative() && (-offset) as u64 > self.offset {
Err(io::ErrorKind::InvalidInput.into())
} else {
if offset < 0 {
self.offset -= (-offset) as u64;
} else {
self.offset += offset as u64;
}
Ok(self.offset)
}
}
io::SeekFrom::Start(start) => {
if start < self.size() {
self.offset = start;
Ok(self.offset)
} else {
Err(io::ErrorKind::InvalidInput.into())
}
}
io::SeekFrom::End(end) => {
if end.is_negative() && (-end) as u64 > self.size() {
Err(io::ErrorKind::InvalidInput.into())
} else {
self.offset = if end.is_negative() {
self.size() - ((-end) as u64)
} else {
self.size() + (end as u64)
};
Ok(self.offset)
}
}
}
}
}
#[derive(Debug)]
pub struct Files<'a, R: Read + Seek> {
files: Vec<File<'a, R>>,
}
impl<'a, R: Read + Seek> Files<'a, R> {
pub fn all(&mut self) -> &mut [File<'a, R>] {
self.files.as_mut_slice()
}
pub fn filter<F: FnMut(&Path) -> bool>(&mut self, mut filter: F) -> Vec<&mut File<'a, R>> {
self.files.iter_mut().filter(|f| filter(f.path())).collect()
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct FileMeta {
path_start: usize,
path_length: usize,
file_size: u64,
}
#[derive(Debug)]
pub enum Error {
VersionNotSupported,
MetadataIncomplete,
MetadataWrong,
HeaderTooLarge,
HeaderUnexpectedlySmall,
PathLengthTooLong,
FileMetadataIncomplete,
Reader(io::Error),
InvalidUTF8,
}
pub fn parse<'a, R: Read + Seek>(
reader: &'a RefCell<&'a mut R>,
) -> Result<Files<'a, R>, Error> {
let mut buffer = [0; MAGIC_NUMBER.len() + 4];
let read = reader
.borrow_mut()
.read(&mut buffer)
.map_err(Error::Reader)?;
let version = if read == buffer.len() {
let value = buffer[MAGIC_NUMBER.len()..MAGIC_NUMBER.len() + 4]
.try_into()
.unwrap();
u32::from_be_bytes(value)
} else {
return Err(Error::MetadataIncomplete);
};
match version {
1 => versions::parse_v1(reader),
_ => Err(Error::VersionNotSupported),
}
}
pub mod metadata {
use super::{parse_uint, FileMeta, ParseUintError, TryFrom, TryInto, UintBytesLength};
#[derive(Debug, PartialEq, Eq)]
pub enum ParseFileErrorV1 {
TooShort,
PathLengthTooLong,
}
pub fn parse_file_meta_v1(
bytes: &[u8],
file_meta_start: usize,
path_length_bytes: UintBytesLength,
) -> Result<FileMeta, ParseFileErrorV1> {
if bytes.len() < 8 + 8 + path_length_bytes.get() {
return Err(ParseFileErrorV1::TooShort);
}
let mut start = 0_usize;
let file_size = {
let file_size = bytes[start..start + 8].try_into().unwrap();
start += 8;
u64::from_be_bytes(file_size)
};
let path_length =
usize::try_from(match parse_uint(&bytes[start..], path_length_bytes) {
Ok(length) => length,
Err(ParseUintError::BytesMissing) => return Err(ParseFileErrorV1::TooShort),
Err(ParseUintError::SizeTooLarge) => unreachable!(),
})
.ok()
.ok_or(ParseFileErrorV1::PathLengthTooLong)?;
start += path_length_bytes.get();
let absolute_path_start = file_meta_start + start;
Ok(FileMeta {
path_start: absolute_path_start,
path_length,
file_size,
})
}
#[derive(Debug)]
pub struct FileIterV1<'a> {
bytes: &'a [u8],
current_position: usize,
path_length: UintBytesLength,
header_size: u64,
}
impl<'a> FileIterV1<'a> {
#[must_use]
pub fn new(
bytes: &'a [u8],
path_length_bytes: UintBytesLength,
header_size: u64,
) -> Self {
Self {
bytes,
current_position: 0,
path_length: path_length_bytes,
header_size,
}
}
}
impl<'a> Iterator for FileIterV1<'a> {
type Item = Result<FileMeta, ParseFileErrorV1>;
fn next(&mut self) -> Option<Self::Item> {
if self.current_position as u64 >= (self.header_size - (8 + 1)) {
return None;
}
let file = match parse_file_meta_v1(
&self.bytes[self.current_position..],
self.current_position,
self.path_length,
) {
Err(ParseFileErrorV1::TooShort) => None,
Err(err) => Some(Err(err)),
Ok(file) => Some(Ok(file)),
};
if let Some(file) = file.as_ref() {
if let Ok(file) = file {
self.current_position += 8 + self.path_length.get() + file.path_length;
}
}
file
}
}
}
pub mod versions {
use super::*;
pub fn parse_v1<'a, R: Read + Seek>(
read_cell: &'a RefCell<&'a mut R>,
) -> Result<Files<'a, R>, Error> {
let mut buffer = [0; 9];
let read = read_cell
.borrow_mut()
.read(&mut buffer)
.map_err(Error::Reader)?;
if read != buffer.len() {
return Err(Error::MetadataIncomplete);
}
let header_size = u64::from_be_bytes(buffer[0..8].try_into().unwrap());
let header_size_usize = usize::try_from(header_size)
.ok()
.ok_or(Error::HeaderTooLarge)?;
let path_length_bytes = buffer[8];
let path_length_bytes = UintBytesLength::new(path_length_bytes)
.ok()
.ok_or(Error::MetadataWrong)?;
let header = {
let mut header = Vec::with_capacity(header_size_usize);
unsafe { header.set_len(header.capacity()) };
if read_saturate(&mut header[..], &mut *read_cell.borrow_mut())
.map_err(Error::Reader)?
!= header.len()
{
return Err(Error::HeaderUnexpectedlySmall);
};
header
};
let mut position_in_file = MAGIC_NUMBER.len() as u64 + 4 + header_size;
let files = {
let mut vec = Vec::with_capacity(512);
for file in metadata::FileIterV1::new(&header, path_length_bytes, header_size) {
let file = match file {
Ok(file_meta) => {
let path = {
let vec = header[file_meta.path_start
..file_meta.path_start + file_meta.path_length]
.to_vec();
let string =
String::from_utf8(vec).ok().ok_or(Error::InvalidUTF8)?;
PathBuf::from(string)
};
let file = File {
source: &read_cell,
size: file_meta.file_size,
position: position_in_file,
path,
offset: 0,
};
position_in_file += file_meta.file_size;
file
}
Err(metadata::ParseFileErrorV1::PathLengthTooLong) => {
return Err(Error::PathLengthTooLong)
}
Err(metadata::ParseFileErrorV1::TooShort) => {
return Err(Error::FileMetadataIncomplete)
}
};
vec.push(file);
}
vec
};
Ok(Files { files })
}
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum ParseUintError {
SizeTooLarge,
BytesMissing,
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub struct UintBytesLength(u8);
impl UintBytesLength {
pub fn new(length: u8) -> Result<Self, ParseUintError> {
if length as usize >= mem::size_of::<UintParseType>() {
Err(ParseUintError::SizeTooLarge)
} else {
Ok(UintBytesLength(length))
}
}
#[must_use]
pub fn get(self) -> usize {
self.0 as usize
}
#[must_use]
pub fn get_u8(self) -> u8 {
self.0
}
}
pub type UintParseType = u64;
pub fn parse_uint(
bytes: &[u8],
length: UintBytesLength,
) -> Result<UintParseType, ParseUintError> {
const MAX_BYTES: usize = mem::size_of::<UintParseType>();
if length.get() > bytes.len() {
return Err(ParseUintError::BytesMissing);
}
let mut array = [0_u8; MAX_BYTES];
let offset = MAX_BYTES - length.get();
array[offset..(length.get() + offset)].copy_from_slice(&bytes[..length.get()]);
Ok(u64::from_be_bytes(array))
}
}
pub mod serialize {
use super::*;
use std::{
io,
io::prelude::*,
path::{Path, PathBuf},
};
#[derive(Debug)]
pub enum Error {
InvalidUTF8,
Reader(io::Error),
Writer(io::Error),
Open(String),
}
pub fn walk_dir<P: AsRef<Path>, F: Fn(&Path) -> bool>(
path: P,
filter: &F,
) -> io::Result<Vec<PathBuf>> {
fn walk<F: Fn(&Path) -> bool>(
path: &Path,
filter: &F,
vec: &mut Vec<PathBuf>,
) -> io::Result<()> {
let dir = path.read_dir()?;
for file in dir {
let file = file?;
let file_type = file.file_type()?;
let path = file.path();
if file_type.is_file() {
if filter(&path) {
vec.push(path);
}
} else if file_type.is_dir() {
walk(path.as_path(), filter, vec)?;
}
}
Ok(())
}
let mut files = Vec::new();
walk(path.as_ref(), filter, &mut files)?;
Ok(files)
}
pub fn write<W: Write + Seek, R: Read, F: Fn(&Path) -> Result<R, Error>, P: AsRef<Path>>(
paths: &[P],
dest: W,
open: F,
) -> Result<(), Error> {
versions::write_v1(paths, dest, open)
}
pub mod versions {
use super::*;
pub fn write_v1<
W: Write + Seek,
R: Read,
F: Fn(&Path) -> Result<R, Error>,
P: AsRef<Path>,
>(
paths: &[P],
mut dest: W,
open: F,
) -> Result<(), Error> {
let mut metadata = Vec::with_capacity(DEFAULT_BUFFER_SIZE);
metadata.extend_from_slice(MAGIC_NUMBER);
let version: u32 = 1;
metadata.extend_from_slice(&version.to_be_bytes());
let path_length_bytes = {
let mut longest: deserialize::UintParseType = 0;
for path in paths {
let length = path.as_ref().to_str().ok_or(Error::InvalidUTF8)?.len() as u64;
if length > longest {
longest = length;
}
}
let mut shift = longest >> 8;
let mut bytes: u8 = 1;
loop {
if shift > 255 {
shift >>= 8;
bytes += 1;
continue;
} else {
break;
}
}
bytes
};
let zero_u64 = &[0; 8];
metadata.extend_from_slice(zero_u64);
metadata.extend_from_slice(&[path_length_bytes]);
let mut file_size_positions = Vec::with_capacity(paths.len());
unsafe { file_size_positions.set_len(file_size_positions.capacity()) };
for (file, size_pos) in paths.iter().zip(file_size_positions.iter_mut()) {
let path = file.as_ref();
let s = path.to_str().unwrap();
*size_pos = metadata.len();
metadata.extend_from_slice(zero_u64);
let path_length = (s.len() as u64).to_be_bytes();
let path_length_bytes = &path_length[8 - path_length_bytes as usize..];
metadata.extend_from_slice(path_length_bytes);
metadata.extend_from_slice(s.as_bytes());
}
let header_offset = MAGIC_NUMBER.len() + 4;
let header_size = (metadata.len() - header_offset) as u64;
metadata[header_offset..header_offset + 8].copy_from_slice(&header_size.to_be_bytes());
dest.write_all(&metadata).map_err(Error::Writer)?;
let mut buffer = Vec::with_capacity(DEFAULT_BUFFER_SIZE);
unsafe { buffer.set_len(buffer.capacity()) };
for (file, size_pos) in paths.iter().zip(file_size_positions.iter()) {
let path = file.as_ref();
let mut reader = open(path)?;
let mut size = 0;
loop {
let read = match reader.read(&mut buffer) {
Err(ref err) if err.kind() == io::ErrorKind::Interrupted => continue,
Err(err) => return Err(Error::Reader(err)),
Ok(0) => break,
Ok(read) => read,
};
size += read as u64;
dest.write_all(&buffer[..read]).map_err(Error::Writer)?;
}
metadata[*size_pos..size_pos + 8].copy_from_slice(&size.to_be_bytes());
}
dest.seek(io::SeekFrom::Start(0)).map_err(Error::Writer)?;
dest.write_all(&metadata).map_err(Error::Writer)?;
dest.flush().map_err(Error::Writer)?;
Ok(())
}
}
}