use std::pin::Pin;
use crate::storage::checksum;
use crate::storage::compression;
use crate::storage::format::{
FileFooter, FileHeader, FileVersion, FormatError, SectionEntry, SectionIndex, FOOTER_SIZE,
HEADER_SIZE,
};
#[cfg(not(target_arch = "wasm32"))]
use memmap2::Mmap;
#[cfg(not(target_arch = "wasm32"))]
use std::fs::File;
#[cfg(not(target_arch = "wasm32"))]
use std::path::{Path, PathBuf};
#[cfg(target_arch = "wasm32")]
use std::vec::Vec;
pub enum FileSource {
#[cfg(not(target_arch = "wasm32"))]
Path(PathBuf),
#[cfg(target_arch = "wasm32")]
Buffer(Vec<u8>),
#[cfg(all(target_arch = "wasm32", feature = "wasm-indexeddb"))]
IndexedDb {
db_name: String,
key: String,
length: u64,
loader: Box<dyn RangeLoader>,
},
}
#[cfg(not(target_arch = "wasm32"))]
pub type PrefetchFuture<'a> =
Pin<Box<dyn std::future::Future<Output = Result<(), FormatError>> + Send + 'a>>;
#[cfg(target_arch = "wasm32")]
pub type PrefetchFuture<'a> =
Pin<Box<dyn std::future::Future<Output = Result<(), FormatError>> + 'a>>;
#[cfg(target_arch = "wasm32")]
pub trait RangeLoader: Send + Sync {
fn load_range(&self, offset: u64, length: u64) -> Result<Vec<u8>, FormatError>;
}
#[cfg(target_arch = "wasm32")]
pub struct BufferRangeLoader {
data: Vec<u8>,
}
#[cfg(target_arch = "wasm32")]
impl BufferRangeLoader {
pub fn new(data: Vec<u8>) -> Self {
Self { data }
}
}
#[cfg(target_arch = "wasm32")]
impl RangeLoader for BufferRangeLoader {
fn load_range(&self, offset: u64, length: u64) -> Result<Vec<u8>, FormatError> {
let offset = offset as usize;
let length = length as usize;
let end = offset
.checked_add(length)
.ok_or(FormatError::IncompleteWrite)?;
if end > self.data.len() {
return Err(FormatError::IncompleteWrite);
}
Ok(self.data[offset..end].to_vec())
}
}
pub trait FileReader {
fn open(source: FileSource) -> Result<Self, FormatError>
where
Self: Sized;
fn header(&self) -> &FileHeader;
fn footer(&self) -> &FileFooter;
fn section_index(&self) -> &SectionIndex;
fn read_section(&self, section_id: u32) -> Result<Vec<u8>, FormatError>;
fn read_section_raw(&self, section_id: u32) -> Result<Vec<u8>, FormatError>;
fn validate_section(&self, section_id: u32) -> Result<(), FormatError>;
fn validate_all(&self) -> Result<(), FormatError>;
fn prefetch_sections<'a>(&'a self, section_ids: &'a [u32]) -> PrefetchFuture<'a>;
}
#[cfg(not(target_arch = "wasm32"))]
pub struct AlopexFileReader {
mmap: Mmap,
header: FileHeader,
footer: FileFooter,
section_index: SectionIndex,
}
#[cfg(not(target_arch = "wasm32"))]
impl AlopexFileReader {
fn map_file(path: &Path) -> Result<Mmap, FormatError> {
let file = File::open(path).map_err(|_| FormatError::IncompleteWrite)?;
unsafe { Mmap::map(&file).map_err(|_| FormatError::IncompleteWrite) }
}
fn read_footer(mmap: &Mmap) -> Result<FileFooter, FormatError> {
if mmap.len() < FOOTER_SIZE {
return Err(FormatError::IncompleteWrite);
}
let start = mmap.len() - FOOTER_SIZE;
let mut buf = [0u8; FOOTER_SIZE];
buf.copy_from_slice(&mmap[start..]);
FileFooter::from_bytes(&buf)
}
fn read_header(mmap: &Mmap) -> Result<FileHeader, FormatError> {
if mmap.len() < HEADER_SIZE {
return Err(FormatError::IncompleteWrite);
}
let mut buf = [0u8; HEADER_SIZE];
buf.copy_from_slice(&mmap[..HEADER_SIZE]);
let header = FileHeader::from_bytes(&buf)?;
header.check_compatibility(&FileVersion::CURRENT)?;
Ok(header)
}
fn read_section_index(mmap: &Mmap, footer: &FileFooter) -> Result<SectionIndex, FormatError> {
let offset = footer.section_index_offset as usize;
if offset >= mmap.len() {
return Err(FormatError::IncompleteWrite);
}
if mmap.len() < offset + 4 {
return Err(FormatError::IncompleteWrite);
}
let count = u32::from_le_bytes(
mmap[offset..offset + 4]
.try_into()
.expect("slice length checked"),
);
let expected = 4usize + count as usize * SectionEntry::SIZE;
if mmap.len() < offset + expected {
return Err(FormatError::IncompleteWrite);
}
SectionIndex::from_bytes(&mmap[offset..offset + expected])
}
fn entry(&self, section_id: u32) -> Result<&SectionEntry, FormatError> {
self.section_index
.find_by_id(section_id)
.ok_or(FormatError::IncompleteWrite)
}
}
#[cfg(not(target_arch = "wasm32"))]
impl FileReader for AlopexFileReader {
fn open(source: FileSource) -> Result<Self, FormatError> {
let FileSource::Path(path) = source;
let mmap = Self::map_file(&path)?;
let footer = Self::read_footer(&mmap)?;
let section_index = Self::read_section_index(&mmap, &footer)?;
let header = Self::read_header(&mmap)?;
Ok(Self {
mmap,
header,
footer,
section_index,
})
}
fn header(&self) -> &FileHeader {
&self.header
}
fn footer(&self) -> &FileFooter {
&self.footer
}
fn section_index(&self) -> &SectionIndex {
&self.section_index
}
fn read_section(&self, section_id: u32) -> Result<Vec<u8>, FormatError> {
let entry = self.entry(section_id)?;
let raw = self.read_section_raw(section_id)?;
if raw.len() as u64 != entry.compressed_length {
return Err(FormatError::IncompleteWrite);
}
checksum::verify(&raw, self.header.checksum_algorithm, entry.checksum as u64)?;
let decompressed = compression::decompress(&raw, entry.compression)?;
if decompressed.len() as u64 != entry.uncompressed_length {
return Err(FormatError::IncompleteWrite);
}
Ok(decompressed)
}
fn read_section_raw(&self, section_id: u32) -> Result<Vec<u8>, FormatError> {
let entry = self.entry(section_id)?;
let offset = entry.offset as usize;
let end = offset
.checked_add(entry.compressed_length as usize)
.ok_or(FormatError::IncompleteWrite)?;
if end > self.mmap.len() || end - offset != entry.compressed_length as usize {
return Err(FormatError::IncompleteWrite);
}
Ok(self.mmap[offset..end].to_vec())
}
fn validate_section(&self, section_id: u32) -> Result<(), FormatError> {
let entry = self.entry(section_id)?;
let raw = self.read_section_raw(section_id)?;
checksum::verify(&raw, self.header.checksum_algorithm, entry.checksum as u64)
}
fn validate_all(&self) -> Result<(), FormatError> {
for entry in &self.section_index.entries {
self.validate_section(entry.section_id)?;
}
Ok(())
}
fn prefetch_sections<'a>(&'a self, _section_ids: &'a [u32]) -> PrefetchFuture<'a> {
let section_ids = _section_ids.to_vec();
Box::pin(async move {
for id in section_ids {
let _ = self.read_section_raw(id)?;
}
Ok(())
})
}
}
#[cfg(target_arch = "wasm32")]
pub struct WasmReaderConfig {
pub full_load_threshold_bytes: usize,
pub range_loader: Option<Box<dyn RangeLoader>>,
}
#[cfg(target_arch = "wasm32")]
impl Default for WasmReaderConfig {
fn default() -> Self {
Self {
full_load_threshold_bytes: 100 * 1024 * 1024, range_loader: None,
}
}
}
#[cfg(target_arch = "wasm32")]
pub struct AlopexFileReader {
buffer: Option<Vec<u8>>,
loader: Option<Box<dyn RangeLoader>>,
#[allow(dead_code)]
length: u64,
header: FileHeader,
footer: FileFooter,
section_index: SectionIndex,
#[allow(dead_code)]
config: WasmReaderConfig,
}
#[cfg(target_arch = "wasm32")]
impl AlopexFileReader {
pub fn open_with_config(
source: FileSource,
config: WasmReaderConfig,
) -> Result<Self, FormatError> {
match source {
FileSource::Buffer(buf) => Self::from_buffer(buf, config),
#[cfg(feature = "wasm-indexeddb")]
FileSource::IndexedDb {
length,
db_name: _,
key: _,
loader,
} => {
let mut cfg = config;
cfg.range_loader = Some(loader);
Self::from_indexed_db(length, cfg)
}
}
}
fn from_buffer(buffer: Vec<u8>, config: WasmReaderConfig) -> Result<Self, FormatError> {
if buffer.len() > config.full_load_threshold_bytes {
let mut cfg = config;
if let Some(loader) = cfg.range_loader.take() {
return Self::from_indexed_db(
buffer.len() as u64,
WasmReaderConfig {
range_loader: Some(loader),
..cfg
},
);
} else {
return Err(FormatError::IncompleteWrite);
}
}
if buffer.len() < HEADER_SIZE + FOOTER_SIZE {
return Err(FormatError::IncompleteWrite);
}
let footer = Self::read_footer(&buffer)?;
let section_index = Self::read_section_index(&buffer, &footer)?;
let header = Self::read_header(&buffer)?;
Ok(Self {
buffer: Some(buffer),
loader: None,
length: (HEADER_SIZE + FOOTER_SIZE + section_index.serialized_size()) as u64,
header,
footer,
section_index,
config,
})
}
fn from_indexed_db(length: u64, mut config: WasmReaderConfig) -> Result<Self, FormatError> {
let loader = config
.range_loader
.take()
.ok_or(FormatError::IncompleteWrite)?;
if (length as usize) <= config.full_load_threshold_bytes {
let full = loader.load_range(0, length)?;
return Self::from_buffer(
full,
WasmReaderConfig {
range_loader: Some(loader),
..config
},
);
}
let footer_start = length
.checked_sub(FOOTER_SIZE as u64)
.ok_or(FormatError::IncompleteWrite)?;
let footer_bytes = loader.load_range(footer_start, FOOTER_SIZE as u64)?;
let footer_array: [u8; FOOTER_SIZE] = footer_bytes
.try_into()
.map_err(|_| FormatError::IncompleteWrite)?;
let footer = FileFooter::from_bytes(&footer_array)?;
let index_offset = footer.section_index_offset;
let count_bytes = loader.load_range(index_offset, 4)?;
let count_arr: [u8; 4] = count_bytes
.try_into()
.map_err(|_| FormatError::IncompleteWrite)?;
let count = u32::from_le_bytes(count_arr);
let total_size = 4 + count as usize * SectionEntry::SIZE;
let index_bytes = loader.load_range(index_offset, total_size as u64)?;
let section_index = SectionIndex::from_bytes(&index_bytes)?;
let header_bytes = loader.load_range(0, HEADER_SIZE as u64)?;
let header_array: [u8; HEADER_SIZE] = header_bytes
.try_into()
.map_err(|_| FormatError::IncompleteWrite)?;
let header = FileHeader::from_bytes(&header_array)?;
header.check_compatibility(&FileVersion::CURRENT)?;
Ok(Self {
buffer: None,
loader: Some(loader),
length,
header,
footer,
section_index,
config,
})
}
fn read_footer(buffer: &[u8]) -> Result<FileFooter, FormatError> {
if buffer.len() < FOOTER_SIZE {
return Err(FormatError::IncompleteWrite);
}
let start = buffer.len() - FOOTER_SIZE;
let mut buf = [0u8; FOOTER_SIZE];
buf.copy_from_slice(&buffer[start..]);
FileFooter::from_bytes(&buf)
}
fn read_header(buffer: &[u8]) -> Result<FileHeader, FormatError> {
if buffer.len() < HEADER_SIZE {
return Err(FormatError::IncompleteWrite);
}
let mut buf = [0u8; HEADER_SIZE];
buf.copy_from_slice(&buffer[..HEADER_SIZE]);
let header = FileHeader::from_bytes(&buf)?;
header.check_compatibility(&FileVersion::CURRENT)?;
Ok(header)
}
fn read_section_index(buffer: &[u8], footer: &FileFooter) -> Result<SectionIndex, FormatError> {
let offset = footer.section_index_offset as usize;
if buffer.len() < offset + 4 {
return Err(FormatError::IncompleteWrite);
}
let count = u32::from_le_bytes(
buffer[offset..offset + 4]
.try_into()
.expect("slice length checked"),
);
let expected = 4usize + count as usize * SectionEntry::SIZE;
if buffer.len() < offset + expected {
return Err(FormatError::IncompleteWrite);
}
SectionIndex::from_bytes(&buffer[offset..offset + expected])
}
fn entry(&self, section_id: u32) -> Result<&SectionEntry, FormatError> {
self.section_index
.find_by_id(section_id)
.ok_or(FormatError::IncompleteWrite)
}
}
#[cfg(target_arch = "wasm32")]
impl FileReader for AlopexFileReader {
fn open(source: FileSource) -> Result<Self, FormatError>
where
Self: Sized,
{
Self::open_with_config(source, WasmReaderConfig::default())
}
fn header(&self) -> &FileHeader {
&self.header
}
fn footer(&self) -> &FileFooter {
&self.footer
}
fn section_index(&self) -> &SectionIndex {
&self.section_index
}
fn read_section(&self, section_id: u32) -> Result<Vec<u8>, FormatError> {
let entry = self.entry(section_id)?;
let raw = self.read_section_raw(section_id)?;
if raw.len() as u64 != entry.compressed_length {
return Err(FormatError::IncompleteWrite);
}
checksum::verify(&raw, self.header.checksum_algorithm, entry.checksum as u64)?;
let decompressed = compression::decompress(&raw, entry.compression)?;
if decompressed.len() as u64 != entry.uncompressed_length {
return Err(FormatError::IncompleteWrite);
}
Ok(decompressed)
}
fn read_section_raw(&self, section_id: u32) -> Result<Vec<u8>, FormatError> {
let entry = self.entry(section_id)?;
match &self.buffer {
Some(buf) => {
let offset = entry.offset as usize;
let end = offset
.checked_add(entry.compressed_length as usize)
.ok_or(FormatError::IncompleteWrite)?;
if end > buf.len() || end - offset != entry.compressed_length as usize {
return Err(FormatError::IncompleteWrite);
}
Ok(buf[offset..end].to_vec())
}
None => {
let loader = self.loader.as_ref().ok_or(FormatError::IncompleteWrite)?;
loader.load_range(entry.offset, entry.compressed_length)
}
}
}
fn validate_section(&self, section_id: u32) -> Result<(), FormatError> {
let entry = self.entry(section_id)?;
let raw = self.read_section_raw(section_id)?;
checksum::verify(&raw, self.header.checksum_algorithm, entry.checksum as u64)
}
fn validate_all(&self) -> Result<(), FormatError> {
for entry in &self.section_index.entries {
self.validate_section(entry.section_id)?;
}
Ok(())
}
fn prefetch_sections<'a>(&'a self, _section_ids: &'a [u32]) -> PrefetchFuture<'a> {
let section_ids = _section_ids.to_vec();
Box::pin(async move {
for id in section_ids {
let _ = self.read_section_raw(id)?;
}
Ok(())
})
}
}