pub struct File {
pub header: Header,
/* private fields */
}Expand description
Read-only interface for accessing Hexz snapshot data.
File is the primary API for reading compressed, block-indexed snapshots.
It handles:
- Block-level decompression with LRU caching
- Optional AES-256-GCM decryption
- Thin snapshot parent chaining
- Dual-stream access (disk and memory)
- Random access with minimal I/O
§Thread Safety
File is Send + Sync and can be safely shared across threads via Arc.
Internal caches use Mutex for synchronization.
§Performance
- Cache hit latency: ~80μs (warm cache)
- Cache miss latency: ~1ms (cold cache, local storage)
- Sequential throughput: ~2-3 GB/s (NVMe + LZ4)
- Memory overhead: ~150MB typical (configurable)
§Examples
§Basic Usage
use hexz_core::{File, SnapshotStream};
use hexz_core::store::local::FileBackend;
use hexz_core::algo::compression::lz4::Lz4Compressor;
use std::sync::Arc;
let backend = Arc::new(FileBackend::new("snapshot.hxz".as_ref())?);
let compressor = Box::new(Lz4Compressor::new());
let snapshot = File::new(backend, compressor, None)?;
// Read 4KB at offset 1MB
let data = snapshot.read_at(SnapshotStream::Disk, 1024 * 1024, 4096)?;
assert_eq!(data.len(), 4096);§Thin Snapshots (with parent)
use hexz_core::File;
use hexz_core::store::local::FileBackend;
use hexz_core::algo::compression::lz4::Lz4Compressor;
use std::sync::Arc;
// Open base snapshot
let base_backend = Arc::new(FileBackend::new("base.hxz".as_ref())?);
let base = File::new(
base_backend,
Box::new(Lz4Compressor::new()),
None
)?;
// The thin snapshot will automatically load its parent based on
// the parent_path field in the header
let thin_backend = Arc::new(FileBackend::new("incremental.hxz".as_ref())?);
let thin = File::new(
thin_backend,
Box::new(Lz4Compressor::new()),
None
)?;
// Reads automatically fall back to base for unchanged blocks
let data = thin.read_at(hexz_core::SnapshotStream::Disk, 0, 4096)?;Fields§
§header: HeaderSnapshot metadata (sizes, compression, encryption settings)
Implementations§
Source§impl File
impl File
Sourcepub fn open(
backend: Arc<dyn StorageBackend>,
encryptor: Option<Box<dyn Encryptor>>,
) -> Result<Arc<Self>>
pub fn open( backend: Arc<dyn StorageBackend>, encryptor: Option<Box<dyn Encryptor>>, ) -> Result<Arc<Self>>
Opens a Hexz snapshot with default cache settings.
This is the primary constructor for File. It:
- Reads and validates the snapshot header (magic bytes, version)
- Deserializes the master index
- Recursively loads parent snapshots (for thin snapshots)
- Initializes block and page caches
§Parameters
backend: Storage backend (local file, HTTP, S3, etc.)compressor: Compression algorithm matching the snapshot formatencryptor: Optional decryption handler (passNonefor unencrypted snapshots)
§Returns
Ok(File)on successErr(Error::Format)if magic bytes or version are invalidErr(Error::Io)if storage backend fails
§Examples
use hexz_core::{File, SnapshotStream};
use hexz_core::store::local::FileBackend;
use hexz_core::algo::compression::lz4::Lz4Compressor;
use std::sync::Arc;
let backend = Arc::new(FileBackend::new("snapshot.hxz".as_ref())?);
let compressor = Box::new(Lz4Compressor::new());
let snapshot = File::new(backend, compressor, None)?;
println!("Disk size: {} bytes", snapshot.size(SnapshotStream::Disk));Opens a snapshot, auto-detecting compression and dictionary from the header.
This eliminates the 3-step boilerplate of: read header, load dict, create
compressor. Equivalent to File::new(backend, auto_compressor, encryptor).
Sourcepub fn open_with_cache(
backend: Arc<dyn StorageBackend>,
encryptor: Option<Box<dyn Encryptor>>,
cache_capacity_bytes: Option<usize>,
prefetch_window_size: Option<u32>,
) -> Result<Arc<Self>>
pub fn open_with_cache( backend: Arc<dyn StorageBackend>, encryptor: Option<Box<dyn Encryptor>>, cache_capacity_bytes: Option<usize>, prefetch_window_size: Option<u32>, ) -> Result<Arc<Self>>
Like open but with custom cache and prefetch settings.
pub fn new( backend: Arc<dyn StorageBackend>, compressor: Box<dyn Compressor>, encryptor: Option<Box<dyn Encryptor>>, ) -> Result<Arc<Self>>
Sourcepub fn with_cache(
backend: Arc<dyn StorageBackend>,
compressor: Box<dyn Compressor>,
encryptor: Option<Box<dyn Encryptor>>,
cache_capacity_bytes: Option<usize>,
prefetch_window_size: Option<u32>,
) -> Result<Arc<Self>>
pub fn with_cache( backend: Arc<dyn StorageBackend>, compressor: Box<dyn Compressor>, encryptor: Option<Box<dyn Encryptor>>, cache_capacity_bytes: Option<usize>, prefetch_window_size: Option<u32>, ) -> Result<Arc<Self>>
Opens a Hexz snapshot with custom cache capacity and prefetching.
Identical to new but allows specifying cache size and prefetch window.
§Parameters
backend: Storage backendcompressor: Compression algorithmencryptor: Optional decryption handlercache_capacity_bytes: Block cache size in bytes (default: ~400MB for 4KB blocks)prefetch_window_size: Number of blocks to prefetch ahead (default: disabled)
§Cache Sizing
The cache stores decompressed blocks. Given a block size of 4KB:
Some(100_000_000)→ ~24,000 blocks (~96MB effective)None→ 1000 blocks (~4MB effective)
Larger caches reduce repeated decompression but increase memory usage.
§Prefetching
When prefetch_window_size is set, the system will automatically fetch the next N blocks
in the background after each read, optimizing sequential access patterns:
Some(4)→ Prefetch 4 blocks aheadNoneorSome(0)→ Disable prefetching
§Examples
use hexz_core::File;
use hexz_core::store::local::FileBackend;
use hexz_core::algo::compression::lz4::Lz4Compressor;
use std::sync::Arc;
let backend = Arc::new(FileBackend::new("snapshot.hxz".as_ref())?);
let compressor = Box::new(Lz4Compressor::new());
// Allocate 256MB for cache, prefetch 4 blocks ahead
let snapshot = File::with_cache(
backend,
compressor,
None,
Some(256 * 1024 * 1024),
Some(4)
)?;Sourcepub fn size(&self, stream: SnapshotStream) -> u64
pub fn size(&self, stream: SnapshotStream) -> u64
Returns the logical size of a stream in bytes.
§Parameters
stream: The stream to query (Disk or Memory)
§Returns
The uncompressed, logical size of the stream. This is the size you would get if you decompressed all blocks and concatenated them.
§Examples
use hexz_core::{File, SnapshotStream};
let disk_bytes = snapshot.size(SnapshotStream::Disk);
let mem_bytes = snapshot.size(SnapshotStream::Memory);
println!("Disk: {} GB", disk_bytes / (1024 * 1024 * 1024));
println!("Memory: {} MB", mem_bytes / (1024 * 1024));Sourcepub fn read_at(
self: &Arc<Self>,
stream: SnapshotStream,
offset: u64,
len: usize,
) -> Result<Vec<u8>>
pub fn read_at( self: &Arc<Self>, stream: SnapshotStream, offset: u64, len: usize, ) -> Result<Vec<u8>>
Reads data from a snapshot stream at a given offset.
This is the primary read method for random access. It:
- Identifies which blocks overlap the requested range
- Fetches blocks from cache or decompresses from storage
- Handles thin snapshot fallback to parent
- Assembles the final buffer from block slices
§Parameters
stream: Which stream to read from (Disk or Memory)offset: Starting byte offset (0-indexed)len: Number of bytes to read
§Returns
A Vec<u8> containing up to len bytes. The returned vector may be shorter
if:
offsetis beyond the stream size (returns empty vector)offset + lenexceeds stream size (returns partial data)
Missing data (sparse regions) is zero-filled.
§Errors
Error::Ioif backend read fails (e.g. truncated file)Error::Corruption(block_idx)if block checksum does not matchError::Decompressionif block decompression failsError::Decryptionif block decryption fails
§Performance
- Cache hit: ~80μs latency, no I/O
- Cache miss: ~1ms latency (local storage), includes decompression
- Remote storage: Latency depends on network (HTTP: ~50ms, S3: ~100ms)
Aligned reads (offset % block_size == 0) are most efficient.
§Examples
use hexz_core::{File, SnapshotStream};
// Read first 512 bytes of disk stream
let boot_sector = snapshot.read_at(SnapshotStream::Disk, 0, 512)?;
// Read from arbitrary offset
let chunk = snapshot.read_at(SnapshotStream::Disk, 1024 * 1024, 4096)?;
// Reading beyond stream size returns empty vector
let empty = snapshot.read_at(SnapshotStream::Disk, u64::MAX, 100)?;
assert!(empty.is_empty());Reads a byte range. Uses parallel block decompression when the range spans multiple blocks.
Sourcepub fn read_at_into(
self: &Arc<Self>,
stream: SnapshotStream,
offset: u64,
buffer: &mut [u8],
) -> Result<()>
pub fn read_at_into( self: &Arc<Self>, stream: SnapshotStream, offset: u64, buffer: &mut [u8], ) -> Result<()>
Reads into a provided buffer. Unused suffix is zero-filled. Uses parallel decompression when spanning multiple blocks.
Sourcepub fn read_at_into_uninit(
self: &Arc<Self>,
stream: SnapshotStream,
offset: u64,
buffer: &mut [MaybeUninit<u8>],
) -> Result<()>
pub fn read_at_into_uninit( self: &Arc<Self>, stream: SnapshotStream, offset: u64, buffer: &mut [MaybeUninit<u8>], ) -> Result<()>
Writes into uninitialized memory. Unused suffix is zero-filled. Uses parallel decompression when spanning multiple blocks.
On error: The buffer contents are undefined (possibly partially written).
Sourcepub fn read_at_into_uninit_bytes(
self: &Arc<Self>,
stream: SnapshotStream,
offset: u64,
buf: &mut [u8],
) -> Result<()>
pub fn read_at_into_uninit_bytes( self: &Arc<Self>, stream: SnapshotStream, offset: u64, buf: &mut [u8], ) -> Result<()>
Like read_at_into_uninit but accepts &mut [u8]. Use from FFI (e.g. Python).
Auto Trait Implementations§
impl !Freeze for File
impl !RefUnwindSafe for File
impl Send for File
impl Sync for File
impl Unpin for File
impl !UnwindSafe for File
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more