hexz_core/
lib.rs

1//! Core snapshot engine providing block-level compressed storage with random access.
2//!
3//! # Overview
4//!
5//! `hexz-core` implements the core logic for creating and reading Hexz snapshots—
6//! compressed, block-indexed archives that support random access, remote streaming,
7//! and incremental updates. This crate contains no UI code; all user interfaces
8//! (CLI, Python, FUSE) are in separate crates.
9//!
10//! # Architecture
11//!
12//! The crate is organized into several independent modules:
13//!
14//! - **[`mod@format`]**: On-disk structures (headers, indices) defining the file format
15//! - **[`store`]**: Storage backend abstraction (local files, HTTP, S3)
16//! - **[`algo`]**: Compression, encryption, hashing, and deduplication algorithms
17//! - **[`cache`]**: LRU caching for decompressed blocks and index pages
18//! - **[`api`]**: Public API ([`File`]) for reading snapshots
19//! - **[`ops`]**: High-level operations for packing and manipulating snapshots
20//!
21//! # Quick Start
22//!
23//! ```no_run
24//! use hexz_core::{File, SnapshotStream};
25//! use hexz_core::store::local::FileBackend;
26//! use hexz_core::algo::compression::lz4::Lz4Compressor;
27//! use std::sync::Arc;
28//!
29//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
30//! // Open a local snapshot file
31//! let backend = Arc::new(FileBackend::new("snapshot.hxz".as_ref())?);
32//! let compressor = Box::new(Lz4Compressor::new());
33//! let snapshot = File::new(backend, compressor, None)?;
34//!
35//! // Read 4KB from disk stream at offset 1MB
36//! let data = snapshot.read_at(SnapshotStream::Disk, 1024 * 1024, 4096)?;
37//! assert_eq!(data.len(), 4096);
38//! # Ok(())
39//! # }
40//! ```
41//!
42//! # File Format
43//!
44//! Hexz snapshots consist of:
45//! 1. A fixed-size header (512 bytes) with metadata
46//! 2. Compressed data blocks (variable size)
47//! 3. Hierarchical index pages (serialized with bincode)
48//! 4. Master index at the end (location stored in header)
49//!
50//! The format supports:
51//! - Block-level compression (LZ4, Zstandard)
52//! - Optional AES-256-GCM encryption
53//! - Thin snapshots (parent references)
54//! - Dual streams (separate disk and memory data)
55//! - Content-defined chunking for deduplication
56//!
57//! See [`mod@format`] module for detailed specification.
58//!
59//! # Storage Backends
60//!
61//! Storage backends implement the [`store::StorageBackend`] trait, enabling reads from:
62//! - Local files ([`store::local::FileBackend`])
63//! - Memory-mapped files ([`store::local::MmapBackend`])
64//! - HTTP/HTTPS URLs ([`store::http`])
65//! - S3 buckets ([`store::s3`])
66//!
67//! All backends provide the same interface—higher layers don't know where data comes from.
68//!
69//! # Compression & Encryption
70//!
71//! Compression and encryption are pluggable via traits:
72//! - [`algo::compression::Compressor`]: LZ4 ([`algo::compression::lz4`]) or Zstandard ([`algo::compression::zstd`])
73//! - [`algo::encryption::Encryptor`][]: AES-256-GCM ([`algo::encryption::aes_gcm`])
74//!
75//! Each block is compressed independently, then optionally encrypted. This enables:
76//! - Parallel decompression (each block is self-contained)
77//! - Random access (only decompress blocks you need)
78//! - Block-level integrity (CRC32 checksums)
79//!
80//! # Performance
81//!
82//! - **Compression**: LZ4 ~2GB/s, Zstd ~500MB/s (single-threaded)
83//! - **Random Access**: ~1ms latency (cold cache), ~0.08ms (warm cache)
84//! - **Sequential Read**: ~2-3GB/s (NVMe storage, LZ4 decompression)
85//! - **Memory**: <150MB typical (configurable block cache)
86//!
87//! # Thread Safety
88//!
89//! [`File`] is `Send + Sync` and can be safely shared across threads via `Arc`.
90//! Internal caches use `Mutex` for synchronization. Multiple threads can read
91//! concurrently from the same snapshot with independent cache hits.
92//!
93//! # Examples
94//!
95//! ## Reading from HTTP
96//!
97//! ```no_run
98//! use hexz_core::File;
99//! use hexz_core::store::http::HttpBackend;
100//! use hexz_core::algo::compression::lz4::Lz4Compressor;
101//! use std::sync::Arc;
102//!
103//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
104//! let backend = Arc::new(HttpBackend::new(
105//!     "https://example.com/dataset.hxz".to_string(),
106//!     false // don't allow restricted IPs
107//! )?);
108//! let compressor = Box::new(Lz4Compressor::new());
109//! let snapshot = File::new(backend, compressor, None)?;
110//!
111//! // Stream data without downloading entire file
112//! let data = snapshot.read_at(hexz_core::SnapshotStream::Disk, 0, 1024)?;
113//! # Ok(())
114//! # }
115//! ```
116//!
117//! ## Thin Snapshots (Parent References)
118//!
119//! Thin snapshots store a path to a base snapshot in their header; opening the
120//! thin file automatically loads the parent when needed.
121//!
122//! ```no_run
123//! use hexz_core::File;
124//! use hexz_core::store::local::FileBackend;
125//! use hexz_core::algo::compression::lz4::Lz4Compressor;
126//! use std::sync::Arc;
127//!
128//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
129//! // Open thin snapshot (parent is loaded from header parent_path when present)
130//! let thin_backend = Arc::new(FileBackend::new("incremental.hxz".as_ref())?);
131//! let thin_compressor = Box::new(Lz4Compressor::new());
132//! let thin = File::new(thin_backend, thin_compressor, None)?;
133//!
134//! // Reading from thin automatically falls back to base for unchanged blocks
135//! let data = thin.read_at(hexz_core::SnapshotStream::Disk, 0, 4096)?;
136//! # Ok(())
137//! # }
138//! ```
139
140/// Public API surface for reading snapshot files.
141///
142/// Contains the main entry point for opening and reading snapshots.
143///
144/// See [`api::file`] module for the main types.
145pub mod api;
146
147/// Storage backend abstraction and implementations.
148///
149/// All backends implement [`store::StorageBackend`] to provide
150/// uniform access to snapshot data regardless of source (local file, HTTP, S3).
151pub mod store;
152
153/// In-memory caching for decompressed blocks and deserialized index pages.
154///
155/// Caching is critical for performance—decompression is expensive. The LRU cache
156/// implementation stores recently accessed blocks to avoid repeated decompression.
157pub mod cache;
158
159/// On-disk format structures: headers, indices, and serialization.
160///
161/// These types define the binary wire format for Hexz snapshots. All structures
162/// use `bincode` for serialization and are versioned for forward compatibility.
163///
164/// See submodules for detailed format specification:
165/// - `magic`: Magic bytes and version constants
166/// - `header`: File header structure and enums
167/// - `index`: Index pages and block metadata
168/// - `version`: Version compatibility checking
169pub mod format;
170
171/// Algorithms for compression, encryption, hashing, and deduplication.
172///
173/// Each algorithm category has a trait definition and one or more implementations:
174///
175/// ## Compression
176/// - LZ4: Fast compression
177/// - Zstandard: High ratio compression
178///
179/// ## Encryption
180/// - AES-256-GCM: Authenticated encryption
181///
182/// ## Hashing
183/// - BLAKE3: Content-defined chunking
184///
185/// ## Deduplication
186/// - FastCDC, DCAM modeling
187pub mod algo;
188
189/// High-level operations for creating, modifying, and analyzing snapshots.
190///
191/// Contains complex logic for creating and manipulating snapshots.
192///
193/// See submodules for specific operations.
194pub mod ops;
195
196pub use api::file::{File, SnapshotStream};
hexz_core/lib.rs

hexz_core/
lib.rs