Skip to main content

hexz_core/store/local/
file.rs

1//! Local file system storage backend using position-independent I/O.
2//!
3//! This module implements the `StorageBackend` trait for files on local filesystems
4//! using standard POSIX `pread(2)` semantics. Unlike traditional file I/O that
5//! maintains a stateful read cursor, this backend uses offset-based reads that
6//! allow safe concurrent access from multiple threads without synchronization.
7//!
8//! # Architecture
9//!
10//! The [`FileBackend`] wraps a single `std::fs::File` handle and caches the file
11//! size at construction. All reads use `FileExt::read_exact_at()`, which maps to
12//! the `pread(2)` system call on Unix systems. This design provides:
13//!
14//! - **Zero-lock concurrency**: Multiple threads can read simultaneously without contention
15//! - **Predictable I/O**: Explicit control over read sizes and offsets
16//! - **Kernel buffering**: Leverages the OS page cache for repeated reads
17//!
18//! # Thread Safety
19//!
20//! The backend is fully thread-safe (`Send + Sync`) because:
21//! - The underlying `File` handle does not maintain a mutable cursor position
22//! - All reads are atomic with respect to the file offset parameter
23//! - The cached `size` field is immutable after initialization
24//!
25//! No internal locking is required. Multiple threads calling `read_exact()` will
26//! not interfere with each other.
27//!
28//! # Performance Characteristics
29//!
30//! - **Latency**: 5-50µs per read (varies with page cache hit rate)
31//! - **Throughput**: Limited by storage device (500MB/s HDD, 3-7GB/s SSD)
32//! - **CPU overhead**: One system call + one kernel-to-userspace copy per read
33//! - **Memory overhead**: Only requested data is allocated; no mapping overhead
34//!
35//! # When to Use This Backend
36//!
37//! Prefer [`FileBackend`] over [`MmapBackend`](super::mmap::MmapBackend) when:
38//! - Accessing large files (>1GB) with sparse, unpredictable read patterns
39//! - Running in restricted environments where `mmap(2)` may be disabled
40//! - Profiling shows memory-mapped I/O causes excessive page faults
41//! - You need explicit control over read granularity and buffering
42//!
43//! # Error Handling
44//!
45//! All I/O errors are wrapped in `Error::Io`. Common failure modes:
46//! - File not found or insufficient permissions (construction)
47//! - Unexpected EOF when reading beyond file boundaries
48//! - Storage device failures or filesystem corruption (rare)
49//!
50//! # Examples
51//!
52//! ```no_run
53//! use hexz_core::store::local::FileBackend;
54//! use hexz_core::store::StorageBackend;
55//! use std::path::Path;
56//!
57//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
58//! // Open a snapshot file
59//! let backend = FileBackend::new(Path::new("/data/snapshot.hxz"))?;
60//!
61//! // Read 4KB starting at offset 8192
62//! let data = backend.read_exact(8192, 4096)?;
63//! assert_eq!(data.len(), 4096);
64//!
65//! // Multiple threads can read concurrently
66//! let backend = std::sync::Arc::new(backend);
67//! let handles: Vec<_> = (0..4)
68//!     .map(|i| {
69//!         let b = backend.clone();
70//!         std::thread::spawn(move || {
71//!             b.read_exact(i * 1024, 1024)
72//!         })
73//!     })
74//!     .collect();
75//!
76//! for handle in handles {
77//!     let result = handle.join().unwrap()?;
78//!     assert_eq!(result.len(), 1024);
79//! }
80//! # Ok(())
81//! # }
82//! ```
83
84use crate::store::StorageBackend;
85use bytes::{Bytes, BytesMut};
86use hexz_common::Result;
87use std::fs::File;
88
89/// Reads exactly `buffer.len()` bytes from `file` at the given `offset`.
90/// Uses `pread` on Unix and `seek_read` on Windows for position-independent I/O.
91#[cfg(unix)]
92fn read_exact_at(file: &File, buffer: &mut [u8], offset: u64) -> std::io::Result<()> {
93    use std::os::unix::fs::FileExt;
94    file.read_exact_at(buffer, offset)
95}
96
97#[cfg(windows)]
98fn read_exact_at(file: &File, buffer: &mut [u8], mut offset: u64) -> std::io::Result<()> {
99    use std::os::windows::fs::FileExt;
100    let mut pos = 0;
101    while pos < buffer.len() {
102        let n = file.seek_read(&mut buffer[pos..], offset)?;
103        if n == 0 {
104            return Err(std::io::Error::new(
105                std::io::ErrorKind::UnexpectedEof,
106                "unexpected eof",
107            ));
108        }
109        pos += n;
110        offset += n as u64;
111    }
112    Ok(())
113}
114
115/// A storage backend implementation backed by a local file.
116///
117/// This struct wraps a standard `std::fs::File` handle. It provides thread-safe
118/// access to the underlying file data by utilizing system calls that accept
119/// an explicit offset, thereby bypassing the stateful file pointer. This design
120/// eliminates the need for a `Mutex` around the file handle during read operations.
121#[derive(Debug)]
122pub struct FileBackend {
123    /// The underlying operating system file handle.
124    inner: File,
125    /// The total size of the file in bytes, cached at initialization.
126    size: u64,
127}
128
129impl FileBackend {
130    /// Opens a snapshot file and prepares it for concurrent reads.
131    ///
132    /// This constructor performs two operations:
133    /// 1. Opens the file at `path` in read-only mode (`O_RDONLY`)
134    /// 2. Queries file metadata via `fstat(2)` to cache the file size
135    ///
136    /// The file size is cached to avoid repeated `stat` system calls. The backend
137    /// assumes the file is immutable (snapshot semantics) and will not change
138    /// size during its lifetime.
139    ///
140    /// # Parameters
141    ///
142    /// - `path`: Filesystem path to the snapshot file (absolute or relative)
143    ///
144    /// # Returns
145    ///
146    /// - `Ok(FileBackend)`: Successfully opened and initialized
147    /// - `Err(Error::Io)`: If the file cannot be opened or metadata cannot be read
148    ///
149    /// # Errors
150    ///
151    /// Common error conditions:
152    /// - **File not found** (`ENOENT`): Path does not exist
153    /// - **Permission denied** (`EACCES`): Insufficient permissions to read file
154    /// - **Invalid path** (`ENOTDIR`): A path component is not a directory
155    /// - **Device errors**: Disk I/O failure during open or stat
156    ///
157    /// # Examples
158    ///
159    /// ```no_run
160    /// use hexz_core::store::local::FileBackend;
161    /// use std::path::Path;
162    ///
163    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
164    /// // Absolute path
165    /// let backend = FileBackend::new(Path::new("/var/data/snapshot.hxz"))?;
166    ///
167    /// // Relative path
168    /// let backend = FileBackend::new(Path::new("./snapshots/test.hxz"))?;
169    ///
170    /// // Error handling
171    /// match FileBackend::new(Path::new("/nonexistent.hxz")) {
172    ///     Ok(_) => println!("Success"),
173    ///     Err(e) => eprintln!("Failed to open: {}", e),
174    /// }
175    /// # Ok(())
176    /// # }
177    /// ```
178    pub fn new(path: &std::path::Path) -> Result<Self> {
179        let file = File::open(path)?;
180        let metadata = file.metadata()?;
181        Ok(Self {
182            inner: file,
183            size: metadata.len(),
184        })
185    }
186}
187
188impl StorageBackend for FileBackend {
189    /// Reads exactly `len` bytes starting at `offset` using position-independent I/O.
190    ///
191    /// This method uses `pread(2)` semantics (via `FileExt::read_exact_at`) to read
192    /// data at an explicit offset without modifying any file descriptor state. This
193    /// enables safe concurrent reads from multiple threads without coordination.
194    ///
195    /// # Parameters
196    ///
197    /// - `offset`: Absolute byte offset from the start of the file (0-indexed)
198    /// - `len`: Number of bytes to read (must not cause `offset + len` to exceed file size)
199    ///
200    /// # Returns
201    ///
202    /// - `Ok(Bytes)`: A buffer containing exactly `len` bytes of data
203    /// - `Err(Error::Io)`: If the read fails or reaches unexpected EOF
204    ///
205    /// # Errors
206    ///
207    /// This method returns an error in the following cases:
208    /// - **Unexpected EOF** (`ErrorKind::UnexpectedEof`): `offset + len > file_size`
209    /// - **I/O errors**: Disk read failure, filesystem corruption, device disconnected
210    /// - **Invalid offset**: Requesting beyond addressable range (rare on 64-bit systems)
211    ///
212    /// # Performance
213    ///
214    /// - **Time complexity**: O(len) for data copying, O(1) for offset calculation
215    /// - **Syscalls**: 1 `pread(2)` call
216    /// - **Allocations**: 1 heap allocation of `len` bytes
217    /// - **Page cache**: Benefits from OS caching; repeated reads of the same range
218    ///   may be served from memory without disk I/O
219    ///
220    /// # Concurrency
221    ///
222    /// This method is safe to call concurrently from multiple threads. Each call
223    /// operates on an independent offset and does not affect other reads.
224    ///
225    /// # Examples
226    ///
227    /// ```no_run
228    /// use hexz_core::store::local::FileBackend;
229    /// use hexz_core::store::StorageBackend;
230    /// use std::path::Path;
231    ///
232    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
233    /// let backend = FileBackend::new(Path::new("/data/snapshot.hxz"))?;
234    ///
235    /// // Read first 512 bytes (header)
236    /// let header = backend.read_exact(0, 512)?;
237    /// assert_eq!(header.len(), 512);
238    ///
239    /// // Read 4KB block at offset 1MB
240    /// let block = backend.read_exact(1024 * 1024, 4096)?;
241    /// assert_eq!(block.len(), 4096);
242    ///
243    /// // Error: reading beyond file boundary
244    /// let file_size = backend.len();
245    /// assert!(backend.read_exact(file_size, 1).is_err());
246    /// # Ok(())
247    /// # }
248    /// ```
249    fn read_exact(&self, offset: u64, len: usize) -> Result<Bytes> {
250        let mut buffer = BytesMut::with_capacity(len);
251
252        // SAFETY: Buffer has been pre-allocated to exactly `len` bytes via with_capacity.
253        // read_exact_at will initialize all bytes before we access them, so set_len is safe.
254        unsafe {
255            buffer.set_len(len);
256        }
257
258        match read_exact_at(&self.inner, &mut buffer, offset) {
259            Ok(_) => Ok(buffer.freeze()),
260            Err(e) => Err(hexz_common::Error::Io(e)),
261        }
262    }
263
264    /// Returns the total file size in bytes.
265    ///
266    /// This value is cached during construction via `File::metadata()` and remains
267    /// constant for the lifetime of the backend. The file is assumed to be immutable
268    /// (snapshot semantics); modifying the file externally while the backend is
269    /// active results in undefined behavior.
270    ///
271    /// # Returns
272    ///
273    /// The file size in bytes as of the time `FileBackend::new()` was called.
274    ///
275    /// # Performance
276    ///
277    /// This method is a simple field access with no system calls (O(1)).
278    ///
279    /// # Examples
280    ///
281    /// ```no_run
282    /// use hexz_core::store::local::FileBackend;
283    /// use hexz_core::store::StorageBackend;
284    /// use std::path::Path;
285    ///
286    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
287    /// let backend = FileBackend::new(Path::new("/data/snapshot.hxz"))?;
288    /// let size = backend.len();
289    /// println!("Snapshot size: {} bytes ({} MB)", size, size / 1024 / 1024);
290    /// # Ok(())
291    /// # }
292    /// ```
293    fn len(&self) -> u64 {
294        self.size
295    }
296}