hexz_core/store/local/file.rs
1//! Local file system storage backend using position-independent I/O.
2//!
3//! This module implements the `StorageBackend` trait for files on local filesystems
4//! using standard POSIX `pread(2)` semantics. Unlike traditional file I/O that
5//! maintains a stateful read cursor, this backend uses offset-based reads that
6//! allow safe concurrent access from multiple threads without synchronization.
7//!
8//! # Architecture
9//!
10//! The [`FileBackend`] wraps a single `std::fs::File` handle and caches the file
11//! size at construction. All reads use `FileExt::read_exact_at()`, which maps to
12//! the `pread(2)` system call on Unix systems. This design provides:
13//!
14//! - **Zero-lock concurrency**: Multiple threads can read simultaneously without contention
15//! - **Predictable I/O**: Explicit control over read sizes and offsets
16//! - **Kernel buffering**: Leverages the OS page cache for repeated reads
17//!
18//! # Thread Safety
19//!
20//! The backend is fully thread-safe (`Send + Sync`) because:
21//! - The underlying `File` handle does not maintain a mutable cursor position
22//! - All reads are atomic with respect to the file offset parameter
23//! - The cached `size` field is immutable after initialization
24//!
25//! No internal locking is required. Multiple threads calling `read_exact()` will
26//! not interfere with each other.
27//!
28//! # Performance Characteristics
29//!
30//! - **Latency**: 5-50µs per read (varies with page cache hit rate)
31//! - **Throughput**: Limited by storage device (500MB/s HDD, 3-7GB/s SSD)
32//! - **CPU overhead**: One system call + one kernel-to-userspace copy per read
33//! - **Memory overhead**: Only requested data is allocated; no mapping overhead
34//!
35//! # When to Use This Backend
36//!
37//! Prefer [`FileBackend`] over [`MmapBackend`](super::mmap::MmapBackend) when:
38//! - Accessing large files (>1GB) with sparse, unpredictable read patterns
39//! - Running in restricted environments where `mmap(2)` may be disabled
40//! - Profiling shows memory-mapped I/O causes excessive page faults
41//! - You need explicit control over read granularity and buffering
42//!
43//! # Error Handling
44//!
45//! All I/O errors are wrapped in `Error::Io`. Common failure modes:
46//! - File not found or insufficient permissions (construction)
47//! - Unexpected EOF when reading beyond file boundaries
48//! - Storage device failures or filesystem corruption (rare)
49//!
50//! # Examples
51//!
52//! ```no_run
53//! use hexz_core::store::local::FileBackend;
54//! use hexz_core::store::StorageBackend;
55//! use std::path::Path;
56//!
57//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
58//! // Open a snapshot file
59//! let backend = FileBackend::new(Path::new("/data/snapshot.hxz"))?;
60//!
61//! // Read 4KB starting at offset 8192
62//! let data = backend.read_exact(8192, 4096)?;
63//! assert_eq!(data.len(), 4096);
64//!
65//! // Multiple threads can read concurrently
66//! let backend = std::sync::Arc::new(backend);
67//! let handles: Vec<_> = (0..4)
68//! .map(|i| {
69//! let b = backend.clone();
70//! std::thread::spawn(move || {
71//! b.read_exact(i * 1024, 1024)
72//! })
73//! })
74//! .collect();
75//!
76//! for handle in handles {
77//! let result = handle.join().unwrap()?;
78//! assert_eq!(result.len(), 1024);
79//! }
80//! # Ok(())
81//! # }
82//! ```
83
84use crate::store::StorageBackend;
85use bytes::{Bytes, BytesMut};
86use hexz_common::Result;
87use std::fs::File;
88
89/// Reads exactly `buffer.len()` bytes from `file` at the given `offset`.
90/// Uses `pread` on Unix and `seek_read` on Windows for position-independent I/O.
91#[cfg(unix)]
92fn read_exact_at(file: &File, buffer: &mut [u8], offset: u64) -> std::io::Result<()> {
93 use std::os::unix::fs::FileExt;
94 file.read_exact_at(buffer, offset)
95}
96
97#[cfg(windows)]
98fn read_exact_at(file: &File, buffer: &mut [u8], mut offset: u64) -> std::io::Result<()> {
99 use std::os::windows::fs::FileExt;
100 let mut pos = 0;
101 while pos < buffer.len() {
102 let n = file.seek_read(&mut buffer[pos..], offset)?;
103 if n == 0 {
104 return Err(std::io::Error::new(
105 std::io::ErrorKind::UnexpectedEof,
106 "unexpected eof",
107 ));
108 }
109 pos += n;
110 offset += n as u64;
111 }
112 Ok(())
113}
114
115/// A storage backend implementation backed by a local file.
116///
117/// This struct wraps a standard `std::fs::File` handle. It provides thread-safe
118/// access to the underlying file data by utilizing system calls that accept
119/// an explicit offset, thereby bypassing the stateful file pointer. This design
120/// eliminates the need for a `Mutex` around the file handle during read operations.
121#[derive(Debug)]
122pub struct FileBackend {
123 /// The underlying operating system file handle.
124 inner: File,
125 /// The total size of the file in bytes, cached at initialization.
126 size: u64,
127}
128
129impl FileBackend {
130 /// Opens a snapshot file and prepares it for concurrent reads.
131 ///
132 /// This constructor performs two operations:
133 /// 1. Opens the file at `path` in read-only mode (`O_RDONLY`)
134 /// 2. Queries file metadata via `fstat(2)` to cache the file size
135 ///
136 /// The file size is cached to avoid repeated `stat` system calls. The backend
137 /// assumes the file is immutable (snapshot semantics) and will not change
138 /// size during its lifetime.
139 ///
140 /// # Parameters
141 ///
142 /// - `path`: Filesystem path to the snapshot file (absolute or relative)
143 ///
144 /// # Returns
145 ///
146 /// - `Ok(FileBackend)`: Successfully opened and initialized
147 /// - `Err(Error::Io)`: If the file cannot be opened or metadata cannot be read
148 ///
149 /// # Errors
150 ///
151 /// Common error conditions:
152 /// - **File not found** (`ENOENT`): Path does not exist
153 /// - **Permission denied** (`EACCES`): Insufficient permissions to read file
154 /// - **Invalid path** (`ENOTDIR`): A path component is not a directory
155 /// - **Device errors**: Disk I/O failure during open or stat
156 ///
157 /// # Examples
158 ///
159 /// ```no_run
160 /// use hexz_core::store::local::FileBackend;
161 /// use std::path::Path;
162 ///
163 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
164 /// // Absolute path
165 /// let backend = FileBackend::new(Path::new("/var/data/snapshot.hxz"))?;
166 ///
167 /// // Relative path
168 /// let backend = FileBackend::new(Path::new("./snapshots/test.hxz"))?;
169 ///
170 /// // Error handling
171 /// match FileBackend::new(Path::new("/nonexistent.hxz")) {
172 /// Ok(_) => println!("Success"),
173 /// Err(e) => eprintln!("Failed to open: {}", e),
174 /// }
175 /// # Ok(())
176 /// # }
177 /// ```
178 pub fn new(path: &std::path::Path) -> Result<Self> {
179 let file = File::open(path)?;
180 let metadata = file.metadata()?;
181 Ok(Self {
182 inner: file,
183 size: metadata.len(),
184 })
185 }
186}
187
188impl StorageBackend for FileBackend {
189 /// Reads exactly `len` bytes starting at `offset` using position-independent I/O.
190 ///
191 /// This method uses `pread(2)` semantics (via `FileExt::read_exact_at`) to read
192 /// data at an explicit offset without modifying any file descriptor state. This
193 /// enables safe concurrent reads from multiple threads without coordination.
194 ///
195 /// # Parameters
196 ///
197 /// - `offset`: Absolute byte offset from the start of the file (0-indexed)
198 /// - `len`: Number of bytes to read (must not cause `offset + len` to exceed file size)
199 ///
200 /// # Returns
201 ///
202 /// - `Ok(Bytes)`: A buffer containing exactly `len` bytes of data
203 /// - `Err(Error::Io)`: If the read fails or reaches unexpected EOF
204 ///
205 /// # Errors
206 ///
207 /// This method returns an error in the following cases:
208 /// - **Unexpected EOF** (`ErrorKind::UnexpectedEof`): `offset + len > file_size`
209 /// - **I/O errors**: Disk read failure, filesystem corruption, device disconnected
210 /// - **Invalid offset**: Requesting beyond addressable range (rare on 64-bit systems)
211 ///
212 /// # Performance
213 ///
214 /// - **Time complexity**: O(len) for data copying, O(1) for offset calculation
215 /// - **Syscalls**: 1 `pread(2)` call
216 /// - **Allocations**: 1 heap allocation of `len` bytes
217 /// - **Page cache**: Benefits from OS caching; repeated reads of the same range
218 /// may be served from memory without disk I/O
219 ///
220 /// # Concurrency
221 ///
222 /// This method is safe to call concurrently from multiple threads. Each call
223 /// operates on an independent offset and does not affect other reads.
224 ///
225 /// # Examples
226 ///
227 /// ```no_run
228 /// use hexz_core::store::local::FileBackend;
229 /// use hexz_core::store::StorageBackend;
230 /// use std::path::Path;
231 ///
232 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
233 /// let backend = FileBackend::new(Path::new("/data/snapshot.hxz"))?;
234 ///
235 /// // Read first 512 bytes (header)
236 /// let header = backend.read_exact(0, 512)?;
237 /// assert_eq!(header.len(), 512);
238 ///
239 /// // Read 4KB block at offset 1MB
240 /// let block = backend.read_exact(1024 * 1024, 4096)?;
241 /// assert_eq!(block.len(), 4096);
242 ///
243 /// // Error: reading beyond file boundary
244 /// let file_size = backend.len();
245 /// assert!(backend.read_exact(file_size, 1).is_err());
246 /// # Ok(())
247 /// # }
248 /// ```
249 fn read_exact(&self, offset: u64, len: usize) -> Result<Bytes> {
250 let mut buffer = BytesMut::with_capacity(len);
251
252 // SAFETY: Buffer has been pre-allocated to exactly `len` bytes via with_capacity.
253 // read_exact_at will initialize all bytes before we access them, so set_len is safe.
254 unsafe {
255 buffer.set_len(len);
256 }
257
258 match read_exact_at(&self.inner, &mut buffer, offset) {
259 Ok(_) => Ok(buffer.freeze()),
260 Err(e) => Err(hexz_common::Error::Io(e)),
261 }
262 }
263
264 /// Returns the total file size in bytes.
265 ///
266 /// This value is cached during construction via `File::metadata()` and remains
267 /// constant for the lifetime of the backend. The file is assumed to be immutable
268 /// (snapshot semantics); modifying the file externally while the backend is
269 /// active results in undefined behavior.
270 ///
271 /// # Returns
272 ///
273 /// The file size in bytes as of the time `FileBackend::new()` was called.
274 ///
275 /// # Performance
276 ///
277 /// This method is a simple field access with no system calls (O(1)).
278 ///
279 /// # Examples
280 ///
281 /// ```no_run
282 /// use hexz_core::store::local::FileBackend;
283 /// use hexz_core::store::StorageBackend;
284 /// use std::path::Path;
285 ///
286 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
287 /// let backend = FileBackend::new(Path::new("/data/snapshot.hxz"))?;
288 /// let size = backend.len();
289 /// println!("Snapshot size: {} bytes ({} MB)", size, size / 1024 / 1024);
290 /// # Ok(())
291 /// # }
292 /// ```
293 fn len(&self) -> u64 {
294 self.size
295 }
296}