Skip to main content

obj_core/platform/
mod.rs

1//! Platform layer (L0).
2//!
3//! This module owns the file-system primitives the pager and WAL
4//! build on: opening a database file, positioned reads and writes at
5//! fixed page boundaries, length queries, truncation, removal, and the
6//! durability primitive [`FileHandle::sync_data`].
7//!
8//! # `unsafe` policy
9//!
10//! Power-of-ten Rule 8 confines `unsafe` to this submodule (and to
11//! `libobj`). All positioned-I/O and durability calls go through the
12//! `rustix` crate, which provides audited safe wrappers. The
13//! cross-process locking submodule [`lock`] reaches for `libc::fcntl`
14//! / `LockFileEx` directly because `rustix` does not expose POSIX
15//! OFD-lock variants; every `unsafe` block in that submodule
16//! carries a `// SAFETY:` comment per Rule 8. This `mod.rs` itself
17//! contains no `unsafe` blocks and is `#![deny(unsafe_code)]`; the
18//! lint is scoped to the file rather than the module tree so the
19//! `lock` submodule can re-introduce its (audited) `unsafe`
20//! blocks.
21
22#![deny(unsafe_code)]
23
24#[cfg(any(test, feature = "fault-injection"))]
25pub mod fault;
26
27pub mod lock;
28
29pub use crate::platform::lock::{ReaderLock, WriterLock};
30
31use std::fs::{File, OpenOptions};
32use std::io;
33use std::path::Path;
34
35// Positioned-I/O extension trait differs per platform: the Unix
36// `FileExt` exposes `read_exact_at` / `write_all_at` directly, while
37// the Windows `FileExt` only exposes single-shot `seek_read` /
38// `seek_write`. We import each as `_` so the methods are in scope and
39// fall back to a hand-rolled retry loop on Windows.
40#[cfg(unix)]
41use std::os::unix::fs::FileExt as _;
42// #75: on unix, newly-created DB/backup files get owner-only
43// permissions (0600) via `OpenOptionsExt::mode` so a database that
44// may hold plaintext (unencrypted) data is not world-readable at
45// rest. The mode is the pre-umask request; the effective mode is
46// `0600 & !umask`. Non-unix targets keep the platform default.
47#[cfg(unix)]
48use std::os::unix::fs::OpenOptionsExt as _;
49#[cfg(windows)]
50use std::os::windows::fs::FileExt as _;
51
52/// #75: owner read+write only (`rw-------`). Applied to freshly
53/// created database and backup files on unix targets.
54#[cfg(unix)]
55const OWNER_ONLY_MODE: u32 = 0o600;
56
57use crate::error::{Error, Result};
58
59/// Maximum number of retry attempts for transient Windows I/O errors
60/// (`ERROR_LOCK_VIOLATION` / `ERROR_SHARING_VIOLATION`). Mirrors
61/// `SQLite`'s `winRetryIoerr` (10 attempts, linear backoff).
62/// Power-of-ten Rule 2: every retry loop carries an explicit bound
63/// and a recoverable `Err` on exhaustion.
64#[cfg(windows)]
65const WIN_TRANSIENT_RETRY_LIMIT: u32 = 10;
66
67/// File-backend abstraction the pager and WAL build on.
68///
69/// `FileBackend` is the common subset of [`FileHandle`] operations
70/// that fault-injection harnesses and the production type both expose
71/// (Rule 9). Production code never holds `dyn FileBackend`; both
72/// [`crate::pager::Pager`] and [`crate::wal::Wal`] are generic over
73/// `F: FileBackend` so the dispatch stays monomorphised.
74///
75/// New methods added to this trait MUST mirror an existing
76/// [`FileHandle`] method exactly. Adding a method that does not exist
77/// on the production type would let the harness perform syscalls
78/// production code cannot — a forbidden divergence (the harness
79/// must be a strict superset of legal behaviour, never a separate
80/// kingdom).
81pub trait FileBackend: Sized {
82    /// Length of the file in bytes. See [`FileHandle::len`].
83    ///
84    /// # Errors
85    ///
86    /// Returns [`Error::Io`] on syscall failure.
87    fn len(&self) -> Result<u64>;
88
89    /// `true` iff the file has zero length.
90    ///
91    /// # Errors
92    ///
93    /// Returns [`Error::Io`] on syscall failure.
94    fn is_empty(&self) -> Result<bool> {
95        Ok(self.len()? == 0)
96    }
97
98    /// Positioned read. See [`FileHandle::read_exact_at`].
99    ///
100    /// # Errors
101    ///
102    /// Returns [`Error::Io`] on syscall failure or harness-injected
103    /// short read.
104    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<()>;
105
106    /// Positioned write. See [`FileHandle::write_all_at`].
107    ///
108    /// # Errors
109    ///
110    /// Returns [`Error::Io`] on syscall failure.
111    fn write_all_at(&self, buf: &[u8], offset: u64) -> Result<()>;
112
113    /// Truncate or extend the file. See [`FileHandle::set_len`].
114    ///
115    /// # Errors
116    ///
117    /// Returns [`Error::Io`] on syscall failure.
118    fn set_len(&self, new_len: u64) -> Result<()>;
119
120    /// See [`FileHandle::sync_data`].
121    ///
122    /// # Errors
123    ///
124    /// Returns [`Error::Io`] on syscall failure.
125    fn sync_data(&self, mode: SyncMode) -> Result<()>;
126
127    /// See [`FileHandle::sync_all`].
128    ///
129    /// # Errors
130    ///
131    /// Returns [`Error::Io`] on syscall failure.
132    fn sync_all(&self) -> Result<()>;
133}
134
135/// Durability mode for [`FileHandle::sync_data`].
136///
137/// `SyncMode` is the user-visible knob that selects the cross-platform
138/// fsync primitive `obj` calls after a WAL commit. The contract for
139/// each variant is documented in `docs/format.md` § `SyncMode`.
140///
141/// The default is [`SyncMode::Full`]: a `commit` that returns
142/// `Ok(())` is durable across a system-wide power loss. `Normal` is
143/// the throughput-tuned middle ground; `Off` skips the syscall and is
144/// only safe for tests and benchmarks.
145///
146/// Power-of-ten Rule 5: a three-state enum is far cheaper to audit
147/// than three `bool` knobs, and the variants are exhaustive at every
148/// `match`.
149#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
150#[non_exhaustive]
151pub enum SyncMode {
152    /// Strongest durability. Survives system-wide power loss.
153    ///
154    /// Maps to `fcntl(F_FULLFSYNC)` on macOS (forces the drive cache
155    /// to flush), `FlushFileBuffers` on Windows, and `fdatasync` on
156    /// Linux / BSDs. macOS's plain `fsync` is **not** sufficient
157    /// here — it does not flush the drive cache; `F_FULLFSYNC` does.
158    /// This is the standard wisdom for safety-critical macOS storage.
159    #[default]
160    Full,
161
162    /// Process-crash and kernel-panic durability; may lose data on a
163    /// sudden power loss if the drive's write cache has not been
164    /// flushed by the time the OS acknowledges the call.
165    ///
166    /// Maps to `fsync` on Unix and `FlushFileBuffers` on Windows. On
167    /// Windows there is no weaker primitive than `FlushFileBuffers`,
168    /// so `Normal` and `Full` are equivalent there.
169    Normal,
170
171    /// No durability call. The OS may write the data eventually, but
172    /// `obj` does not ask it to. Use only for tests and benchmarks
173    /// where data loss is acceptable.
174    Off,
175}
176
177/// A handle to a database file capable of positioned reads and writes
178/// at page boundaries.
179///
180/// `FileHandle` is intentionally minimal — it exposes only the
181/// operations the pager (L1) and WAL (L2) need. Higher layers must
182/// never reach past it into `std::fs` directly; routing every syscall
183/// through this type is how the project keeps Rule 8 enforceable.
184#[derive(Debug)]
185pub struct FileHandle {
186    file: File,
187}
188
189impl FileHandle {
190    /// Open `path` for read-write access, creating it if it does not
191    /// exist. The new file is empty; the caller is responsible for
192    /// writing the file header.
193    ///
194    /// # Errors
195    ///
196    /// Returns [`Error::Io`] if the file cannot be opened or created
197    /// (permission denied, missing parent directory, etc.).
198    pub fn open_or_create<P: AsRef<Path>>(path: P) -> Result<Self> {
199        let mut opts = OpenOptions::new();
200        opts.read(true).write(true).create(true).truncate(false);
201        // #75: request owner-only permissions on unix. `mode` only
202        // affects files this call CREATES; an existing file keeps its
203        // current permissions.
204        #[cfg(unix)]
205        opts.mode(OWNER_ONLY_MODE);
206        let file = opts.open(path)?;
207        Ok(Self { file })
208    }
209
210    /// Open `path` for read-write access, failing if the file
211    /// already exists (`O_CREAT | O_EXCL` on POSIX, `CREATE_NEW` on
212    /// Windows). Used by M11 #92 hot-backup to guarantee the
213    /// destination is never overwritten.
214    ///
215    /// # Errors
216    ///
217    /// Returns [`Error::Io`] if the file already exists, the parent
218    /// directory does not exist, or any other syscall failure
219    /// occurs.
220    pub fn create_new<P: AsRef<Path>>(path: P) -> Result<Self> {
221        let mut opts = OpenOptions::new();
222        opts.read(true).write(true).create_new(true);
223        // #75: owner-only permissions on unix for the always-fresh
224        // backup destination.
225        #[cfg(unix)]
226        opts.mode(OWNER_ONLY_MODE);
227        let file = opts.open(path)?;
228        Ok(Self { file })
229    }
230
231    /// Length of the file in bytes.
232    ///
233    /// # Errors
234    ///
235    /// Returns [`Error::Io`] if the metadata syscall fails.
236    pub fn len(&self) -> Result<u64> {
237        let meta = self.file.metadata()?;
238        Ok(meta.len())
239    }
240
241    /// `true` if the file is zero-length (i.e. just created).
242    ///
243    /// # Errors
244    ///
245    /// Returns [`Error::Io`] if the metadata syscall fails.
246    pub fn is_empty(&self) -> Result<bool> {
247        Ok(self.len()? == 0)
248    }
249
250    /// Positioned read. Fills `buf` from byte offset `offset`.
251    ///
252    /// # Errors
253    ///
254    /// Returns [`Error::Io`] on syscall failure or on short read
255    /// (e.g. file shorter than `offset + buf.len()`).
256    pub fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<()> {
257        read_exact_at_impl(&self.file, buf, offset).map_err(Error::from)
258    }
259
260    /// Positioned write. Writes `buf` to byte offset `offset`.
261    ///
262    /// # Errors
263    ///
264    /// Returns [`Error::Io`] on syscall failure or on short write.
265    pub fn write_all_at(&self, buf: &[u8], offset: u64) -> Result<()> {
266        write_all_at_impl(&self.file, buf, offset).map_err(Error::from)
267    }
268
269    /// Truncate or extend the file to `new_len` bytes.
270    ///
271    /// Used by the pager when the freelist is exhausted and a fresh
272    /// page must be appended.
273    ///
274    /// On Windows this call wraps `SetEndOfFile`, which can transiently
275    /// return `ERROR_LOCK_VIOLATION` when Windows Defender holds a
276    /// short byte-range lock on the extending region. We retry under
277    /// the same bounded scheme as positioned reads and writes (up to
278    /// 10 attempts with linear backoff capped at 250 ms). On Unix the
279    /// call is forwarded unchanged.
280    ///
281    /// # Errors
282    ///
283    /// Returns [`Error::Io`] on syscall failure.
284    pub fn set_len(&self, new_len: u64) -> Result<()> {
285        #[cfg(windows)]
286        {
287            set_len_with_retry(&self.file, new_len).map_err(Error::from)
288        }
289        #[cfg(not(windows))]
290        {
291            self.file.set_len(new_len).map_err(Error::from)
292        }
293    }
294
295    /// Force file contents and metadata to disk. Used at close.
296    ///
297    /// Power-of-ten Rule 7: the underlying call returns
298    /// `io::Result<()>` and is propagated explicitly.
299    ///
300    /// # Errors
301    ///
302    /// Returns [`Error::Io`] on syscall failure.
303    pub fn sync_all(&self) -> Result<()> {
304        self.file.sync_all().map_err(Error::from)
305    }
306
307    /// Force file data (and on `Full`, the drive cache) to persistent
308    /// storage according to `mode`. See [`SyncMode`] for the exact
309    /// per-variant durability promise.
310    ///
311    /// On `SyncMode::Off` this call is a no-op.
312    ///
313    /// # Errors
314    ///
315    /// Returns [`Error::Io`] on syscall failure.
316    pub fn sync_data(&self, mode: SyncMode) -> Result<()> {
317        match mode {
318            SyncMode::Off => Ok(()),
319            SyncMode::Normal => sync_data_normal(&self.file),
320            SyncMode::Full => sync_data_full(&self.file),
321        }
322    }
323}
324
325impl FileBackend for FileHandle {
326    fn len(&self) -> Result<u64> {
327        FileHandle::len(self)
328    }
329    fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<()> {
330        FileHandle::read_exact_at(self, buf, offset)
331    }
332    fn write_all_at(&self, buf: &[u8], offset: u64) -> Result<()> {
333        FileHandle::write_all_at(self, buf, offset)
334    }
335    fn set_len(&self, new_len: u64) -> Result<()> {
336        FileHandle::set_len(self, new_len)
337    }
338    fn sync_data(&self, mode: SyncMode) -> Result<()> {
339        FileHandle::sync_data(self, mode)
340    }
341    fn sync_all(&self) -> Result<()> {
342        FileHandle::sync_all(self)
343    }
344}
345
346// --------------------------------------------------------------------
347// Per-platform positioned-I/O primitives. Unix's `FileExt` exposes
348// `read_exact_at` / `write_all_at` natively; Windows only offers the
349// single-shot `seek_read` / `seek_write`, so we retry until the
350// requested span is satisfied or the call fails.
351// --------------------------------------------------------------------
352
353#[cfg(unix)]
354fn read_exact_at_impl(file: &File, buf: &mut [u8], offset: u64) -> io::Result<()> {
355    file.read_exact_at(buf, offset)
356}
357
358#[cfg(unix)]
359fn write_all_at_impl(file: &File, buf: &[u8], offset: u64) -> io::Result<()> {
360    file.write_all_at(buf, offset)
361}
362
363#[cfg(windows)]
364fn read_exact_at_impl(file: &File, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> {
365    let mut attempt: u32 = 0;
366    while !buf.is_empty() {
367        match file.seek_read(buf, offset) {
368            Ok(0) => {
369                return Err(io::Error::new(
370                    io::ErrorKind::UnexpectedEof,
371                    "failed to fill whole buffer",
372                ));
373            }
374            Ok(n) => {
375                // Forward progress: any short read that returned data
376                // is not contention, so reset the transient counter.
377                attempt = 0;
378                let tmp = buf;
379                buf = &mut tmp[n..];
380                offset += n as u64;
381            }
382            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
383            Err(e) if is_transient_io_error(&e) => {
384                if attempt >= WIN_TRANSIENT_RETRY_LIMIT - 1 {
385                    return Err(e);
386                }
387                windows_io_backoff(attempt + 1);
388                attempt += 1;
389            }
390            Err(e) => return Err(e),
391        }
392    }
393    Ok(())
394}
395
396#[cfg(windows)]
397fn write_all_at_impl(file: &File, mut buf: &[u8], mut offset: u64) -> io::Result<()> {
398    let mut attempt: u32 = 0;
399    while !buf.is_empty() {
400        match file.seek_write(buf, offset) {
401            Ok(0) => {
402                return Err(io::Error::new(
403                    io::ErrorKind::WriteZero,
404                    "failed to write whole buffer",
405                ));
406            }
407            Ok(n) => {
408                attempt = 0;
409                buf = &buf[n..];
410                offset += n as u64;
411            }
412            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
413            Err(e) if is_transient_io_error(&e) => {
414                if attempt >= WIN_TRANSIENT_RETRY_LIMIT - 1 {
415                    return Err(e);
416                }
417                windows_io_backoff(attempt + 1);
418                attempt += 1;
419            }
420            Err(e) => return Err(e),
421        }
422    }
423    Ok(())
424}
425
426/// Windows-only: truncate or extend `file` to `new_len`, retrying
427/// `ERROR_LOCK_VIOLATION` / `ERROR_SHARING_VIOLATION` up to
428/// [`WIN_TRANSIENT_RETRY_LIMIT`] times with linear backoff. Mirrors
429/// the retry shape used for positioned reads and writes.
430#[cfg(windows)]
431fn set_len_with_retry(file: &File, new_len: u64) -> io::Result<()> {
432    retry_transient_io(|| file.set_len(new_len))
433}
434
435/// Generic Windows transient-I/O retry harness. The closure is called
436/// at least once; on `ERROR_LOCK_VIOLATION` / `ERROR_SHARING_VIOLATION`
437/// the harness sleeps with linear backoff and retries up to
438/// [`WIN_TRANSIENT_RETRY_LIMIT`] times before surfacing the last
439/// `io::Error`. Used by `set_len_with_retry`; also the unit-test
440/// hook for the exhaustion path (no Defender / real lock needed).
441#[cfg(windows)]
442fn retry_transient_io<F>(mut op: F) -> io::Result<()>
443where
444    F: FnMut() -> io::Result<()>,
445{
446    let mut attempt: u32 = 0;
447    loop {
448        match op() {
449            Ok(()) => return Ok(()),
450            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
451            Err(e) if is_transient_io_error(&e) => {
452                if attempt >= WIN_TRANSIENT_RETRY_LIMIT - 1 {
453                    return Err(e);
454                }
455                windows_io_backoff(attempt + 1);
456                attempt += 1;
457            }
458            Err(e) => return Err(e),
459        }
460    }
461}
462
463/// `true` iff `e` is a Windows transient I/O error that should be
464/// retried: `ERROR_LOCK_VIOLATION` (33) or `ERROR_SHARING_VIOLATION`
465/// (32). These are the codes Windows Defender's real-time scanner
466/// emits when it briefly holds a byte-range lock on a file being
467/// scanned; `SQLite`'s `winRetryIoerr` retries the same two codes.
468#[cfg(windows)]
469fn is_transient_io_error(e: &io::Error) -> bool {
470    use windows_sys::Win32::Foundation::{ERROR_LOCK_VIOLATION, ERROR_SHARING_VIOLATION};
471    // `raw_os_error` returns `Option<i32>`; the `windows-sys`
472    // constants are `u32`. Both error codes are small positive
473    // numbers, so `cast_signed` is well-defined and preserves the
474    // bit pattern `from_raw_os_error` round-trips back to.
475    match e.raw_os_error() {
476        Some(code) => {
477            code == ERROR_LOCK_VIOLATION.cast_signed()
478                || code == ERROR_SHARING_VIOLATION.cast_signed()
479        }
480        None => false,
481    }
482}
483
484/// Linear backoff between transient-I/O retries. Sleep
485/// `min(attempt * 25, 250)` ms, mirroring `SQLite`'s `winRetryIoerr`.
486/// The caller is responsible for bounding `attempt` via
487/// [`WIN_TRANSIENT_RETRY_LIMIT`] (Power-of-ten Rule 2).
488#[cfg(windows)]
489fn windows_io_backoff(attempt: u32) {
490    const STEP_MS: u64 = 25;
491    const CAP_MS: u64 = 250;
492    let delay = u64::from(attempt).saturating_mul(STEP_MS).min(CAP_MS);
493    std::thread::sleep(std::time::Duration::from_millis(delay));
494}
495
496// --------------------------------------------------------------------
497// Per-platform sync primitives. Kept as small free functions so the
498// platform switch is one match-arm per variant rather than threading
499// `cfg` attributes through `FileHandle::sync_data`.
500// --------------------------------------------------------------------
501
502/// `Normal` durability — `fsync` on Unix, `FlushFileBuffers` on
503/// Windows. Survives process / kernel crash but may lose data on a
504/// sudden power loss if the drive cache has not been flushed.
505fn sync_data_normal(file: &File) -> Result<()> {
506    // `std::fs::File::sync_all` invokes `fsync(2)` on Unix and
507    // `FlushFileBuffers` on Windows. Both flush the OS page cache;
508    // neither, on macOS, flushes the drive cache (that is `Full`'s
509    // job via `F_FULLFSYNC`). Using `sync_all` here keeps the
510    // platform switch in one place and avoids reimplementing the
511    // `fsync` syscall ourselves.
512    file.sync_all().map_err(Error::from)
513}
514
515/// `Full` durability — flush the drive cache where the platform
516/// distinguishes it from the OS cache. See [`SyncMode::Full`] for
517/// the per-OS mapping.
518#[cfg(target_vendor = "apple")]
519fn sync_data_full(file: &File) -> Result<()> {
520    // macOS: plain `fsync` does NOT flush the drive cache. The
521    // documented way to do that is `fcntl(F_FULLFSYNC)`, which the
522    // `rustix` crate exposes safely. See
523    // <https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/fcntl.2.html>.
524    rustix::fs::fcntl_fullfsync(file).map_err(|e| Error::Io(io::Error::from(e)))
525}
526
527/// `Full` durability on non-Apple Unix targets: `fdatasync(2)` is
528/// sufficient (the on-disk data is flushed, metadata changes that do
529/// not affect the data — like mtime — are not).
530#[cfg(all(unix, not(target_vendor = "apple")))]
531fn sync_data_full(file: &File) -> Result<()> {
532    rustix::fs::fdatasync(file).map_err(|e| Error::Io(io::Error::from(e)))
533}
534
535/// `Full` durability on Windows: `FlushFileBuffers` is the strongest
536/// primitive; `std::fs::File::sync_all` invokes it.
537#[cfg(windows)]
538fn sync_data_full(file: &File) -> Result<()> {
539    file.sync_all().map_err(Error::from)
540}
541
542/// Delete the file at `path` if it exists.
543///
544/// Used by `Pager::close()` to remove the WAL sidecar after a clean
545/// shutdown. Missing-file is intentionally **not** an error; the
546/// post-condition is "no file at `path`", and that is satisfied either
547/// by deletion or by absence.
548///
549/// # Errors
550///
551/// Returns [`Error::Io`] on any failure other than `NotFound`.
552pub fn remove_file_if_exists<P: AsRef<Path>>(path: P) -> Result<()> {
553    match std::fs::remove_file(path) {
554        Ok(()) => Ok(()),
555        Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(()),
556        Err(e) => Err(Error::Io(e)),
557    }
558}
559
560impl From<io::ErrorKind> for Error {
561    fn from(kind: io::ErrorKind) -> Self {
562        Error::Io(io::Error::from(kind))
563    }
564}
565
566#[cfg(test)]
567mod tests {
568    use super::{FileHandle, SyncMode};
569    use tempfile::TempDir;
570
571    fn write_and_sync(mode: SyncMode) {
572        let dir = TempDir::new().expect("tempdir");
573        let path = dir.path().join("sync.bin");
574        let h = FileHandle::open_or_create(&path).expect("open");
575        h.set_len(4096).expect("set_len");
576        h.write_all_at(&[0xABu8; 4096], 0).expect("write");
577        h.sync_data(mode).expect("sync_data must succeed");
578    }
579
580    #[test]
581    fn sync_data_full_returns_ok() {
582        write_and_sync(SyncMode::Full);
583    }
584
585    #[test]
586    fn sync_data_normal_returns_ok() {
587        write_and_sync(SyncMode::Normal);
588    }
589
590    #[test]
591    fn sync_data_off_is_noop() {
592        write_and_sync(SyncMode::Off);
593    }
594
595    #[test]
596    fn default_is_full() {
597        assert_eq!(SyncMode::default(), SyncMode::Full);
598    }
599
600    // --------------------------------------------------------------
601    // Windows-only retry-helper tests. These are compiled and run
602    // only on `cfg(windows)`; the macOS / Linux test runs ignore
603    // them entirely. The helpers under test are themselves
604    // `#[cfg(windows)]`, so the gating must match.
605    // --------------------------------------------------------------
606
607    #[cfg(windows)]
608    #[test]
609    fn is_transient_io_error_matches_lock_and_sharing_codes() {
610        use super::is_transient_io_error;
611        use std::io;
612        use windows_sys::Win32::Foundation::{
613            ERROR_ACCESS_DENIED, ERROR_LOCK_VIOLATION, ERROR_SHARING_VIOLATION,
614        };
615
616        let lock = io::Error::from_raw_os_error(ERROR_LOCK_VIOLATION.cast_signed());
617        let share = io::Error::from_raw_os_error(ERROR_SHARING_VIOLATION.cast_signed());
618        assert!(is_transient_io_error(&lock));
619        assert!(is_transient_io_error(&share));
620
621        // Non-transient OS error: `ERROR_ACCESS_DENIED` is a hard
622        // failure, not a retry candidate.
623        let denied = io::Error::from_raw_os_error(ERROR_ACCESS_DENIED.cast_signed());
624        assert!(!is_transient_io_error(&denied));
625
626        // Kind-only errors carry `raw_os_error() == None` and must
627        // never be treated as transient.
628        let not_found = io::Error::from(io::ErrorKind::NotFound);
629        assert!(!is_transient_io_error(&not_found));
630    }
631
632    #[cfg(windows)]
633    #[test]
634    fn windows_io_backoff_is_bounded() {
635        use super::windows_io_backoff;
636        use std::time::Instant;
637
638        // Each individual attempt must sleep at most ~250 ms. Allow
639        // generous headroom for scheduler jitter on CI runners.
640        let start = Instant::now();
641        windows_io_backoff(1);
642        assert!(start.elapsed() < std::time::Duration::from_secs(1));
643
644        // Cap holds: attempt 100 still sleeps no more than ~250 ms.
645        let start = Instant::now();
646        windows_io_backoff(100);
647        assert!(start.elapsed() < std::time::Duration::from_secs(1));
648    }
649
650    #[cfg(windows)]
651    #[test]
652    fn retry_transient_io_returns_first_success() {
653        use super::retry_transient_io;
654        use std::cell::Cell;
655        use std::io;
656        use windows_sys::Win32::Foundation::ERROR_LOCK_VIOLATION;
657
658        // Two transient failures, then success. The harness must
659        // see exactly three calls and return `Ok(())`.
660        let calls = Cell::new(0u32);
661        let result = retry_transient_io(|| {
662            calls.set(calls.get() + 1);
663            if calls.get() < 3 {
664                Err(io::Error::from_raw_os_error(
665                    ERROR_LOCK_VIOLATION.cast_signed(),
666                ))
667            } else {
668                Ok(())
669            }
670        });
671        result.expect("must recover after transient sequence");
672        assert_eq!(calls.get(), 3);
673    }
674
675    #[cfg(windows)]
676    #[test]
677    fn retry_transient_io_exhausts_and_surfaces_last_error() {
678        use super::{retry_transient_io, WIN_TRANSIENT_RETRY_LIMIT};
679        use std::cell::Cell;
680        use std::io;
681        use windows_sys::Win32::Foundation::ERROR_LOCK_VIOLATION;
682
683        // Always-transient: the harness must call the closure
684        // exactly `WIN_TRANSIENT_RETRY_LIMIT` times and return the
685        // last raw OS error unchanged.
686        let calls = Cell::new(0u32);
687        let err = retry_transient_io(|| -> io::Result<()> {
688            calls.set(calls.get() + 1);
689            Err(io::Error::from_raw_os_error(
690                ERROR_LOCK_VIOLATION.cast_signed(),
691            ))
692        })
693        .expect_err("must exhaust");
694        assert_eq!(calls.get(), WIN_TRANSIENT_RETRY_LIMIT);
695        assert_eq!(err.raw_os_error(), Some(ERROR_LOCK_VIOLATION.cast_signed()));
696    }
697
698    #[cfg(windows)]
699    #[test]
700    fn retry_transient_io_returns_non_transient_immediately() {
701        use super::retry_transient_io;
702        use std::cell::Cell;
703        use std::io;
704        use windows_sys::Win32::Foundation::ERROR_ACCESS_DENIED;
705
706        // Non-transient errors must not be retried. The closure
707        // is called exactly once.
708        let calls = Cell::new(0u32);
709        let err = retry_transient_io(|| -> io::Result<()> {
710            calls.set(calls.get() + 1);
711            Err(io::Error::from_raw_os_error(
712                ERROR_ACCESS_DENIED.cast_signed(),
713            ))
714        })
715        .expect_err("must fail");
716        assert_eq!(calls.get(), 1);
717        assert_eq!(err.raw_os_error(), Some(ERROR_ACCESS_DENIED.cast_signed()));
718    }
719}