Skip to main content

obj_core/platform/
lock.rs

1//! Cross-process byte-range file locking.
2//!
3//! M6 issue #44. POSIX uses OFD `fcntl` locks (`F_OFD_SETLK` /
4//! `F_OFD_SETLKW`) — kernel-tracked per-fd, fork-safe, automatically
5//! released on process exit. Windows uses `LockFileEx` /
6//! `UnlockFileEx`.
7//!
8//! Locks anchor against a dedicated `<db>.obj-lock` sidecar file
9//! created by `Db::open` next to the main database (mirroring the
10//! existing `<db>.obj-wal` sidecar convention). Using a sidecar
11//! decouples the lock-byte range from any region the pager may
12//! read or write, so the lock byte offsets can be the same on
13//! every platform and need not be placed past EOF:
14//!
15//! - [`WRITER_LOCK_OFFSET`] = 96 (exclusive, 1 byte).
16//! - [`READER_LOCK_RANGE_OFFSET`] = 97..128 (shared, 31 slots).
17//!
18//! On Windows `LockFileEx` produces *mandatory* byte-range locks.
19//! Issue #1: prior versions placed the Windows lock anchor at
20//! `0x4000_0000` (past EOF of an empty file) so that pager I/O
21//! could not overlap the locked range. That assumption broke when
22//! the main DB file grew past 1 GiB — any page write whose offset
23//! crossed `0x4000_0000` failed with `ERROR_LOCK_VIOLATION`. The
24//! sidecar fixes the hazard structurally: the lock handle and the
25//! pager handle target *different files*, so no pager I/O can
26//! ever overlap a lock byte regardless of how large the DB grows.
27//! See `docs/format.md` § File locking.
28//!
29//! The lock state lives in the OS kernel's per-fd lock table — the
30//! bytes on disk are never read or written by obj. See
31//! `docs/format.md` § File locking and § Reader snapshots (MVCC)
32//! for the user-visible protocol.
33//!
34//! # `unsafe` policy
35//!
36//! `rustix::fs::fcntl_lock` does whole-file locking with `F_SETLK*`,
37//! not OFD locks. We therefore call `libc::fcntl` directly with the
38//! OFD command IDs. On Windows we call `LockFileEx` /
39//! `UnlockFileEx` via `windows-sys`. Every `unsafe` block carries a
40//! `// SAFETY:` comment per power-of-ten Rule 8.
41
42// Re-introduce unsafe inside this submodule.  The parent
43// `platform/mod.rs` is `#![deny(unsafe_code)]`; we override the
44// deny here because the OS-side lock syscalls are pointer-based
45// and `rustix` does not expose the OFD variants.
46#![allow(unsafe_code)]
47
48use std::os::raw::c_int;
49use std::sync::atomic::{AtomicU64, Ordering};
50use std::time::{Duration, Instant};
51
52use crate::error::{Error, LockKind, Result};
53use crate::platform::FileHandle;
54
55/// Byte offset of the `WRITER_LOCK` (1 byte, exclusive) inside the
56/// `<db>.obj-lock` sidecar file.
57///
58/// The lock anchor lives at the same offset on every platform
59/// because the sidecar file is never read or written by the pager
60/// — its only purpose is to carry kernel-side lock metadata. On
61/// POSIX this byte exists inside a 128-byte sidecar (see
62/// `Db::open`'s `set_len(128)` on the sidecar). OFD locks are
63/// advisory and would tolerate locks past EOF, but giving the byte
64/// a physical existence is the conservative choice across kernels.
65/// On Windows `LockFileEx` produces **mandatory** byte-range
66/// locks — the sidecar guarantees pager I/O cannot overlap the
67/// locked region regardless of how large the main DB grows
68/// (issue #1; the previous past-EOF strategy broke at >1 GiB).
69pub const WRITER_LOCK_OFFSET: u64 = 96;
70/// Byte offset of the first reader-lock slot inside the
71/// `<db>.obj-lock` sidecar. See [`WRITER_LOCK_OFFSET`] for the
72/// sidecar rationale.
73pub const READER_LOCK_RANGE_OFFSET: u64 = 97;
74/// Length of the reader-lock byte range. 31 slots.
75pub const READER_LOCK_RANGE_LEN: u64 = 31;
76
77/// Initial backoff between busy-loop retries.  Power-of-ten Rule 2:
78/// the retry loop is bounded by `deadline / INITIAL_BACKOFF` so an
79/// exhausted budget surfaces deterministically.
80const INITIAL_BACKOFF: Duration = Duration::from_millis(1);
81/// Cap on the per-retry sleep so a long timeout stays responsive.
82const MAX_BACKOFF: Duration = Duration::from_millis(100);
83
84/// RAII guard for a held `WRITER_LOCK` byte. Dropping the guard
85/// releases the OS-side lock. The guard is `!Send` only by virtue of
86/// the file handle it does NOT own — the underlying lock is per-fd,
87/// so as long as the fd survives, releasing from any thread is
88/// sound.
89#[derive(Debug)]
90#[must_use = "WriterLock releases the OS-side lock when dropped"]
91pub struct WriterLock {
92    fd: c_int,
93    released: bool,
94}
95
96impl WriterLock {
97    /// Explicitly release the lock.  Equivalent to `Drop` but lets
98    /// the caller observe a release error (the `Drop` impl silently
99    /// swallows errors because panics from `Drop` are toxic).
100    ///
101    /// # Errors
102    ///
103    /// Returns `Error::Io` on the unlikely event that the OS
104    /// rejects the unlock syscall.
105    pub fn release(mut self) -> Result<()> {
106        if self.released {
107            return Ok(());
108        }
109        self.released = true;
110        unlock_range(self.fd, WRITER_LOCK_OFFSET, 1)
111    }
112}
113
114impl Drop for WriterLock {
115    fn drop(&mut self) {
116        if !self.released {
117            let _ = unlock_range(self.fd, WRITER_LOCK_OFFSET, 1);
118        }
119    }
120}
121
122/// RAII guard for a held reader-lock byte. Dropping the guard
123/// releases the OS-side lock.
124#[derive(Debug)]
125#[must_use = "ReaderLock releases the OS-side lock when dropped"]
126pub struct ReaderLock {
127    fd: c_int,
128    slot: u64,
129    released: bool,
130}
131
132impl ReaderLock {
133    /// Byte offset of the reader-slot this guard holds.  Useful for
134    /// diagnostics.
135    #[must_use]
136    pub fn slot(&self) -> u64 {
137        self.slot
138    }
139
140    /// Explicitly release the lock.
141    ///
142    /// # Errors
143    ///
144    /// Returns `Error::Io` on the unlikely event that the OS
145    /// rejects the unlock syscall.
146    pub fn release(mut self) -> Result<()> {
147        if self.released {
148            return Ok(());
149        }
150        self.released = true;
151        unlock_range(self.fd, self.slot, 1)
152    }
153}
154
155impl Drop for ReaderLock {
156    fn drop(&mut self) {
157        if !self.released {
158            let _ = unlock_range(self.fd, self.slot, 1);
159        }
160    }
161}
162
163impl FileHandle {
164    /// Try once, non-blocking, to acquire the `WRITER_LOCK`. Returns
165    /// `Ok(Some(guard))` if the lock was acquired, `Ok(None)` if it
166    /// is held by someone else, or `Err(Error::Io)` on syscall
167    /// failure.
168    ///
169    /// # Errors
170    ///
171    /// Returns [`Error::Io`] on syscall failure other than
172    /// "would-block / already-locked".
173    pub fn try_lock_writer(&self) -> Result<Option<WriterLock>> {
174        ensure_ofd_locks_supported()?;
175        let fd = self.raw_fd();
176        if try_lock_range(fd, WRITER_LOCK_OFFSET, 1, LockMode::Exclusive)? {
177            Ok(Some(WriterLock {
178                fd,
179                released: false,
180            }))
181        } else {
182            Ok(None)
183        }
184    }
185
186    /// Acquire the `WRITER_LOCK`, retrying with bounded exponential
187    /// backoff until either acquired or `timeout` elapses. Returns
188    /// `Err(Error::Busy { kind: LockKind::Writer })` on timeout.
189    ///
190    /// # Errors
191    ///
192    /// - [`Error::Busy`] with `LockKind::Writer` on timeout.
193    /// - [`Error::Io`] on any non-"would-block" syscall failure.
194    pub fn lock_writer(&self, timeout: Duration) -> Result<WriterLock> {
195        ensure_ofd_locks_supported()?;
196        let fd = self.raw_fd();
197        retry_until_acquired(timeout, LockKind::Writer, || {
198            try_lock_range(fd, WRITER_LOCK_OFFSET, 1, LockMode::Exclusive)
199        })?;
200        Ok(WriterLock {
201            fd,
202            released: false,
203        })
204    }
205
206    /// Acquire any one of the 31 reader-lock slots in shared mode,
207    /// retrying with bounded backoff until either acquired or
208    /// `timeout` elapses.
209    ///
210    /// The slot is chosen with a per-process round-robin counter so
211    /// concurrent readers in the same process do not all race for
212    /// the same byte.  Shared locks compose, so falling on the same
213    /// byte is not a correctness bug — just a hot-spot the spread
214    /// avoids in practice.
215    ///
216    /// # Errors
217    ///
218    /// - [`Error::Busy`] with `LockKind::Reader` on timeout (very
219    ///   rare — shared locks rarely contend).
220    /// - [`Error::Io`] on syscall failure.
221    pub fn lock_reader(&self, timeout: Duration) -> Result<ReaderLock> {
222        ensure_ofd_locks_supported()?;
223        let fd = self.raw_fd();
224        let start_slot = next_reader_slot();
225        // Try every slot once round-robin; if all 31 slots are
226        // contended (very rare), fall back to bounded retry on the
227        // start slot until the deadline expires.
228        let mut last_err: Option<Error> = None;
229        for offset in 0..READER_LOCK_RANGE_LEN {
230            let slot = READER_LOCK_RANGE_OFFSET + ((start_slot + offset) % READER_LOCK_RANGE_LEN);
231            match try_lock_range(fd, slot, 1, LockMode::Shared) {
232                Ok(true) => {
233                    return Ok(ReaderLock {
234                        fd,
235                        slot,
236                        released: false,
237                    });
238                }
239                Ok(false) => {}
240                Err(e) => last_err = Some(e),
241            }
242        }
243        if let Some(err) = last_err {
244            return Err(err);
245        }
246        // All slots reported "would-block" — fall back to busy-wait
247        // on the start slot with the caller's timeout.
248        let slot = READER_LOCK_RANGE_OFFSET + start_slot;
249        retry_until_acquired(timeout, LockKind::Reader, || {
250            try_lock_range(fd, slot, 1, LockMode::Shared)
251        })?;
252        Ok(ReaderLock {
253            fd,
254            slot,
255            released: false,
256        })
257    }
258
259    /// Raw fd accessor (POSIX) or HANDLE (Windows; cast through
260    /// `as_raw_handle`). Internal to the platform layer.
261    #[cfg(unix)]
262    fn raw_fd(&self) -> c_int {
263        use std::os::unix::io::AsRawFd;
264        self.file_ref().as_raw_fd()
265    }
266
267    #[cfg(windows)]
268    fn raw_fd(&self) -> c_int {
269        use std::os::windows::io::AsRawHandle;
270        // Carry the HANDLE through the `c_int` slot.  The lock
271        // syscalls cast it back to HANDLE before use; `c_int` is
272        // chosen so the cross-platform signature stays uniform.
273        self.file_ref().as_raw_handle() as c_int
274    }
275}
276
277// ---------- internal helpers --------------------------------------
278
279/// Per-process round-robin counter so threads in the same process
280/// pick different reader-slot bytes by default.  Wraps at
281/// `READER_LOCK_RANGE_LEN` — the modulo arithmetic in `lock_reader`
282/// handles the actual selection.
283static READER_ROUND_ROBIN: AtomicU64 = AtomicU64::new(0);
284
285fn next_reader_slot() -> u64 {
286    READER_ROUND_ROBIN.fetch_add(1, Ordering::Relaxed) % READER_LOCK_RANGE_LEN
287}
288
289#[derive(Debug, Clone, Copy)]
290enum LockMode {
291    Exclusive,
292    Shared,
293}
294
295/// Bounded retry harness shared by `lock_writer` / `lock_reader`.
296/// Power-of-ten Rule 2: the loop's upper bound is
297/// `deadline.elapsed() < timeout`; once `Instant::now() >= deadline`
298/// the function returns `Err(Error::Busy)`.
299fn retry_until_acquired<F>(timeout: Duration, kind: LockKind, mut once: F) -> Result<()>
300where
301    F: FnMut() -> Result<bool>,
302{
303    let start = Instant::now();
304    let mut backoff = INITIAL_BACKOFF;
305    // Rule-2 upper bound on iteration count: `timeout` / 1 ms.  With
306    // exponential backoff capped at 100 ms, the real count is far
307    // lower than this; the explicit bound is defensive.
308    let timeout_millis = u64::try_from(timeout.as_millis()).unwrap_or(u64::MAX);
309    let max_iters: u64 = timeout_millis.saturating_add(2);
310    let mut iters: u64 = 0;
311    loop {
312        iters = iters.saturating_add(1);
313        if iters > max_iters.saturating_add(64) {
314            return Err(Error::Busy { kind });
315        }
316        if once()? {
317            return Ok(());
318        }
319        if start.elapsed() >= timeout {
320            return Err(Error::Busy { kind });
321        }
322        std::thread::sleep(backoff);
323        backoff = (backoff * 2).min(MAX_BACKOFF);
324    }
325}
326
327// ---------- platform-specific lock primitives ---------------------
328
329/// Build a POSIX `struct flock` for the given byte range.  The
330/// numeric type of `l_type` / `l_whence` differs per platform
331/// (`i16` on Linux/macOS, `c_short` typedef elsewhere); we use
332/// `try_from` rather than `as` so clippy's pedantic
333/// `cast_possible_truncation` lint stays clean (Rule 10).
334#[cfg(unix)]
335fn build_flock(l_type: i32, offset: u64, len: u64) -> Result<libc::flock> {
336    // `libc::flock.l_type` has type `libc::c_short` on every POSIX
337    // target.  The narrowing here is exact because every libc
338    // constant we pass (`F_WRLCK`, `F_RDLCK`, `F_UNLCK`) fits in
339    // `i16` on every supported target.  `libc::F_WRLCK` is typed
340    // `i32` on Linux but `i16` on macOS; we widen at the call site
341    // and narrow here so the common helper has one signature.
342    let l_type_short =
343        libc::c_short::try_from(l_type).map_err(|_| Error::InvalidArgument("lock l_type"))?;
344    let l_whence_short = libc::c_short::try_from(libc::SEEK_SET)
345        .map_err(|_| Error::InvalidArgument("lock l_whence"))?;
346    Ok(libc::flock {
347        l_type: l_type_short,
348        l_whence: l_whence_short,
349        l_start: offset_to_off_t(offset)?,
350        l_len: offset_to_off_t(len)?,
351        l_pid: 0,
352        #[cfg(target_os = "freebsd")]
353        l_sysid: 0,
354    })
355}
356
357#[cfg(unix)]
358fn try_lock_range(fd: c_int, offset: u64, len: u64, mode: LockMode) -> Result<bool> {
359    // POSIX OFD lock: kernel-tracked per-fd, fork-safe, released on
360    // exit.  Linux ≥ 3.15, macOS ≥ 10.14, FreeBSD ≥ 12.
361    // libc::F_*LCK is `i16` on macOS, `i32` on Linux; the `.into()`
362    // widens on macOS and is a no-op on Linux.
363    #[allow(clippy::useless_conversion)]
364    let l_type: i32 = match mode {
365        LockMode::Exclusive => libc::F_WRLCK.into(),
366        LockMode::Shared => libc::F_RDLCK.into(),
367    };
368    let flock = build_flock(l_type, offset, len)?;
369    // SAFETY: `fd` is a borrowed fd from the FileHandle that
370    // outlives this call (we never store the fd beyond the
371    // call). The third argument matches the kernel's expected
372    // `struct flock*` for F_OFD_SETLK.  The kernel writes nothing
373    // through the pointer for the SETLK variant.
374    let ret = unsafe { libc::fcntl(fd, ofd_setlk_cmd(), &raw const flock) };
375    if ret == 0 {
376        return Ok(true);
377    }
378    // SAFETY: errno is a thread-local set by the libc call we just
379    // made; calling `__errno_location()` (and friends) is sound on
380    // every POSIX target libc supports.
381    let errno = unsafe { *libc_errno() };
382    if errno == libc::EAGAIN || errno == libc::EACCES {
383        // POSIX permits either EAGAIN or EACCES for "would-block".
384        return Ok(false);
385    }
386    Err(Error::Io(std::io::Error::from_raw_os_error(errno)))
387}
388
389#[cfg(unix)]
390fn unlock_range(fd: c_int, offset: u64, len: u64) -> Result<()> {
391    // libc::F_UNLCK is `i16` on macOS, `i32` on Linux; see try_lock_range.
392    #[allow(clippy::useless_conversion)]
393    let flock = build_flock(libc::F_UNLCK.into(), offset, len)?;
394    // SAFETY: same contract as `try_lock_range` above. F_OFD_SETLK
395    // with l_type = F_UNLCK is the standard release primitive.
396    let ret = unsafe { libc::fcntl(fd, ofd_setlk_cmd(), &raw const flock) };
397    if ret == 0 {
398        return Ok(());
399    }
400    // SAFETY: see comment in try_lock_range.
401    let errno = unsafe { *libc_errno() };
402    Err(Error::Io(std::io::Error::from_raw_os_error(errno)))
403}
404
405/// `true` iff the build target provides OFD (open-file-description)
406/// `fcntl` locks — `F_OFD_SETLK` / `F_OFD_SETLKW`. These are the
407/// only POSIX lock primitive obj's concurrency model can rely on:
408/// they are tracked PER-fd, so two `Db` handles to the same file in
409/// one process correctly exclude each other, and they are released
410/// on the owning fd's close rather than coalescing across the whole
411/// process.
412///
413/// Classic POSIX `F_SETLK` locks are tracked PER-PROCESS: a second
414/// `Db` handle in the same process would silently share (and on the
415/// first handle's close, silently drop) the first handle's lock,
416/// breaking the single-writer invariant without any error. Rather
417/// than fall back to that unsound primitive (#30, #44), obj refuses
418/// to lock on a target without OFD locks — see
419/// [`ensure_ofd_locks_supported`].
420///
421/// # Supported-target matrix
422///
423/// | Target | OFD locks | obj locking |
424/// |---|---|---|
425/// | Linux ≥ 3.15 / Android | yes (`F_OFD_SETLK` = 37) | supported |
426/// | macOS ≥ 10.14 / iOS (Apple) | yes (`F_OFD_SETLK` = 90) | supported |
427/// | Windows | n/a (`LockFileEx`) | supported (separate path) |
428/// | FreeBSD / other POSIX | not exported by `libc` | **refused at open** |
429///
430/// Windows uses a completely separate `LockFileEx` path and never
431/// consults this constant; it is only meaningful on `unix`.
432#[cfg(unix)]
433const TARGET_HAS_OFD_LOCKS: bool = cfg!(any(
434    target_os = "linux",
435    target_os = "android",
436    target_vendor = "apple",
437));
438
439/// Hard, documented gate for the non-OFD targets (#30). Called at
440/// the head of every lock-acquisition entry point so the failure
441/// surfaces at `Db::open` time rather than as silent, per-process
442/// lock coalescing later.
443///
444/// On Linux/macOS this is a compile-time-`true` check the optimiser
445/// erases — the lock fast path is byte-for-byte unchanged. On a
446/// target without OFD locks it returns
447/// [`std::io::ErrorKind::Unsupported`] wrapped in [`Error::Io`].
448///
449/// # Errors
450///
451/// Returns [`Error::Io`] with `ErrorKind::Unsupported` when the
452/// build target lacks OFD `fcntl` locks.
453#[cfg(unix)]
454fn ensure_ofd_locks_supported() -> Result<()> {
455    if TARGET_HAS_OFD_LOCKS {
456        return Ok(());
457    }
458    Err(Error::Io(std::io::Error::new(
459        std::io::ErrorKind::Unsupported,
460        "obj requires OFD (open-file-description) fcntl locks, which \
461         this target does not provide; classic POSIX F_SETLK locks are \
462         per-process and would silently break same-process multi-handle \
463         exclusion (see obj-core platform::lock supported-target matrix)",
464    )))
465}
466
467/// Resolve the `F_OFD_SETLK` command id.  Linux and Apple ship it as
468/// a numeric constant in `<fcntl.h>` (`37` on Linux, `90` on macOS
469/// 10.14+).  We hard-code the numeric values here because `libc`
470/// does not export them on every target.
471///
472/// This function is only ever reached after
473/// [`ensure_ofd_locks_supported`] has returned `Ok` (every public
474/// lock entry point gates on it first), so the non-OFD targets never
475/// execute the fallback arm below. The `unreachable_target` arm
476/// exists solely to keep the function total across `cfg` targets; it
477/// returns a deliberately invalid command id (`-1`) so that if a
478/// future refactor were ever to call this without the guard, the
479/// `fcntl` would fail with `EINVAL` rather than silently install a
480/// per-process lock (#30).
481#[cfg(unix)]
482fn ofd_setlk_cmd() -> c_int {
483    #[cfg(any(target_os = "linux", target_os = "android"))]
484    {
485        37 // F_OFD_SETLK on Linux
486    }
487    #[cfg(target_vendor = "apple")]
488    {
489        90 // F_OFD_SETLK on macOS 10.14+
490    }
491    #[cfg(not(any(target_os = "linux", target_os = "android", target_vendor = "apple",)))]
492    {
493        // Unreachable in practice: `ensure_ofd_locks_supported`
494        // rejects these targets at open. `-1` is an invalid `fcntl`
495        // command (EINVAL) — never the unsound `F_SETLK` fallback.
496        -1
497    }
498}
499
500#[cfg(unix)]
501fn offset_to_off_t(v: u64) -> Result<libc::off_t> {
502    libc::off_t::try_from(v).map_err(|_| Error::InvalidArgument("lock offset overflow"))
503}
504
505#[cfg(unix)]
506fn libc_errno() -> *mut c_int {
507    // The errno-location accessor name varies by platform. Each
508    // branch returns a thread-local pointer that lives as long as
509    // the calling thread.
510    #[cfg(any(target_os = "linux", target_os = "android"))]
511    // SAFETY: libc-provided extern "C" function with C linkage;
512    // calling without arguments is always sound and returns a
513    // thread-local pointer.
514    unsafe {
515        libc::__errno_location()
516    }
517    #[cfg(target_vendor = "apple")]
518    // SAFETY: libc-provided extern "C" function with C linkage;
519    // calling without arguments is always sound and returns a
520    // thread-local pointer.
521    unsafe {
522        libc::__error()
523    }
524    #[cfg(any(target_os = "freebsd", target_os = "dragonfly"))]
525    // SAFETY: libc-provided extern "C" function with C linkage;
526    // calling without arguments is always sound and returns a
527    // thread-local pointer.
528    unsafe {
529        libc::__error()
530    }
531    #[cfg(any(target_os = "openbsd", target_os = "netbsd"))]
532    // SAFETY: libc-provided extern "C" function with C linkage;
533    // calling without arguments is always sound and returns a
534    // thread-local pointer.
535    unsafe {
536        libc::__errno()
537    }
538    #[cfg(not(any(
539        target_os = "linux",
540        target_os = "android",
541        target_vendor = "apple",
542        target_os = "freebsd",
543        target_os = "dragonfly",
544        target_os = "openbsd",
545        target_os = "netbsd",
546    )))]
547    // SAFETY: libc-provided extern "C" function with C linkage on
548    // the fallback path; calling without arguments is always sound.
549    unsafe {
550        libc::__errno_location()
551    }
552}
553
554// ---------- Windows -----------------------------------------------
555
556/// Windows counterpart to the unix [`ensure_ofd_locks_supported`]
557/// gate (#30). Windows acquires byte-range locks via `LockFileEx`,
558/// which are mandatory PER-handle — they do not have the
559/// per-process coalescing hazard the POSIX `F_SETLK` fallback has —
560/// so the Windows lock path is always supported and this gate is an
561/// unconditional `Ok(())`. Defined so the cross-platform
562/// `FileHandle::{try_lock_writer, lock_writer, lock_reader}` entry
563/// points can call one guard regardless of target.
564#[cfg(windows)]
565// The unconditional `Ok(())` wrap is deliberate: a single `Result<()>`
566// signature is shared across platforms so the lock entry points `?` one
567// guard on every target. The unix counterpart genuinely can `Err`.
568#[allow(clippy::unnecessary_wraps)]
569fn ensure_ofd_locks_supported() -> Result<()> {
570    Ok(())
571}
572
573/// Split a `u64` into the (low, high) `u32` halves the Windows
574/// `OVERLAPPED` ABI expects. The truncation is intentional — each
575/// half holds 32 distinct bits of the original value — so the
576/// `cast_possible_truncation` lint is scoped to this helper.
577#[cfg(windows)]
578// Truncation here is the whole point: we're splitting a u64 into its low
579// and high u32 halves for the Win32 OVERLAPPED ABI.
580#[allow(clippy::cast_possible_truncation)]
581fn split_u64(v: u64) -> (u32, u32) {
582    (v as u32, (v >> 32) as u32)
583}
584
585#[cfg(windows)]
586fn try_lock_range(fd: c_int, offset: u64, len: u64, mode: LockMode) -> Result<bool> {
587    use windows_sys::Win32::Foundation::{ERROR_IO_PENDING, ERROR_LOCK_VIOLATION, HANDLE};
588    use windows_sys::Win32::Storage::FileSystem::{
589        LockFileEx, LOCKFILE_EXCLUSIVE_LOCK, LOCKFILE_FAIL_IMMEDIATELY,
590    };
591    use windows_sys::Win32::System::IO::OVERLAPPED;
592
593    let mut flags = LOCKFILE_FAIL_IMMEDIATELY;
594    if matches!(mode, LockMode::Exclusive) {
595        flags |= LOCKFILE_EXCLUSIVE_LOCK;
596    }
597    // SAFETY: `OVERLAPPED` is a plain-data Win32 struct (no pointers
598    // we set later); an all-zero bit pattern is a valid initialised
599    // value per the Windows SDK header.
600    let mut overlapped: OVERLAPPED = unsafe { std::mem::zeroed() };
601    let (off_lo, off_hi) = split_u64(offset);
602    let (len_lo, len_hi) = split_u64(len);
603    overlapped.Anonymous.Anonymous.Offset = off_lo;
604    overlapped.Anonymous.Anonymous.OffsetHigh = off_hi;
605    // SAFETY: `fd` was obtained from `AsRawHandle::as_raw_handle()`
606    // on a still-open FileHandle that outlives this call. The
607    // OVERLAPPED struct is owned and zeroed; LockFileEx only reads
608    // `Offset`/`OffsetHigh` (and writes nothing to the rest, per
609    // its docs, because we do not pass an event handle).
610    let ret = unsafe { LockFileEx(fd as HANDLE, flags, 0, len_lo, len_hi, &raw mut overlapped) };
611    if ret != 0 {
612        return Ok(true);
613    }
614    // SAFETY: GetLastError reads the thread-local last-error slot
615    // and never writes.
616    let last = unsafe { windows_sys::Win32::Foundation::GetLastError() };
617    if last == ERROR_LOCK_VIOLATION || last == ERROR_IO_PENDING {
618        return Ok(false);
619    }
620    Err(Error::Io(std::io::Error::from_raw_os_error(
621        last.cast_signed(),
622    )))
623}
624
625#[cfg(windows)]
626fn unlock_range(fd: c_int, offset: u64, len: u64) -> Result<()> {
627    use windows_sys::Win32::Foundation::HANDLE;
628    use windows_sys::Win32::Storage::FileSystem::UnlockFileEx;
629    use windows_sys::Win32::System::IO::OVERLAPPED;
630
631    // SAFETY: `OVERLAPPED` is a plain-data Win32 struct; an all-zero
632    // bit pattern is a valid initialised value per the Windows SDK
633    // header. Same rationale as `try_lock_range` above.
634    let mut overlapped: OVERLAPPED = unsafe { std::mem::zeroed() };
635    let (off_lo, off_hi) = split_u64(offset);
636    let (len_lo, len_hi) = split_u64(len);
637    overlapped.Anonymous.Anonymous.Offset = off_lo;
638    overlapped.Anonymous.Anonymous.OffsetHigh = off_hi;
639    // SAFETY: same contract as try_lock_range above.
640    let ret = unsafe { UnlockFileEx(fd as HANDLE, 0, len_lo, len_hi, &raw mut overlapped) };
641    if ret != 0 {
642        return Ok(());
643    }
644    // SAFETY: GetLastError reads a thread-local slot.
645    let last = unsafe { windows_sys::Win32::Foundation::GetLastError() };
646    Err(Error::Io(std::io::Error::from_raw_os_error(
647        last.cast_signed(),
648    )))
649}
650
651// ---------- internal access to the FileHandle's inner File --------
652
653impl FileHandle {
654    /// Borrow the inner `std::fs::File`. Internal to the platform
655    /// layer; only the lock submodule needs the raw fd / handle.
656    fn file_ref(&self) -> &std::fs::File {
657        &self.file
658    }
659}
660
661#[cfg(test)]
662mod tests {
663    #[cfg(unix)]
664    use super::*;
665    #[cfg(unix)]
666    use tempfile::TempDir;
667
668    /// Create a file that's at least 4 KiB so the lock byte
669    /// offsets at 96 / 97..128 are inside the file. Unix-only
670    /// because every caller is gated on `cfg(unix)`; on Windows
671    /// the lock tests live elsewhere and the helper would be dead
672    /// code.
673    #[cfg(unix)]
674    fn fresh_handle(dir: &TempDir, name: &str) -> FileHandle {
675        let path = dir.path().join(name);
676        let h = FileHandle::open_or_create(&path).expect("open");
677        h.set_len(4096).expect("extend");
678        h
679    }
680
681    #[test]
682    #[cfg(unix)]
683    fn writer_lock_excludes_writers() {
684        let dir = TempDir::new().expect("tmp");
685        let path = dir.path().join("lock.obj");
686        FileHandle::open_or_create(&path)
687            .expect("init")
688            .set_len(4096)
689            .expect("len");
690
691        let h1 = FileHandle::open_or_create(&path).expect("h1");
692        let h2 = FileHandle::open_or_create(&path).expect("h2");
693
694        let guard = h1
695            .try_lock_writer()
696            .expect("try lock h1")
697            .expect("must acquire");
698        let none = h2.try_lock_writer().expect("try lock h2");
699        assert!(none.is_none(), "second writer lock must be refused");
700        drop(guard);
701        let _g2 = h2
702            .try_lock_writer()
703            .expect("try lock h2 again")
704            .expect("now acquires");
705    }
706
707    #[test]
708    #[cfg(unix)]
709    fn writer_busy_timeout_returns_err_busy() {
710        let dir = TempDir::new().expect("tmp");
711        let _h0 = fresh_handle(&dir, "lock.obj");
712
713        let h1 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h1");
714        let h2 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h2");
715        let _g1 = h1
716            .try_lock_writer()
717            .expect("h1 lock")
718            .expect("h1 must acquire");
719        let start = std::time::Instant::now();
720        let err = h2
721            .lock_writer(Duration::from_millis(50))
722            .expect_err("must time out");
723        let elapsed = start.elapsed();
724        assert!(matches!(
725            err,
726            Error::Busy {
727                kind: LockKind::Writer
728            }
729        ));
730        // Some slack for test scheduler jitter.
731        assert!(
732            elapsed >= Duration::from_millis(45),
733            "must wait at least the timeout (~50 ms); got {elapsed:?}",
734        );
735    }
736
737    #[test]
738    #[cfg(unix)]
739    fn many_readers_can_coexist() {
740        let dir = TempDir::new().expect("tmp");
741        let _h0 = fresh_handle(&dir, "lock.obj");
742        let h1 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h1");
743        let h2 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h2");
744        let h3 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h3");
745        let g1 = h1.lock_reader(Duration::from_millis(50)).expect("r1");
746        let g2 = h2.lock_reader(Duration::from_millis(50)).expect("r2");
747        let g3 = h3.lock_reader(Duration::from_millis(50)).expect("r3");
748        // The three guards may sit on the same or different slots.
749        // The contract is only that they were all acquired
750        // simultaneously without erroring.
751        drop((g1, g2, g3));
752    }
753
754    #[test]
755    #[cfg(unix)]
756    fn reader_and_writer_dont_collide_on_separate_anchors() {
757        // The WRITER_LOCK lives at byte 96; reader range is 97..128.
758        // A held writer lock must NOT block a reader from acquiring
759        // a slot in the reader range.  (The exclusion between
760        // readers and writers is the TXN layer's job — the byte
761        // layout intentionally keeps them on separate anchors so
762        // open_readonly callers don't have to wait for an in-flight
763        // writer.)
764        let dir = TempDir::new().expect("tmp");
765        let _h0 = fresh_handle(&dir, "lock.obj");
766        let h1 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h1");
767        let h2 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h2");
768        let _wg = h1.lock_writer(Duration::from_millis(50)).expect("writer");
769        let _rg = h2
770            .lock_reader(Duration::from_millis(50))
771            .expect("reader must not collide");
772    }
773
774    #[test]
775    #[cfg(unix)]
776    fn explicit_release_returns_ok() {
777        let dir = TempDir::new().expect("tmp");
778        let _h0 = fresh_handle(&dir, "lock.obj");
779        let h = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h");
780        let g = h.lock_writer(Duration::from_millis(50)).expect("lock");
781        g.release().expect("release ok");
782        // A re-acquire from the same handle must succeed.
783        let _g2 = h.lock_writer(Duration::from_millis(50)).expect("relock");
784    }
785
786    #[test]
787    #[cfg(unix)]
788    fn lock_methods_compile_when_dropped() {
789        // Compile-only smoke test that the `#[must_use]` annotation
790        // is non-fatal when the caller actually uses the guard.
791        let dir = TempDir::new().expect("tmp");
792        let _h0 = fresh_handle(&dir, "lock.obj");
793        let h = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h");
794        let g = h.lock_reader(Duration::from_millis(10)).expect("rlock");
795        drop(g);
796    }
797
798    /// #30: the OFD-capability gate must agree with the build target.
799    /// On any target obj actually supports (Linux/Android/Apple) the
800    /// gate is `Ok` and the per-fd locking primitive is OFD. The
801    /// classic-`F_SETLK` fallback that silently broke same-process
802    /// multi-fd exclusion on FreeBSD / unknown POSIX is gone: those
803    /// targets now hard-error at `ensure_ofd_locks_supported`.
804    #[test]
805    #[cfg(unix)]
806    fn ofd_capability_gate_matches_target() {
807        // The CI matrix is Linux + macOS; both have OFD locks. If
808        // this assertion ever fires, obj is being built for a target
809        // whose lock soundness has not been established — the gate
810        // below will (correctly) refuse to lock.
811        assert_eq!(
812            TARGET_HAS_OFD_LOCKS,
813            cfg!(any(
814                target_os = "linux",
815                target_os = "android",
816                target_vendor = "apple",
817            )),
818            "OFD capability constant must track the supported-target set",
819        );
820        let gate = ensure_ofd_locks_supported();
821        if TARGET_HAS_OFD_LOCKS {
822            gate.expect("supported targets must pass the gate");
823        } else {
824            // Unsupported target: every lock entry point must refuse
825            // with ErrorKind::Unsupported rather than fall back to a
826            // per-process F_SETLK lock.
827            match gate {
828                Err(Error::Io(e)) => {
829                    assert_eq!(e.kind(), std::io::ErrorKind::Unsupported);
830                }
831                other => panic!("expected Io(Unsupported), got {other:?}"),
832            }
833        }
834    }
835
836    /// #30 regression: two `FileHandle`s to the SAME file in the SAME
837    /// process must exclude each other for the writer lock. This is
838    /// exactly the invariant the classic per-process `F_SETLK`
839    /// fallback silently broke (a second open in-process would
840    /// "succeed" because the lock coalesces per process). OFD locks
841    /// are per-fd, so the second handle is correctly refused.
842    #[test]
843    #[cfg(unix)]
844    fn same_process_multi_fd_writer_exclusion_holds() {
845        let dir = TempDir::new().expect("tmp");
846        let _h0 = fresh_handle(&dir, "lock.obj");
847        let h1 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h1");
848        let h2 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h2");
849        let g1 = h1.try_lock_writer().expect("h1 try").expect("h1 acquires");
850        // Same process, different fd: must NOT coalesce — refused.
851        assert!(
852            h2.try_lock_writer().expect("h2 try").is_none(),
853            "per-fd OFD lock must refuse a second in-process handle; a \
854             per-process F_SETLK fallback would wrongly grant this",
855        );
856        drop(g1);
857    }
858}