obj_core/platform/lock.rs
1//! Cross-process byte-range file locking.
2//!
3//! M6 issue #44. POSIX uses OFD `fcntl` locks (`F_OFD_SETLK` /
4//! `F_OFD_SETLKW`) — kernel-tracked per-fd, fork-safe, automatically
5//! released on process exit. Windows uses `LockFileEx` /
6//! `UnlockFileEx`.
7//!
8//! Locks anchor against a dedicated `<db>.obj-lock` sidecar file
9//! created by `Db::open` next to the main database (mirroring the
10//! existing `<db>.obj-wal` sidecar convention). Using a sidecar
11//! decouples the lock-byte range from any region the pager may
12//! read or write, so the lock byte offsets can be the same on
13//! every platform and need not be placed past EOF:
14//!
15//! - [`WRITER_LOCK_OFFSET`] = 96 (exclusive, 1 byte).
16//! - [`READER_LOCK_RANGE_OFFSET`] = 97..128 (shared, 31 slots).
17//!
18//! On Windows `LockFileEx` produces *mandatory* byte-range locks.
19//! Issue #1: prior versions placed the Windows lock anchor at
20//! `0x4000_0000` (past EOF of an empty file) so that pager I/O
21//! could not overlap the locked range. That assumption broke when
22//! the main DB file grew past 1 GiB — any page write whose offset
23//! crossed `0x4000_0000` failed with `ERROR_LOCK_VIOLATION`. The
24//! sidecar fixes the hazard structurally: the lock handle and the
25//! pager handle target *different files*, so no pager I/O can
26//! ever overlap a lock byte regardless of how large the DB grows.
27//! See `docs/format.md` § File locking.
28//!
29//! The lock state lives in the OS kernel's per-fd lock table — the
30//! bytes on disk are never read or written by obj. See
31//! `docs/format.md` § File locking and § Reader snapshots (MVCC)
32//! for the user-visible protocol.
33//!
34//! # `unsafe` policy
35//!
36//! `rustix::fs::fcntl_lock` does whole-file locking with `F_SETLK*`,
37//! not OFD locks. We therefore call `libc::fcntl` directly with the
38//! OFD command IDs. On Windows we call `LockFileEx` /
39//! `UnlockFileEx` via `windows-sys`. Every `unsafe` block carries a
40//! `// SAFETY:` comment per power-of-ten Rule 8.
41
42// Re-introduce unsafe inside this submodule. The parent
43// `platform/mod.rs` is `#![deny(unsafe_code)]`; we override the
44// deny here because the OS-side lock syscalls are pointer-based
45// and `rustix` does not expose the OFD variants.
46#![allow(unsafe_code)]
47
48use std::os::raw::c_int;
49use std::sync::atomic::{AtomicU64, Ordering};
50use std::time::{Duration, Instant};
51
52use crate::error::{Error, LockKind, Result};
53use crate::platform::FileHandle;
54
55/// Byte offset of the `WRITER_LOCK` (1 byte, exclusive) inside the
56/// `<db>.obj-lock` sidecar file.
57///
58/// The lock anchor lives at the same offset on every platform
59/// because the sidecar file is never read or written by the pager
60/// — its only purpose is to carry kernel-side lock metadata. On
61/// POSIX this byte exists inside a 128-byte sidecar (see
62/// `Db::open`'s `set_len(128)` on the sidecar). OFD locks are
63/// advisory and would tolerate locks past EOF, but giving the byte
64/// a physical existence is the conservative choice across kernels.
65/// On Windows `LockFileEx` produces **mandatory** byte-range
66/// locks — the sidecar guarantees pager I/O cannot overlap the
67/// locked region regardless of how large the main DB grows
68/// (issue #1; the previous past-EOF strategy broke at >1 GiB).
69pub const WRITER_LOCK_OFFSET: u64 = 96;
70/// Byte offset of the first reader-lock slot inside the
71/// `<db>.obj-lock` sidecar. See [`WRITER_LOCK_OFFSET`] for the
72/// sidecar rationale.
73pub const READER_LOCK_RANGE_OFFSET: u64 = 97;
74/// Length of the reader-lock byte range. 31 slots.
75pub const READER_LOCK_RANGE_LEN: u64 = 31;
76
77/// Initial backoff between busy-loop retries. Power-of-ten Rule 2:
78/// the retry loop is bounded by `deadline / INITIAL_BACKOFF` so an
79/// exhausted budget surfaces deterministically.
80const INITIAL_BACKOFF: Duration = Duration::from_millis(1);
81/// Cap on the per-retry sleep so a long timeout stays responsive.
82const MAX_BACKOFF: Duration = Duration::from_millis(100);
83
84/// RAII guard for a held `WRITER_LOCK` byte. Dropping the guard
85/// releases the OS-side lock. The guard is `!Send` only by virtue of
86/// the file handle it does NOT own — the underlying lock is per-fd,
87/// so as long as the fd survives, releasing from any thread is
88/// sound.
89#[derive(Debug)]
90#[must_use = "WriterLock releases the OS-side lock when dropped"]
91pub struct WriterLock {
92 fd: c_int,
93 released: bool,
94}
95
96impl WriterLock {
97 /// Explicitly release the lock. Equivalent to `Drop` but lets
98 /// the caller observe a release error (the `Drop` impl silently
99 /// swallows errors because panics from `Drop` are toxic).
100 ///
101 /// # Errors
102 ///
103 /// Returns `Error::Io` on the unlikely event that the OS
104 /// rejects the unlock syscall.
105 pub fn release(mut self) -> Result<()> {
106 if self.released {
107 return Ok(());
108 }
109 self.released = true;
110 unlock_range(self.fd, WRITER_LOCK_OFFSET, 1)
111 }
112}
113
114impl Drop for WriterLock {
115 fn drop(&mut self) {
116 if !self.released {
117 let _ = unlock_range(self.fd, WRITER_LOCK_OFFSET, 1);
118 }
119 }
120}
121
122/// RAII guard for a held reader-lock byte. Dropping the guard
123/// releases the OS-side lock.
124#[derive(Debug)]
125#[must_use = "ReaderLock releases the OS-side lock when dropped"]
126pub struct ReaderLock {
127 fd: c_int,
128 slot: u64,
129 released: bool,
130}
131
132impl ReaderLock {
133 /// Byte offset of the reader-slot this guard holds. Useful for
134 /// diagnostics.
135 #[must_use]
136 pub fn slot(&self) -> u64 {
137 self.slot
138 }
139
140 /// Explicitly release the lock.
141 ///
142 /// # Errors
143 ///
144 /// Returns `Error::Io` on the unlikely event that the OS
145 /// rejects the unlock syscall.
146 pub fn release(mut self) -> Result<()> {
147 if self.released {
148 return Ok(());
149 }
150 self.released = true;
151 unlock_range(self.fd, self.slot, 1)
152 }
153}
154
155impl Drop for ReaderLock {
156 fn drop(&mut self) {
157 if !self.released {
158 let _ = unlock_range(self.fd, self.slot, 1);
159 }
160 }
161}
162
163impl FileHandle {
164 /// Try once, non-blocking, to acquire the `WRITER_LOCK`. Returns
165 /// `Ok(Some(guard))` if the lock was acquired, `Ok(None)` if it
166 /// is held by someone else, or `Err(Error::Io)` on syscall
167 /// failure.
168 ///
169 /// # Errors
170 ///
171 /// Returns [`Error::Io`] on syscall failure other than
172 /// "would-block / already-locked".
173 pub fn try_lock_writer(&self) -> Result<Option<WriterLock>> {
174 ensure_ofd_locks_supported()?;
175 let fd = self.raw_fd();
176 if try_lock_range(fd, WRITER_LOCK_OFFSET, 1, LockMode::Exclusive)? {
177 Ok(Some(WriterLock {
178 fd,
179 released: false,
180 }))
181 } else {
182 Ok(None)
183 }
184 }
185
186 /// Acquire the `WRITER_LOCK`, retrying with bounded exponential
187 /// backoff until either acquired or `timeout` elapses. Returns
188 /// `Err(Error::Busy { kind: LockKind::Writer })` on timeout.
189 ///
190 /// # Errors
191 ///
192 /// - [`Error::Busy`] with `LockKind::Writer` on timeout.
193 /// - [`Error::Io`] on any non-"would-block" syscall failure.
194 pub fn lock_writer(&self, timeout: Duration) -> Result<WriterLock> {
195 ensure_ofd_locks_supported()?;
196 let fd = self.raw_fd();
197 retry_until_acquired(timeout, LockKind::Writer, || {
198 try_lock_range(fd, WRITER_LOCK_OFFSET, 1, LockMode::Exclusive)
199 })?;
200 Ok(WriterLock {
201 fd,
202 released: false,
203 })
204 }
205
206 /// Acquire any one of the 31 reader-lock slots in shared mode,
207 /// retrying with bounded backoff until either acquired or
208 /// `timeout` elapses.
209 ///
210 /// The slot is chosen with a per-process round-robin counter so
211 /// concurrent readers in the same process do not all race for
212 /// the same byte. Shared locks compose, so falling on the same
213 /// byte is not a correctness bug — just a hot-spot the spread
214 /// avoids in practice.
215 ///
216 /// # Errors
217 ///
218 /// - [`Error::Busy`] with `LockKind::Reader` on timeout (very
219 /// rare — shared locks rarely contend).
220 /// - [`Error::Io`] on syscall failure.
221 pub fn lock_reader(&self, timeout: Duration) -> Result<ReaderLock> {
222 ensure_ofd_locks_supported()?;
223 let fd = self.raw_fd();
224 let start_slot = next_reader_slot();
225 // Try every slot once round-robin; if all 31 slots are
226 // contended (very rare), fall back to bounded retry on the
227 // start slot until the deadline expires.
228 let mut last_err: Option<Error> = None;
229 for offset in 0..READER_LOCK_RANGE_LEN {
230 let slot = READER_LOCK_RANGE_OFFSET + ((start_slot + offset) % READER_LOCK_RANGE_LEN);
231 match try_lock_range(fd, slot, 1, LockMode::Shared) {
232 Ok(true) => {
233 return Ok(ReaderLock {
234 fd,
235 slot,
236 released: false,
237 });
238 }
239 Ok(false) => {}
240 Err(e) => last_err = Some(e),
241 }
242 }
243 if let Some(err) = last_err {
244 return Err(err);
245 }
246 // All slots reported "would-block" — fall back to busy-wait
247 // on the start slot with the caller's timeout.
248 let slot = READER_LOCK_RANGE_OFFSET + start_slot;
249 retry_until_acquired(timeout, LockKind::Reader, || {
250 try_lock_range(fd, slot, 1, LockMode::Shared)
251 })?;
252 Ok(ReaderLock {
253 fd,
254 slot,
255 released: false,
256 })
257 }
258
259 /// Raw fd accessor (POSIX) or HANDLE (Windows; cast through
260 /// `as_raw_handle`). Internal to the platform layer.
261 #[cfg(unix)]
262 fn raw_fd(&self) -> c_int {
263 use std::os::unix::io::AsRawFd;
264 self.file_ref().as_raw_fd()
265 }
266
267 #[cfg(windows)]
268 fn raw_fd(&self) -> c_int {
269 use std::os::windows::io::AsRawHandle;
270 // Carry the HANDLE through the `c_int` slot. The lock
271 // syscalls cast it back to HANDLE before use; `c_int` is
272 // chosen so the cross-platform signature stays uniform.
273 self.file_ref().as_raw_handle() as c_int
274 }
275}
276
277// ---------- internal helpers --------------------------------------
278
279/// Per-process round-robin counter so threads in the same process
280/// pick different reader-slot bytes by default. Wraps at
281/// `READER_LOCK_RANGE_LEN` — the modulo arithmetic in `lock_reader`
282/// handles the actual selection.
283static READER_ROUND_ROBIN: AtomicU64 = AtomicU64::new(0);
284
285fn next_reader_slot() -> u64 {
286 READER_ROUND_ROBIN.fetch_add(1, Ordering::Relaxed) % READER_LOCK_RANGE_LEN
287}
288
289#[derive(Debug, Clone, Copy)]
290enum LockMode {
291 Exclusive,
292 Shared,
293}
294
295/// Bounded retry harness shared by `lock_writer` / `lock_reader`.
296/// Power-of-ten Rule 2: the loop's upper bound is
297/// `deadline.elapsed() < timeout`; once `Instant::now() >= deadline`
298/// the function returns `Err(Error::Busy)`.
299fn retry_until_acquired<F>(timeout: Duration, kind: LockKind, mut once: F) -> Result<()>
300where
301 F: FnMut() -> Result<bool>,
302{
303 let start = Instant::now();
304 let mut backoff = INITIAL_BACKOFF;
305 // Rule-2 upper bound on iteration count: `timeout` / 1 ms. With
306 // exponential backoff capped at 100 ms, the real count is far
307 // lower than this; the explicit bound is defensive.
308 let timeout_millis = u64::try_from(timeout.as_millis()).unwrap_or(u64::MAX);
309 let max_iters: u64 = timeout_millis.saturating_add(2);
310 let mut iters: u64 = 0;
311 loop {
312 iters = iters.saturating_add(1);
313 if iters > max_iters.saturating_add(64) {
314 return Err(Error::Busy { kind });
315 }
316 if once()? {
317 return Ok(());
318 }
319 if start.elapsed() >= timeout {
320 return Err(Error::Busy { kind });
321 }
322 std::thread::sleep(backoff);
323 backoff = (backoff * 2).min(MAX_BACKOFF);
324 }
325}
326
327// ---------- platform-specific lock primitives ---------------------
328
329/// Build a POSIX `struct flock` for the given byte range. The
330/// numeric type of `l_type` / `l_whence` differs per platform
331/// (`i16` on Linux/macOS, `c_short` typedef elsewhere); we use
332/// `try_from` rather than `as` so clippy's pedantic
333/// `cast_possible_truncation` lint stays clean (Rule 10).
334#[cfg(unix)]
335fn build_flock(l_type: i32, offset: u64, len: u64) -> Result<libc::flock> {
336 // `libc::flock.l_type` has type `libc::c_short` on every POSIX
337 // target. The narrowing here is exact because every libc
338 // constant we pass (`F_WRLCK`, `F_RDLCK`, `F_UNLCK`) fits in
339 // `i16` on every supported target. `libc::F_WRLCK` is typed
340 // `i32` on Linux but `i16` on macOS; we widen at the call site
341 // and narrow here so the common helper has one signature.
342 let l_type_short =
343 libc::c_short::try_from(l_type).map_err(|_| Error::InvalidArgument("lock l_type"))?;
344 let l_whence_short = libc::c_short::try_from(libc::SEEK_SET)
345 .map_err(|_| Error::InvalidArgument("lock l_whence"))?;
346 Ok(libc::flock {
347 l_type: l_type_short,
348 l_whence: l_whence_short,
349 l_start: offset_to_off_t(offset)?,
350 l_len: offset_to_off_t(len)?,
351 l_pid: 0,
352 #[cfg(target_os = "freebsd")]
353 l_sysid: 0,
354 })
355}
356
357#[cfg(unix)]
358fn try_lock_range(fd: c_int, offset: u64, len: u64, mode: LockMode) -> Result<bool> {
359 // POSIX OFD lock: kernel-tracked per-fd, fork-safe, released on
360 // exit. Linux ≥ 3.15, macOS ≥ 10.14, FreeBSD ≥ 12.
361 // libc::F_*LCK is `i16` on macOS, `i32` on Linux; the `.into()`
362 // widens on macOS and is a no-op on Linux.
363 #[allow(clippy::useless_conversion)]
364 let l_type: i32 = match mode {
365 LockMode::Exclusive => libc::F_WRLCK.into(),
366 LockMode::Shared => libc::F_RDLCK.into(),
367 };
368 let flock = build_flock(l_type, offset, len)?;
369 // SAFETY: `fd` is a borrowed fd from the FileHandle that
370 // outlives this call (we never store the fd beyond the
371 // call). The third argument matches the kernel's expected
372 // `struct flock*` for F_OFD_SETLK. The kernel writes nothing
373 // through the pointer for the SETLK variant.
374 let ret = unsafe { libc::fcntl(fd, ofd_setlk_cmd(), &raw const flock) };
375 if ret == 0 {
376 return Ok(true);
377 }
378 // SAFETY: errno is a thread-local set by the libc call we just
379 // made; calling `__errno_location()` (and friends) is sound on
380 // every POSIX target libc supports.
381 let errno = unsafe { *libc_errno() };
382 if errno == libc::EAGAIN || errno == libc::EACCES {
383 // POSIX permits either EAGAIN or EACCES for "would-block".
384 return Ok(false);
385 }
386 Err(Error::Io(std::io::Error::from_raw_os_error(errno)))
387}
388
389#[cfg(unix)]
390fn unlock_range(fd: c_int, offset: u64, len: u64) -> Result<()> {
391 // libc::F_UNLCK is `i16` on macOS, `i32` on Linux; see try_lock_range.
392 #[allow(clippy::useless_conversion)]
393 let flock = build_flock(libc::F_UNLCK.into(), offset, len)?;
394 // SAFETY: same contract as `try_lock_range` above. F_OFD_SETLK
395 // with l_type = F_UNLCK is the standard release primitive.
396 let ret = unsafe { libc::fcntl(fd, ofd_setlk_cmd(), &raw const flock) };
397 if ret == 0 {
398 return Ok(());
399 }
400 // SAFETY: see comment in try_lock_range.
401 let errno = unsafe { *libc_errno() };
402 Err(Error::Io(std::io::Error::from_raw_os_error(errno)))
403}
404
405/// `true` iff the build target provides OFD (open-file-description)
406/// `fcntl` locks — `F_OFD_SETLK` / `F_OFD_SETLKW`. These are the
407/// only POSIX lock primitive obj's concurrency model can rely on:
408/// they are tracked PER-fd, so two `Db` handles to the same file in
409/// one process correctly exclude each other, and they are released
410/// on the owning fd's close rather than coalescing across the whole
411/// process.
412///
413/// Classic POSIX `F_SETLK` locks are tracked PER-PROCESS: a second
414/// `Db` handle in the same process would silently share (and on the
415/// first handle's close, silently drop) the first handle's lock,
416/// breaking the single-writer invariant without any error. Rather
417/// than fall back to that unsound primitive (#30, #44), obj refuses
418/// to lock on a target without OFD locks — see
419/// [`ensure_ofd_locks_supported`].
420///
421/// # Supported-target matrix
422///
423/// | Target | OFD locks | obj locking |
424/// |---|---|---|
425/// | Linux ≥ 3.15 / Android | yes (`F_OFD_SETLK` = 37) | supported |
426/// | macOS ≥ 10.14 / iOS (Apple) | yes (`F_OFD_SETLK` = 90) | supported |
427/// | Windows | n/a (`LockFileEx`) | supported (separate path) |
428/// | FreeBSD / other POSIX | not exported by `libc` | **refused at open** |
429///
430/// Windows uses a completely separate `LockFileEx` path and never
431/// consults this constant; it is only meaningful on `unix`.
432#[cfg(unix)]
433const TARGET_HAS_OFD_LOCKS: bool = cfg!(any(
434 target_os = "linux",
435 target_os = "android",
436 target_vendor = "apple",
437));
438
439/// Hard, documented gate for the non-OFD targets (#30). Called at
440/// the head of every lock-acquisition entry point so the failure
441/// surfaces at `Db::open` time rather than as silent, per-process
442/// lock coalescing later.
443///
444/// On Linux/macOS this is a compile-time-`true` check the optimiser
445/// erases — the lock fast path is byte-for-byte unchanged. On a
446/// target without OFD locks it returns
447/// [`std::io::ErrorKind::Unsupported`] wrapped in [`Error::Io`].
448///
449/// # Errors
450///
451/// Returns [`Error::Io`] with `ErrorKind::Unsupported` when the
452/// build target lacks OFD `fcntl` locks.
453#[cfg(unix)]
454fn ensure_ofd_locks_supported() -> Result<()> {
455 if TARGET_HAS_OFD_LOCKS {
456 return Ok(());
457 }
458 Err(Error::Io(std::io::Error::new(
459 std::io::ErrorKind::Unsupported,
460 "obj requires OFD (open-file-description) fcntl locks, which \
461 this target does not provide; classic POSIX F_SETLK locks are \
462 per-process and would silently break same-process multi-handle \
463 exclusion (see obj-core platform::lock supported-target matrix)",
464 )))
465}
466
467/// Resolve the `F_OFD_SETLK` command id. Linux and Apple ship it as
468/// a numeric constant in `<fcntl.h>` (`37` on Linux, `90` on macOS
469/// 10.14+). We hard-code the numeric values here because `libc`
470/// does not export them on every target.
471///
472/// This function is only ever reached after
473/// [`ensure_ofd_locks_supported`] has returned `Ok` (every public
474/// lock entry point gates on it first), so the non-OFD targets never
475/// execute the fallback arm below. The `unreachable_target` arm
476/// exists solely to keep the function total across `cfg` targets; it
477/// returns a deliberately invalid command id (`-1`) so that if a
478/// future refactor were ever to call this without the guard, the
479/// `fcntl` would fail with `EINVAL` rather than silently install a
480/// per-process lock (#30).
481#[cfg(unix)]
482fn ofd_setlk_cmd() -> c_int {
483 #[cfg(any(target_os = "linux", target_os = "android"))]
484 {
485 37 // F_OFD_SETLK on Linux
486 }
487 #[cfg(target_vendor = "apple")]
488 {
489 90 // F_OFD_SETLK on macOS 10.14+
490 }
491 #[cfg(not(any(target_os = "linux", target_os = "android", target_vendor = "apple",)))]
492 {
493 // Unreachable in practice: `ensure_ofd_locks_supported`
494 // rejects these targets at open. `-1` is an invalid `fcntl`
495 // command (EINVAL) — never the unsound `F_SETLK` fallback.
496 -1
497 }
498}
499
500#[cfg(unix)]
501fn offset_to_off_t(v: u64) -> Result<libc::off_t> {
502 libc::off_t::try_from(v).map_err(|_| Error::InvalidArgument("lock offset overflow"))
503}
504
505#[cfg(unix)]
506fn libc_errno() -> *mut c_int {
507 // The errno-location accessor name varies by platform. Each
508 // branch returns a thread-local pointer that lives as long as
509 // the calling thread.
510 #[cfg(any(target_os = "linux", target_os = "android"))]
511 // SAFETY: libc-provided extern "C" function with C linkage;
512 // calling without arguments is always sound and returns a
513 // thread-local pointer.
514 unsafe {
515 libc::__errno_location()
516 }
517 #[cfg(target_vendor = "apple")]
518 // SAFETY: libc-provided extern "C" function with C linkage;
519 // calling without arguments is always sound and returns a
520 // thread-local pointer.
521 unsafe {
522 libc::__error()
523 }
524 #[cfg(any(target_os = "freebsd", target_os = "dragonfly"))]
525 // SAFETY: libc-provided extern "C" function with C linkage;
526 // calling without arguments is always sound and returns a
527 // thread-local pointer.
528 unsafe {
529 libc::__error()
530 }
531 #[cfg(any(target_os = "openbsd", target_os = "netbsd"))]
532 // SAFETY: libc-provided extern "C" function with C linkage;
533 // calling without arguments is always sound and returns a
534 // thread-local pointer.
535 unsafe {
536 libc::__errno()
537 }
538 #[cfg(not(any(
539 target_os = "linux",
540 target_os = "android",
541 target_vendor = "apple",
542 target_os = "freebsd",
543 target_os = "dragonfly",
544 target_os = "openbsd",
545 target_os = "netbsd",
546 )))]
547 // SAFETY: libc-provided extern "C" function with C linkage on
548 // the fallback path; calling without arguments is always sound.
549 unsafe {
550 libc::__errno_location()
551 }
552}
553
554// ---------- Windows -----------------------------------------------
555
556/// Windows counterpart to the unix [`ensure_ofd_locks_supported`]
557/// gate (#30). Windows acquires byte-range locks via `LockFileEx`,
558/// which are mandatory PER-handle — they do not have the
559/// per-process coalescing hazard the POSIX `F_SETLK` fallback has —
560/// so the Windows lock path is always supported and this gate is an
561/// unconditional `Ok(())`. Defined so the cross-platform
562/// `FileHandle::{try_lock_writer, lock_writer, lock_reader}` entry
563/// points can call one guard regardless of target.
564#[cfg(windows)]
565// The unconditional `Ok(())` wrap is deliberate: a single `Result<()>`
566// signature is shared across platforms so the lock entry points `?` one
567// guard on every target. The unix counterpart genuinely can `Err`.
568#[allow(clippy::unnecessary_wraps)]
569fn ensure_ofd_locks_supported() -> Result<()> {
570 Ok(())
571}
572
573/// Split a `u64` into the (low, high) `u32` halves the Windows
574/// `OVERLAPPED` ABI expects. The truncation is intentional — each
575/// half holds 32 distinct bits of the original value — so the
576/// `cast_possible_truncation` lint is scoped to this helper.
577#[cfg(windows)]
578// Truncation here is the whole point: we're splitting a u64 into its low
579// and high u32 halves for the Win32 OVERLAPPED ABI.
580#[allow(clippy::cast_possible_truncation)]
581fn split_u64(v: u64) -> (u32, u32) {
582 (v as u32, (v >> 32) as u32)
583}
584
585#[cfg(windows)]
586fn try_lock_range(fd: c_int, offset: u64, len: u64, mode: LockMode) -> Result<bool> {
587 use windows_sys::Win32::Foundation::{ERROR_IO_PENDING, ERROR_LOCK_VIOLATION, HANDLE};
588 use windows_sys::Win32::Storage::FileSystem::{
589 LockFileEx, LOCKFILE_EXCLUSIVE_LOCK, LOCKFILE_FAIL_IMMEDIATELY,
590 };
591 use windows_sys::Win32::System::IO::OVERLAPPED;
592
593 let mut flags = LOCKFILE_FAIL_IMMEDIATELY;
594 if matches!(mode, LockMode::Exclusive) {
595 flags |= LOCKFILE_EXCLUSIVE_LOCK;
596 }
597 // SAFETY: `OVERLAPPED` is a plain-data Win32 struct (no pointers
598 // we set later); an all-zero bit pattern is a valid initialised
599 // value per the Windows SDK header.
600 let mut overlapped: OVERLAPPED = unsafe { std::mem::zeroed() };
601 let (off_lo, off_hi) = split_u64(offset);
602 let (len_lo, len_hi) = split_u64(len);
603 overlapped.Anonymous.Anonymous.Offset = off_lo;
604 overlapped.Anonymous.Anonymous.OffsetHigh = off_hi;
605 // SAFETY: `fd` was obtained from `AsRawHandle::as_raw_handle()`
606 // on a still-open FileHandle that outlives this call. The
607 // OVERLAPPED struct is owned and zeroed; LockFileEx only reads
608 // `Offset`/`OffsetHigh` (and writes nothing to the rest, per
609 // its docs, because we do not pass an event handle).
610 let ret = unsafe { LockFileEx(fd as HANDLE, flags, 0, len_lo, len_hi, &raw mut overlapped) };
611 if ret != 0 {
612 return Ok(true);
613 }
614 // SAFETY: GetLastError reads the thread-local last-error slot
615 // and never writes.
616 let last = unsafe { windows_sys::Win32::Foundation::GetLastError() };
617 if last == ERROR_LOCK_VIOLATION || last == ERROR_IO_PENDING {
618 return Ok(false);
619 }
620 Err(Error::Io(std::io::Error::from_raw_os_error(
621 last.cast_signed(),
622 )))
623}
624
625#[cfg(windows)]
626fn unlock_range(fd: c_int, offset: u64, len: u64) -> Result<()> {
627 use windows_sys::Win32::Foundation::HANDLE;
628 use windows_sys::Win32::Storage::FileSystem::UnlockFileEx;
629 use windows_sys::Win32::System::IO::OVERLAPPED;
630
631 // SAFETY: `OVERLAPPED` is a plain-data Win32 struct; an all-zero
632 // bit pattern is a valid initialised value per the Windows SDK
633 // header. Same rationale as `try_lock_range` above.
634 let mut overlapped: OVERLAPPED = unsafe { std::mem::zeroed() };
635 let (off_lo, off_hi) = split_u64(offset);
636 let (len_lo, len_hi) = split_u64(len);
637 overlapped.Anonymous.Anonymous.Offset = off_lo;
638 overlapped.Anonymous.Anonymous.OffsetHigh = off_hi;
639 // SAFETY: same contract as try_lock_range above.
640 let ret = unsafe { UnlockFileEx(fd as HANDLE, 0, len_lo, len_hi, &raw mut overlapped) };
641 if ret != 0 {
642 return Ok(());
643 }
644 // SAFETY: GetLastError reads a thread-local slot.
645 let last = unsafe { windows_sys::Win32::Foundation::GetLastError() };
646 Err(Error::Io(std::io::Error::from_raw_os_error(
647 last.cast_signed(),
648 )))
649}
650
651// ---------- internal access to the FileHandle's inner File --------
652
653impl FileHandle {
654 /// Borrow the inner `std::fs::File`. Internal to the platform
655 /// layer; only the lock submodule needs the raw fd / handle.
656 fn file_ref(&self) -> &std::fs::File {
657 &self.file
658 }
659}
660
661#[cfg(test)]
662mod tests {
663 #[cfg(unix)]
664 use super::*;
665 #[cfg(unix)]
666 use tempfile::TempDir;
667
668 /// Create a file that's at least 4 KiB so the lock byte
669 /// offsets at 96 / 97..128 are inside the file. Unix-only
670 /// because every caller is gated on `cfg(unix)`; on Windows
671 /// the lock tests live elsewhere and the helper would be dead
672 /// code.
673 #[cfg(unix)]
674 fn fresh_handle(dir: &TempDir, name: &str) -> FileHandle {
675 let path = dir.path().join(name);
676 let h = FileHandle::open_or_create(&path).expect("open");
677 h.set_len(4096).expect("extend");
678 h
679 }
680
681 #[test]
682 #[cfg(unix)]
683 fn writer_lock_excludes_writers() {
684 let dir = TempDir::new().expect("tmp");
685 let path = dir.path().join("lock.obj");
686 FileHandle::open_or_create(&path)
687 .expect("init")
688 .set_len(4096)
689 .expect("len");
690
691 let h1 = FileHandle::open_or_create(&path).expect("h1");
692 let h2 = FileHandle::open_or_create(&path).expect("h2");
693
694 let guard = h1
695 .try_lock_writer()
696 .expect("try lock h1")
697 .expect("must acquire");
698 let none = h2.try_lock_writer().expect("try lock h2");
699 assert!(none.is_none(), "second writer lock must be refused");
700 drop(guard);
701 let _g2 = h2
702 .try_lock_writer()
703 .expect("try lock h2 again")
704 .expect("now acquires");
705 }
706
707 #[test]
708 #[cfg(unix)]
709 fn writer_busy_timeout_returns_err_busy() {
710 let dir = TempDir::new().expect("tmp");
711 let _h0 = fresh_handle(&dir, "lock.obj");
712
713 let h1 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h1");
714 let h2 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h2");
715 let _g1 = h1
716 .try_lock_writer()
717 .expect("h1 lock")
718 .expect("h1 must acquire");
719 let start = std::time::Instant::now();
720 let err = h2
721 .lock_writer(Duration::from_millis(50))
722 .expect_err("must time out");
723 let elapsed = start.elapsed();
724 assert!(matches!(
725 err,
726 Error::Busy {
727 kind: LockKind::Writer
728 }
729 ));
730 // Some slack for test scheduler jitter.
731 assert!(
732 elapsed >= Duration::from_millis(45),
733 "must wait at least the timeout (~50 ms); got {elapsed:?}",
734 );
735 }
736
737 #[test]
738 #[cfg(unix)]
739 fn many_readers_can_coexist() {
740 let dir = TempDir::new().expect("tmp");
741 let _h0 = fresh_handle(&dir, "lock.obj");
742 let h1 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h1");
743 let h2 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h2");
744 let h3 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h3");
745 let g1 = h1.lock_reader(Duration::from_millis(50)).expect("r1");
746 let g2 = h2.lock_reader(Duration::from_millis(50)).expect("r2");
747 let g3 = h3.lock_reader(Duration::from_millis(50)).expect("r3");
748 // The three guards may sit on the same or different slots.
749 // The contract is only that they were all acquired
750 // simultaneously without erroring.
751 drop((g1, g2, g3));
752 }
753
754 #[test]
755 #[cfg(unix)]
756 fn reader_and_writer_dont_collide_on_separate_anchors() {
757 // The WRITER_LOCK lives at byte 96; reader range is 97..128.
758 // A held writer lock must NOT block a reader from acquiring
759 // a slot in the reader range. (The exclusion between
760 // readers and writers is the TXN layer's job — the byte
761 // layout intentionally keeps them on separate anchors so
762 // open_readonly callers don't have to wait for an in-flight
763 // writer.)
764 let dir = TempDir::new().expect("tmp");
765 let _h0 = fresh_handle(&dir, "lock.obj");
766 let h1 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h1");
767 let h2 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h2");
768 let _wg = h1.lock_writer(Duration::from_millis(50)).expect("writer");
769 let _rg = h2
770 .lock_reader(Duration::from_millis(50))
771 .expect("reader must not collide");
772 }
773
774 #[test]
775 #[cfg(unix)]
776 fn explicit_release_returns_ok() {
777 let dir = TempDir::new().expect("tmp");
778 let _h0 = fresh_handle(&dir, "lock.obj");
779 let h = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h");
780 let g = h.lock_writer(Duration::from_millis(50)).expect("lock");
781 g.release().expect("release ok");
782 // A re-acquire from the same handle must succeed.
783 let _g2 = h.lock_writer(Duration::from_millis(50)).expect("relock");
784 }
785
786 #[test]
787 #[cfg(unix)]
788 fn lock_methods_compile_when_dropped() {
789 // Compile-only smoke test that the `#[must_use]` annotation
790 // is non-fatal when the caller actually uses the guard.
791 let dir = TempDir::new().expect("tmp");
792 let _h0 = fresh_handle(&dir, "lock.obj");
793 let h = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h");
794 let g = h.lock_reader(Duration::from_millis(10)).expect("rlock");
795 drop(g);
796 }
797
798 /// #30: the OFD-capability gate must agree with the build target.
799 /// On any target obj actually supports (Linux/Android/Apple) the
800 /// gate is `Ok` and the per-fd locking primitive is OFD. The
801 /// classic-`F_SETLK` fallback that silently broke same-process
802 /// multi-fd exclusion on FreeBSD / unknown POSIX is gone: those
803 /// targets now hard-error at `ensure_ofd_locks_supported`.
804 #[test]
805 #[cfg(unix)]
806 fn ofd_capability_gate_matches_target() {
807 // The CI matrix is Linux + macOS; both have OFD locks. If
808 // this assertion ever fires, obj is being built for a target
809 // whose lock soundness has not been established — the gate
810 // below will (correctly) refuse to lock.
811 assert_eq!(
812 TARGET_HAS_OFD_LOCKS,
813 cfg!(any(
814 target_os = "linux",
815 target_os = "android",
816 target_vendor = "apple",
817 )),
818 "OFD capability constant must track the supported-target set",
819 );
820 let gate = ensure_ofd_locks_supported();
821 if TARGET_HAS_OFD_LOCKS {
822 gate.expect("supported targets must pass the gate");
823 } else {
824 // Unsupported target: every lock entry point must refuse
825 // with ErrorKind::Unsupported rather than fall back to a
826 // per-process F_SETLK lock.
827 match gate {
828 Err(Error::Io(e)) => {
829 assert_eq!(e.kind(), std::io::ErrorKind::Unsupported);
830 }
831 other => panic!("expected Io(Unsupported), got {other:?}"),
832 }
833 }
834 }
835
836 /// #30 regression: two `FileHandle`s to the SAME file in the SAME
837 /// process must exclude each other for the writer lock. This is
838 /// exactly the invariant the classic per-process `F_SETLK`
839 /// fallback silently broke (a second open in-process would
840 /// "succeed" because the lock coalesces per process). OFD locks
841 /// are per-fd, so the second handle is correctly refused.
842 #[test]
843 #[cfg(unix)]
844 fn same_process_multi_fd_writer_exclusion_holds() {
845 let dir = TempDir::new().expect("tmp");
846 let _h0 = fresh_handle(&dir, "lock.obj");
847 let h1 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h1");
848 let h2 = FileHandle::open_or_create(dir.path().join("lock.obj")).expect("h2");
849 let g1 = h1.try_lock_writer().expect("h1 try").expect("h1 acquires");
850 // Same process, different fd: must NOT coalesce — refused.
851 assert!(
852 h2.try_lock_writer().expect("h2 try").is_none(),
853 "per-fd OFD lock must refuse a second in-process handle; a \
854 per-process F_SETLK fallback would wrongly grant this",
855 );
856 drop(g1);
857 }
858}