reddb_server/storage/btree/prefetch.rs
1//! B-tree leaf prefetch — Phase 5 / PLAN.md backlog 3.6.
2//!
3//! Issues OS-level read-ahead hints for the next leaf block in a
4//! range scan, so the kernel can DMA the page into the buffer pool
5//! while the cursor is still consuming the current one.
6//!
7//! Mirrors PG's `BufferPrefetchPage` via `posix_fadvise(WILLNEED)`
8//! on Linux and `madvise(MADV_WILLNEED)` on macOS / BSD.
9//!
10//! ## Why
11//!
12//! reddb's range scan in `btree/cursor.rs` walks leaves
13//! sequentially. The buffer-pool fetch for leaf N+1 happens only
14//! after the cursor finishes leaf N, so the disk read serializes
15//! with the CPU's tuple processing. Prefetch breaks that
16//! dependency: as soon as the cursor lands on leaf N's halfway
17//! point, we tell the kernel to start fetching N+1 in the
18//! background.
19//!
20//! ## Wiring
21//!
22//! Phase 5 wiring adds a single call site in
23//! `btree/cursor.rs::advance_leaf` that checks "are we past 50%
24//! of the current leaf?" and if so calls `prefetch_page(next_leaf_id)`.
25//! The cursor already knows `next_leaf_id` from the leaf header.
26//!
27//! The actual `posix_fadvise` syscall is OS-specific and behind
28//! a stub on platforms that don't support it (Windows). reddb
29//! ships Linux-first so the Linux path is the one this module
30//! actually exercises.
31
32#[cfg(target_os = "linux")]
33use std::os::unix::io::AsRawFd;
34
35/// Errors raised by the prefetch path. Most are silent — a
36/// failed prefetch is a perf miss, not a correctness bug, so
37/// callers should log and continue.
38#[derive(Debug)]
39pub enum PrefetchError {
40 /// posix_fadvise / madvise returned non-zero. Wrapped so
41 /// the caller can decide whether to log or escalate.
42 SyscallFailed(std::io::Error),
43 /// Platform doesn't support read-ahead hints; the call
44 /// becomes a no-op but we surface the unsupported state
45 /// for diagnostics.
46 Unsupported,
47}
48
49impl std::fmt::Display for PrefetchError {
50 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51 match self {
52 Self::SyscallFailed(e) => write!(f, "prefetch syscall failed: {e}"),
53 Self::Unsupported => write!(f, "prefetch unsupported on this platform"),
54 }
55 }
56}
57
58impl std::error::Error for PrefetchError {}
59
60/// Tell the OS to start fetching the byte range
61/// `[offset, offset + length)` of `file` into the page cache.
62/// Returns `Ok(())` when the syscall succeeds (no guarantee
63/// the data actually arrives — that's up to the kernel).
64///
65/// **Linux**: invokes `posix_fadvise(fd, off, len, POSIX_FADV_WILLNEED)`.
66/// **macOS / BSD**: stub returning `Unsupported`. A future commit
67/// adds `fcntl(F_RDADVISE)` for Darwin.
68/// **Windows**: stub returning `Unsupported`.
69pub fn prefetch_range(file: &std::fs::File, offset: u64, length: u64) -> Result<(), PrefetchError> {
70 #[cfg(target_os = "linux")]
71 {
72 // POSIX_FADV_WILLNEED == 3 on Linux. Hardcoded so we
73 // don't pull libc into the dep graph.
74 const POSIX_FADV_WILLNEED: i32 = 3;
75 let fd = file.as_raw_fd();
76 // SAFETY: fd is a valid open file descriptor for the
77 // lifetime of `file`. The syscall takes raw integers.
78 let ret = unsafe {
79 libc_like::posix_fadvise(fd, offset as i64, length as i64, POSIX_FADV_WILLNEED)
80 };
81 if ret == 0 {
82 Ok(())
83 } else {
84 Err(PrefetchError::SyscallFailed(
85 std::io::Error::from_raw_os_error(ret),
86 ))
87 }
88 }
89 #[cfg(not(target_os = "linux"))]
90 {
91 let _ = (file, offset, length);
92 Err(PrefetchError::Unsupported)
93 }
94}
95
96/// Prefetch a single page identified by `(file, page_id, page_size)`.
97/// Convenience wrapper for `prefetch_range` that does the
98/// `offset = page_id * page_size` math.
99pub fn prefetch_page(
100 file: &std::fs::File,
101 page_id: u64,
102 page_size: u32,
103) -> Result<(), PrefetchError> {
104 prefetch_range(file, page_id * page_size as u64, page_size as u64)
105}
106
107/// Tiny libc shim — only the one function we need, declared
108/// extern so we don't pull the full `libc` crate into the dep
109/// graph. Linux ABI is stable for this call.
110#[cfg(target_os = "linux")]
111mod libc_like {
112 extern "C" {
113 pub fn posix_fadvise(fd: i32, offset: i64, len: i64, advice: i32) -> i32;
114 }
115}