Skip to main content

nodedb_wal/
align.rs

1//! O_DIRECT alignment utilities.
2//!
3//! O_DIRECT requires that:
4//! - The file offset is aligned to the logical block size (typically 512 or 4096).
5//! - The memory buffer address is aligned to the logical block size.
6//! - The I/O size is a multiple of the logical block size.
7//!
8//! This module provides an aligned buffer type that satisfies these constraints.
9
10use crate::error::{Result, WalError};
11
12/// Default alignment for O_DIRECT I/O (4 KiB).
13///
14/// This matches the typical NVMe logical block size and Linux page size.
15/// Can be overridden at runtime via `WalWriterConfig::alignment`, which reads
16/// from `WalTuning::alignment`.
17pub const DEFAULT_ALIGNMENT: usize = 4096;
18
19/// An aligned byte buffer suitable for O_DIRECT I/O.
20///
21/// The buffer's starting address is guaranteed to be aligned to `alignment`,
22/// and its capacity is rounded up to a multiple of `alignment`.
23pub struct AlignedBuf {
24    /// Raw allocation. Layout guarantees alignment.
25    ptr: *mut u8,
26
27    /// Usable capacity (always a multiple of `alignment`).
28    capacity: usize,
29
30    /// Current write position within the buffer.
31    len: usize,
32
33    /// Alignment in bytes (power of two).
34    alignment: usize,
35
36    /// Layout used for deallocation.
37    layout: std::alloc::Layout,
38}
39
40// SAFETY: The buffer is a plain byte array with no interior mutability concerns.
41// It is owned by a single writer at a time.
42unsafe impl Send for AlignedBuf {}
43
44impl AlignedBuf {
45    /// Allocate a new aligned buffer.
46    ///
47    /// `min_capacity` is rounded up to the next multiple of `alignment`.
48    /// `alignment` must be a power of two.
49    pub fn new(min_capacity: usize, alignment: usize) -> Result<Self> {
50        assert!(
51            alignment.is_power_of_two(),
52            "alignment must be power of two"
53        );
54        assert!(alignment > 0, "alignment must be > 0");
55
56        // Round capacity up to alignment boundary.
57        let capacity = round_up(min_capacity.max(alignment), alignment);
58
59        let layout = std::alloc::Layout::from_size_align(capacity, alignment).map_err(|_| {
60            WalError::AlignmentViolation {
61                context: "buffer allocation",
62                required: alignment,
63                actual: min_capacity,
64            }
65        })?;
66
67        // SAFETY: Layout has non-zero size (capacity >= alignment > 0).
68        let ptr = unsafe { std::alloc::alloc_zeroed(layout) };
69        if ptr.is_null() {
70            std::alloc::handle_alloc_error(layout);
71        }
72
73        Ok(Self {
74            ptr,
75            capacity,
76            len: 0,
77            alignment,
78            layout,
79        })
80    }
81
82    /// Allocate with the default 4 KiB alignment.
83    pub fn with_default_alignment(min_capacity: usize) -> Result<Self> {
84        Self::new(min_capacity, DEFAULT_ALIGNMENT)
85    }
86
87    /// Write bytes into the buffer. Returns the number of bytes written.
88    ///
89    /// If the buffer doesn't have enough remaining capacity, writes as much
90    /// as possible and returns the count.
91    pub fn write(&mut self, data: &[u8]) -> usize {
92        let available = self.capacity - self.len;
93        let to_write = data.len().min(available);
94        if to_write > 0 {
95            // SAFETY: ptr + len is within the allocation, and to_write <= available.
96            unsafe {
97                std::ptr::copy_nonoverlapping(data.as_ptr(), self.ptr.add(self.len), to_write);
98            }
99            self.len += to_write;
100        }
101        to_write
102    }
103
104    /// Get the written portion of the buffer as a byte slice.
105    ///
106    /// The returned slice starts at an aligned address.
107    pub fn as_slice(&self) -> &[u8] {
108        // SAFETY: ptr is valid for `len` bytes, and we have shared access.
109        unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
110    }
111
112    /// Get the written portion padded up to the next alignment boundary.
113    ///
114    /// O_DIRECT requires the I/O size to be a multiple of the block size.
115    /// The padding bytes are zeroed (from `alloc_zeroed`).
116    pub fn as_aligned_slice(&self) -> &[u8] {
117        let aligned_len = round_up(self.len, self.alignment);
118        let actual_len = aligned_len.min(self.capacity);
119        // SAFETY: ptr is valid for `capacity` bytes, and actual_len <= capacity.
120        unsafe { std::slice::from_raw_parts(self.ptr, actual_len) }
121    }
122
123    /// Reset the buffer for reuse without deallocating.
124    pub fn clear(&mut self) {
125        self.len = 0;
126    }
127
128    /// Current number of bytes written.
129    pub fn len(&self) -> usize {
130        self.len
131    }
132
133    /// Whether the buffer has no written data.
134    pub fn is_empty(&self) -> bool {
135        self.len == 0
136    }
137
138    /// Total capacity of the buffer.
139    pub fn capacity(&self) -> usize {
140        self.capacity
141    }
142
143    /// Remaining capacity.
144    pub fn remaining(&self) -> usize {
145        self.capacity - self.len
146    }
147
148    /// The alignment of this buffer.
149    pub fn alignment(&self) -> usize {
150        self.alignment
151    }
152
153    /// Raw pointer to the buffer start (for io_uring submission).
154    pub fn as_ptr(&self) -> *const u8 {
155        self.ptr
156    }
157
158    /// Mutable raw pointer to the buffer start.
159    pub fn as_mut_ptr(&mut self) -> *mut u8 {
160        self.ptr
161    }
162}
163
164impl Drop for AlignedBuf {
165    fn drop(&mut self) {
166        // SAFETY: ptr was allocated with this layout in `new()`.
167        unsafe {
168            std::alloc::dealloc(self.ptr, self.layout);
169        }
170    }
171}
172
173/// Round `value` up to the next multiple of `align` (which must be a power of two).
174#[inline]
175pub const fn round_up(value: usize, align: usize) -> usize {
176    (value + align - 1) & !(align - 1)
177}
178
179/// Check if a value is aligned to the given alignment.
180#[inline]
181pub const fn is_aligned(value: usize, align: usize) -> bool {
182    value & (align - 1) == 0
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn round_up_works() {
191        assert_eq!(round_up(0, 4096), 0);
192        assert_eq!(round_up(1, 4096), 4096);
193        assert_eq!(round_up(4096, 4096), 4096);
194        assert_eq!(round_up(4097, 4096), 8192);
195        assert_eq!(round_up(8192, 4096), 8192);
196    }
197
198    #[test]
199    fn is_aligned_works() {
200        assert!(is_aligned(0, 4096));
201        assert!(is_aligned(4096, 4096));
202        assert!(is_aligned(8192, 4096));
203        assert!(!is_aligned(1, 4096));
204        assert!(!is_aligned(4097, 4096));
205    }
206
207    #[test]
208    fn aligned_buf_address_is_aligned() {
209        let buf = AlignedBuf::with_default_alignment(1).unwrap();
210        assert!(is_aligned(buf.as_ptr() as usize, DEFAULT_ALIGNMENT));
211    }
212
213    #[test]
214    fn aligned_buf_capacity_is_aligned() {
215        let buf = AlignedBuf::with_default_alignment(1).unwrap();
216        assert!(is_aligned(buf.capacity(), DEFAULT_ALIGNMENT));
217        assert!(buf.capacity() >= DEFAULT_ALIGNMENT);
218    }
219
220    #[test]
221    fn aligned_buf_write_and_read() {
222        let mut buf = AlignedBuf::with_default_alignment(8192).unwrap();
223        let data = b"hello nodedb WAL";
224        let written = buf.write(data);
225        assert_eq!(written, data.len());
226        assert_eq!(&buf.as_slice()[..data.len()], data);
227    }
228
229    #[test]
230    fn aligned_slice_pads_to_boundary() {
231        let mut buf = AlignedBuf::with_default_alignment(8192).unwrap();
232        buf.write(b"short");
233        assert_eq!(buf.len(), 5);
234        assert_eq!(buf.as_aligned_slice().len(), DEFAULT_ALIGNMENT);
235    }
236
237    #[test]
238    fn clear_resets_without_dealloc() {
239        let mut buf = AlignedBuf::with_default_alignment(8192).unwrap();
240        let ptr_before = buf.as_ptr();
241        buf.write(b"some data");
242        buf.clear();
243        assert_eq!(buf.len(), 0);
244        assert!(buf.is_empty());
245        assert_eq!(buf.as_ptr(), ptr_before); // Same allocation.
246    }
247}