spacetimedb_table/page.rs
1//! Provides a [`Page`] abstraction that stores rows
2//! and an associated header necessary for the page to work.
3//! Consult the documentation of this type for a list of operations
4//! and a description of how page work.
5//!
6//! A page can provide a split mutable view of its fixed section and its variable section.
7//! This is provided through [`Page::split_fixed_var_mut`] with view operations
8//! defined on [`FixedView`] and [`VarView`].
9//!
10//! [ralfj_safe_valid]: https://www.ralfj.de/blog/2018/08/22/two-kinds-of-invariants.html
11//!
12//! Technical terms:
13//!
14//! - `valid` refers to, when referring to a type, granule, or row,
15//! depending on the context, a memory location that holds a *safe* object.
16//! When "valid for writes" is used, the location must be properly aligned
17//! and none of its bytes may be uninit,
18//! but the value need not be valid at the type in question.
19//! "Valid for writes" is equivalent to valid-unconstrained.
20//!
21//! - `valid-unconstrained`, when referring to a memory location with a given type,
22//! that the location stores a byte pattern which Rust/LLVM's memory model recognizes as valid,
23//! and therefore must not contain any uninit,
24//! but the value is not required to be logically meaningful,
25//! and no code may depend on the data within it to uphold any invariants.
26//! E.g. an unallocated [`VarLenGranule`] within a page stores valid-unconstrained bytes,
27//! because the bytes are either 0 from the initial [`alloc_zeroed`] of the page,
28//! or contain stale data from a previously freed [`VarLenGranule`].
29//!
30//! - `unused` means that it is safe to overwrite a block of memory without cleaning up its previous value.
31//!
32//! See the post [Two Kinds of Invariants: Safety and Validity][ralf_safe_valid]
33//! for a discussion on safety and validity invariants.
34
35use super::{
36 blob_store::BlobStore,
37 fixed_bit_set::FixedBitSet,
38 indexes::{Byte, Bytes, PageOffset, PAGE_HEADER_SIZE, PAGE_SIZE},
39 var_len::{is_granule_offset_aligned, VarLenGranule, VarLenGranuleHeader, VarLenMembers, VarLenRef},
40};
41use crate::{fixed_bit_set::IterSet, indexes::max_rows_in_page, static_assert_size, table::BlobNumBytes};
42use core::{mem, ops::ControlFlow};
43use spacetimedb_sats::layout::{Size, MIN_ROW_SIZE};
44use spacetimedb_sats::memory_usage::MemoryUsage;
45use spacetimedb_sats::{de::Deserialize, ser::Serialize};
46use thiserror::Error;
47
48#[derive(Error, Debug, PartialEq, Eq)]
49pub enum Error {
50 #[error("Want to allocate a var-len object of {need} granules, but have only {have} granules available")]
51 InsufficientVarLenSpace { need: u16, have: u16 },
52 #[error("Want to allocate a fixed-len row of {} bytes, but the page is full", need.len())]
53 InsufficientFixedLenSpace { need: Size },
54}
55
56/// A cons-cell in a freelist either
57/// for an unused fixed-len cell or a variable-length granule.
58#[repr(C)] // Required for a stable ABI.
59#[derive(Clone, Copy, Debug, PartialEq, Eq, bytemuck::NoUninit, Serialize, Deserialize)]
60struct FreeCellRef {
61 /// The address of the next free cell in a freelist.
62 ///
63 /// The `PageOffset::PAGE_END` is used as a sentinel to signal "`None`".
64 next: PageOffset,
65}
66
67impl MemoryUsage for FreeCellRef {
68 fn heap_usage(&self) -> usize {
69 let Self { next } = self;
70 next.heap_usage()
71 }
72}
73
74impl FreeCellRef {
75 /// The sentinel for NULL cell references.
76 const NIL: Self = Self {
77 next: PageOffset::PAGE_END,
78 };
79
80 /// Replaces the cell reference with `offset`, returning the existing one.
81 #[inline]
82 fn replace(&mut self, offset: PageOffset) -> FreeCellRef {
83 let next = mem::replace(&mut self.next, offset);
84 Self { next }
85 }
86
87 /// Returns whether the cell reference is non-empty.
88 #[inline]
89 const fn has(&self) -> bool {
90 !self.next.is_at_end()
91 }
92
93 /// Take the first free cell in the freelist starting with `self`, if any,
94 /// and promote the second free cell as the freelist head.
95 ///
96 /// # Safety
97 ///
98 /// When `self.has()`, it must point to a valid `FreeCellRef`.
99 #[inline]
100 unsafe fn take_freelist_head(
101 self: &mut FreeCellRef,
102 row_data: &Bytes,
103 adjust_free: impl FnOnce(PageOffset) -> PageOffset,
104 ) -> Option<PageOffset> {
105 self.has().then(|| {
106 let head = adjust_free(self.next);
107 // SAFETY: `self.next` so `head` points to a valid `FreeCellRef`.
108 let next = unsafe { get_ref(row_data, head) };
109 self.replace(*next).next
110 })
111 }
112
113 /// Prepend `new_head` to the freelist starting with `self`.
114 ///
115 /// SAFETY: `new_head`, after adjustment, must be in bounds of `row_data`.
116 /// Moreover, it must be valid for writing a `FreeCellRef` to it,
117 /// which includes being properly aligned with respect to `row_data` for a `FreeCellRef`.
118 /// Additionally, `self` must contain a valid `FreeCellRef`.
119 #[inline]
120 unsafe fn prepend_freelist(
121 self: &mut FreeCellRef,
122 row_data: &mut Bytes,
123 new_head: PageOffset,
124 adjust_free: impl FnOnce(PageOffset) -> PageOffset,
125 ) {
126 let next = self.replace(new_head);
127 let new_head = adjust_free(new_head);
128 // SAFETY: Per caller contract, `new_head` is in bounds of `row_data`.
129 // Moreover, `new_head` points to an unused `FreeCellRef`, so we can write to it.
130 let next_slot: &mut FreeCellRef = unsafe { get_mut(row_data, new_head) };
131 *next_slot = next;
132 }
133}
134
135/// All the fixed size header information.
136#[repr(C)] // Required for a stable ABI.
137#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] // So we can dump and restore pages during snapshotting.
138struct FixedHeader {
139 /// A pointer to the head of the freelist which stores
140 /// all the unused (freed) fixed row cells.
141 /// These cells can be reused when inserting a new row.
142 next_free: FreeCellRef,
143
144 /// High water mark (HWM) for fixed-length rows.
145 /// Points one past the last-allocated (highest-indexed) fixed-length row,
146 /// so to allocate a fixed-length row from the gap,
147 /// post-increment this index.
148 // TODO(perf,future-work): determine how to lower the high water mark when freeing the topmost row.
149 last: PageOffset,
150
151 /// The number of rows currently in the page.
152 ///
153 /// N.B. this is not the same as `self.present_rows.len()`
154 /// as that counts both zero and one bits.
155 num_rows: u16,
156
157 // TODO(stable-module-abi): should this be inlined into the page?
158 /// For each fixed-length row slot, true if a row is stored there,
159 /// false if the slot is unallocated.
160 ///
161 /// Unallocated row slots store valid-unconstrained bytes, i.e. are never uninit.
162 present_rows: FixedBitSet,
163}
164
165impl MemoryUsage for FixedHeader {
166 fn heap_usage(&self) -> usize {
167 let Self {
168 next_free,
169 last,
170 num_rows,
171 present_rows,
172 } = self;
173 next_free.heap_usage() + last.heap_usage() + num_rows.heap_usage() + present_rows.heap_usage()
174 }
175}
176
177static_assert_size!(FixedHeader, 16);
178
179impl FixedHeader {
180 /// Returns a new `FixedHeader`
181 /// using the provided `max_rows_in_page` to decide how many rows `present_rows` can represent.
182 #[inline]
183 fn new(max_rows_in_page: usize) -> Self {
184 Self {
185 next_free: FreeCellRef::NIL,
186 // Points one after the last allocated fixed-length row, or `NULL` for an empty page.
187 last: PageOffset::VAR_LEN_NULL,
188 num_rows: 0,
189 present_rows: FixedBitSet::new(max_rows_in_page),
190 }
191 }
192
193 /// Set the (fixed) row starting at `offset`
194 /// and lasting `fixed_row_size` as `present`.
195 #[inline]
196 fn set_row_present(&mut self, offset: PageOffset, fixed_row_size: Size) {
197 self.set_row_presence(offset, fixed_row_size, true);
198 self.num_rows += 1;
199 }
200
201 /// Sets whether the (fixed) row starting at `offset`
202 /// and lasting `fixed_row_size` is `present` or not.
203 #[inline]
204 fn set_row_presence(&mut self, offset: PageOffset, fixed_row_size: Size, present: bool) {
205 self.present_rows.set(offset / fixed_row_size, present);
206 }
207
208 /// Returns whether the (fixed) row starting at `offset`
209 /// and lasting `fixed_row_size` is present or not.
210 #[inline]
211 fn is_row_present(&self, offset: PageOffset, fixed_row_size: Size) -> bool {
212 self.present_rows.get(offset / fixed_row_size)
213 }
214
215 /// Resets the header information to its state
216 /// when it was first created in [`FixedHeader::new`]
217 /// but with `max_rows_in_page` instead of the value passed on creation.
218 fn reset_for(&mut self, max_rows_in_page: usize) {
219 self.next_free = FreeCellRef::NIL;
220 self.last = PageOffset::VAR_LEN_NULL;
221 self.num_rows = 0;
222 self.present_rows.reset_for(max_rows_in_page);
223 }
224
225 /// Resets the header information to its state
226 /// when it was first created in [`FixedHeader::new`].
227 ///
228 /// The header is only good for the original row size.
229 #[inline]
230 fn clear(&mut self) {
231 self.next_free = FreeCellRef::NIL;
232 self.last = PageOffset::VAR_LEN_NULL;
233 self.num_rows = 0;
234 self.present_rows.clear();
235 }
236}
237
238/// All the var-len header information.
239#[repr(C)] // Required for a stable ABI.
240#[derive(bytemuck::NoUninit, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
241struct VarHeader {
242 /// A pointer to the head of the freelist which stores
243 /// all the unused (freed) var-len granules.
244 /// These cells can be reused when inserting a new row.
245 next_free: FreeCellRef,
246
247 /// The length of the freelist with its head referred to by `next_free`.
248 /// Stored in units of var-len nodes.
249 ///
250 /// This field is redundant,
251 /// as it can be recovered by traversing `next_free`.
252 /// However, traversing this linked-list is not cache friendly,
253 /// so we memoize the computation here.
254 freelist_len: u16,
255
256 /// High water mark (HWM) for var-len granules.
257 /// Points to the last-allocated (lowest-indexed) var-len granule,
258 /// so to allocate a var-len granule from the gap,
259 /// pre-decrement this index.
260 // TODO(perf,future-work): determine how to "lower" the high water mark when freeing the "top"-most granule.
261 first: PageOffset,
262
263 /// The number of granules currently used by rows within this page.
264 ///
265 /// [`Page::bytes_used_by_rows`] needs this information.
266 /// Stored here because otherwise counting it would require traversing all the present rows.
267 num_granules: u16,
268}
269
270impl MemoryUsage for VarHeader {
271 fn heap_usage(&self) -> usize {
272 let Self {
273 next_free,
274 freelist_len,
275 first,
276 num_granules,
277 } = self;
278 next_free.heap_usage() + freelist_len.heap_usage() + first.heap_usage() + num_granules.heap_usage()
279 }
280}
281
282static_assert_size!(VarHeader, 8);
283
284impl Default for VarHeader {
285 fn default() -> Self {
286 Self {
287 next_free: FreeCellRef::NIL,
288 freelist_len: 0,
289 first: PageOffset::PAGE_END,
290 num_granules: 0,
291 }
292 }
293}
294
295impl VarHeader {
296 /// Resets the header information to its state
297 /// when it was first created in [`VarHeader::default`].
298 fn clear(&mut self) {
299 *self = Self::default();
300 }
301}
302
303/// The metadata / header of a page that is necessary
304/// for modifying and interpreting the `row_data`.
305///
306/// This header info is split into a header for the fixed part
307/// and one for the variable part.
308/// The header is stored in the same heap allocation as the `row_data`
309/// as the whole [`Page`] is `Box`ed.
310#[repr(C)] // Required for a stable ABI.
311#[repr(align(64))] // Alignment must be same as `VarLenGranule::SIZE`.
312#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] // So we can dump and restore pages during snapshotting.
313pub(super) struct PageHeader {
314 /// The header data relating to the fixed component of a row.
315 fixed: FixedHeader,
316 /// The header data relating to the var-len component of a row.
317 var: VarHeader,
318 /// The content-addressed hash of the page on disk,
319 /// if unmodified since the last snapshot,
320 /// and `None` otherwise.
321 ///
322 /// This means that modifications to the page always sets this field to `None`.
323 unmodified_hash: Option<blake3::Hash>,
324}
325
326impl MemoryUsage for PageHeader {
327 fn heap_usage(&self) -> usize {
328 let Self {
329 fixed,
330 var,
331 // MEMUSE: no allocation, ok to ignore
332 unmodified_hash: _,
333 } = self;
334 fixed.heap_usage() + var.heap_usage()
335 }
336}
337
338static_assert_size!(PageHeader, PAGE_HEADER_SIZE);
339
340impl PageHeader {
341 /// Returns a new `PageHeader` proper a [`Page`] for holding at most `max_rows_in_page` rows.
342 fn new(max_rows_in_page: usize) -> Self {
343 Self {
344 fixed: FixedHeader::new(max_rows_in_page),
345 var: VarHeader::default(),
346 unmodified_hash: None,
347 }
348 }
349
350 /// Resets the header information to its state
351 /// when it was first created in [`PageHeader::new`]
352 /// but with `max_rows_in_page` instead of the value passed on creation.
353 fn reset_for(&mut self, max_rows_in_page: usize) {
354 self.fixed.reset_for(max_rows_in_page);
355 self.var.clear();
356 self.unmodified_hash = None;
357 }
358
359 /// Resets the header information to its state
360 /// when it was first created in [`PageHeader::new`].
361 ///
362 /// The header is only good for the original row size.
363 fn clear(&mut self) {
364 self.fixed.clear();
365 self.var.clear();
366 self.unmodified_hash = None;
367 }
368
369 /// Returns the maximum number of rows the page can hold.
370 ///
371 /// Note that this number can be bigger
372 /// than the value provided in [`Self::new`] due to rounding up.
373 pub(super) fn max_rows_in_page(&self) -> usize {
374 self.fixed.present_rows.bits()
375 }
376
377 /// Returns a pointer to the `present_rows` bitset.
378 /// This is exposed for testing only.
379 #[cfg(test)]
380 pub(super) fn present_rows_storage_ptr_for_test(&self) -> *const () {
381 self.fixed.present_rows.storage().as_ptr().cast()
382 }
383}
384
385/// Fixed-length row portions must be at least large enough to store a `FreeCellRef`.
386const _MIN_ROW_SIZE_CAN_STORE_FCR: () = assert!(MIN_ROW_SIZE.len() >= mem::size_of::<FreeCellRef>());
387
388/// [`VarLenGranule`]s must be at least large enough to store a [`FreeCellRef`].
389const _VLG_CAN_STORE_FCR: () = assert!(VarLenGranule::SIZE.len() >= MIN_ROW_SIZE.len());
390
391/// Pointers properly aligned for a [`VarLenGranule`] must be properly aligned for [`FreeCellRef`].
392/// This is the case as the former's alignment is a multiple of the latter's alignment.
393const _VLG_ALIGN_MULTIPLE_OF_FCR: () = assert!(mem::align_of::<VarLenGranule>() % mem::align_of::<FreeCellRef>() == 0);
394
395/// The actual row data of a [`Page`].
396type RowData = [Byte; PageOffset::PAGE_END.idx()];
397
398/// A page of row data with an associated `header` and the raw `row_data` itself.
399///
400/// As a rough summary, the strategy employed by this page is:
401///
402/// - The fixed-len parts of rows grows left-to-right
403/// and starts from the beginning of the `row_data`
404/// until its high water mark (fixed HWM), i.e., `self.header.fixed.last`.
405///
406/// - The var-len parts of rows grows right-to-left
407/// and starts from the end of the `row_data`
408/// until its high water mark (variable HWM), i.e., `self.header.var.first`.
409///
410/// Each var-len object is stored in terms of a linked-list of chunks.
411/// Each chunk in this case is a [`VarLenGranule`] taking up 64 bytes where:
412/// - 6 bits = length, 10 bits = next-cell-pointer
413/// - 62 bytes = the bytes of the object
414///
415/// - As new rows are added, the HWMs move appropriately.
416/// When the fixed and variable HWMs meet, the page is full.
417///
418/// - When rows are freed, a freelist strategy is used both for
419/// the fixed parts and each `VarLenGranule`.
420/// These freelists are then used first before using space from the gap.
421/// The head of these freelists are stored in `next_free`
422/// in the fixed and variable headers respectively.
423///
424/// - As the fixed parts of rows may store pointers into the var-length section,
425/// to ensure that these pointers aren't dangling,
426/// the page uses pointer fixups when adding to, deleting from, and copying the page.
427/// These fixups are handled by having callers provide `VarLenMembers`
428/// to find the var-len reference slots in the fixed parts.
429#[repr(C)]
430// ^-- Required for a stable ABI.
431#[repr(align(64))]
432// ^-- Must have align at least that of `VarLenGranule`,
433// so that `row_data[PageOffset::PAGE_END - VarLenGranule::SIZE]` is an aligned pointer to `VarLenGranule`.
434// TODO(bikeshedding): consider raising the alignment. We may want this to be OS page (4096) aligned.
435#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] // So we can dump and restore pages during snapshotting.
436pub struct Page {
437 /// The header containing metadata on how to interpret and modify the `row_data`.
438 header: PageHeader,
439 /// The actual bytes stored in the page.
440 /// This contains row data, fixed and variable, and freelists.
441 row_data: RowData,
442}
443
444impl MemoryUsage for Page {
445 fn heap_usage(&self) -> usize {
446 self.header.heap_usage()
447 }
448}
449
450static_assert_size!(Page, PAGE_SIZE);
451
452/// A mutable view of the fixed-len section of a [`Page`].
453pub struct FixedView<'page> {
454 /// A mutable view of the fixed-len bytes.
455 fixed_row_data: &'page mut Bytes,
456 /// A mutable view of the fixed header.
457 header: &'page mut FixedHeader,
458}
459
460impl FixedView<'_> {
461 /// Returns a mutable view of the row from `start` lasting `fixed_row_size` number of bytes.
462 ///
463 /// This method is safe, but callers should take care that `start` and `fixed_row_size`
464 /// are correct for this page, and that `start` is aligned.
465 /// Callers should further ensure that mutations to the row leave the row bytes
466 /// in an expected state, i.e. initialized where required by the row type,
467 /// and with `VarLenRef`s that point to valid granules and with correct lengths.
468 pub fn get_row_mut(&mut self, start: PageOffset, fixed_row_size: Size) -> &mut Bytes {
469 &mut self.fixed_row_data[start.range(fixed_row_size)]
470 }
471
472 /// Returns a shared view of the row from `start` lasting `fixed_row_size` number of bytes.
473 fn get_row(&mut self, start: PageOffset, fixed_row_size: Size) -> &Bytes {
474 &self.fixed_row_data[start.range(fixed_row_size)]
475 }
476
477 /// Frees the row starting at `row_offset` and lasting `fixed_row_size` bytes.
478 ///
479 /// # Safety
480 ///
481 /// `range_move(0..fixed_row_size, row_offset)` must be in bounds of `row_data`.
482 /// Moreover, it must be valid for writing a `FreeCellRef` to it,
483 /// which includes being properly aligned with respect to `row_data` for a `FreeCellRef`.
484 pub unsafe fn free(&mut self, row_offset: PageOffset, fixed_row_size: Size) {
485 // TODO(perf,future-work): if `row` is at the HWM, return it to the gap.
486
487 // SAFETY: Per caller contract, `row_offset` must be in bounds of `row_data`.
488 // Moreover, it must be valid for writing a `FreeCellRef` to it,
489 // which includes being properly aligned with respect to `row_data` for a `FreeCellRef`.
490 // We also know that `self.header.next_free` contains a valid `FreeCellRef`.
491 unsafe {
492 self.header
493 .next_free
494 .prepend_freelist(self.fixed_row_data, row_offset, |x| x)
495 };
496 self.header.num_rows -= 1;
497 self.header.set_row_presence(row_offset, fixed_row_size, false);
498 }
499}
500
501/// A mutable view of the var-len section of a [`Page`].
502pub struct VarView<'page> {
503 /// A mutable view of the var-len bytes.
504 var_row_data: &'page mut Bytes,
505 /// A mutable view of the var-len header.
506 header: &'page mut VarHeader,
507 /// One past the end of the fixed-len section of the page.
508 last_fixed: PageOffset,
509}
510
511impl<'page> VarView<'page> {
512 /// Returns the number of granules required to store the data,
513 /// whether the page has enough space,
514 /// and whether the object needs to go in the blob store.
515 ///
516 /// If the third value is `true`, i.e., the object will go in the blob store,
517 /// the first value will always be `1`.
518 fn has_enough_space_for(&self, len_in_bytes: usize) -> (usize, bool, bool) {
519 let (num_granules_req, in_blob) = VarLenGranule::bytes_to_granules(len_in_bytes);
520 let enough_space = num_granules_req <= self.num_granules_available();
521 (num_granules_req, enough_space, in_blob)
522 }
523
524 /// Returns the number of granules available for allocation.
525 fn num_granules_available(&self) -> usize {
526 self.header.freelist_len as usize
527 + VarLenGranule::space_to_granules(gap_remaining_size(self.header.first, self.last_fixed))
528 }
529
530 /// Provides an adjuster of offset in terms of `Page::row_data`
531 /// to work in terms of `VarView::var_row_data`.
532 ///
533 /// This has to be done due to `page.row_data.split_at_mut(last_fixed)`.
534 #[inline(always)]
535 fn adjuster(&self) -> impl FnOnce(PageOffset) -> PageOffset {
536 let lf = self.last_fixed;
537 move |offset| offset - lf
538 }
539
540 /// Allocates a linked-list of granules, in the var-len storage of the page,
541 /// for a var-len object of `obj_len` bytes.
542 ///
543 /// Returns a [`VarLenRef`] pointing to the head of that list,
544 /// and a boolean `in_blob` for whether the allocation is a `BlobHash`
545 /// and the object must be inserted into the large-blob store.
546 ///
547 /// The length of each granule is set, but no data is written to any granule.
548 /// Thus, the caller must proceed to write data to each granule for the claimed lengths.
549 ///
550 /// # Safety post-requirements
551 ///
552 /// The following are the safety *post-requirements* of calling this method.
553 /// That is, this method is safe to call,
554 /// but may leave the page in an inconsistent state
555 /// which must be rectified before other **unsafe methods** may be called.
556 ///
557 /// 1. When the returned `in_blob` holds, caller must ensure that,
558 /// before the granule's data is read from / assumed to be initialized,
559 /// the granule pointed to by the returned `vlr.first_granule`
560 /// has an initialized header and a data section initialized to at least
561 /// as many bytes as claimed by the header.
562 ///
563 /// 2. The caller must initialize each granule with data for the claimed length
564 /// of the granule's data.
565 pub fn alloc_for_len(&mut self, obj_len: usize) -> Result<(VarLenRef, bool), Error> {
566 // Safety post-requirements of `alloc_for_obj_common`:
567 // 1. caller promised they will be satisfied.
568 // 2a. already satisfied as the closure below returns all the summands of `obj_len`.
569 // 2b. caller promised in 2. that they will satisfy this.
570 self.alloc_for_obj_common(obj_len, |req_granules| {
571 let rem = obj_len % VarLenGranule::DATA_SIZE;
572 (0..req_granules).map(move |rev_idx| {
573 let len = if rev_idx == 0 && rem != 0 {
574 // The first allocated granule will be the last in the list.
575 // Thus, `rev_idx == 0` is the last element and might not take up a full granule.
576 rem
577 } else {
578 VarLenGranule::DATA_SIZE
579 };
580 // Caller will initialize the granule's data for `len` bytes.
581 (<&[u8]>::default(), len)
582 })
583 })
584 }
585
586 /// Returns an iterator over all offsets of the `VarLenGranule`s of the var-len object
587 /// that has its first granule at offset `first_granule`.
588 /// An empty iterator will be returned when `first_granule` is `NULL`.
589 ///
590 /// # Safety
591 ///
592 /// `first_granule` must be an offset to a granule or `NULL`.
593 /// The data of the granule need not be initialized.
594 pub unsafe fn granule_offset_iter(&mut self, first_granule: PageOffset) -> GranuleOffsetIter<'_, 'page> {
595 GranuleOffsetIter {
596 next_granule: first_granule,
597 var_view: self,
598 }
599 }
600
601 /// Allocates and stores `slice` as a linked-list of granules
602 /// in the var-len storage of the page.
603 ///
604 /// Returns a [`VarLenRef`] pointing to the head of that list,
605 /// and a boolean `in_blob` for whether the allocation is a `BlobHash`
606 /// and the `slice` must be inserted into the large-blob store.
607 ///
608 /// # Safety post-requirements
609 ///
610 /// The following are the safety *post-requirements* of calling this method.
611 /// That is, this method is safe to call,
612 /// but may leave the page in an inconsistent state
613 /// which must be rectified before other **unsafe methods** may be called.
614 ///
615 /// 1. When the returned `in_blob` holds, caller must ensure that,
616 /// before the granule's data is read from / assumed to be initialized,
617 /// the granule pointed to by the returned `vlr.first_granule`
618 /// has an initialized header and a data section initialized to at least
619 /// as many bytes as claimed by the header.
620 pub fn alloc_for_slice(&mut self, slice: &[u8]) -> Result<(VarLenRef, bool), Error> {
621 let obj_len = slice.len();
622 // Safety post-requirement 2. of `alloc_for_obj_common` is already satisfied
623 // as `chunks(slice)` will return sub-slices where the sum is `obj_len`.
624 // Moreover, we initialize each granule already with the right data and length.
625 // The requirement 1. is forwarded to the caller.
626 let chunks = |_| VarLenGranule::chunks(slice).rev().map(|c| (c, c.len()));
627 self.alloc_for_obj_common(obj_len, chunks)
628 }
629
630 /// Allocates for `obj_len` bytes as a linked-list of granules
631 /// in the var-len storage of the page.
632 ///
633 /// For every granule in the aforementioned linked-list,
634 /// the caller must provide an element in the *reversed* iterator `chunks`,
635 /// and of pairs `(chunk, len)`.
636 /// To each granule `chunk` will be written and the granule will be of length `len`.
637 /// The caller can opt to provide `chunk` that is not of `len`.
638 ///
639 /// Returns a [`VarLenRef`] pointing to the head of that list,
640 /// and a boolean `in_blob` for whether the allocation is a `BlobHash`
641 /// and the `slice` must be inserted into the large-blob store.
642 ///
643 /// # Safety post-requirements
644 ///
645 /// The following are the safety *post-requirements* of calling this method.
646 /// That is, this method is safe to call,
647 /// but may leave the page in an inconsistent state
648 /// which must be rectified before other **unsafe methods** may be called.
649 ///
650 /// 1. When the returned `in_blob` holds, caller must ensure that,
651 /// before the granule's data is read from / assumed to be initialized,
652 /// the granule pointed to by the returned `vlr.first_granule`
653 /// has an initialized header and a data section initialized to at least
654 /// as many bytes as claimed by the header.
655 ///
656 /// 2. Otherwise, when `in_blob` doesn't hold the safety post-requirements are:
657 ///
658 /// a. Let `cs = chunks(req_granules)` for the `req_granules` derived from `obj_len`.
659 /// Then, `obj_len == cs.map(|(_, len)| len).sum()`.
660 ///
661 /// b. For each `(_, len) ∈ cs`, caller must ensure that
662 /// the relevant granule is initialized with data for at least `len`
663 /// before the granule's data is read from / assumed to be initialized.
664 #[expect(clippy::doc_overindented_list_items)]
665 fn alloc_for_obj_common<'chunk, Cs: Iterator<Item = (&'chunk [u8], usize)>>(
666 &mut self,
667 obj_len: usize,
668 chunks: impl Copy + FnOnce(usize) -> Cs,
669 ) -> Result<(VarLenRef, bool), Error> {
670 // Check that we have sufficient space to allocate `obj_len` bytes in var-len data.
671 let (req_granules, enough_space, in_blob) = self.has_enough_space_for(obj_len);
672 if !enough_space {
673 return Err(Error::InsufficientVarLenSpace {
674 need: req_granules.try_into().unwrap_or(u16::MAX),
675 have: self.num_granules_available().try_into().unwrap_or(u16::MAX),
676 });
677 }
678
679 // For large blob objects, only reserve a granule.
680 // The caller promised that they will initialize it with a blob hash.
681 if in_blob {
682 let vlr = self.alloc_blob_hash()?;
683 return Ok((vlr, true));
684 };
685
686 // Write each `chunk` to var-len storage.
687 // To do this, we allocate granules for and store the chunks in reverse,
688 // starting with the end first.
689 // The offset to the previous granule in the iteration is kept to
690 // link it in as the next pointer in the current iteration.
691 let mut next = PageOffset::VAR_LEN_NULL;
692 debug_assert_eq!(obj_len, chunks(req_granules).map(|(_, len)| len).sum::<usize>());
693 for (chunk, len) in chunks(req_granules) {
694 // This should never error, since we already checked for available space.
695 let granule = self.alloc_granule()?;
696 // SAFETY:
697 // 1. `granule` is properly aligned as it came from `alloc_granule`
698 // and so is `next` as it's either NULL or was the previous `granule`.
699 // This also ensures that both are in bounds
700 // of the page for `granule + granule + VarLenGranule::SIZE`.
701 //
702 // 2. `next` is either NULL or was initialized in the previous loop iteration.
703 //
704 // 3. `granule` points to an unused slot as the space was just allocated.
705 unsafe { self.write_chunk_to_granule(chunk, len, granule, next) };
706 next = granule;
707 }
708
709 Ok((
710 VarLenRef {
711 first_granule: next,
712 length_in_bytes: obj_len as u16,
713 },
714 false,
715 ))
716 }
717
718 /// Allocates a granule for a large blob object
719 /// and returns a [`VarLenRef`] pointing to that granule.
720 ///
721 /// The granule is not initialized by this method, and contains valid-unconstrained bytes.
722 /// It is the caller's responsibility to initialize it with a [`BlobHash`](super::blob_hash::BlobHash).
723 #[cold]
724 fn alloc_blob_hash(&mut self) -> Result<VarLenRef, Error> {
725 // Var-len hashes are 32 bytes, which fits within a single granule.
726 self.alloc_granule().map(VarLenRef::large_blob)
727 }
728
729 /// Inserts `var_len_obj` into `blob_store`
730 /// and stores the blob hash in the granule pointed to by `vlr.first_granule`.
731 ///
732 /// This insertion will never fail.
733 ///
734 /// # Safety
735 ///
736 /// `vlr.first_granule` must point to an unused `VarLenGranule` in bounds of this page,
737 /// which must be valid for writes.
738 pub unsafe fn write_large_blob_hash_to_granule(
739 &mut self,
740 blob_store: &mut dyn BlobStore,
741 var_len_obj: &impl AsRef<[u8]>,
742 vlr: VarLenRef,
743 ) -> BlobNumBytes {
744 let hash = blob_store.insert_blob(var_len_obj.as_ref());
745
746 let granule = vlr.first_granule;
747 // SAFETY:
748 // 1. `granule` is properly aligned for `VarLenGranule` and is in bounds of the page.
749 // 2. The null granule is trivially initialized.
750 // 3. The caller promised that `granule` is safe to overwrite.
751 unsafe { self.write_chunk_to_granule(&hash.data, hash.data.len(), granule, PageOffset::VAR_LEN_NULL) };
752 var_len_obj.as_ref().len().into()
753 }
754
755 /// Write the `chunk` (data) to the [`VarLenGranule`] pointed to by `granule`,
756 /// set the granule's length to be `len`,
757 /// and set the next granule in the list to `next`.
758 ///
759 /// SAFETY:
760 ///
761 /// 1. Both `granule` and `next` must be properly aligned pointers to [`VarLenGranule`]s
762 /// and they must be in bounds of the page. However, neither need to point to init data.
763 ///
764 /// 2. The caller must initialize the granule pointed to by `next`
765 /// before the granule-list is read from (e.g., iterated on).
766 /// The null granule is considered trivially initialized.
767 ///
768 /// 3. The space pointed to by `granule` must be unused and valid for writes,
769 /// and will be overwritten here.
770 unsafe fn write_chunk_to_granule(&mut self, chunk: &[u8], len: usize, granule: PageOffset, next: PageOffset) {
771 let granule = self.adjuster()(granule);
772 // SAFETY: A `PageOffset` is always in bounds of the page.
773 let ptr: *mut VarLenGranule = unsafe { offset_to_ptr_mut(self.var_row_data, granule).cast() };
774
775 // TODO(centril,bikeshedding): check if creating the `VarLenGranule` first on stack
776 // and then writing to `ptr` would have any impact on perf.
777 // This would be nicer as it requires less `unsafe`.
778
779 // We need to initialize `Page::header`
780 // without materializing a `&mut` as that is instant UB.
781 // SAFETY: `ptr` isn't NULL as `&mut self.row_data` itself is a non-null pointer.
782 let header = unsafe { &raw mut (*ptr).header };
783
784 // SAFETY: `header` is valid for writes as only we have exclusive access.
785 // (1) The `ptr` was also promised as aligned
786 // and `granule + (granule + 64 bytes)` is in bounds of the page per caller contract.
787 // (2) Moreover, `next` will be an initialized granule per caller contract,
788 // so we can link it into the list without causing UB elsewhere.
789 // (3) It's also OK to write to `granule` as it's unused.
790 unsafe {
791 header.write(VarLenGranuleHeader::new(len as u8, next));
792 }
793
794 // SAFETY: We can treat any part of `row_data` as `.data`. Also (1) and (2).
795 let data = unsafe { &mut (*ptr).data };
796
797 // Copy the data into the granule.
798 data[0..chunk.len()].copy_from_slice(chunk);
799 }
800
801 /// Allocate a [`VarLenGranule`] at the returned [`PageOffset`].
802 ///
803 /// The allocated storage is not initialized by this method,
804 /// and will be valid-unconstrained at [`VarLenGranule`].
805 ///
806 /// This offset will be properly aligned for `VarLenGranule` when converted to a pointer.
807 ///
808 /// Returns an error when there are neither free granules nor space in the gap left.
809 fn alloc_granule(&mut self) -> Result<PageOffset, Error> {
810 let granule = self
811 .alloc_from_freelist()
812 .or_else(|| self.alloc_from_gap())
813 .ok_or(Error::InsufficientVarLenSpace { need: 1, have: 0 })?;
814
815 debug_assert!(
816 is_granule_offset_aligned(granule),
817 "Allocated an unaligned var-len granule: {granule:x}",
818 );
819
820 self.header.num_granules += 1;
821
822 Ok(granule)
823 }
824
825 /// Allocate a [`VarLenGranule`] at the returned [`PageOffset`]
826 /// taken from the freelist, if any.
827 #[inline]
828 fn alloc_from_freelist(&mut self) -> Option<PageOffset> {
829 // SAFETY: `header.next_free` points to a `c: FreeCellRef` when the former `.has()`.
830 let free = unsafe {
831 self.header
832 .next_free
833 .take_freelist_head(self.var_row_data, |o| o - self.last_fixed)
834 }?;
835 self.header.freelist_len -= 1;
836 Some(free)
837 }
838
839 /// Allocate a [`VarLenGranule`] at the returned [`PageOffset`]
840 /// taken from the gap, if there is space left, or `None` if there is insufficient space.
841 #[inline]
842 fn alloc_from_gap(&mut self) -> Option<PageOffset> {
843 if gap_enough_size_for_row(self.header.first, self.last_fixed, VarLenGranule::SIZE) {
844 // `var.first` points *at* the lowest-indexed var-len granule,
845 // *not* before it, so pre-decrement.
846 self.header.first -= VarLenGranule::SIZE;
847 Some(self.header.first)
848 } else {
849 None
850 }
851 }
852
853 /// Free a single var-len granule pointed to at by `offset`.
854 ///
855 /// SAFETY: `offset` must point to a valid [`VarLenGranule`].
856 #[inline]
857 unsafe fn free_granule(&mut self, offset: PageOffset) {
858 // TODO(perf,future-work): if `chunk` is at the HWM, return it to the gap.
859 // Returning a single chunk to the gap is easy,
860 // but we want to return a whole "run" of sequential freed chunks,
861 // which requires some bookkeeping (or an O(> n) linked list traversal).
862 self.header.freelist_len += 1;
863 self.header.num_granules -= 1;
864 let adjuster = self.adjuster();
865
866 // SAFETY: Per caller contract, `offset` is a valid `VarLenGranule`,
867 // and is therefore in bounds of the page row data.
868 // By `_VLG_CAN_STORE_FCR`, and as we won't be reading from the granule anymore,
869 // we know that this makes it valid for writing a `FreeCellRef` to it.
870 // Moreover, by `_VLG_ALIGN_MULTIPLE_OF_FCR`,
871 // the derived pointer is properly aligned (64) for a granule
872 // and as `64 % 2 == 0` the alignment of a granule works for a `FreeCellRef`.
873 // Finally, `self.header.next_free` contains a valid `FreeCellRef`.
874 unsafe {
875 self.header
876 .next_free
877 .prepend_freelist(self.var_row_data, offset, adjuster)
878 };
879 }
880
881 /// Returns a reference to the granule at `offset`.
882 ///
883 /// SAFETY: `offset` must point to a valid [`VarLenGranule`].
884 unsafe fn get_granule_ref(&self, offset: PageOffset) -> &VarLenGranule {
885 unsafe { get_ref(self.var_row_data, self.adjuster()(offset)) }
886 }
887
888 /// Frees the blob pointed to by the [`BlobHash`] stored in the granule at `offset`.
889 ///
890 /// Panics when `offset` is NULL.
891 ///
892 /// SAFETY: `offset` must point to a valid [`VarLenGranule`] or be NULL.
893 #[cold]
894 #[inline(never)]
895 unsafe fn free_blob(&self, offset: PageOffset, blob_store: &mut dyn BlobStore) -> BlobNumBytes {
896 assert!(!offset.is_var_len_null());
897
898 // SAFETY: Per caller contract + the assertion above,
899 // we know `offset` refers to a valid `VarLenGranule`.
900 let granule = unsafe { self.get_granule_ref(offset) };
901
902 // Actually free the blob.
903 let hash = granule.blob_hash();
904
905 // The size of `deleted_bytes` is calculated here instead of requesting it from `blob_store`.
906 // This is because the actual number of bytes deleted depends on the `blob_store`'s logic.
907 // We prefer to measure it from the datastore's point of view.
908 let blob_store_deleted_bytes = blob_store
909 .retrieve_blob(&hash)
910 .expect("failed to free var-len blob")
911 .len()
912 .into();
913
914 // Actually free the blob.
915 blob_store.free_blob(&hash).expect("failed to free var-len blob");
916
917 blob_store_deleted_bytes
918 }
919
920 /// Frees an entire var-len linked-list object.
921 ///
922 /// If the `var_len_obj` is a large blob,
923 /// the `VarLenGranule` which stores its blob hash will be freed from the page,
924 /// but the blob itself will not be freed from the blob store.
925 /// If used incorrectly, this may leak large blobs.
926 ///
927 /// This behavior is used to roll-back on failure in `[crate::bflatn::ser::write_av_to_page]`,
928 /// where inserting large blobs is deferred until all allocations succeed.
929 /// Freeing a fully-inserted object should instead use [`Self::free_object`].
930 ///
931 /// # Safety
932 ///
933 /// `var_len_obj.first_granule` must point to a valid [`VarLenGranule`] or be NULL.
934 pub unsafe fn free_object_ignore_blob(&mut self, var_len_obj: VarLenRef) {
935 let mut next_granule = var_len_obj.first_granule;
936
937 while !next_granule.is_var_len_null() {
938 // SAFETY: Per caller contract, `first_granule` points to a valid granule or is NULL.
939 // We know however at this point that it isn't NULL so it is valid.
940 // Thus the successor is too a valid granule or NULL.
941 // However, again, at this point we know that the successor isn't NULL.
942 // It follows then by induction that any `next_granule` at this point is valid.
943 // Thus we have fulfilled the requirement that `next_granule` points to a valid granule.
944 let header = unsafe { self.get_granule_ref(next_granule) }.header;
945 // SAFETY: `next_granule` still points to a valid granule per above.
946 unsafe {
947 self.free_granule(next_granule);
948 }
949 next_granule = header.next();
950 }
951 }
952
953 /// Frees an entire var-len linked-list object.
954 ///
955 /// SAFETY: `var_len_obj.first_granule` must point to a valid [`VarLenGranule`] or be NULL.
956 unsafe fn free_object(&mut self, var_len_obj: VarLenRef, blob_store: &mut dyn BlobStore) -> BlobNumBytes {
957 let mut blob_store_deleted_bytes = BlobNumBytes::default();
958 // For large blob objects, extract the hash and tell `blob_store` to discard it.
959 if var_len_obj.is_large_blob() {
960 // SAFETY: `var_len_obj.first_granule` was promised to
961 // point to a valid [`VarLenGranule`] or be NULL, as required.
962 unsafe {
963 blob_store_deleted_bytes = self.free_blob(var_len_obj.first_granule, blob_store);
964 }
965 }
966
967 // SAFETY: `free_object_ignore_blob` has the same safety contract as this method.
968 unsafe {
969 self.free_object_ignore_blob(var_len_obj);
970 }
971
972 blob_store_deleted_bytes
973 }
974}
975
976/// An iterator yielding the offsets to the granules of a var-len object.
977pub struct GranuleOffsetIter<'vv, 'page> {
978 /// Our mutable view of the page.
979 var_view: &'vv mut VarView<'page>,
980 /// The offset, that will be yielded next, pointing to next granule.
981 next_granule: PageOffset,
982}
983
984impl GranuleOffsetIter<'_, '_> {
985 /// Returns a mutable view of, for the `granule` at `offset`, `granule.data[start..]`.
986 ///
987 /// # Safety
988 ///
989 /// - `offset` must point to a valid granule
990 /// - `start < VarLenGranule::DATA_SIZE`
991 pub unsafe fn get_mut_data(&mut self, offset: PageOffset, start: usize) -> &mut Bytes {
992 // SAFETY: Caller promised that `offset` points o a valid granule.
993 let granule: &mut VarLenGranule = unsafe { get_mut(self.var_view.var_row_data, offset) };
994 // SAFETY: Caller promised `start < granule.data.len()`.
995 unsafe { granule.data.as_mut_slice().get_unchecked_mut(start..) }
996 }
997}
998
999impl Iterator for GranuleOffsetIter<'_, '_> {
1000 type Item = PageOffset;
1001 fn next(&mut self) -> Option<Self::Item> {
1002 let adjust = self.var_view.adjuster();
1003
1004 if self.next_granule.is_var_len_null() {
1005 return None;
1006 }
1007 let ret = adjust(self.next_granule);
1008 // SAFETY: By construction,
1009 // the initial `next_granule` was promised to either be `NULL` or point to a valid granule.
1010 // For a given granule, the same applies to its `.next()` granule.
1011 // At this point, we've excluded `NULL`,
1012 // so we know inductively that `next_granule` points to a valid granule, as required.
1013 let granule: &VarLenGranule = unsafe { get_ref(self.var_view.var_row_data, ret) };
1014 self.next_granule = granule.header.next();
1015
1016 Some(ret)
1017 }
1018}
1019
1020/// Assert that `ptr` is sufficiently aligned to reference a value of `T`.
1021///
1022/// In release mode, this is a no-op.
1023fn assert_alignment<T>(ptr: *const Byte) {
1024 debug_assert_eq!(
1025 ptr as usize % mem::align_of::<T>(),
1026 0,
1027 "Wanted a PageOffset with align 0x{:x} (for {}) but found 0x{:x}",
1028 mem::align_of::<T>(),
1029 std::any::type_name::<T>(),
1030 ptr as usize,
1031 );
1032}
1033
1034/// Returns a reference to the [`T`] pointed to at by `offset`.
1035///
1036/// # Safety
1037///
1038/// `offset` must point to a valid `T` in `row_data`.
1039#[inline]
1040pub unsafe fn get_ref<T>(row_data: &Bytes, offset: PageOffset) -> &T {
1041 // SAFETY: Caller promised that `offset` is in bounds of `row_data`.
1042 let ptr = unsafe { offset_to_ptr(row_data, offset) };
1043 assert_alignment::<T>(ptr);
1044 let ptr = ptr.cast::<T>();
1045 // SAFETY: Caller promised that `offset` points to a `T` in `row_data`.
1046 unsafe { &*ptr }
1047}
1048
1049/// Returns a mutable reference to the [`T`] pointed to at by `offset`.
1050///
1051/// # Safety
1052///
1053/// `offset` must point to a valid `T` in `row_data`.
1054#[inline]
1055unsafe fn get_mut<T>(row_data: &mut Bytes, offset: PageOffset) -> &mut T {
1056 // SAFETY: Caller promised that `offset` is in bounds of `row_data`.
1057 let ptr = unsafe { offset_to_ptr_mut(row_data, offset) };
1058 assert_alignment::<T>(ptr as *const Byte);
1059 let ptr = ptr.cast::<T>();
1060 // SAFETY: Caller promised that `offset` points to a `T` in `row_data`.
1061 unsafe { &mut *ptr }
1062}
1063
1064/// Returns a raw const pointer into the `row_data` at `offset` bytes.
1065///
1066/// # Safety
1067///
1068/// `offset` must be in bounds or one past end of `row_data`.
1069#[inline]
1070unsafe fn offset_to_ptr(row_data: &Bytes, offset: PageOffset) -> *const Byte {
1071 debug_assert!(offset.idx() <= row_data.len());
1072
1073 // SAFETY: per caller contract, `offset` is in bounds or one past end of `row_data`.
1074 unsafe { row_data.as_ptr().add(offset.idx()) }
1075}
1076
1077/// Returns a raw mutable pointer into the `row_data` at `offset` bytes.
1078///
1079/// SAFETY: `offset` must be in bounds or one past end of `row_data`.
1080#[inline]
1081unsafe fn offset_to_ptr_mut(row_data: &mut Bytes, offset: PageOffset) -> *mut Byte {
1082 debug_assert!(offset.idx() <= row_data.len());
1083
1084 // SAFETY: per caller contract, `offset` is in bounds or one past end of `row_data`.
1085 unsafe { row_data.as_mut_ptr().add(offset.idx()) }
1086}
1087
1088/// Returns the size of the gap,
1089/// assuming `first_var` is the high water mark (HWM) of the var-len section,
1090/// pointing *at* the granule with the lowest offset,
1091/// and `last_fixed` is the HWM of the fixed-len section,
1092/// pointing *one past the end* of the last fixed row.
1093#[inline]
1094fn gap_remaining_size(first_var: PageOffset, last_fixed: PageOffset) -> Size {
1095 // For illustration, suppose `row_data` is 10 bytes, i.e., `[Byte; 10]`.
1096 // Let's assume the following setup with a full page,
1097 // where capital letters are fixed rows and lower case are variable.
1098 //
1099 // [ A, B, C, D, E, f, g, h, i, j ]
1100 // ^
1101 // first_var
1102 // ^
1103 // last_fixed
1104 //
1105 // The high water mark `first_var` points *at* the granule with the lowest offset (`f`).
1106 // Whereas `last_fixed` points *one past the end* (`f`) of the last fixed row (`E`)
1107 //
1108 // This is the case we have to consider in terms of possible underflow.
1109 // As both HWMs would point at the same place,
1110 // the result would be `0`, and no underflow occurs.
1111 Size((first_var - last_fixed).0)
1112}
1113
1114/// Returns whether the remaining gap is large enough to host an object `fixed_row_size` large,
1115/// assuming `first_var` is the high water mark (HWM) of the var-len section,
1116/// pointing *at* the granule with the lowest offset,
1117/// and `last_fixed` is the HWM of the fixed-len section,
1118/// pointing *one past the end* of the last fixed row.
1119#[inline]
1120fn gap_enough_size_for_row(first_var: PageOffset, last_fixed: PageOffset, fixed_row_size: Size) -> bool {
1121 gap_remaining_size(first_var, last_fixed) >= fixed_row_size
1122}
1123
1124impl Page {
1125 /// Returns a new page allocated on the heap.
1126 ///
1127 /// The new page supports a rows with `fixed_row_size`.
1128 pub fn new(fixed_row_size: Size) -> Box<Self> {
1129 Self::new_with_max_row_count(max_rows_in_page(fixed_row_size))
1130 }
1131
1132 /// Returns a new page allocated on the heap.
1133 ///
1134 /// The new page supports `max_rows_in_page` at most.
1135 pub fn new_with_max_row_count(max_rows_in_page: usize) -> Box<Self> {
1136 // TODO(perf): mmap? allocator may do so already.
1137 // mmap may be more efficient as we save allocator metadata.
1138 use std::alloc::{alloc_zeroed, handle_alloc_error, Layout};
1139
1140 let layout = Layout::new::<Page>();
1141
1142 // Allocate with `alloc_zeroed` so that the bytes are initially 0, rather than uninit.
1143 // We will never write an uninit byte into the page except in the `PageHeader`,
1144 // so it is safe for `row_data` to have type `[u8; _]` rather than `[MaybeUninit<u8>; _]`.
1145 // `alloc_zeroed` may be more efficient than `alloc` + `memset`;
1146 // in particular, it may `mmap` pages directly from the OS, which are always zeroed for security reasons.
1147 // TODO: use Box::new_zeroed() once stabilized.
1148 // SAFETY: The layout's size is non-zero.
1149 let raw: *mut Page = unsafe { alloc_zeroed(layout) }.cast();
1150
1151 if raw.is_null() {
1152 handle_alloc_error(layout);
1153 }
1154
1155 // We need to initialize `Page::header`
1156 // without materializing a `&mut` as that is instant UB.
1157 // SAFETY: `raw` isn't NULL.
1158 let header = unsafe { &raw mut (*raw).header };
1159
1160 // SAFETY: `header` is valid for writes as only we have exclusive access.
1161 // The pointer is also aligned.
1162 unsafe { header.write(PageHeader::new(max_rows_in_page)) };
1163
1164 // SAFETY: We used the global allocator with a layout for `Page`.
1165 // We have initialized the `header`,
1166 // and the `row_bytes` are initially 0 by `alloc_zeroed`,
1167 // making the pointee a `Page` valid for reads and writes.
1168 unsafe { Box::from_raw(raw) }
1169 }
1170
1171 /// Returns the number of rows stored in this page.
1172 ///
1173 /// This method runs in constant time.
1174 pub fn num_rows(&self) -> usize {
1175 self.header.fixed.num_rows as usize
1176 }
1177
1178 #[cfg(test)]
1179 /// Use this page's present rows bitvec to compute the number of present rows.
1180 ///
1181 /// This can be compared with [`Self::num_rows`] as a consistency check during tests.
1182 pub fn reconstruct_num_rows(&self) -> usize {
1183 // If we cared, we could rewrite this to `u64::count_ones` on each block of the bitset.
1184 // We do not care. This method is slow.
1185 self.header.fixed.present_rows.iter_set().count()
1186 }
1187
1188 /// Returns the number of var-len granules allocated in this page.
1189 ///
1190 /// This method runs in constant time.
1191 pub fn num_var_len_granules(&self) -> usize {
1192 self.header.var.num_granules as usize
1193 }
1194
1195 #[cfg(test)]
1196 /// # Safety
1197 ///
1198 /// - `var_len_visitor` must be a valid [`VarLenMembers`] visitor
1199 /// specialized to the type and layout of rows within this [`Page`].
1200 /// - `fixed_row_size` must be exactly the length in bytes of fixed rows in this page,
1201 /// which must further be the length of rows expected by the `var_len_visitor`.
1202 pub unsafe fn reconstruct_num_var_len_granules(
1203 &self,
1204 fixed_row_size: Size,
1205 var_len_visitor: &impl VarLenMembers,
1206 ) -> usize {
1207 self.iter_fixed_len(fixed_row_size)
1208 .flat_map(|row| unsafe {
1209 // Safety: `row` came out of `iter_fixed_len`,
1210 // which, due to caller requirements on `fixed_row_size`,
1211 // is giving us valid, aligned, initialized rows of the row type.
1212 var_len_visitor.visit_var_len(self.get_row_data(row, fixed_row_size))
1213 })
1214 .flat_map(|var_len_obj| unsafe {
1215 // Safety: We believe `row` to be valid
1216 // and `var_len_visitor` to be correctly visiting its var-len members.
1217 // Therefore, `var_len_obj` is a valid var-len object.
1218 self.iter_var_len_object(var_len_obj.first_granule)
1219 })
1220 .count()
1221 }
1222
1223 /// Returns the number of bytes used by rows stored in this page.
1224 ///
1225 /// This is necessarily an overestimate of live data bytes, as it includes:
1226 /// - Padding bytes within the fixed-length portion of the rows.
1227 /// - [`VarLenRef`] pointer-like portions of rows.
1228 /// - Unused trailing parts of partially-filled [`VarLenGranule`]s.
1229 /// - [`VarLenGranule`]s used to store [`BlobHash`]es.
1230 ///
1231 /// Note that large blobs themselves are not counted.
1232 /// The caller should obtain a count of the bytes used by large blobs
1233 /// from the [`super::blob_store::BlobStore`].
1234 ///
1235 /// This method runs in constant time.
1236 pub fn bytes_used_by_rows(&self, fixed_row_size: Size) -> usize {
1237 let fixed_row_bytes = self.num_rows() * fixed_row_size.len();
1238 let var_len_bytes = self.num_var_len_granules() * VarLenGranule::SIZE.len();
1239 fixed_row_bytes + var_len_bytes
1240 }
1241
1242 #[cfg(test)]
1243 /// # Safety
1244 ///
1245 /// - `var_len_visitor` must be a valid [`VarLenMembers`] visitor
1246 /// specialized to the type and layout of rows within this [`Page`].
1247 /// - `fixed_row_size` must be exactly the length in bytes of fixed rows in this page,
1248 /// which must further be the length of rows expected by the `var_len_visitor`.
1249 pub unsafe fn reconstruct_bytes_used_by_rows(
1250 &self,
1251 fixed_row_size: Size,
1252 var_len_visitor: &impl VarLenMembers,
1253 ) -> usize {
1254 let fixed_row_bytes = self.reconstruct_num_rows() * fixed_row_size.len();
1255 let var_len_bytes = unsafe { self.reconstruct_num_var_len_granules(fixed_row_size, var_len_visitor) }
1256 * VarLenGranule::SIZE.len();
1257 fixed_row_bytes + var_len_bytes
1258 }
1259
1260 /// Returns the range of row data starting at `offset` and lasting `size` bytes.
1261 pub fn get_row_data(&self, row: PageOffset, size: Size) -> &Bytes {
1262 &self.row_data[row.range(size)]
1263 }
1264
1265 /// Returns whether the row at `offset` is present or not.
1266 pub fn has_row_offset(&self, fixed_row_size: Size, offset: PageOffset) -> bool {
1267 // Check that the `offset` is properly aligned for a row of size `fixed_row_size`.
1268 // This cannot be `debug_assert!` as the caller could rely on this
1269 // reporting properly whether `offset` is at a row boundary or not.
1270 assert_eq!(offset.idx() % fixed_row_size.len(), 0);
1271
1272 self.header.fixed.is_row_present(offset, fixed_row_size)
1273 }
1274
1275 /// Returns split mutable views of this page over the fixed and variable sections.
1276 pub fn split_fixed_var_mut(&mut self) -> (FixedView<'_>, VarView<'_>) {
1277 // The fixed HWM (`fixed.last`) points *one past the end* of the fixed section
1278 // which is exactly what we want for `split_at_mut`.
1279 let last_fixed = self.header.fixed.last;
1280 let (fixed_row_data, var_row_data) = self.row_data.split_at_mut(last_fixed.idx());
1281
1282 // Construct the fixed-len view.
1283 let fixed = FixedView {
1284 fixed_row_data,
1285 header: &mut self.header.fixed,
1286 };
1287
1288 // Construct the var-len view.
1289 let var = VarView {
1290 var_row_data,
1291 header: &mut self.header.var,
1292 last_fixed,
1293 };
1294
1295 (fixed, var)
1296 }
1297
1298 /// Returns a mutable view of the row from `start` lasting `fixed_row_size` number of bytes.
1299 ///
1300 /// This method is safe, but callers should take care that `start` and `fixed_row_size`
1301 /// are correct for this page, and that `start` is aligned.
1302 /// Callers should further ensure that mutations to the row leave the row bytes
1303 /// in an expected state, i.e. initialized where required by the row type,
1304 /// and with `VarLenRef`s that point to valid granules and with correct lengths.
1305 ///
1306 /// This call will clear the unmodified hash
1307 /// as it is expected that the caller will alter the the page.
1308 pub fn get_fixed_row_data_mut(&mut self, start: PageOffset, fixed_row_size: Size) -> &mut Bytes {
1309 self.header.unmodified_hash = None;
1310 &mut self.row_data[start.range(fixed_row_size)]
1311 }
1312
1313 /// Return the total required var-len granules to store `objects`.
1314 pub fn total_granules_required_for_objects(objects: &[impl AsRef<[u8]>]) -> usize {
1315 objects
1316 .iter()
1317 .map(|obj| VarLenGranule::bytes_to_granules(obj.as_ref().len()).0)
1318 .sum()
1319 }
1320
1321 /// Does the page have space to store a row,
1322 /// where the fixed size part is `fixed_row_size` bytes large,
1323 /// and the row has the given `var_len_objects`?
1324 pub fn has_space_for_row_with_objects(&self, fixed_row_size: Size, var_len_objects: &[impl AsRef<[u8]>]) -> bool {
1325 let num_granules_required = Self::total_granules_required_for_objects(var_len_objects);
1326 self.has_space_for_row(fixed_row_size, num_granules_required)
1327 }
1328
1329 /// Does the page have space to store a row,
1330 /// where the fixed size part is `fixed_row_size` bytes large,
1331 /// and the variable part requires `num_granules`.
1332 pub fn has_space_for_row(&self, fixed_row_size: Size, num_granules: usize) -> bool {
1333 // Determine the gap remaining after allocating for the fixed part.
1334 let gap_remaining = gap_remaining_size(self.header.var.first, self.header.fixed.last);
1335 let gap_avail_for_granules = if self.header.fixed.next_free.has() {
1336 // If we have a free fixed length block, then we can use the whole gap for var-len granules.
1337 gap_remaining
1338 } else {
1339 // If we need to grow the fixed-length store into the gap,
1340 if gap_remaining < fixed_row_size {
1341 // if the gap is too small for fixed-length row, fail.
1342 return false;
1343 }
1344 // Otherwise, the space available in the gap for var-len granules
1345 // is the current gap size less the fixed-len row size.
1346 gap_remaining - fixed_row_size
1347 };
1348
1349 // Convert the gap size to granules.
1350 let gap_in_granules = VarLenGranule::space_to_granules(gap_avail_for_granules);
1351 // Account for granules available in the freelist.
1352 let needed_granules_after_freelist = num_granules.saturating_sub(self.header.var.freelist_len as usize);
1353
1354 gap_in_granules >= needed_granules_after_freelist
1355 }
1356
1357 /// Returns whether the row is full with respect to storing a fixed row with `fixed_row_size`
1358 /// and no variable component.
1359 pub fn is_full(&self, fixed_row_size: Size) -> bool {
1360 !self.has_space_for_row(fixed_row_size, 0)
1361 }
1362
1363 /// Will leave partially-allocated chunks if fails prematurely,
1364 /// so always check `Self::has_space_for_row` before calling.
1365 ///
1366 /// This method is provided for testing the page store directly;
1367 /// higher-level codepaths are expected to use [`crate::bflatn::ser::write_av_to_page`],
1368 /// which performs similar operations to this method,
1369 /// but handles rollback on failure appropriately.
1370 ///
1371 /// This function will never fail if `Self::has_space_for_row` has returned true.
1372 ///
1373 /// # Safety
1374 ///
1375 /// - `var_len_visitor` is suitable for visiting var-len refs in `fixed_row`.
1376 ///
1377 /// - `fixed_row.len()` must be consistent with `var_len_visitor` and `self`.
1378 /// That is, `VarLenMembers` must be specialized for a row type with that length,
1379 /// and all past, present, and future fixed-length rows stored in this `Page`
1380 /// must also be of that length.
1381 pub unsafe fn insert_row(
1382 &mut self,
1383 fixed_row: &Bytes,
1384 var_len_objects: &[impl AsRef<[u8]>],
1385 var_len_visitor: &impl VarLenMembers,
1386 blob_store: &mut dyn BlobStore,
1387 ) -> Result<PageOffset, Error> {
1388 // Allocate the fixed-len row.
1389 let fixed_row_size = Size(fixed_row.len() as u16);
1390
1391 // SAFETY: Caller promised that `fixed_row.len()` uses the right `fixed_row_size`
1392 // and we trust that others have too.
1393 let fixed_len_offset = unsafe { self.alloc_fixed_len(fixed_row_size)? };
1394
1395 // Store the fixed-len row.
1396 let (mut fixed, mut var) = self.split_fixed_var_mut();
1397 let row = fixed.get_row_mut(fixed_len_offset, fixed_row_size);
1398 row.copy_from_slice(fixed_row);
1399
1400 // Store all var-len refs into their appropriate slots in the fixed-len row.
1401 // SAFETY:
1402 // - The `fixed_len_offset` given by `alloc_fixed_len` results in `row`
1403 // being properly aligned for the row type.
1404 // - Caller promised that `fixed_row.len()` matches the row type size exactly.
1405 // - `var_len_visitor` is suitable for `fixed_row`.
1406 let vlr_slot_iter = unsafe { var_len_visitor.visit_var_len_mut(row) };
1407 for (var_len_ref_slot, var_len_obj) in vlr_slot_iter.zip(var_len_objects) {
1408 let (var_len_ref, in_blob) = var.alloc_for_slice(var_len_obj.as_ref())?;
1409 if in_blob {
1410 // The blob store insertion will never fail.
1411 // SAFETY: `alloc_for_slice` always returns a pointer
1412 // to a `VarLenGranule` in bounds of this page.
1413 // As `in_blob` holds, it is also unused, as required.
1414 // We'll now make that granule valid.
1415 unsafe {
1416 var.write_large_blob_hash_to_granule(blob_store, var_len_obj, var_len_ref);
1417 }
1418 }
1419 *var_len_ref_slot = var_len_ref;
1420 }
1421
1422 Ok(fixed_len_offset)
1423 }
1424
1425 /// Allocates space for a fixed size row of `fixed_row_size` bytes.
1426 ///
1427 /// # Safety
1428 ///
1429 /// `fixed_row_size` must be equal to the value passed
1430 /// to all other methods ever invoked on `self`.
1431 pub unsafe fn alloc_fixed_len(&mut self, fixed_row_size: Size) -> Result<PageOffset, Error> {
1432 self.alloc_fixed_len_from_freelist(fixed_row_size)
1433 .or_else(|| self.alloc_fixed_len_from_gap(fixed_row_size))
1434 .ok_or(Error::InsufficientFixedLenSpace { need: fixed_row_size })
1435 }
1436
1437 /// Allocates a space for a fixed size row of `fixed_row_size` in the freelist, if possible.
1438 ///
1439 /// This call will clear the unmodified hash.
1440 #[inline]
1441 fn alloc_fixed_len_from_freelist(&mut self, fixed_row_size: Size) -> Option<PageOffset> {
1442 let header = &mut self.header.fixed;
1443 // SAFETY: `header.next_free` points to a `FreeCellRef` when the former `.has()`.
1444 let free = unsafe { header.next_free.take_freelist_head(&self.row_data, |x| x) }?;
1445 header.set_row_present(free, fixed_row_size);
1446
1447 // We are and have modified the page, so clear the unmodified hash.
1448 self.header.unmodified_hash = None;
1449
1450 Some(free)
1451 }
1452
1453 /// Allocates a space for a fixed size row of `fixed_row_size` in the freelist, if possible.
1454 ///
1455 /// This call will clear the unmodified hash.
1456 #[inline]
1457 fn alloc_fixed_len_from_gap(&mut self, fixed_row_size: Size) -> Option<PageOffset> {
1458 if gap_enough_size_for_row(self.header.var.first, self.header.fixed.last, fixed_row_size) {
1459 // We're modifying the page, so clear the unmodified hash.
1460 self.header.unmodified_hash = None;
1461
1462 // Enough space in the gap; move the high water mark and return the old HWM.
1463 // `fixed.last` points *after* the highest-indexed fixed-len row,
1464 // so post-increment.
1465 let ptr = self.header.fixed.last;
1466 self.header.fixed.last += fixed_row_size;
1467 self.header.fixed.set_row_present(ptr, fixed_row_size);
1468 Some(ptr)
1469 } else {
1470 // Not enough space in the gap for another row!
1471 None
1472 }
1473 }
1474
1475 /// Returns an iterator over all the [`PageOffset`]s of the fixed rows in this page
1476 /// beginning with `starting_from`.
1477 ///
1478 /// The rows are assumed to be `fixed_row_size` bytes long
1479 /// and `starting_from` is assumed to be at a valid starting `PageOffset` for a fixed row.
1480 ///
1481 /// NOTE: This method is not `unsafe` as it cannot trigger UB.
1482 /// However, when provided with garbage input, it will return garbage back.
1483 /// It is the caller's responsibility to ensure that `PageOffset`s derived from
1484 /// this iterator are valid when used to do anything `unsafe`.
1485 fn iter_fixed_len_from(&self, fixed_row_size: Size, starting_from: PageOffset) -> FixedLenRowsIter<'_> {
1486 let idx = starting_from / fixed_row_size;
1487 FixedLenRowsIter {
1488 idx_iter: self.header.fixed.present_rows.iter_set_from(idx),
1489 fixed_row_size,
1490 }
1491 }
1492
1493 /// Returns an iterator over all the [`PageOffset`]s of the fixed rows in this page.
1494 ///
1495 /// The rows are assumed to be `fixed_row_size` bytes long.
1496 ///
1497 /// NOTE: This method is not `unsafe` as it cannot trigger UB.
1498 /// However, when provided with garbage input, it will return garbage back.
1499 /// It is the caller's responsibility to ensure that `PageOffset`s derived from
1500 /// this iterator are valid when used to do anything `unsafe`.
1501 pub fn iter_fixed_len(&self, fixed_row_size: Size) -> FixedLenRowsIter<'_> {
1502 FixedLenRowsIter {
1503 idx_iter: self.header.fixed.present_rows.iter_set(),
1504 fixed_row_size,
1505 }
1506 }
1507
1508 /// Returns an iterator over all the `VarLenGranule`s of the var-len object
1509 /// that has its first granule at offset `first_granule`.
1510 /// An empty iterator will be returned when `first_granule` is `NULL`.
1511 ///
1512 /// # Safety
1513 ///
1514 /// `first_granule` must be an offset to a valid granule or `NULL`.
1515 pub unsafe fn iter_var_len_object(
1516 &self,
1517 first_granule: PageOffset,
1518 ) -> impl Clone + Iterator<Item = &VarLenGranule> {
1519 VarLenGranulesIter {
1520 page: self,
1521 next_granule: first_granule,
1522 }
1523 }
1524
1525 /// Returns an iterator over the data of all the `VarLenGranule`s of the var-len object
1526 /// that has its first granule at offset `first_granule`.
1527 /// An empty iterator will be returned when `first_granule` is `NULL`.
1528 ///
1529 /// # Safety
1530 ///
1531 /// `first_granule` must be an offset to a valid granule or `NULL`.
1532 pub unsafe fn iter_vlo_data(&self, first_granule: PageOffset) -> impl '_ + Clone + Iterator<Item = &[u8]> {
1533 // SAFETY: Caller and callee have the exact same safety requirements.
1534 unsafe { self.iter_var_len_object(first_granule) }.map(|g| g.data())
1535 }
1536
1537 /// Free a row, marking its fixed-len and var-len storage granules as available for re-use.
1538 ///
1539 /// This call will clear the unmodified hash.
1540 ///
1541 /// # Safety
1542 ///
1543 /// - `fixed_row` must point to a valid row in this page.
1544 ///
1545 /// - `fixed_row_size` must be the size in bytes of the fixed part
1546 /// of all past, present, and future rows in this page and future rows in this page.
1547 ///
1548 /// - The `var_len_visitor` must visit the same set of `VarLenRef`s in the row
1549 /// as the visitor provided to `insert_row`.
1550 pub unsafe fn delete_row(
1551 &mut self,
1552 fixed_row: PageOffset,
1553 fixed_row_size: Size,
1554 var_len_visitor: &impl VarLenMembers,
1555 blob_store: &mut dyn BlobStore,
1556 ) -> BlobNumBytes {
1557 // We're modifying the page, so clear the unmodified hash.
1558 self.header.unmodified_hash = None;
1559
1560 let (mut fixed, mut var) = self.split_fixed_var_mut();
1561
1562 let mut blob_store_deleted_bytes = BlobNumBytes::default();
1563
1564 // Visit the var-len members of the fixed row and free them.
1565 let row = fixed.get_row(fixed_row, fixed_row_size);
1566 // SAFETY: `row` is derived from `fixed_row`, which is known by caller requirements to be valid.
1567 let var_len_refs = unsafe { var_len_visitor.visit_var_len(row) };
1568 for var_len_ref in var_len_refs {
1569 // SAFETY: A sound call to `visit_var_len` on a fully initialized valid row,
1570 // which we've justified that the above is,
1571 // returns an iterator, that will only yield `var_len_ref`s,
1572 // where `var_len_ref.first_granule` points to a valid `VarLenGranule` or is NULL.
1573 blob_store_deleted_bytes += unsafe { var.free_object(*var_len_ref, blob_store) }
1574 }
1575
1576 // SAFETY: Caller promised that `fixed_row` points to a valid row in the page.
1577 // Thus, `range_move(0..fixed_row_size, fixed_row)` is in bounds of `row_data`.
1578 // Moreover, this entails that it is valid for writing a `FreeCellRef`
1579 // to the beginning or entire range, as any row can at least hold a `FreeCellRef`
1580 // and will be properly aligned for it as well.
1581 unsafe {
1582 fixed.free(fixed_row, fixed_row_size);
1583 }
1584
1585 blob_store_deleted_bytes
1586 }
1587
1588 /// Returns the total number of granules used by the fixed row at `fixed_row_offset`
1589 /// and lasting `fixed_row_size` bytes where `var_len_visitor` is used to find
1590 /// the [`VarLenRef`]s in the fixed row.
1591 ///
1592 /// # Safety
1593 ///
1594 /// - `fixed_row_offset` must refer to a previously-allocated and initialized row in `self`,
1595 /// and must not have been de-allocated. In other words, the fixed row must be *valid*.
1596 ///
1597 /// - `fixed_row_size` and `var_len_visitor` must be consistent with each other
1598 /// and with all other calls to any methods on `self`.
1599 pub unsafe fn row_total_granules(
1600 &self,
1601 fixed_row_offset: PageOffset,
1602 fixed_row_size: Size,
1603 var_len_visitor: &impl VarLenMembers,
1604 ) -> usize {
1605 let fixed_row = self.get_row_data(fixed_row_offset, fixed_row_size);
1606 // SAFETY:
1607 // - Caller promised that `fixed_row_offset` is a valid row.
1608 // - Caller promised consistency of `var_len_visitor` wrt. `fixed_row_size` and this page.
1609 let vlr_iter = unsafe { var_len_visitor.visit_var_len(fixed_row) };
1610 vlr_iter.copied().map(|slot| slot.granules_used()).sum()
1611 }
1612
1613 /// Copy as many rows from `self` for which `filter` returns `true` into `dst` as will fit,
1614 /// starting from `starting_from`.
1615 ///
1616 /// If less than the entirety of `self` could be processed, return `Continue(resume_point)`,
1617 /// where `resume_point` is the `starting_from` argument of a subsequent call to `copy_filter_into`
1618 /// that will complete the iteration.
1619 /// `dst` should be assumed to be full in this case,
1620 /// as it does not contain enough free space to store the row of `self` at `resume_point`.
1621 ///
1622 /// If the entirety of `self` is processed, return `Break`.
1623 /// `dst` may or may not be full in this case, but is likely not full.
1624 ///
1625 /// # Safety
1626 ///
1627 /// The `var_len_visitor` must visit the same set of `VarLenRef`s in the row
1628 /// as the visitor provided to all other methods on `self` and `dst`.
1629 ///
1630 /// The `fixed_row_size` must be consistent with the `var_len_visitor`,
1631 /// and be equal to the value provided to all other methods on `self` and `dst`.
1632 ///
1633 /// The `starting_from` offset must point to a valid starting offset
1634 /// consistent with `fixed_row_size`.
1635 /// That is, it must not point into the middle of a row.
1636 pub unsafe fn copy_filter_into(
1637 &self,
1638 starting_from: PageOffset,
1639 dst: &mut Page,
1640 fixed_row_size: Size,
1641 var_len_visitor: &impl VarLenMembers,
1642 blob_store: &mut dyn BlobStore,
1643 mut filter: impl FnMut(&Page, PageOffset) -> bool,
1644 ) -> ControlFlow<(), PageOffset> {
1645 for row_offset in self
1646 .iter_fixed_len_from(fixed_row_size, starting_from)
1647 // Only copy rows satisfying the predicate `filter`.
1648 .filter(|o| filter(self, *o))
1649 {
1650 // SAFETY:
1651 // - `starting_from` points to a valid row and thus `row_offset` also does.
1652 // - `var_len_visitor` will visit the right `VarLenRef`s and is consistent with other calls.
1653 // - `fixed_row_size` is consistent with `var_len_visitor` and `self`.
1654 if !unsafe { self.copy_row_into(row_offset, dst, fixed_row_size, var_len_visitor, blob_store) } {
1655 // Target doesn't have enough space for row;
1656 // stop here and return the offset of the uncopied row
1657 // so a later call to `copy_filter_into` can start there.
1658 return ControlFlow::Continue(row_offset);
1659 }
1660 }
1661
1662 // The `for` loop completed.
1663 // We successfully copied the entire page of `self` into `target`.
1664 // The caller doesn't need to resume from this offset.
1665 ControlFlow::Break(())
1666 }
1667
1668 /// Copies the row at `row_offset` from `self` into `dst`
1669 /// or returns `false` otherwise if `dst` has no space for the row.
1670 ///
1671 /// # Safety
1672 ///
1673 /// - `row_offset` offset must point to a valid row.
1674 ///
1675 /// - `var_len_visitor` must visit the same set of `VarLenRef`s in the row
1676 /// as the visitor provided to all other methods on `self` and `dst`.
1677 ///
1678 /// - `fixed_row_size` must be consistent with the `var_len_visitor`,
1679 /// and be equal to the value provided to all other methods on `self` and `dst`.
1680 unsafe fn copy_row_into(
1681 &self,
1682 row_offset: PageOffset,
1683 dst: &mut Page,
1684 fixed_row_size: Size,
1685 var_len_visitor: &impl VarLenMembers,
1686 blob_store: &mut dyn BlobStore,
1687 ) -> bool {
1688 // SAFETY: Caller promised that `starting_from` points to a valid row
1689 // consistent with `fixed_row_size` which was also
1690 // claimed to be consistent with `var_len_visitor` and `self`.
1691 let required_granules = unsafe { self.row_total_granules(row_offset, fixed_row_size, var_len_visitor) };
1692 if !dst.has_space_for_row(fixed_row_size, required_granules) {
1693 // Target doesn't have enough space for row.
1694 return false;
1695 };
1696
1697 let src_row = self.get_row_data(row_offset, fixed_row_size);
1698
1699 // Allocate for the fixed-len data.
1700 // SAFETY: forward our requirement on `fixed_row_size` to `alloc_fixed_len`.
1701 let inserted_offset = unsafe { dst.alloc_fixed_len(fixed_row_size) }
1702 .expect("Failed to allocate fixed-len row in dst page after checking for available space");
1703
1704 // Copy all fixed-len data. We'll overwrite the var-len parts next.
1705 let (mut dst_fixed, mut dst_var) = dst.split_fixed_var_mut();
1706 let dst_row = dst_fixed.get_row_mut(inserted_offset, fixed_row_size);
1707 dst_row.copy_from_slice(src_row);
1708
1709 // Copy var-len members into target.
1710 // Fixup `VarLenRef`s in `dst_row` to point to the copied var-len objects.
1711 //
1712 // SAFETY: `src_row` is valid because it came from `self.iter_fixed_len_from`.
1713 //
1714 // Forward our safety requirements re: `var_len_visitor` to `visit_var_len`.
1715 let src_vlr_iter = unsafe { var_len_visitor.visit_var_len(src_row) };
1716 // SAFETY: forward our requirement on `var_len_visitor` to `visit_var_len_mut`.
1717 let target_vlr_iter = unsafe { var_len_visitor.visit_var_len_mut(dst_row) };
1718 for (src_vlr, target_vlr_slot) in src_vlr_iter.zip(target_vlr_iter) {
1719 // SAFETY:
1720 //
1721 // - requirements of `visit_var_len_assume_init` were met,
1722 // so we can assume that `src_vlr.first_granule` points to a valid granule or is NULL.
1723 //
1724 // - the call to `dst.has_space_for_row` above ensures
1725 // that the allocation will not fail part-way through.
1726 let target_vlr_fixup = unsafe { self.copy_var_len_into(*src_vlr, &mut dst_var, blob_store) }
1727 .expect("Failed to allocate var-len object in dst page after checking for available space");
1728
1729 *target_vlr_slot = target_vlr_fixup;
1730 }
1731
1732 true
1733 }
1734
1735 /// Copy a var-len object `src_vlr` from `self` into `dst_var`,
1736 /// and return the `VarLenRef` to the copy in `dst_var`.
1737 ///
1738 /// If the `src_vlr` is empty,
1739 /// i.e., has `first_granule.is_null()` and `length_in_bytes == 0`,
1740 /// this will return `VarLenRef::NULL`.
1741 ///
1742 /// # SAFETY:
1743 ///
1744 /// - `src_vlr.first_granule` must point to a valid granule or be NULL.
1745 ///
1746 /// - To avoid leaving dangling uninitialized allocations in `dst_var`,
1747 /// `dst_var` must already be checked to have enough size to store `src_vlr`
1748 /// using `Self::has_space_for_row`.
1749 unsafe fn copy_var_len_into(
1750 &self,
1751 src_vlr: VarLenRef,
1752 dst_var: &mut VarView<'_>,
1753 blob_store: &mut dyn BlobStore,
1754 ) -> Result<VarLenRef, Error> {
1755 // SAFETY: Caller promised that `src_vlr.first_granule` points to a valid granule is be NULL.
1756 let mut iter = unsafe { self.iter_var_len_object(src_vlr.first_granule) };
1757
1758 // If the `src_vlr` is empty, don't copy anything, and return null.
1759 let Some(mut src_chunk) = iter.next() else {
1760 debug_assert!(src_vlr.length_in_bytes == 0);
1761 return Ok(VarLenRef::NULL);
1762 };
1763 let mut dst_chunk = dst_var.alloc_granule()?;
1764
1765 let copied_head = dst_chunk;
1766
1767 // Weird-looking iterator so we can put the next-pointer into `copied_chunk`.
1768 for next_src_chunk in iter {
1769 // Allocate space for the next granule so we can initialize it in the next iteration.
1770 let next_dst_chunk = dst_var.alloc_granule()?;
1771 let data = src_chunk.data();
1772 // Initialize `dst_chunk` with data and next-pointer.
1773 //
1774 // SAFETY:
1775 // 1. `dst_chunk` is properly aligned as it came from `alloc_granule` either
1776 // before the loop or in the previous iteration.
1777 // This also ensures that both are in bounds
1778 // of the page for `granule + granule + VarLenGranule::SIZE`.
1779 //
1780 // 2. `next_dst_chunk` will be initialized
1781 // either in the next iteration or after the loop ends.
1782 //
1783 // 3. `dst_chunk` points to unused data as the space was allocated before the loop
1784 // or was `next_dst_chunk` in the previous iteration and hasn't been written to yet.
1785 unsafe { dst_var.write_chunk_to_granule(data, data.len(), dst_chunk, next_dst_chunk) };
1786 dst_chunk = next_dst_chunk;
1787 src_chunk = next_src_chunk;
1788 }
1789
1790 let data = src_chunk.data();
1791 // The last granule has null as next-pointer.
1792 //
1793 // SAFETY:
1794 // 1. `dst_chunk` is properly aligned as it came from `alloc_granule` either
1795 // before the loop or in the previous iteration.
1796 // This also ensures that both are in bounds
1797 // of the page for `granule + granule + VarLenGranule::SIZE`.
1798 //
1799 // 2. `next` is NULL which is trivially init.
1800 //
1801 // 3. `dst_chunk` points to unused data as the space was allocated before the loop
1802 // or was `next_dst_chunk` in the previous iteration and hasn't been written to yet.
1803 unsafe { dst_var.write_chunk_to_granule(data, data.len(), dst_chunk, PageOffset::VAR_LEN_NULL) };
1804
1805 // For a large blob object,
1806 // notify the `blob_store` that we've taken a reference to the blob hash.
1807 if src_vlr.is_large_blob() {
1808 blob_store
1809 .clone_blob(&src_chunk.blob_hash())
1810 .expect("blob_store could not mark hash as used");
1811 }
1812
1813 Ok(VarLenRef {
1814 first_granule: copied_head,
1815 length_in_bytes: src_vlr.length_in_bytes,
1816 })
1817 }
1818
1819 /// Make `self` empty, removing all rows from it and resetting the high water marks to zero.
1820 ///
1821 /// This also clears the `unmodified_hash`.
1822 pub fn clear(&mut self) {
1823 self.header.clear();
1824 }
1825
1826 /// Zeroes every byte of row data in this page.
1827 ///
1828 /// # Safety
1829 ///
1830 /// Causes the page header to no longer match the contents, invalidating many assumptions.
1831 /// Should be called in conjunction with [`Self::clear`].
1832 pub unsafe fn zero_data(&mut self) {
1833 self.row_data.fill(0);
1834 }
1835
1836 /// Resets this page for reuse of its allocation.
1837 ///
1838 /// The reset page supports `max_rows_in_page` at most.
1839 pub fn reset_for(&mut self, max_rows_in_page: usize) {
1840 self.header.reset_for(max_rows_in_page);
1841
1842 // NOTE(centril): We previously zeroed pages when resetting.
1843 // This had an adverse performance impact.
1844 // The reason why we previously zeroed was for security under a multi-tenant setup
1845 // when exposing a module ABI that allows modules to memcpy whole pages over.
1846 // However, we have no such ABI for the time being, so we can soundly avoid zeroing.
1847 // If we ever decide to add such an ABI, we must start zeroing again.
1848 //
1849 // // SAFETY: We just reset the page header.
1850 // unsafe { self.zero_data() };
1851 }
1852
1853 /// Sets the header and the row data.
1854 ///
1855 /// # Safety
1856 ///
1857 /// The `header` and `row_data` must be consistent with each other.
1858 pub(super) unsafe fn set_raw(&mut self, header: PageHeader, row_data: RowData) {
1859 self.header = header;
1860 self.row_data = row_data;
1861 }
1862
1863 /// Returns the page header, for testing.
1864 #[cfg(test)]
1865 pub(super) fn page_header_for_test(&self) -> &PageHeader {
1866 &self.header
1867 }
1868
1869 /// Computes the content hash of this page.
1870 pub fn content_hash(&self) -> blake3::Hash {
1871 let mut hasher = blake3::Hasher::new();
1872
1873 // Hash the page contents.
1874 hasher.update(&self.row_data);
1875
1876 // Hash the `FixedHeader`, first copy out the fixed part save for the bitset into an array.
1877 let fixed = &self.header.fixed;
1878 let mut fixed_bytes = [0u8; 6];
1879 fixed_bytes[0..2].copy_from_slice(&fixed.next_free.next.0.to_le_bytes());
1880 fixed_bytes[2..4].copy_from_slice(&fixed.last.0.to_le_bytes());
1881 fixed_bytes[4..6].copy_from_slice(&fixed.num_rows.to_le_bytes());
1882 hasher.update(&fixed_bytes);
1883
1884 // Hash the fixed bit set.
1885 hasher.update(bytemuck::must_cast_slice(fixed.present_rows.storage()));
1886
1887 // Hash the `VarHeader`.
1888 hasher.update(bytemuck::bytes_of(&self.header.var));
1889
1890 // We're done.
1891 // Note that `unmodified_hash` itself must not be hashed to avoid a recursive dependency.
1892 hasher.finalize()
1893 }
1894
1895 /// Computes the content hash of this page and saves it to [`PageHeader::unmodified_hash`].
1896 pub fn save_content_hash(&mut self) {
1897 let hash = self.content_hash();
1898 self.header.unmodified_hash = Some(hash);
1899 }
1900
1901 /// Return the page's content hash, computing and saving it if it is not already stored.
1902 pub fn save_or_get_content_hash(&mut self) -> blake3::Hash {
1903 self.unmodified_hash().copied().unwrap_or_else(|| {
1904 self.save_content_hash();
1905 self.header.unmodified_hash.unwrap()
1906 })
1907 }
1908
1909 /// Returns the stored unmodified hash, if any.
1910 pub fn unmodified_hash(&self) -> Option<&blake3::Hash> {
1911 self.header.unmodified_hash.as_ref()
1912 }
1913}
1914
1915/// An iterator over the `PageOffset`s of all present fixed-length rows in a [`Page`].
1916pub struct FixedLenRowsIter<'page> {
1917 /// The fixed header of the page,
1918 /// used to determine where the last fixed row is
1919 /// and whether the fixed row slot is actually a fixed row.
1920 idx_iter: IterSet<'page>,
1921 /// The size of a row in bytes.
1922 fixed_row_size: Size,
1923}
1924
1925impl Iterator for FixedLenRowsIter<'_> {
1926 type Item = PageOffset;
1927
1928 fn next(&mut self) -> Option<Self::Item> {
1929 self.idx_iter
1930 .next()
1931 .map(|idx| PageOffset(idx as u16 * self.fixed_row_size.0))
1932 }
1933}
1934
1935/// An iterator over the [`VarLenGranule`]s in a particular [`VarLenRef`] in `page`.
1936///
1937/// Constructing a `VarLenGranulesIter` should be considered unsafe
1938/// because the initial `next_granule` must either be `NULL` or point to a valid [`VarLenGranule`].
1939///
1940/// Iterating over [`VarLenRef::NULL`] is safe and will immediately return `None`.
1941#[derive(Clone, Copy)]
1942struct VarLenGranulesIter<'page> {
1943 /// The page to yield granules from.
1944 page: &'page Page,
1945 /// Location of the next granule in `page`.
1946 /// Must either be `NULL` or point to a valid granule.
1947 next_granule: PageOffset,
1948 // TODO(perf,bikeshedding): store length and implement `Iterator::size_hint`?
1949}
1950
1951impl<'page> Iterator for VarLenGranulesIter<'page> {
1952 type Item = &'page VarLenGranule;
1953
1954 fn next(&mut self) -> Option<Self::Item> {
1955 if self.next_granule.is_var_len_null() {
1956 return None;
1957 }
1958
1959 // SAFETY: By construction,
1960 // the initial `next_granule` was promised to either be `NULL` or point to a valid granule.
1961 // For a given granule, the same applies to its `.next()` granule.
1962 // At this point, we've excluded `NULL`,
1963 // so we know inductively that `next_granule` points to a valid granule, as required.
1964 let granule: &VarLenGranule = unsafe { get_ref(&self.page.row_data, self.next_granule) };
1965 self.next_granule = granule.header.next();
1966
1967 Some(granule)
1968 }
1969}
1970
1971#[cfg(test)]
1972pub(crate) mod tests {
1973 use super::*;
1974 use crate::{blob_store::NullBlobStore, page_pool::PagePool, var_len::AlignedVarLenOffsets};
1975 use proptest::{collection::vec, prelude::*};
1976 use spacetimedb_sats::bsatn;
1977 use spacetimedb_sats::layout::row_size_for_type;
1978
1979 fn u64_row_size() -> Size {
1980 let fixed_row_size = row_size_for_type::<u64>();
1981 assert_eq!(fixed_row_size.len(), 8);
1982 fixed_row_size
1983 }
1984
1985 const U64_VL_VISITOR: AlignedVarLenOffsets<'_> = AlignedVarLenOffsets::from_offsets(&[]);
1986 fn u64_var_len_visitor() -> &'static AlignedVarLenOffsets<'static> {
1987 &U64_VL_VISITOR
1988 }
1989
1990 fn insert_u64(page: &mut Page, val: u64) -> PageOffset {
1991 let val_slice = val.to_le_bytes();
1992 unsafe { page.insert_row(&val_slice, &[] as &[&[u8]], u64_var_len_visitor(), &mut NullBlobStore) }
1993 .expect("Failed to insert first row")
1994 }
1995
1996 fn insert_u64_drop(page: &mut Page, val: u64) {
1997 insert_u64(page, val);
1998 }
1999
2000 fn read_u64(page: &Page, offset: PageOffset) -> u64 {
2001 let row = page.get_row_data(offset, u64_row_size());
2002 u64::from_le_bytes(row.try_into().unwrap())
2003 }
2004
2005 fn data_sub_n_vlg(n: usize) -> usize {
2006 PageOffset::PAGE_END.idx() - (VarLenGranule::SIZE * n).len()
2007 }
2008
2009 pub(crate) fn hash_unmodified_save_get(page: &mut Page) -> blake3::Hash {
2010 assert_eq!(page.header.unmodified_hash, None);
2011 page.save_content_hash();
2012 page.header.unmodified_hash.unwrap()
2013 }
2014
2015 #[test]
2016 fn insert_one_u64() {
2017 let mut page = Page::new(u64_row_size());
2018
2019 // First the hash is not saved, so compute it.
2020 let hash = hash_unmodified_save_get(&mut page);
2021
2022 let val: u64 = 0xa5a5_a5a5_a5a5_a5a5;
2023
2024 let offset = insert_u64(&mut page, val);
2025
2026 assert_eq!(offset.idx(), 0);
2027
2028 let row_val = read_u64(&page, offset);
2029
2030 assert_eq!(row_val, val);
2031
2032 // The hash should have been cleared.
2033 assert_ne!(hash, hash_unmodified_save_get(&mut page));
2034 }
2035
2036 fn insert_while(
2037 page: &mut Page,
2038 mut next_val: u64,
2039 fixed_row_size: Size,
2040 vl_num: usize,
2041 mut insert: impl FnMut(&mut Page, u64),
2042 ) -> u64 {
2043 while page.has_space_for_row(fixed_row_size, vl_num) {
2044 insert(page, next_val);
2045 next_val += 1;
2046 }
2047 next_val
2048 }
2049
2050 #[test]
2051 fn fill_then_iter_fixed_len_u64() {
2052 let mut page = Page::new(u64_row_size());
2053
2054 let last_val = insert_while(&mut page, 0, u64_row_size(), 0, insert_u64_drop);
2055 assert_eq!(last_val, (PageOffset::PAGE_END / u64_row_size()) as u64);
2056
2057 for (row_idx, expected_val) in page.iter_fixed_len(u64_row_size()).zip(0..last_val) {
2058 let row_val = read_u64(&page, row_idx);
2059 assert_eq!(
2060 row_val, expected_val,
2061 "row_val {row_val:x} /= expected_val {expected_val:x}"
2062 );
2063 }
2064 }
2065
2066 #[test]
2067 fn fill_delete_iter_fixed_len_u64() {
2068 let mut page = Page::new(u64_row_size());
2069
2070 // First the hash is not saved, so compute it.
2071 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2072
2073 // Insert rows.
2074 let mut odds: Vec<PageOffset> = Vec::new();
2075 let last_val = insert_while(&mut page, 2, u64_row_size(), 0, |page, val| {
2076 let offset = insert_u64(page, val);
2077 if val % 2 == 1 {
2078 odds.push(offset);
2079 }
2080 });
2081
2082 // The hash should have been cleared.
2083 let hash_pre_del = hash_unmodified_save_get(&mut page);
2084 assert_ne!(hash_pre_ins, hash_pre_del);
2085
2086 // Delete rows.
2087 for row_offset in odds {
2088 unsafe { page.delete_row(row_offset, u64_row_size(), u64_var_len_visitor(), &mut NullBlobStore) };
2089 }
2090
2091 // The hash should have been cleared.
2092 let hash_pre_iter = hash_unmodified_save_get(&mut page);
2093 assert_ne!(hash_pre_ins, hash_pre_iter);
2094 assert_ne!(hash_pre_del, hash_pre_iter);
2095
2096 // Iterate the rows.
2097 for (row_offset, expected_val) in page.iter_fixed_len(u64_row_size()).zip((2..last_val).step_by(2)) {
2098 let found_val = read_u64(&page, row_offset);
2099 assert_eq!(found_val, expected_val);
2100 }
2101
2102 // Hash is unchanged.
2103 assert_eq!(page.header.unmodified_hash, Some(hash_pre_iter));
2104 }
2105
2106 #[test]
2107 /// After deleting a fixed-length row and then inserting a new fixed-length row,
2108 /// the fixed-length high water mark must not change,
2109 /// i.e. we must re-use memory from the deleted row to store the new insertion.
2110 fn reuse_fixed_len_space() {
2111 let mut page = Page::new(u64_row_size());
2112
2113 // First the hash is not saved, so compute it.
2114 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2115
2116 // Insert two rows.
2117 let offset_0 = insert_u64(&mut page, 0xa5a5a5a5_a5a5a5a5);
2118 assert_eq!(offset_0.idx(), 0);
2119 let offset_1 = insert_u64(&mut page, 0xbeefbeef_beefbeef);
2120 assert_eq!(offset_1, u64_row_size());
2121
2122 assert_eq!(page.header.fixed.last, u64_row_size() * 2);
2123
2124 // Hash has been cleared after inserts.
2125 let hash_pre_del = hash_unmodified_save_get(&mut page);
2126 assert_ne!(hash_pre_ins, hash_pre_del);
2127
2128 // Delete first row.
2129 unsafe { page.delete_row(offset_0, u64_row_size(), u64_var_len_visitor(), &mut NullBlobStore) };
2130
2131 assert_eq!(page.header.fixed.last, u64_row_size() * 2);
2132
2133 // Hash has been cleared after deletes.
2134 let hash_pre_ins2 = hash_unmodified_save_get(&mut page);
2135 assert_ne!(hash_pre_ins, hash_pre_ins2);
2136 assert_ne!(hash_pre_del, hash_pre_ins2);
2137
2138 // Insert first row again, re-using memory.
2139 let offset_0_again = insert_u64(&mut page, 0xffffffff_ffffffff);
2140
2141 assert_eq!(offset_0_again.idx(), 0);
2142 assert_eq!(offset_0.idx(), offset_0_again.idx());
2143
2144 assert_eq!(page.header.fixed.last, u64_row_size() * 2);
2145
2146 // Hash has been cleared after last insert, despite re-using memory.
2147 let hash_post_ins2 = hash_unmodified_save_get(&mut page);
2148 assert_ne!(hash_pre_ins, hash_post_ins2);
2149 assert_ne!(hash_pre_del, hash_post_ins2);
2150 assert_ne!(hash_pre_ins2, hash_post_ins2);
2151 }
2152
2153 const STR_ROW_SIZE: Size = row_size_for_type::<VarLenRef>();
2154
2155 const _: () = assert!(STR_ROW_SIZE.len() == mem::size_of::<VarLenRef>());
2156
2157 const STR_VL_VISITOR: AlignedVarLenOffsets<'_> = AlignedVarLenOffsets::from_offsets(&[0]);
2158 fn str_var_len_visitor() -> &'static AlignedVarLenOffsets<'static> {
2159 &STR_VL_VISITOR
2160 }
2161
2162 fn insert_str(page: &mut Page, data: &[u8]) -> PageOffset {
2163 let fixed_len_data = [0u8; STR_ROW_SIZE.len()];
2164 unsafe { page.insert_row(&fixed_len_data, &[data], str_var_len_visitor(), &mut NullBlobStore) }
2165 .expect("Failed to insert row")
2166 }
2167
2168 fn read_str_ref(page: &Page, offset: PageOffset) -> VarLenRef {
2169 *unsafe { get_ref(&page.row_data, offset) }
2170 }
2171
2172 #[test]
2173 fn insert_empty_str() {
2174 let mut page = Page::new(STR_ROW_SIZE);
2175
2176 // First the hash is not saved, so compute it.
2177 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2178
2179 // Insert the empty string.
2180 let offset = insert_str(&mut page, &[]);
2181
2182 // No granules were used.
2183 let extracted = read_str_ref(&page, offset);
2184 let mut granules_iter = unsafe { page.iter_var_len_object(extracted.first_granule) };
2185 assert!(granules_iter.next().is_none());
2186 drop(granules_iter);
2187
2188 // Hash is cleared even though the string was empty.
2189 assert_ne!(hash_pre_ins, hash_unmodified_save_get(&mut page));
2190 }
2191
2192 proptest! {
2193 #[test]
2194 fn insert_one_short_str(data in vec(any::<u8>(), 1..VarLenGranule::DATA_SIZE)) {
2195 let mut page = Page::new(STR_ROW_SIZE);
2196
2197 // First the hash is not saved, so compute it.
2198 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2199
2200 // Insert the row.
2201 let offset = insert_str(&mut page, &data);
2202
2203 // Hash was cleared by the insert.
2204 let hash_pre_iter = hash_unmodified_save_get(&mut page);
2205 assert_ne!(hash_pre_ins, hash_pre_iter);
2206
2207 // Confirm we inserted correctly.
2208 let extracted = read_str_ref(&page, offset);
2209 let mut data_iter = unsafe { page.iter_vlo_data(extracted.first_granule) };
2210 let (first, next) = (data_iter.next(), data_iter.next());
2211 assert_eq!(first, Some(&*data));
2212 assert_eq!(next, None);
2213
2214 // Iteration and reading did not change the hash.
2215 assert_eq!(hash_pre_iter, page.header.unmodified_hash.unwrap());
2216 }
2217
2218 #[test]
2219 fn insert_one_long_str(data in vec(any::<u8>(), (VarLenGranule::OBJECT_SIZE_BLOB_THRESHOLD / 2)..VarLenGranule::OBJECT_SIZE_BLOB_THRESHOLD)) {
2220 let mut page = Page::new(STR_ROW_SIZE);
2221
2222 // First the hash is not saved, so compute it.
2223 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2224
2225 // Insert the long string.
2226 let offset = insert_str(&mut page, &data);
2227
2228 // The hash was cleared, and the new one is different.
2229 let hash_post_ins = hash_unmodified_save_get(&mut page);
2230 assert_ne!(hash_pre_ins, hash_post_ins);
2231
2232 // Check that we inserted correctly.
2233 let extracted = read_str_ref(&page, offset);
2234
2235 let mut data_iter = unsafe { page.iter_vlo_data(extracted.first_granule) };
2236 let mut chunks_iter = data.chunks(VarLenGranule::DATA_SIZE);
2237
2238 for (i, (data, chunk)) in (&mut data_iter).zip(&mut chunks_iter).enumerate() {
2239 assert_eq!(
2240 data,
2241 chunk,
2242 "Chunk {i} does not match. Left is found, right is expected.",
2243 );
2244 }
2245
2246 // Both iterators must be finished, i.e. they must have the same length.
2247 assert!(data_iter.next().is_none());
2248 assert!(chunks_iter.next().is_none());
2249
2250 // Reading did not alter the hash.
2251 assert_eq!(hash_post_ins, page.header.unmodified_hash.unwrap());
2252 }
2253 }
2254
2255 #[test]
2256 fn reuse_var_len_space_no_fragmentation_concerns() {
2257 let data_0 = b"Hello, world!";
2258 let data_1 = b"How goes life?";
2259 let data_2 = b"Glad to hear it.";
2260
2261 let mut page = Page::new(STR_ROW_SIZE);
2262 let offset_0 = insert_str(&mut page, data_0);
2263 let offset_1 = insert_str(&mut page, data_1);
2264
2265 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(2));
2266
2267 assert_ne!(offset_0.idx(), offset_1.idx());
2268
2269 let var_len_0 = read_str_ref(&page, offset_0);
2270
2271 assert_eq!(var_len_0.length_in_bytes as usize, data_0.len());
2272 assert_eq!(var_len_0.first_granule.idx(), data_sub_n_vlg(1));
2273
2274 let var_len_1 = read_str_ref(&page, offset_1);
2275
2276 assert_eq!(var_len_1.length_in_bytes as usize, data_1.len());
2277 assert_eq!(var_len_1.first_granule.idx(), data_sub_n_vlg(2));
2278
2279 let hash_pre_del = hash_unmodified_save_get(&mut page);
2280
2281 unsafe { page.delete_row(offset_0, STR_ROW_SIZE, str_var_len_visitor(), &mut NullBlobStore) };
2282
2283 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2284
2285 let offset_2 = insert_str(&mut page, data_2);
2286
2287 let hash_post_ins = hash_unmodified_save_get(&mut page);
2288 assert_ne!(hash_pre_del, hash_pre_ins);
2289 assert_ne!(hash_pre_del, hash_post_ins);
2290 assert_ne!(hash_pre_ins, hash_post_ins);
2291
2292 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(2));
2293
2294 assert_eq!(offset_0.idx(), offset_2.idx());
2295
2296 let var_len_2 = read_str_ref(&page, offset_2);
2297
2298 assert_eq!(var_len_2.length_in_bytes as usize, data_2.len());
2299 assert_eq!(var_len_2.first_granule.idx(), var_len_0.first_granule.idx());
2300 }
2301
2302 #[test]
2303 fn free_var_len_obj_multiple_granules() {
2304 let mut page = Page::new(STR_ROW_SIZE);
2305
2306 // Allocate a 4-granule var-len object.
2307 let data_0 = [0xa5u8].repeat(VarLenGranule::DATA_SIZE * 4);
2308 let offset_0 = insert_str(&mut page, &data_0);
2309
2310 let var_len_0 = read_str_ref(&page, offset_0);
2311
2312 // Read the addresses of its var-len granules.
2313 let granules_0 = unsafe { page.iter_var_len_object(var_len_0.first_granule) }
2314 .map(|granule| granule as *const VarLenGranule as usize)
2315 .collect::<Vec<_>>();
2316
2317 // Sanity checks: we have allocated 4 granules.
2318 assert_eq!(granules_0.len(), 4);
2319 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(4));
2320
2321 // Delete the row.
2322 unsafe { page.delete_row(offset_0, STR_ROW_SIZE, str_var_len_visitor(), &mut NullBlobStore) };
2323
2324 // Allocate a new 4-granule var-len object.
2325 // This should use the same storage as the original row.
2326 let data_1 = [0xffu8].repeat(VarLenGranule::DATA_SIZE * 4);
2327 let offset_1 = insert_str(&mut page, &data_1);
2328
2329 let var_len_1 = read_str_ref(&page, offset_1);
2330
2331 // Read the addresses of the new allocation's var-len granules.
2332 let granules_1 = unsafe { page.iter_var_len_object(var_len_1.first_granule) }
2333 .map(|granule| granule as *const VarLenGranule as usize)
2334 .collect::<Vec<_>>();
2335
2336 // Sanity check: the new allocation is also 4 granules.
2337 assert_eq!(granules_1.len(), 4);
2338
2339 for granule in granules_1.iter().copied() {
2340 // The new var-len allocation must contain all the same granules by address
2341 // as the old var-len allocation.
2342 assert!(granules_0.iter().copied().any(|other_granule| other_granule == granule));
2343 }
2344
2345 // The var-len high water mark must not have moved.
2346 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(4));
2347 }
2348
2349 #[test]
2350 fn reuse_var_len_space_avoid_fragmentation() {
2351 let data_0 = &[0xa5u8];
2352 let data_1 = &[0xffu8];
2353 let data_2 = [0x11u8].repeat(VarLenGranule::DATA_SIZE + 1);
2354 let data_2 = data_2.as_ref();
2355
2356 let mut page = Page::new(STR_ROW_SIZE);
2357
2358 // First the hash is not saved, so compute it.
2359 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2360
2361 // Insert two string rows.
2362 let offset_0 = insert_str(&mut page, data_0);
2363 let _offset_1 = insert_str(&mut page, data_1);
2364
2365 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(2));
2366
2367 // Hash is cleared by inserting and the new one is different.
2368 let hash_pre_del = hash_unmodified_save_get(&mut page);
2369 assert_ne!(hash_pre_ins, hash_pre_del);
2370
2371 // Delete the first row.
2372 unsafe { page.delete_row(offset_0, STR_ROW_SIZE, str_var_len_visitor(), &mut NullBlobStore) };
2373
2374 // Hash is cleared by deleting.
2375 let hash_post_del = hash_unmodified_save_get(&mut page);
2376 assert_ne!(hash_pre_ins, hash_post_del);
2377 assert_ne!(hash_pre_del, hash_post_del);
2378
2379 // Insert again, re-using memory.
2380 let offset_2 = insert_str(&mut page, data_2);
2381
2382 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(3));
2383
2384 // Hash is cleared by inserting again, even though we re-used memory.
2385 let hash_post_ins2 = hash_unmodified_save_get(&mut page);
2386 assert_ne!(hash_pre_ins, hash_post_ins2);
2387 assert_ne!(hash_pre_del, hash_post_ins2);
2388 assert_ne!(hash_post_del, hash_post_ins2);
2389
2390 // Check that we inserted correctly.
2391 let var_len_2 = read_str_ref(&page, offset_2);
2392
2393 let mut data_iter = unsafe { page.iter_vlo_data(var_len_2.first_granule) };
2394 let mut chunks_iter = data_2.chunks(VarLenGranule::DATA_SIZE);
2395
2396 for (i, (data, chunk)) in (&mut data_iter).zip(&mut chunks_iter).enumerate() {
2397 assert_eq!(
2398 data, chunk,
2399 "Chunk {i} does not match. Left is found, right is expected.",
2400 );
2401 }
2402
2403 // Both iterators must be finished, i.e. they must have the same length.
2404 assert!(data_iter.next().is_none());
2405 assert!(chunks_iter.next().is_none());
2406 }
2407
2408 fn check_u64_in_str(page: &Page, row_idx: PageOffset, expected_val: u64) {
2409 let vlr = read_str_ref(page, row_idx);
2410
2411 let mut var_len_iter = unsafe { page.iter_vlo_data(vlr.first_granule) };
2412 let data = var_len_iter.next().unwrap();
2413 assert!(var_len_iter.next().is_none());
2414 assert_eq!(data.len(), mem::size_of::<u64>());
2415
2416 let val = u64::from_le_bytes(data.try_into().unwrap());
2417 assert_eq!(val, expected_val);
2418 }
2419
2420 #[test]
2421 fn fill_then_iter_var_len_str() {
2422 let mut page = Page::new(STR_ROW_SIZE);
2423
2424 // First the hash is not saved, so compute it.
2425 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2426
2427 // Insert the strings.
2428 let last_val = insert_while(&mut page, 0, STR_ROW_SIZE, 1, |page, val| {
2429 insert_str(page, &val.to_le_bytes());
2430 });
2431
2432 // Hash is cleared by inserting and the new one is different.
2433 let hash_pre_iter = hash_unmodified_save_get(&mut page);
2434 assert_ne!(hash_pre_ins, hash_pre_iter);
2435
2436 // Check that we inserted correctly.
2437 let size_per_row = STR_ROW_SIZE + VarLenGranule::SIZE;
2438
2439 assert_eq!(last_val, (PageOffset::PAGE_END / size_per_row) as u64);
2440
2441 for (row_idx, expected_val) in page.iter_fixed_len(STR_ROW_SIZE).zip(0..last_val) {
2442 check_u64_in_str(&page, row_idx, expected_val);
2443 }
2444
2445 // Reading does not alter the hash.
2446 assert_eq!(hash_pre_iter, page.header.unmodified_hash.unwrap());
2447 }
2448
2449 #[test]
2450 fn fill_delete_iter_var_len_str() {
2451 let mut page = Page::new(STR_ROW_SIZE);
2452
2453 // First the hash is not saved, so compute it.
2454 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2455
2456 // Insert the string rows.
2457 let mut odds = Vec::new();
2458 let last_val = insert_while(&mut page, 0, STR_ROW_SIZE, 1, |page, val| {
2459 let offset = insert_str(page, &val.to_le_bytes());
2460 if val % 2 == 1 {
2461 odds.push(offset);
2462 }
2463 });
2464
2465 let size_per_row = STR_ROW_SIZE + VarLenGranule::SIZE;
2466 let num_rows_inserted = (PageOffset::PAGE_END / size_per_row) as u64;
2467 assert_eq!(last_val, num_rows_inserted);
2468
2469 // Hash was cleared by inserting and is different now.
2470 let hash_pre_del = hash_unmodified_save_get(&mut page);
2471 assert_ne!(hash_pre_ins, hash_pre_del);
2472
2473 // Delete the rows.
2474 for row_offset in odds {
2475 unsafe { page.delete_row(row_offset, STR_ROW_SIZE, str_var_len_visitor(), &mut NullBlobStore) };
2476 }
2477
2478 // Hash was cleared by deleting and is different now.
2479 let hash_pre_iter = hash_unmodified_save_get(&mut page);
2480 assert_ne!(hash_pre_ins, hash_pre_iter);
2481 assert_ne!(hash_pre_del, hash_pre_iter);
2482
2483 // Check that we deleted correctly.
2484 let num_rows_retained = num_rows_inserted.div_ceil(2);
2485 let num_rows_removed = num_rows_inserted / 2;
2486
2487 assert_eq!(page.header.fixed.num_rows as u64, num_rows_retained);
2488
2489 assert_eq!(page.header.var.freelist_len as u64, num_rows_removed);
2490
2491 for (row_idx, expected_val) in page.iter_fixed_len(STR_ROW_SIZE).zip((0..last_val).step_by(2)) {
2492 check_u64_in_str(&page, row_idx, expected_val);
2493 }
2494
2495 // Reading did not alter the hash.
2496 assert_eq!(hash_pre_iter, page.header.unmodified_hash.unwrap());
2497 }
2498
2499 #[test]
2500 fn serde_round_trip_whole_page() {
2501 let pool = PagePool::new_for_test();
2502 let mut page = Page::new(u64_row_size());
2503
2504 // Construct an empty page, ser/de it, and assert that it's still empty.
2505 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2506 let ser_pre_ins = bsatn::to_vec(&page).unwrap();
2507 let de_pre_ins = pool.take_deserialize_from(&ser_pre_ins).unwrap();
2508 assert_eq!(de_pre_ins.content_hash(), hash_pre_ins);
2509 assert_eq!(de_pre_ins.header.fixed.num_rows, 0);
2510 assert!(de_pre_ins.header.fixed.present_rows == page.header.fixed.present_rows);
2511
2512 // Insert some rows into the page.
2513 let offsets = (0..64)
2514 .map(|val| insert_u64(&mut page, val))
2515 .collect::<Vec<PageOffset>>();
2516
2517 let hash_ins = hash_unmodified_save_get(&mut page);
2518
2519 // Ser/de the page and assert that it contains the same rows.
2520 let ser_ins = bsatn::to_vec(&page).unwrap();
2521 let de_ins = pool.take_deserialize_from(&ser_ins).unwrap();
2522 assert_eq!(de_ins.content_hash(), hash_ins);
2523 assert_eq!(de_ins.header.fixed.num_rows, 64);
2524 assert!(de_ins.header.fixed.present_rows == page.header.fixed.present_rows);
2525 assert_eq!(
2526 de_ins.iter_fixed_len(u64_row_size()).collect::<Vec<PageOffset>>(),
2527 offsets
2528 );
2529
2530 // Delete the even-numbered rows, leaving the odds.
2531 let offsets = offsets
2532 .into_iter()
2533 .enumerate()
2534 .filter_map(|(i, offset)| {
2535 if i % 2 == 0 {
2536 unsafe { page.delete_row(offset, u64_row_size(), u64_var_len_visitor(), &mut NullBlobStore) };
2537 None
2538 } else {
2539 Some(offset)
2540 }
2541 })
2542 .collect::<Vec<PageOffset>>();
2543
2544 // Ser/de the page again and assert that it contains only the odd-numbered rows.
2545 let hash_del = hash_unmodified_save_get(&mut page);
2546 let ser_del = bsatn::to_vec(&page).unwrap();
2547 let de_del = pool.take_deserialize_from(&ser_del).unwrap();
2548 assert_eq!(de_del.content_hash(), hash_del);
2549 assert_eq!(de_del.header.fixed.num_rows, 32);
2550 assert!(de_del.header.fixed.present_rows == page.header.fixed.present_rows);
2551 assert_eq!(
2552 de_del.iter_fixed_len(u64_row_size()).collect::<Vec<PageOffset>>(),
2553 offsets
2554 );
2555 }
2556}