spacetimedb_table/page.rs
1//! Provides a [`Page`] abstraction that stores rows
2//! and an associated header necessary for the page to work.
3//! Consult the documentation of this type for a list of operations
4//! and a description of how page work.
5//!
6//! A page can provide a split mutable view of its fixed section and its variable section.
7//! This is provided through [`Page::split_fixed_var_mut`] with view operations
8//! defined on [`FixedView`] and [`VarView`].
9//!
10//! [ralfj_safe_valid]: https://www.ralfj.de/blog/2018/08/22/two-kinds-of-invariants.html
11//!
12//! Technical terms:
13//!
14//! - `valid` refers to, when referring to a type, granule, or row,
15//! depending on the context, a memory location that holds a *safe* object.
16//! When "valid for writes" is used, the location must be properly aligned
17//! and none of its bytes may be uninit,
18//! but the value need not be valid at the type in question.
19//! "Valid for writes" is equivalent to valid-unconstrained.
20//!
21//! - `valid-unconstrained`, when referring to a memory location with a given type,
22//! that the location stores a byte pattern which Rust/LLVM's memory model recognizes as valid,
23//! and therefore must not contain any uninit,
24//! but the value is not required to be logically meaningful,
25//! and no code may depend on the data within it to uphold any invariants.
26//! E.g. an unallocated [`VarLenGranule`] within a page stores valid-unconstrained bytes,
27//! because the bytes are either 0 from the initial [`alloc_zeroed`] of the page,
28//! or contain stale data from a previously freed [`VarLenGranule`].
29//!
30//! - `unused` means that it is safe to overwrite a block of memory without cleaning up its previous value.
31//!
32//! See the post [Two Kinds of Invariants: Safety and Validity][ralf_safe_valid]
33//! for a discussion on safety and validity invariants.
34
35use super::{
36 blob_store::BlobStore,
37 fixed_bit_set::FixedBitSet,
38 indexes::{Byte, Bytes, PageOffset, Size, PAGE_HEADER_SIZE, PAGE_SIZE},
39 layout::MIN_ROW_SIZE,
40 var_len::{is_granule_offset_aligned, VarLenGranule, VarLenGranuleHeader, VarLenMembers, VarLenRef},
41};
42use crate::{fixed_bit_set::IterSet, indexes::max_rows_in_page, static_assert_size, table::BlobNumBytes, MemoryUsage};
43use core::{mem, ops::ControlFlow};
44use spacetimedb_lib::{de::Deserialize, ser::Serialize};
45use thiserror::Error;
46
47#[derive(Error, Debug, PartialEq, Eq)]
48pub enum Error {
49 #[error("Want to allocate a var-len object of {need} granules, but have only {have} granules available")]
50 InsufficientVarLenSpace { need: u16, have: u16 },
51 #[error("Want to allocate a fixed-len row of {} bytes, but the page is full", need.len())]
52 InsufficientFixedLenSpace { need: Size },
53}
54
55/// A cons-cell in a freelist either
56/// for an unused fixed-len cell or a variable-length granule.
57#[repr(C)] // Required for a stable ABI.
58#[derive(Clone, Copy, Debug, PartialEq, Eq, bytemuck::NoUninit, Serialize, Deserialize)]
59struct FreeCellRef {
60 /// The address of the next free cell in a freelist.
61 ///
62 /// The `PageOffset::PAGE_END` is used as a sentinel to signal "`None`".
63 next: PageOffset,
64}
65
66impl MemoryUsage for FreeCellRef {
67 fn heap_usage(&self) -> usize {
68 let Self { next } = self;
69 next.heap_usage()
70 }
71}
72
73impl FreeCellRef {
74 /// The sentinel for NULL cell references.
75 const NIL: Self = Self {
76 next: PageOffset::PAGE_END,
77 };
78
79 /// Replaces the cell reference with `offset`, returning the existing one.
80 #[inline]
81 fn replace(&mut self, offset: PageOffset) -> FreeCellRef {
82 let next = mem::replace(&mut self.next, offset);
83 Self { next }
84 }
85
86 /// Returns whether the cell reference is non-empty.
87 #[inline]
88 const fn has(&self) -> bool {
89 !self.next.is_at_end()
90 }
91
92 /// Take the first free cell in the freelist starting with `self`, if any,
93 /// and promote the second free cell as the freelist head.
94 ///
95 /// # Safety
96 ///
97 /// When `self.has()`, it must point to a valid `FreeCellRef`.
98 #[inline]
99 unsafe fn take_freelist_head(
100 self: &mut FreeCellRef,
101 row_data: &Bytes,
102 adjust_free: impl FnOnce(PageOffset) -> PageOffset,
103 ) -> Option<PageOffset> {
104 self.has().then(|| {
105 let head = adjust_free(self.next);
106 // SAFETY: `self.next` so `head` points to a valid `FreeCellRef`.
107 let next = unsafe { get_ref(row_data, head) };
108 self.replace(*next).next
109 })
110 }
111
112 /// Prepend `new_head` to the freelist starting with `self`.
113 ///
114 /// SAFETY: `new_head`, after adjustment, must be in bounds of `row_data`.
115 /// Moreover, it must be valid for writing a `FreeCellRef` to it,
116 /// which includes being properly aligned with respect to `row_data` for a `FreeCellRef`.
117 /// Additionally, `self` must contain a valid `FreeCellRef`.
118 #[inline]
119 unsafe fn prepend_freelist(
120 self: &mut FreeCellRef,
121 row_data: &mut Bytes,
122 new_head: PageOffset,
123 adjust_free: impl FnOnce(PageOffset) -> PageOffset,
124 ) {
125 let next = self.replace(new_head);
126 let new_head = adjust_free(new_head);
127 // SAFETY: Per caller contract, `new_head` is in bounds of `row_data`.
128 // Moreover, `new_head` points to an unused `FreeCellRef`, so we can write to it.
129 let next_slot: &mut FreeCellRef = unsafe { get_mut(row_data, new_head) };
130 *next_slot = next;
131 }
132}
133
134/// All the fixed size header information.
135#[repr(C)] // Required for a stable ABI.
136#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] // So we can dump and restore pages during snapshotting.
137struct FixedHeader {
138 /// A pointer to the head of the freelist which stores
139 /// all the unused (freed) fixed row cells.
140 /// These cells can be reused when inserting a new row.
141 next_free: FreeCellRef,
142
143 /// High water mark (HWM) for fixed-length rows.
144 /// Points one past the last-allocated (highest-indexed) fixed-length row,
145 /// so to allocate a fixed-length row from the gap,
146 /// post-increment this index.
147 // TODO(perf,future-work): determine how to lower the high water mark when freeing the topmost row.
148 last: PageOffset,
149
150 /// The number of rows currently in the page.
151 ///
152 /// N.B. this is not the same as `self.present_rows.len()`
153 /// as that counts both zero and one bits.
154 num_rows: u16,
155
156 // TODO(stable-module-abi): should this be inlined into the page?
157 /// For each fixed-length row slot, true if a row is stored there,
158 /// false if the slot is unallocated.
159 ///
160 /// Unallocated row slots store valid-unconstrained bytes, i.e. are never uninit.
161 present_rows: FixedBitSet,
162}
163
164impl MemoryUsage for FixedHeader {
165 fn heap_usage(&self) -> usize {
166 let Self {
167 next_free,
168 last,
169 num_rows,
170 present_rows,
171 } = self;
172 next_free.heap_usage() + last.heap_usage() + num_rows.heap_usage() + present_rows.heap_usage()
173 }
174}
175
176static_assert_size!(FixedHeader, 16);
177
178impl FixedHeader {
179 /// Returns a new `FixedHeader`
180 /// using the provided `fixed_row_size` to compute the `present_rows` bitvec.
181 #[inline]
182 fn new(fixed_row_size: Size) -> Self {
183 Self {
184 next_free: FreeCellRef::NIL,
185 // Points one after the last allocated fixed-length row, or `NULL` for an empty page.
186 last: PageOffset::VAR_LEN_NULL,
187 num_rows: 0,
188 present_rows: FixedBitSet::new(max_rows_in_page(fixed_row_size)),
189 }
190 }
191
192 /// Set the (fixed) row starting at `offset`
193 /// and lasting `fixed_row_size` as `present`.
194 #[inline]
195 fn set_row_present(&mut self, offset: PageOffset, fixed_row_size: Size) {
196 self.set_row_presence(offset, fixed_row_size, true);
197 self.num_rows += 1;
198 }
199
200 /// Sets whether the (fixed) row starting at `offset`
201 /// and lasting `fixed_row_size` is `present` or not.
202 #[inline]
203 fn set_row_presence(&mut self, offset: PageOffset, fixed_row_size: Size, present: bool) {
204 self.present_rows.set(offset / fixed_row_size, present);
205 }
206
207 /// Returns whether the (fixed) row starting at `offset`
208 /// and lasting `fixed_row_size` is present or not.
209 #[inline]
210 fn is_row_present(&self, offset: PageOffset, fixed_row_size: Size) -> bool {
211 self.present_rows.get(offset / fixed_row_size)
212 }
213
214 /// Resets the header information to its state
215 /// when it was first created in [`FixedHeader::new`].
216 ///
217 /// The header is only good for the original row size.
218 #[inline]
219 fn clear(&mut self) {
220 self.next_free = FreeCellRef::NIL;
221 self.last = PageOffset::VAR_LEN_NULL;
222 self.num_rows = 0;
223 self.present_rows.clear();
224 }
225}
226
227/// All the var-len header information.
228#[repr(C)] // Required for a stable ABI.
229#[derive(bytemuck::NoUninit, Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
230struct VarHeader {
231 /// A pointer to the head of the freelist which stores
232 /// all the unused (freed) var-len granules.
233 /// These cells can be reused when inserting a new row.
234 next_free: FreeCellRef,
235
236 /// The length of the freelist with its head referred to by `next_free`.
237 /// Stored in units of var-len nodes.
238 ///
239 /// This field is redundant,
240 /// as it can be recovered by traversing `next_free`.
241 /// However, traversing this linked-list is not cache friendly,
242 /// so we memoize the computation here.
243 freelist_len: u16,
244
245 /// High water mark (HWM) for var-len granules.
246 /// Points to the last-allocated (lowest-indexed) var-len granule,
247 /// so to allocate a var-len granule from the gap,
248 /// pre-decrement this index.
249 // TODO(perf,future-work): determine how to "lower" the high water mark when freeing the "top"-most granule.
250 first: PageOffset,
251
252 /// The number of granules currently used by rows within this page.
253 ///
254 /// [`Page::bytes_used_by_rows`] needs this information.
255 /// Stored here because otherwise counting it would require traversing all the present rows.
256 num_granules: u16,
257}
258
259impl MemoryUsage for VarHeader {
260 fn heap_usage(&self) -> usize {
261 let Self {
262 next_free,
263 freelist_len,
264 first,
265 num_granules,
266 } = self;
267 next_free.heap_usage() + freelist_len.heap_usage() + first.heap_usage() + num_granules.heap_usage()
268 }
269}
270
271static_assert_size!(VarHeader, 8);
272
273impl Default for VarHeader {
274 fn default() -> Self {
275 Self {
276 next_free: FreeCellRef::NIL,
277 freelist_len: 0,
278 first: PageOffset::PAGE_END,
279 num_granules: 0,
280 }
281 }
282}
283
284impl VarHeader {
285 /// Resets the header information to its state
286 /// when it was first created in [`VarHeader::default`].
287 fn clear(&mut self) {
288 *self = Self::default();
289 }
290}
291
292/// The metadata / header of a page that is necessary
293/// for modifying and interpreting the `row_data`.
294///
295/// This header info is split into a header for the fixed part
296/// and one for the variable part.
297/// The header is stored in the same heap allocation as the `row_data`
298/// as the whole [`Page`] is `Box`ed.
299#[repr(C)] // Required for a stable ABI.
300#[repr(align(64))] // Alignment must be same as `VarLenGranule::SIZE`.
301#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] // So we can dump and restore pages during snapshotting.
302struct PageHeader {
303 /// The header data relating to the fixed component of a row.
304 fixed: FixedHeader,
305 /// The header data relating to the var-len component of a row.
306 var: VarHeader,
307 /// The content-addressed hash of the page on disk,
308 /// if unmodified since the last snapshot,
309 /// and `None` otherwise.
310 ///
311 /// This means that modifications to the page always sets this field to `None`.
312 unmodified_hash: Option<blake3::Hash>,
313}
314
315impl MemoryUsage for PageHeader {
316 fn heap_usage(&self) -> usize {
317 let Self {
318 fixed,
319 var,
320 // MEMUSE: no allocation, ok to ignore
321 unmodified_hash: _,
322 } = self;
323 fixed.heap_usage() + var.heap_usage()
324 }
325}
326
327static_assert_size!(PageHeader, PAGE_HEADER_SIZE);
328
329impl PageHeader {
330 /// Returns a new `PageHeader` proper for fixed-len rows of `fixed_row_size`.
331 fn new(fixed_row_size: Size) -> Self {
332 Self {
333 fixed: FixedHeader::new(fixed_row_size),
334 var: VarHeader::default(),
335 unmodified_hash: None,
336 }
337 }
338
339 /// Resets the header information to its state
340 /// when it was first created in [`PageHeader::new`].
341 ///
342 /// The header is only good for the original row size.
343 fn clear(&mut self) {
344 self.fixed.clear();
345 self.var.clear();
346 self.unmodified_hash = None;
347 }
348}
349
350/// Fixed-length row portions must be at least large enough to store a `FreeCellRef`.
351const _MIN_ROW_SIZE_CAN_STORE_FCR: () = assert!(MIN_ROW_SIZE.len() >= mem::size_of::<FreeCellRef>());
352
353/// [`VarLenGranule`]s must be at least large enough to store a [`FreeCellRef`].
354const _VLG_CAN_STORE_FCR: () = assert!(VarLenGranule::SIZE.len() >= MIN_ROW_SIZE.len());
355
356/// Pointers properly aligned for a [`VarLenGranule`] must be properly aligned for [`FreeCellRef`].
357/// This is the case as the former's alignment is a multiple of the latter's alignment.
358const _VLG_ALIGN_MULTIPLE_OF_FCR: () = assert!(mem::align_of::<VarLenGranule>() % mem::align_of::<FreeCellRef>() == 0);
359
360/// A page of row data with an associated `header` and the raw `row_data` itself.
361///
362/// As a rough summary, the strategy employed by this page is:
363///
364/// - The fixed-len parts of rows grows left-to-right
365/// and starts from the beginning of the `row_data`
366/// until its high water mark (fixed HWM), i.e., `self.header.fixed.last`.
367///
368/// - The var-len parts of rows grows right-to-left
369/// and starts from the end of the `row_data`
370/// until its high water mark (variable HWM), i.e., `self.header.var.first`.
371///
372/// Each var-len object is stored in terms of a linked-list of chunks.
373/// Each chunk in this case is a [`VarLenGranule`] taking up 64 bytes where:
374/// - 6 bits = length, 10 bits = next-cell-pointer
375/// - 62 bytes = the bytes of the object
376///
377/// - As new rows are added, the HWMs move appropriately.
378/// When the fixed and variable HWMs meet, the page is full.
379///
380/// - When rows are freed, a freelist strategy is used both for
381/// the fixed parts and each `VarLenGranule`.
382/// These freelists are then used first before using space from the gap.
383/// The head of these freelists are stored in `next_free`
384/// in the fixed and variable headers respectively.
385///
386/// - As the fixed parts of rows may store pointers into the var-length section,
387/// to ensure that these pointers aren't dangling,
388/// the page uses pointer fixups when adding to, deleting from, and copying the page.
389/// These fixups are handled by having callers provide `VarLenMembers`
390/// to find the var-len reference slots in the fixed parts.
391#[repr(C)]
392// ^-- Required for a stable ABI.
393#[repr(align(64))]
394// ^-- Must have align at least that of `VarLenGranule`,
395// so that `row_data[PageOffset::PAGE_END - VarLenGranule::SIZE]` is an aligned pointer to `VarLenGranule`.
396// TODO(bikeshedding): consider raising the alignment. We may want this to be OS page (4096) aligned.
397#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] // So we can dump and restore pages during snapshotting.
398pub struct Page {
399 /// The header containing metadata on how to interpret and modify the `row_data`.
400 header: PageHeader,
401 /// The actual bytes stored in the page.
402 /// This contains row data, fixed and variable, and freelists.
403 row_data: [Byte; PageOffset::PAGE_END.idx()],
404}
405
406impl MemoryUsage for Page {
407 fn heap_usage(&self) -> usize {
408 self.header.heap_usage()
409 }
410}
411
412static_assert_size!(Page, PAGE_SIZE);
413
414/// A mutable view of the fixed-len section of a [`Page`].
415pub struct FixedView<'page> {
416 /// A mutable view of the fixed-len bytes.
417 fixed_row_data: &'page mut Bytes,
418 /// A mutable view of the fixed header.
419 header: &'page mut FixedHeader,
420}
421
422impl FixedView<'_> {
423 /// Returns a mutable view of the row from `start` lasting `fixed_row_size` number of bytes.
424 ///
425 /// This method is safe, but callers should take care that `start` and `fixed_row_size`
426 /// are correct for this page, and that `start` is aligned.
427 /// Callers should further ensure that mutations to the row leave the row bytes
428 /// in an expected state, i.e. initialized where required by the row type,
429 /// and with `VarLenRef`s that point to valid granules and with correct lengths.
430 pub fn get_row_mut(&mut self, start: PageOffset, fixed_row_size: Size) -> &mut Bytes {
431 &mut self.fixed_row_data[start.range(fixed_row_size)]
432 }
433
434 /// Returns a shared view of the row from `start` lasting `fixed_row_size` number of bytes.
435 fn get_row(&mut self, start: PageOffset, fixed_row_size: Size) -> &Bytes {
436 &self.fixed_row_data[start.range(fixed_row_size)]
437 }
438
439 /// Frees the row starting at `row_offset` and lasting `fixed_row_size` bytes.
440 ///
441 /// # Safety
442 ///
443 /// `range_move(0..fixed_row_size, row_offset)` must be in bounds of `row_data`.
444 /// Moreover, it must be valid for writing a `FreeCellRef` to it,
445 /// which includes being properly aligned with respect to `row_data` for a `FreeCellRef`.
446 pub unsafe fn free(&mut self, row_offset: PageOffset, fixed_row_size: Size) {
447 // TODO(perf,future-work): if `row` is at the HWM, return it to the gap.
448
449 // SAFETY: Per caller contract, `row_offset` must be in bounds of `row_data`.
450 // Moreover, it must be valid for writing a `FreeCellRef` to it,
451 // which includes being properly aligned with respect to `row_data` for a `FreeCellRef`.
452 // We also know that `self.header.next_free` contains a valid `FreeCellRef`.
453 unsafe {
454 self.header
455 .next_free
456 .prepend_freelist(self.fixed_row_data, row_offset, |x| x)
457 };
458 self.header.num_rows -= 1;
459 self.header.set_row_presence(row_offset, fixed_row_size, false);
460 }
461}
462
463/// A mutable view of the var-len section of a [`Page`].
464pub struct VarView<'page> {
465 /// A mutable view of the var-len bytes.
466 var_row_data: &'page mut Bytes,
467 /// A mutable view of the var-len header.
468 header: &'page mut VarHeader,
469 /// One past the end of the fixed-len section of the page.
470 last_fixed: PageOffset,
471}
472
473impl<'page> VarView<'page> {
474 /// Returns the number of granules required to store the data,
475 /// whether the page has enough space,
476 /// and whether the object needs to go in the blob store.
477 ///
478 /// If the third value is `true`, i.e., the object will go in the blob store,
479 /// the first value will always be `1`.
480 fn has_enough_space_for(&self, len_in_bytes: usize) -> (usize, bool, bool) {
481 let (num_granules_req, in_blob) = VarLenGranule::bytes_to_granules(len_in_bytes);
482 let enough_space = num_granules_req <= self.num_granules_available();
483 (num_granules_req, enough_space, in_blob)
484 }
485
486 /// Returns the number of granules available for allocation.
487 fn num_granules_available(&self) -> usize {
488 self.header.freelist_len as usize
489 + VarLenGranule::space_to_granules(gap_remaining_size(self.header.first, self.last_fixed))
490 }
491
492 /// Provides an adjuster of offset in terms of `Page::row_data`
493 /// to work in terms of `VarView::var_row_data`.
494 ///
495 /// This has to be done due to `page.row_data.split_at_mut(last_fixed)`.
496 #[inline(always)]
497 fn adjuster(&self) -> impl FnOnce(PageOffset) -> PageOffset {
498 let lf = self.last_fixed;
499 move |offset| offset - lf
500 }
501
502 /// Allocates a linked-list of granules, in the var-len storage of the page,
503 /// for a var-len object of `obj_len` bytes.
504 ///
505 /// Returns a [`VarLenRef`] pointing to the head of that list,
506 /// and a boolean `in_blob` for whether the allocation is a `BlobHash`
507 /// and the object must be inserted into the large-blob store.
508 ///
509 /// The length of each granule is set, but no data is written to any granule.
510 /// Thus, the caller must proceed to write data to each granule for the claimed lengths.
511 ///
512 /// # Safety post-requirements
513 ///
514 /// The following are the safety *post-requirements* of calling this method.
515 /// That is, this method is safe to call,
516 /// but may leave the page in an inconsistent state
517 /// which must be rectified before other **unsafe methods** may be called.
518 ///
519 /// 1. When the returned `in_blob` holds, caller must ensure that,
520 /// before the granule's data is read from / assumed to be initialized,
521 /// the granule pointed to by the returned `vlr.first_granule`
522 /// has an initialized header and a data section initialized to at least
523 /// as many bytes as claimed by the header.
524 ///
525 /// 2. The caller must initialize each granule with data for the claimed length
526 /// of the granule's data.
527 pub fn alloc_for_len(&mut self, obj_len: usize) -> Result<(VarLenRef, bool), Error> {
528 // Safety post-requirements of `alloc_for_obj_common`:
529 // 1. caller promised they will be satisfied.
530 // 2a. already satisfied as the closure below returns all the summands of `obj_len`.
531 // 2b. caller promised in 2. that they will satisfy this.
532 self.alloc_for_obj_common(obj_len, |req_granules| {
533 let rem = obj_len % VarLenGranule::DATA_SIZE;
534 (0..req_granules).map(move |rev_idx| {
535 let len = if rev_idx == 0 && rem != 0 {
536 // The first allocated granule will be the last in the list.
537 // Thus, `rev_idx == 0` is the last element and might not take up a full granule.
538 rem
539 } else {
540 VarLenGranule::DATA_SIZE
541 };
542 // Caller will initialize the granule's data for `len` bytes.
543 (<&[u8]>::default(), len)
544 })
545 })
546 }
547
548 /// Returns an iterator over all offsets of the `VarLenGranule`s of the var-len object
549 /// that has its first granule at offset `first_granule`.
550 /// An empty iterator will be returned when `first_granule` is `NULL`.
551 ///
552 /// # Safety
553 ///
554 /// `first_granule` must be an offset to a granule or `NULL`.
555 /// The data of the granule need not be initialized.
556 pub unsafe fn granule_offset_iter(&mut self, first_granule: PageOffset) -> GranuleOffsetIter<'_, 'page> {
557 GranuleOffsetIter {
558 next_granule: first_granule,
559 var_view: self,
560 }
561 }
562
563 /// Allocates and stores `slice` as a linked-list of granules
564 /// in the var-len storage of the page.
565 ///
566 /// Returns a [`VarLenRef`] pointing to the head of that list,
567 /// and a boolean `in_blob` for whether the allocation is a `BlobHash`
568 /// and the `slice` must be inserted into the large-blob store.
569 ///
570 /// # Safety post-requirements
571 ///
572 /// The following are the safety *post-requirements* of calling this method.
573 /// That is, this method is safe to call,
574 /// but may leave the page in an inconsistent state
575 /// which must be rectified before other **unsafe methods** may be called.
576 ///
577 /// 1. When the returned `in_blob` holds, caller must ensure that,
578 /// before the granule's data is read from / assumed to be initialized,
579 /// the granule pointed to by the returned `vlr.first_granule`
580 /// has an initialized header and a data section initialized to at least
581 /// as many bytes as claimed by the header.
582 pub fn alloc_for_slice(&mut self, slice: &[u8]) -> Result<(VarLenRef, bool), Error> {
583 let obj_len = slice.len();
584 // Safety post-requirement 2. of `alloc_for_obj_common` is already satisfied
585 // as `chunks(slice)` will return sub-slices where the sum is `obj_len`.
586 // Moreover, we initialize each granule already with the right data and length.
587 // The requirement 1. is forwarded to the caller.
588 let chunks = |_| VarLenGranule::chunks(slice).rev().map(|c| (c, c.len()));
589 self.alloc_for_obj_common(obj_len, chunks)
590 }
591
592 /// Allocates for `obj_len` bytes as a linked-list of granules
593 /// in the var-len storage of the page.
594 ///
595 /// For every granule in the aforementioned linked-list,
596 /// the caller must provide an element in the *reversed* iterator `chunks`,
597 /// and of pairs `(chunk, len)`.
598 /// To each granule `chunk` will be written and the granule will be of length `len`.
599 /// The caller can opt to provide `chunk` that is not of `len`.
600 ///
601 /// Returns a [`VarLenRef`] pointing to the head of that list,
602 /// and a boolean `in_blob` for whether the allocation is a `BlobHash`
603 /// and the `slice` must be inserted into the large-blob store.
604 ///
605 /// # Safety post-requirements
606 ///
607 /// The following are the safety *post-requirements* of calling this method.
608 /// That is, this method is safe to call,
609 /// but may leave the page in an inconsistent state
610 /// which must be rectified before other **unsafe methods** may be called.
611 ///
612 /// 1. When the returned `in_blob` holds, caller must ensure that,
613 /// before the granule's data is read from / assumed to be initialized,
614 /// the granule pointed to by the returned `vlr.first_granule`
615 /// has an initialized header and a data section initialized to at least
616 /// as many bytes as claimed by the header.
617 ///
618 /// 2. Otherwise, when `in_blob` doesn't hold the safety post-requirements are:
619 ///
620 /// a. Let `cs = chunks(req_granules)` for the `req_granules` derived from `obj_len`.
621 /// Then, `obj_len == cs.map(|(_, len)| len).sum()`.
622 ///
623 /// b. For each `(_, len) ∈ cs`, caller must ensure that
624 /// the relevant granule is initialized with data for at least `len`
625 /// before the granule's data is read from / assumed to be initialized.
626 fn alloc_for_obj_common<'chunk, Cs: Iterator<Item = (&'chunk [u8], usize)>>(
627 &mut self,
628 obj_len: usize,
629 chunks: impl Copy + FnOnce(usize) -> Cs,
630 ) -> Result<(VarLenRef, bool), Error> {
631 // Check that we have sufficient space to allocate `obj_len` bytes in var-len data.
632 let (req_granules, enough_space, in_blob) = self.has_enough_space_for(obj_len);
633 if !enough_space {
634 return Err(Error::InsufficientVarLenSpace {
635 need: req_granules.try_into().unwrap_or(u16::MAX),
636 have: self.num_granules_available().try_into().unwrap_or(u16::MAX),
637 });
638 }
639
640 // For large blob objects, only reserve a granule.
641 // The caller promised that they will initialize it with a blob hash.
642 if in_blob {
643 let vlr = self.alloc_blob_hash()?;
644 return Ok((vlr, true));
645 };
646
647 // Write each `chunk` to var-len storage.
648 // To do this, we allocate granules for and store the chunks in reverse,
649 // starting with the end first.
650 // The offset to the previous granule in the iteration is kept to
651 // link it in as the next pointer in the current iteration.
652 let mut next = PageOffset::VAR_LEN_NULL;
653 debug_assert_eq!(obj_len, chunks(req_granules).map(|(_, len)| len).sum::<usize>());
654 for (chunk, len) in chunks(req_granules) {
655 // This should never error, since we already checked for available space.
656 let granule = self.alloc_granule()?;
657 // SAFETY:
658 // 1. `granule` is properly aligned as it came from `alloc_granule`
659 // and so is `next` as it's either NULL or was the previous `granule`.
660 // This also ensures that both are in bounds
661 // of the page for `granule + granule + VarLenGranule::SIZE`.
662 //
663 // 2. `next` is either NULL or was initialized in the previous loop iteration.
664 //
665 // 3. `granule` points to an unused slot as the space was just allocated.
666 unsafe { self.write_chunk_to_granule(chunk, len, granule, next) };
667 next = granule;
668 }
669
670 Ok((
671 VarLenRef {
672 first_granule: next,
673 length_in_bytes: obj_len as u16,
674 },
675 false,
676 ))
677 }
678
679 /// Allocates a granule for a large blob object
680 /// and returns a [`VarLenRef`] pointing to that granule.
681 ///
682 /// The granule is not initialized by this method, and contains valid-unconstrained bytes.
683 /// It is the caller's responsibility to initialize it with a [`BlobHash`](super::blob_hash::BlobHash).
684 #[cold]
685 fn alloc_blob_hash(&mut self) -> Result<VarLenRef, Error> {
686 // Var-len hashes are 32 bytes, which fits within a single granule.
687 self.alloc_granule().map(VarLenRef::large_blob)
688 }
689
690 /// Inserts `var_len_obj` into `blob_store`
691 /// and stores the blob hash in the granule pointed to by `vlr.first_granule`.
692 ///
693 /// This insertion will never fail.
694 ///
695 /// # Safety
696 ///
697 /// `vlr.first_granule` must point to an unused `VarLenGranule` in bounds of this page,
698 /// which must be valid for writes.
699 pub unsafe fn write_large_blob_hash_to_granule(
700 &mut self,
701 blob_store: &mut dyn BlobStore,
702 var_len_obj: &impl AsRef<[u8]>,
703 vlr: VarLenRef,
704 ) -> BlobNumBytes {
705 let hash = blob_store.insert_blob(var_len_obj.as_ref());
706
707 let granule = vlr.first_granule;
708 // SAFETY:
709 // 1. `granule` is properly aligned for `VarLenGranule` and is in bounds of the page.
710 // 2. The null granule is trivially initialized.
711 // 3. The caller promised that `granule` is safe to overwrite.
712 unsafe { self.write_chunk_to_granule(&hash.data, hash.data.len(), granule, PageOffset::VAR_LEN_NULL) };
713 var_len_obj.as_ref().len().into()
714 }
715
716 /// Write the `chunk` (data) to the [`VarLenGranule`] pointed to by `granule`,
717 /// set the granule's length to be `len`,
718 /// and set the next granule in the list to `next`.
719 ///
720 /// SAFETY:
721 ///
722 /// 1. Both `granule` and `next` must be properly aligned pointers to [`VarLenGranule`]s
723 /// and they must be in bounds of the page. However, neither need to point to init data.
724 ///
725 /// 2. The caller must initialize the granule pointed to by `next`
726 /// before the granule-list is read from (e.g., iterated on).
727 /// The null granule is considered trivially initialized.
728 ///
729 /// 3. The space pointed to by `granule` must be unused and valid for writes,
730 /// and will be overwritten here.
731 unsafe fn write_chunk_to_granule(&mut self, chunk: &[u8], len: usize, granule: PageOffset, next: PageOffset) {
732 let granule = self.adjuster()(granule);
733 // SAFETY: A `PageOffset` is always in bounds of the page.
734 let ptr: *mut VarLenGranule = unsafe { offset_to_ptr_mut(self.var_row_data, granule).cast() };
735
736 // TODO(centril,bikeshedding): check if creating the `VarLenGranule` first on stack
737 // and then writing to `ptr` would have any impact on perf.
738 // This would be nicer as it requires less `unsafe`.
739
740 // We need to initialize `Page::header`
741 // without materializing a `&mut` as that is instant UB.
742 // SAFETY: `ptr` isn't NULL as `&mut self.row_data` itself is a non-null pointer.
743 let header = unsafe { &raw mut (*ptr).header };
744
745 // SAFETY: `header` is valid for writes as only we have exclusive access.
746 // (1) The `ptr` was also promised as aligned
747 // and `granule + (granule + 64 bytes)` is in bounds of the page per caller contract.
748 // (2) Moreover, `next` will be an initialized granule per caller contract,
749 // so we can link it into the list without causing UB elsewhere.
750 // (3) It's also OK to write to `granule` as it's unused.
751 unsafe {
752 header.write(VarLenGranuleHeader::new(len as u8, next));
753 }
754
755 // SAFETY: We can treat any part of `row_data` as `.data`. Also (1) and (2).
756 let data = unsafe { &mut (*ptr).data };
757
758 // Copy the data into the granule.
759 data[0..chunk.len()].copy_from_slice(chunk);
760 }
761
762 /// Allocate a [`VarLenGranule`] at the returned [`PageOffset`].
763 ///
764 /// The allocated storage is not initialized by this method,
765 /// and will be valid-unconstrained at [`VarLenGranule`].
766 ///
767 /// This offset will be properly aligned for `VarLenGranule` when converted to a pointer.
768 ///
769 /// Returns an error when there are neither free granules nor space in the gap left.
770 fn alloc_granule(&mut self) -> Result<PageOffset, Error> {
771 let granule = self
772 .alloc_from_freelist()
773 .or_else(|| self.alloc_from_gap())
774 .ok_or(Error::InsufficientVarLenSpace { need: 1, have: 0 })?;
775
776 debug_assert!(
777 is_granule_offset_aligned(granule),
778 "Allocated an unaligned var-len granule: {:x}",
779 granule,
780 );
781
782 self.header.num_granules += 1;
783
784 Ok(granule)
785 }
786
787 /// Allocate a [`VarLenGranule`] at the returned [`PageOffset`]
788 /// taken from the freelist, if any.
789 #[inline]
790 fn alloc_from_freelist(&mut self) -> Option<PageOffset> {
791 // SAFETY: `header.next_free` points to a `c: FreeCellRef` when the former `.has()`.
792 let free = unsafe {
793 self.header
794 .next_free
795 .take_freelist_head(self.var_row_data, |o| o - self.last_fixed)
796 }?;
797 self.header.freelist_len -= 1;
798 Some(free)
799 }
800
801 /// Allocate a [`VarLenGranule`] at the returned [`PageOffset`]
802 /// taken from the gap, if there is space left, or `None` if there is insufficient space.
803 #[inline]
804 fn alloc_from_gap(&mut self) -> Option<PageOffset> {
805 if gap_enough_size_for_row(self.header.first, self.last_fixed, VarLenGranule::SIZE) {
806 // `var.first` points *at* the lowest-indexed var-len granule,
807 // *not* before it, so pre-decrement.
808 self.header.first -= VarLenGranule::SIZE;
809 Some(self.header.first)
810 } else {
811 None
812 }
813 }
814
815 /// Free a single var-len granule pointed to at by `offset`.
816 ///
817 /// SAFETY: `offset` must point to a valid [`VarLenGranule`].
818 #[inline]
819 unsafe fn free_granule(&mut self, offset: PageOffset) {
820 // TODO(perf,future-work): if `chunk` is at the HWM, return it to the gap.
821 // Returning a single chunk to the gap is easy,
822 // but we want to return a whole "run" of sequential freed chunks,
823 // which requries some bookkeeping (or an O(> n) linked list traversal).
824 self.header.freelist_len += 1;
825 self.header.num_granules -= 1;
826 let adjuster = self.adjuster();
827
828 // SAFETY: Per caller contract, `offset` is a valid `VarLenGranule`,
829 // and is therefore in bounds of the page row data.
830 // By `_VLG_CAN_STORE_FCR`, and as we won't be reading from the granule anymore,
831 // we know that this makes it valid for writing a `FreeCellRef` to it.
832 // Moreover, by `_VLG_ALIGN_MULTIPLE_OF_FCR`,
833 // the derived pointer is properly aligned (64) for a granule
834 // and as `64 % 2 == 0` the alignment of a granule works for a `FreeCellRef`.
835 // Finally, `self.header.next_free` contains a valid `FreeCellRef`.
836 unsafe {
837 self.header
838 .next_free
839 .prepend_freelist(self.var_row_data, offset, adjuster)
840 };
841 }
842
843 /// Returns a reference to the granule at `offset`.
844 ///
845 /// SAFETY: `offset` must point to a valid [`VarLenGranule`].
846 unsafe fn get_granule_ref(&self, offset: PageOffset) -> &VarLenGranule {
847 unsafe { get_ref(self.var_row_data, self.adjuster()(offset)) }
848 }
849
850 /// Frees the blob pointed to by the [`BlobHash`] stored in the granule at `offset`.
851 ///
852 /// Panics when `offset` is NULL.
853 ///
854 /// SAFETY: `offset` must point to a valid [`VarLenGranule`] or be NULL.
855 #[cold]
856 #[inline(never)]
857 unsafe fn free_blob(&self, offset: PageOffset, blob_store: &mut dyn BlobStore) -> BlobNumBytes {
858 assert!(!offset.is_var_len_null());
859
860 // SAFETY: Per caller contract + the assertion above,
861 // we know `offset` refers to a valid `VarLenGranule`.
862 let granule = unsafe { self.get_granule_ref(offset) };
863
864 // Actually free the blob.
865 let hash = granule.blob_hash();
866
867 // The size of `deleted_bytes` is calculated here instead of requesting it from `blob_store`.
868 // This is because the actual number of bytes deleted depends on the `blob_store`'s logic.
869 // We prefer to measure it from the datastore's point of view.
870 let blob_store_deleted_bytes = blob_store
871 .retrieve_blob(&hash)
872 .expect("failed to free var-len blob")
873 .len()
874 .into();
875
876 // Actually free the blob.
877 blob_store.free_blob(&hash).expect("failed to free var-len blob");
878
879 blob_store_deleted_bytes
880 }
881
882 /// Frees an entire var-len linked-list object.
883 ///
884 /// If the `var_len_obj` is a large blob,
885 /// the `VarLenGranule` which stores its blob hash will be freed from the page,
886 /// but the blob itself will not be freed from the blob store.
887 /// If used incorrectly, this may leak large blobs.
888 ///
889 /// This behavior is used to roll-back on failure in `[crate::bflatn::ser::write_av_to_page]`,
890 /// where inserting large blobs is deferred until all allocations succeed.
891 /// Freeing a fully-inserted object should instead use [`Self::free_object`].
892 ///
893 /// # Safety
894 ///
895 /// `var_len_obj.first_granule` must point to a valid [`VarLenGranule`] or be NULL.
896 pub unsafe fn free_object_ignore_blob(&mut self, var_len_obj: VarLenRef) {
897 let mut next_granule = var_len_obj.first_granule;
898
899 while !next_granule.is_var_len_null() {
900 // SAFETY: Per caller contract, `first_granule` points to a valid granule or is NULL.
901 // We know however at this point that it isn't NULL so it is valid.
902 // Thus the successor is too a valid granule or NULL.
903 // However, again, at this point we know that the successor isn't NULL.
904 // It follows then by induction that any `next_granule` at this point is valid.
905 // Thus we have fulfilled the requirement that `next_granule` points to a valid granule.
906 let header = unsafe { self.get_granule_ref(next_granule) }.header;
907 // SAFETY: `next_granule` still points to a valid granule per above.
908 unsafe {
909 self.free_granule(next_granule);
910 }
911 next_granule = header.next();
912 }
913 }
914
915 /// Frees an entire var-len linked-list object.
916 ///
917 /// SAFETY: `var_len_obj.first_granule` must point to a valid [`VarLenGranule`] or be NULL.
918 unsafe fn free_object(&mut self, var_len_obj: VarLenRef, blob_store: &mut dyn BlobStore) -> BlobNumBytes {
919 let mut blob_store_deleted_bytes = BlobNumBytes::default();
920 // For large blob objects, extract the hash and tell `blob_store` to discard it.
921 if var_len_obj.is_large_blob() {
922 // SAFETY: `var_len_obj.first_granule` was promised to
923 // point to a valid [`VarLenGranule`] or be NULL, as required.
924 unsafe {
925 blob_store_deleted_bytes = self.free_blob(var_len_obj.first_granule, blob_store);
926 }
927 }
928
929 // SAFETY: `free_object_ignore_blob` has the same safety contract as this method.
930 unsafe {
931 self.free_object_ignore_blob(var_len_obj);
932 }
933
934 blob_store_deleted_bytes
935 }
936}
937
938/// An iterator yielding the offsets to the granules of a var-len object.
939pub struct GranuleOffsetIter<'vv, 'page> {
940 /// Our mutable view of the page.
941 var_view: &'vv mut VarView<'page>,
942 /// The offset, that will be yielded next, pointing to next granule.
943 next_granule: PageOffset,
944}
945
946impl GranuleOffsetIter<'_, '_> {
947 /// Returns a mutable view of, for the `granule` at `offset`, `granule.data[start..]`.
948 ///
949 /// # Safety
950 ///
951 /// - `offset` must point to a valid granule
952 /// - `start < VarLenGranule::DATA_SIZE`
953 pub unsafe fn get_mut_data(&mut self, offset: PageOffset, start: usize) -> &mut Bytes {
954 // SAFETY: Caller promised that `offset` points o a valid granule.
955 let granule: &mut VarLenGranule = unsafe { get_mut(self.var_view.var_row_data, offset) };
956 // SAFETY: Caller promised `start < granule.data.len()`.
957 unsafe { granule.data.as_mut_slice().get_unchecked_mut(start..) }
958 }
959}
960
961impl Iterator for GranuleOffsetIter<'_, '_> {
962 type Item = PageOffset;
963 fn next(&mut self) -> Option<Self::Item> {
964 let adjust = self.var_view.adjuster();
965
966 if self.next_granule.is_var_len_null() {
967 return None;
968 }
969 let ret = adjust(self.next_granule);
970 // SAFETY: By construction,
971 // the initial `next_granule` was promised to either be `NULL` or point to a valid granule.
972 // For a given granule, the same applies to its `.next()` granule.
973 // At this point, we've excluded `NULL`,
974 // so we know inductively that `next_granule` points to a valid granule, as required.
975 let granule: &VarLenGranule = unsafe { get_ref(self.var_view.var_row_data, ret) };
976 self.next_granule = granule.header.next();
977
978 Some(ret)
979 }
980}
981
982/// Assert that `ptr` is sufficiently aligned to reference a value of `T`.
983///
984/// In release mode, this is a no-op.
985fn assert_alignment<T>(ptr: *const Byte) {
986 debug_assert_eq!(
987 ptr as usize % mem::align_of::<T>(),
988 0,
989 "Wanted a PageOffset with align 0x{:x} (for {}) but found 0x{:x}",
990 mem::align_of::<T>(),
991 std::any::type_name::<T>(),
992 ptr as usize,
993 );
994}
995
996/// Returns a reference to the [`T`] pointed to at by `offset`.
997///
998/// # Safety
999///
1000/// `offset` must point to a valid `T` in `row_data`.
1001#[inline]
1002pub unsafe fn get_ref<T>(row_data: &Bytes, offset: PageOffset) -> &T {
1003 // SAFETY: Caller promised that `offset` is in bounds of `row_data`.
1004 let ptr = unsafe { offset_to_ptr(row_data, offset) };
1005 assert_alignment::<T>(ptr);
1006 let ptr = ptr.cast::<T>();
1007 // SAFETY: Caller promised that `offset` points to a `T` in `row_data`.
1008 unsafe { &*ptr }
1009}
1010
1011/// Returns a mutable reference to the [`T`] pointed to at by `offset`.
1012///
1013/// # Safety
1014///
1015/// `offset` must point to a valid `T` in `row_data`.
1016#[inline]
1017unsafe fn get_mut<T>(row_data: &mut Bytes, offset: PageOffset) -> &mut T {
1018 // SAFETY: Caller promised that `offset` is in bounds of `row_data`.
1019 let ptr = unsafe { offset_to_ptr_mut(row_data, offset) };
1020 assert_alignment::<T>(ptr as *const Byte);
1021 let ptr = ptr.cast::<T>();
1022 // SAFETY: Caller promised that `offset` points to a `T` in `row_data`.
1023 unsafe { &mut *ptr }
1024}
1025
1026/// Returns a raw const pointer into the `row_data` at `offset` bytes.
1027///
1028/// # Safety
1029///
1030/// `offset` must be in bounds or one past end of `row_data`.
1031#[inline]
1032unsafe fn offset_to_ptr(row_data: &Bytes, offset: PageOffset) -> *const Byte {
1033 debug_assert!(offset.idx() <= row_data.len());
1034
1035 // SAFETY: per caller contract, `offset` is in bounds or one past end of `row_data`.
1036 unsafe { row_data.as_ptr().add(offset.idx()) }
1037}
1038
1039/// Returns a raw mutable pointer into the `row_data` at `offset` bytes.
1040///
1041/// SAFETY: `offset` must be in bounds or one past end of `row_data`.
1042#[inline]
1043unsafe fn offset_to_ptr_mut(row_data: &mut Bytes, offset: PageOffset) -> *mut Byte {
1044 debug_assert!(offset.idx() <= row_data.len());
1045
1046 // SAFETY: per caller contract, `offset` is in bounds or one past end of `row_data`.
1047 unsafe { row_data.as_mut_ptr().add(offset.idx()) }
1048}
1049
1050/// Returns the size of the gap,
1051/// assuming `first_var` is the high water mark (HWM) of the var-len section,
1052/// pointing *at* the granule with the lowest offset,
1053/// and `last_fixed` is the HWM of the fixed-len section,
1054/// pointing *one past the end* of the last fixed row.
1055#[inline]
1056fn gap_remaining_size(first_var: PageOffset, last_fixed: PageOffset) -> Size {
1057 // For illustration, suppose `row_data` is 10 bytes, i.e., `[Byte; 10]`.
1058 // Let's assume the following setup with a full page,
1059 // where capital letters are fixed rows and lower case are variable.
1060 //
1061 // [ A, B, C, D, E, f, g, h, i, j ]
1062 // ^
1063 // first_var
1064 // ^
1065 // last_fixed
1066 //
1067 // The high water mark `first_var` points *at* the granule with the lowest offset (`f`).
1068 // Whereas `last_fixed` points *one past the end* (`f`) of the last fixed row (`E`)
1069 //
1070 // This is the case we have to consider in terms of possible underflow.
1071 // As both HWMs would point at the same place,
1072 // the result would be `0`, and no underflow occurs.
1073 Size((first_var - last_fixed).0)
1074}
1075
1076/// Returns whether the remaining gap is large enough to host an object `fixed_row_size` large,
1077/// assuming `first_var` is the high water mark (HWM) of the var-len section,
1078/// pointing *at* the granule with the lowest offset,
1079/// and `last_fixed` is the HWM of the fixed-len section,
1080/// pointing *one past the end* of the last fixed row.
1081#[inline]
1082fn gap_enough_size_for_row(first_var: PageOffset, last_fixed: PageOffset, fixed_row_size: Size) -> bool {
1083 gap_remaining_size(first_var, last_fixed) >= fixed_row_size
1084}
1085
1086impl Page {
1087 /// Returns a new page allocated on the heap.
1088 ///
1089 /// The new page supports fixed rows of size `fixed_row_size`.
1090 pub fn new(fixed_row_size: Size) -> Box<Self> {
1091 // TODO(perf): mmap? allocator may do so already.
1092 // mmap may be more efficient as we save allocator metadata.
1093 use std::alloc::{alloc_zeroed, handle_alloc_error, Layout};
1094
1095 let layout = Layout::new::<Page>();
1096
1097 // Allocate with `alloc_zeroed` so that the bytes are initially 0, rather than uninit.
1098 // We will never write an uninit byte into the page except in the `PageHeader`,
1099 // so it is safe for `row_data` to have type `[u8; _]` rather than `[MaybeUninit<u8>; _]`.
1100 // `alloc_zeroed` may be more efficient than `alloc` + `memset`;
1101 // in particular, it may `mmap` pages directly from the OS, which are always zeroed for security reasons.
1102 // TODO: use Box::new_zeroed() once stabilized.
1103 // SAFETY: The layout's size is non-zero.
1104 let raw: *mut Page = unsafe { alloc_zeroed(layout) }.cast();
1105
1106 if raw.is_null() {
1107 handle_alloc_error(layout);
1108 }
1109
1110 // We need to initialize `Page::header`
1111 // without materializing a `&mut` as that is instant UB.
1112 // SAFETY: `raw` isn't NULL.
1113 let header = unsafe { &raw mut (*raw).header };
1114
1115 // SAFETY: `header` is valid for writes as only we have exclusive access.
1116 // The pointer is also aligned.
1117 unsafe { header.write(PageHeader::new(fixed_row_size)) };
1118
1119 // SAFETY: We used the global allocator with a layout for `Page`.
1120 // We have initialized the `header`,
1121 // and the `row_bytes` are initially 0 by `alloc_zeroed`,
1122 // making the pointee a `Page` valid for reads and writes.
1123 unsafe { Box::from_raw(raw) }
1124 }
1125
1126 /// Returns the number of rows stored in this page.
1127 ///
1128 /// This method runs in constant time.
1129 pub fn num_rows(&self) -> usize {
1130 self.header.fixed.num_rows as usize
1131 }
1132
1133 #[cfg(test)]
1134 /// Use this page's present rows bitvec to compute the number of present rows.
1135 ///
1136 /// This can be compared with [`Self::num_rows`] as a consistency check during tests.
1137 pub fn reconstruct_num_rows(&self) -> usize {
1138 // If we cared, we could rewrite this to `u64::count_ones` on each block of the bitset.
1139 // We do not care. This method is slow.
1140 self.header.fixed.present_rows.iter_set().count()
1141 }
1142
1143 /// Returns the number of var-len granules allocated in this page.
1144 ///
1145 /// This method runs in constant time.
1146 pub fn num_var_len_granules(&self) -> usize {
1147 self.header.var.num_granules as usize
1148 }
1149
1150 #[cfg(test)]
1151 /// # Safety
1152 ///
1153 /// - `var_len_visitor` must be a valid [`VarLenMembers`] visitor
1154 /// specialized to the type and layout of rows within this [`Page`].
1155 /// - `fixed_row_size` must be exactly the length in bytes of fixed rows in this page,
1156 /// which must further be the length of rows expected by the `var_len_visitor`.
1157 pub unsafe fn reconstruct_num_var_len_granules(
1158 &self,
1159 fixed_row_size: Size,
1160 var_len_visitor: &impl VarLenMembers,
1161 ) -> usize {
1162 self.iter_fixed_len(fixed_row_size)
1163 .flat_map(|row| unsafe {
1164 // Safety: `row` came out of `iter_fixed_len`,
1165 // which, due to caller requirements on `fixed_row_size`,
1166 // is giving us valid, aligned, initialized rows of the row type.
1167 var_len_visitor.visit_var_len(self.get_row_data(row, fixed_row_size))
1168 })
1169 .flat_map(|var_len_obj| unsafe {
1170 // Safety: We believe `row` to be valid
1171 // and `var_len_visitor` to be correctly visiting its var-len members.
1172 // Therefore, `var_len_obj` is a valid var-len object.
1173 self.iter_var_len_object(var_len_obj.first_granule)
1174 })
1175 .count()
1176 }
1177
1178 /// Returns the number of bytes used by rows stored in this page.
1179 ///
1180 /// This is necessarily an overestimate of live data bytes, as it includes:
1181 /// - Padding bytes within the fixed-length portion of the rows.
1182 /// - [`VarLenRef`] pointer-like portions of rows.
1183 /// - Unused trailing parts of partially-filled [`VarLenGranule`]s.
1184 /// - [`VarLenGranule`]s used to store [`BlobHash`]es.
1185 ///
1186 /// Note that large blobs themselves are not counted.
1187 /// The caller should obtain a count of the bytes used by large blobs
1188 /// from the [`super::blob_store::BlobStore`].
1189 ///
1190 /// This method runs in constant time.
1191 pub fn bytes_used_by_rows(&self, fixed_row_size: Size) -> usize {
1192 let fixed_row_bytes = self.num_rows() * fixed_row_size.len();
1193 let var_len_bytes = self.num_var_len_granules() * VarLenGranule::SIZE.len();
1194 fixed_row_bytes + var_len_bytes
1195 }
1196
1197 #[cfg(test)]
1198 /// # Safety
1199 ///
1200 /// - `var_len_visitor` must be a valid [`VarLenMembers`] visitor
1201 /// specialized to the type and layout of rows within this [`Page`].
1202 /// - `fixed_row_size` must be exactly the length in bytes of fixed rows in this page,
1203 /// which must further be the length of rows expected by the `var_len_visitor`.
1204 pub unsafe fn reconstruct_bytes_used_by_rows(
1205 &self,
1206 fixed_row_size: Size,
1207 var_len_visitor: &impl VarLenMembers,
1208 ) -> usize {
1209 let fixed_row_bytes = self.reconstruct_num_rows() * fixed_row_size.len();
1210 let var_len_bytes = unsafe { self.reconstruct_num_var_len_granules(fixed_row_size, var_len_visitor) }
1211 * VarLenGranule::SIZE.len();
1212 fixed_row_bytes + var_len_bytes
1213 }
1214
1215 /// Returns the range of row data starting at `offset` and lasting `size` bytes.
1216 pub fn get_row_data(&self, row: PageOffset, size: Size) -> &Bytes {
1217 &self.row_data[row.range(size)]
1218 }
1219
1220 /// Returns whether the row at `offset` is present or not.
1221 pub fn has_row_offset(&self, fixed_row_size: Size, offset: PageOffset) -> bool {
1222 // Check that the `offset` is properly aligned for a row of size `fixed_row_size`.
1223 // This cannot be `debug_assert!` as the caller could rely on this
1224 // reporting properly whether `offset` is at a row boundary or not.
1225 assert_eq!(offset.idx() % fixed_row_size.len(), 0);
1226
1227 self.header.fixed.is_row_present(offset, fixed_row_size)
1228 }
1229
1230 /// Returns split mutable views of this page over the fixed and variable sections.
1231 pub fn split_fixed_var_mut(&mut self) -> (FixedView<'_>, VarView<'_>) {
1232 // The fixed HWM (`fixed.last`) points *one past the end* of the fixed section
1233 // which is exactly what we want for `split_at_mut`.
1234 let last_fixed = self.header.fixed.last;
1235 let (fixed_row_data, var_row_data) = self.row_data.split_at_mut(last_fixed.idx());
1236
1237 // Construct the fixed-len view.
1238 let fixed = FixedView {
1239 fixed_row_data,
1240 header: &mut self.header.fixed,
1241 };
1242
1243 // Construct the var-len view.
1244 let var = VarView {
1245 var_row_data,
1246 header: &mut self.header.var,
1247 last_fixed,
1248 };
1249
1250 (fixed, var)
1251 }
1252
1253 /// Returns a mutable view of the row from `start` lasting `fixed_row_size` number of bytes.
1254 ///
1255 /// This method is safe, but callers should take care that `start` and `fixed_row_size`
1256 /// are correct for this page, and that `start` is aligned.
1257 /// Callers should further ensure that mutations to the row leave the row bytes
1258 /// in an expected state, i.e. initialized where required by the row type,
1259 /// and with `VarLenRef`s that point to valid granules and with correct lengths.
1260 ///
1261 /// This call will clear the unmodified hash
1262 /// as it is expected that the caller will alter the the page.
1263 pub fn get_fixed_row_data_mut(&mut self, start: PageOffset, fixed_row_size: Size) -> &mut Bytes {
1264 self.header.unmodified_hash = None;
1265 &mut self.row_data[start.range(fixed_row_size)]
1266 }
1267
1268 /// Return the total required var-len granules to store `objects`.
1269 pub fn total_granules_required_for_objects(objects: &[impl AsRef<[u8]>]) -> usize {
1270 objects
1271 .iter()
1272 .map(|obj| VarLenGranule::bytes_to_granules(obj.as_ref().len()).0)
1273 .sum()
1274 }
1275
1276 /// Does the page have space to store a row,
1277 /// where the fixed size part is `fixed_row_size` bytes large,
1278 /// and the row has the given `var_len_objects`?
1279 pub fn has_space_for_row_with_objects(&self, fixed_row_size: Size, var_len_objects: &[impl AsRef<[u8]>]) -> bool {
1280 let num_granules_required = Self::total_granules_required_for_objects(var_len_objects);
1281 self.has_space_for_row(fixed_row_size, num_granules_required)
1282 }
1283
1284 /// Does the page have space to store a row,
1285 /// where the fixed size part is `fixed_row_size` bytes large,
1286 /// and the variable part requires `num_granules`.
1287 pub fn has_space_for_row(&self, fixed_row_size: Size, num_granules: usize) -> bool {
1288 // Determine the gap remaining after allocating for the fixed part.
1289 let gap_remaining = gap_remaining_size(self.header.var.first, self.header.fixed.last);
1290 let gap_avail_for_granules = if self.header.fixed.next_free.has() {
1291 // If we have a free fixed length block, then we can use the whole gap for var-len granules.
1292 gap_remaining
1293 } else {
1294 // If we need to grow the fixed-length store into the gap,
1295 if gap_remaining < fixed_row_size {
1296 // if the gap is too small for fixed-length row, fail.
1297 return false;
1298 }
1299 // Otherwise, the space available in the gap for var-len granules
1300 // is the current gap size less the fixed-len row size.
1301 gap_remaining - fixed_row_size
1302 };
1303
1304 // Convert the gap size to granules.
1305 let gap_in_granules = VarLenGranule::space_to_granules(gap_avail_for_granules);
1306 // Account for granules available in the freelist.
1307 let needed_granules_after_freelist = num_granules.saturating_sub(self.header.var.freelist_len as usize);
1308
1309 gap_in_granules >= needed_granules_after_freelist
1310 }
1311
1312 /// Returns whether the row is full with respect to storing a fixed row with `fixed_row_size`
1313 /// and no variable component.
1314 pub fn is_full(&self, fixed_row_size: Size) -> bool {
1315 !self.has_space_for_row(fixed_row_size, 0)
1316 }
1317
1318 /// Will leave partially-allocated chunks if fails prematurely,
1319 /// so always check `Self::has_space_for_row` before calling.
1320 ///
1321 /// This method is provided for testing the page store directly;
1322 /// higher-level codepaths are expected to use [`crate::bflatn::ser::write_av_to_page`],
1323 /// which performs similar operations to this method,
1324 /// but handles rollback on failure appropriately.
1325 ///
1326 /// This function will never fail if `Self::has_space_for_row` has returned true.
1327 ///
1328 /// # Safety
1329 ///
1330 /// - `var_len_visitor` is suitable for visiting var-len refs in `fixed_row`.
1331 ///
1332 /// - `fixed_row.len()` must be consistent with `var_len_visitor` and `self`.
1333 /// That is, `VarLenMembers` must be specialized for a row type with that length,
1334 /// and all past, present, and future fixed-length rows stored in this `Page`
1335 /// must also be of that length.
1336 pub unsafe fn insert_row(
1337 &mut self,
1338 fixed_row: &Bytes,
1339 var_len_objects: &[impl AsRef<[u8]>],
1340 var_len_visitor: &impl VarLenMembers,
1341 blob_store: &mut dyn BlobStore,
1342 ) -> Result<PageOffset, Error> {
1343 // Allocate the fixed-len row.
1344 let fixed_row_size = Size(fixed_row.len() as u16);
1345
1346 // SAFETY: Caller promised that `fixed_row.len()` uses the right `fixed_row_size`
1347 // and we trust that others have too.
1348 let fixed_len_offset = unsafe { self.alloc_fixed_len(fixed_row_size)? };
1349
1350 // Store the fixed-len row.
1351 let (mut fixed, mut var) = self.split_fixed_var_mut();
1352 let row = fixed.get_row_mut(fixed_len_offset, fixed_row_size);
1353 row.copy_from_slice(fixed_row);
1354
1355 // Store all var-len refs into their appropriate slots in the fixed-len row.
1356 // SAFETY:
1357 // - The `fixed_len_offset` given by `alloc_fixed_len` resuls in `row`
1358 // being properly aligned for the row type.
1359 // - Caller promised that `fixed_row.len()` matches the row type size exactly.
1360 // - `var_len_visitor` is suitable for `fixed_row`.
1361 let vlr_slot_iter = unsafe { var_len_visitor.visit_var_len_mut(row) };
1362 for (var_len_ref_slot, var_len_obj) in vlr_slot_iter.zip(var_len_objects) {
1363 let (var_len_ref, in_blob) = var.alloc_for_slice(var_len_obj.as_ref())?;
1364 if in_blob {
1365 // The blob store insertion will never fail.
1366 // SAFETY: `alloc_for_slice` always returns a pointer
1367 // to a `VarLenGranule` in bounds of this page.
1368 // As `in_blob` holds, it is also unused, as required.
1369 // We'll now make that granule valid.
1370 unsafe {
1371 var.write_large_blob_hash_to_granule(blob_store, var_len_obj, var_len_ref);
1372 }
1373 }
1374 *var_len_ref_slot = var_len_ref;
1375 }
1376
1377 Ok(fixed_len_offset)
1378 }
1379
1380 /// Allocates space for a fixed size row of `fixed_row_size` bytes.
1381 ///
1382 /// # Safety
1383 ///
1384 /// `fixed_row_size` must be equal to the value passed
1385 /// to all other methods ever invoked on `self`.
1386 pub unsafe fn alloc_fixed_len(&mut self, fixed_row_size: Size) -> Result<PageOffset, Error> {
1387 self.alloc_fixed_len_from_freelist(fixed_row_size)
1388 .or_else(|| self.alloc_fixed_len_from_gap(fixed_row_size))
1389 .ok_or(Error::InsufficientFixedLenSpace { need: fixed_row_size })
1390 }
1391
1392 /// Allocates a space for a fixed size row of `fixed_row_size` in the freelist, if possible.
1393 ///
1394 /// This call will clear the unmodified hash.
1395 #[inline]
1396 fn alloc_fixed_len_from_freelist(&mut self, fixed_row_size: Size) -> Option<PageOffset> {
1397 let header = &mut self.header.fixed;
1398 // SAFETY: `header.next_free` points to a `FreeCellRef` when the former `.has()`.
1399 let free = unsafe { header.next_free.take_freelist_head(&self.row_data, |x| x) }?;
1400 header.set_row_present(free, fixed_row_size);
1401
1402 // We are and have modified the page, so clear the unmodified hash.
1403 self.header.unmodified_hash = None;
1404
1405 Some(free)
1406 }
1407
1408 /// Allocates a space for a fixed size row of `fixed_row_size` in the freelist, if possible.
1409 ///
1410 /// This call will clear the unmodified hash.
1411 #[inline]
1412 fn alloc_fixed_len_from_gap(&mut self, fixed_row_size: Size) -> Option<PageOffset> {
1413 if gap_enough_size_for_row(self.header.var.first, self.header.fixed.last, fixed_row_size) {
1414 // We're modifying the page, so clear the unmodified hash.
1415 self.header.unmodified_hash = None;
1416
1417 // Enough space in the gap; move the high water mark and return the old HWM.
1418 // `fixed.last` points *after* the highest-indexed fixed-len row,
1419 // so post-increment.
1420 let ptr = self.header.fixed.last;
1421 self.header.fixed.last += fixed_row_size;
1422 self.header.fixed.set_row_present(ptr, fixed_row_size);
1423 Some(ptr)
1424 } else {
1425 // Not enough space in the gap for another row!
1426 None
1427 }
1428 }
1429
1430 /// Returns an iterator over all the [`PageOffset`]s of the fixed rows in this page
1431 /// beginning with `starting_from`.
1432 ///
1433 /// The rows are assumed to be `fixed_row_size` bytes long
1434 /// and `starting_from` is assumed to be at a valid starting `PageOffset` for a fixed row.
1435 ///
1436 /// NOTE: This method is not `unsafe` as it cannot trigger UB.
1437 /// However, when provided with garbage input, it will return garbage back.
1438 /// It is the caller's responsibility to ensure that `PageOffset`s derived from
1439 /// this iterator are valid when used to do anything `unsafe`.
1440 fn iter_fixed_len_from(&self, fixed_row_size: Size, starting_from: PageOffset) -> FixedLenRowsIter<'_> {
1441 let idx = starting_from / fixed_row_size;
1442 FixedLenRowsIter {
1443 idx_iter: self.header.fixed.present_rows.iter_set_from(idx),
1444 fixed_row_size,
1445 }
1446 }
1447
1448 /// Returns an iterator over all the [`PageOffset`]s of the fixed rows in this page.
1449 ///
1450 /// The rows are assumed to be `fixed_row_size` bytes long.
1451 ///
1452 /// NOTE: This method is not `unsafe` as it cannot trigger UB.
1453 /// However, when provided with garbage input, it will return garbage back.
1454 /// It is the caller's responsibility to ensure that `PageOffset`s derived from
1455 /// this iterator are valid when used to do anything `unsafe`.
1456 pub fn iter_fixed_len(&self, fixed_row_size: Size) -> FixedLenRowsIter<'_> {
1457 FixedLenRowsIter {
1458 idx_iter: self.header.fixed.present_rows.iter_set(),
1459 fixed_row_size,
1460 }
1461 }
1462
1463 /// Returns an iterator over all the `VarLenGranule`s of the var-len object
1464 /// that has its first granule at offset `first_granule`.
1465 /// An empty iterator will be returned when `first_granule` is `NULL`.
1466 ///
1467 /// # Safety
1468 ///
1469 /// `first_granule` must be an offset to a valid granule or `NULL`.
1470 pub unsafe fn iter_var_len_object(
1471 &self,
1472 first_granule: PageOffset,
1473 ) -> impl Clone + Iterator<Item = &VarLenGranule> {
1474 VarLenGranulesIter {
1475 page: self,
1476 next_granule: first_granule,
1477 }
1478 }
1479
1480 /// Returns an iterator over the data of all the `VarLenGranule`s of the var-len object
1481 /// that has its first granule at offset `first_granule`.
1482 /// An empty iterator will be returned when `first_granule` is `NULL`.
1483 ///
1484 /// # Safety
1485 ///
1486 /// `first_granule` must be an offset to a valid granule or `NULL`.
1487 pub unsafe fn iter_vlo_data(&self, first_granule: PageOffset) -> impl '_ + Clone + Iterator<Item = &[u8]> {
1488 // SAFETY: Caller and callee have the exact same safety requirements.
1489 unsafe { self.iter_var_len_object(first_granule) }.map(|g| g.data())
1490 }
1491
1492 /// Free a row, marking its fixed-len and var-len storage granules as available for re-use.
1493 ///
1494 /// This call will clear the unmodified hash.
1495 ///
1496 /// # Safety
1497 ///
1498 /// - `fixed_row` must point to a valid row in this page.
1499 ///
1500 /// - `fixed_row_size` must be the size in bytes of the fixed part
1501 /// of all past, present, and future rows in this page and future rows in this page.
1502 ///
1503 /// - The `var_len_visitor` must visit the same set of `VarLenRef`s in the row
1504 /// as the visitor provided to `insert_row`.
1505 pub unsafe fn delete_row(
1506 &mut self,
1507 fixed_row: PageOffset,
1508 fixed_row_size: Size,
1509 var_len_visitor: &impl VarLenMembers,
1510 blob_store: &mut dyn BlobStore,
1511 ) -> BlobNumBytes {
1512 // We're modifying the page, so clear the unmodified hash.
1513 self.header.unmodified_hash = None;
1514
1515 let (mut fixed, mut var) = self.split_fixed_var_mut();
1516
1517 let mut blob_store_deleted_bytes = BlobNumBytes::default();
1518
1519 // Visit the var-len members of the fixed row and free them.
1520 let row = fixed.get_row(fixed_row, fixed_row_size);
1521 // SAFETY: `row` is derived from `fixed_row`, which is known by caller requirements to be valid.
1522 let var_len_refs = unsafe { var_len_visitor.visit_var_len(row) };
1523 for var_len_ref in var_len_refs {
1524 // SAFETY: A sound call to `visit_var_len` on a fully initialized valid row,
1525 // which we've justified that the above is,
1526 // returns an iterator, that will only yield `var_len_ref`s,
1527 // where `var_len_ref.first_granule` points to a valid `VarLenGranule` or is NULL.
1528 blob_store_deleted_bytes += unsafe { var.free_object(*var_len_ref, blob_store) }
1529 }
1530
1531 // SAFETY: Caller promised that `fixed_row` points to a valid row in the page.
1532 // Thus, `range_move(0..fixed_row_size, fixed_row)` is in bounds of `row_data`.
1533 // Moreover, this entails that it is valid for writing a `FreeCellRef`
1534 // to the beginning or entire range, as any row can at least hold a `FreeCellRef`
1535 // and will be properly aligned for it as well.
1536 unsafe {
1537 fixed.free(fixed_row, fixed_row_size);
1538 }
1539
1540 blob_store_deleted_bytes
1541 }
1542
1543 /// Returns the total number of granules used by the fixed row at `fixed_row_offset`
1544 /// and lasting `fixed_row_size` bytes where `var_len_visitor` is used to find
1545 /// the [`VarLenRef`]s in the fixed row.
1546 ///
1547 /// # Safety
1548 ///
1549 /// - `fixed_row_offset` must refer to a previously-allocated and initialized row in `self`,
1550 /// and must not have been de-allocated. In other words, the fixed row must be *valid*.
1551 ///
1552 /// - `fixed_row_size` and `var_len_visitor` must be consistent with each other
1553 /// and with all other calls to any methods on `self`.
1554 pub unsafe fn row_total_granules(
1555 &self,
1556 fixed_row_offset: PageOffset,
1557 fixed_row_size: Size,
1558 var_len_visitor: &impl VarLenMembers,
1559 ) -> usize {
1560 let fixed_row = self.get_row_data(fixed_row_offset, fixed_row_size);
1561 // SAFETY:
1562 // - Caller promised that `fixed_row_offset` is a valid row.
1563 // - Caller promised consistency of `var_len_visitor` wrt. `fixed_row_size` and this page.
1564 let vlr_iter = unsafe { var_len_visitor.visit_var_len(fixed_row) };
1565 vlr_iter.copied().map(|slot| slot.granules_used()).sum()
1566 }
1567
1568 /// Copy as many rows from `self` for which `filter` returns `true` into `dst` as will fit,
1569 /// starting from `starting_from`.
1570 ///
1571 /// If less than the entirety of `self` could be processed, return `Continue(resume_point)`,
1572 /// where `resume_point` is the `starting_from` argument of a subsequent call to `copy_filter_into`
1573 /// that will complete the iteration.
1574 /// `dst` should be assumed to be full in this case,
1575 /// as it does not contain enough free space to store the row of `self` at `resume_point`.
1576 ///
1577 /// If the entirety of `self` is processed, return `Break`.
1578 /// `dst` may or may not be full in this case, but is likely not full.
1579 ///
1580 /// # Safety
1581 ///
1582 /// The `var_len_visitor` must visit the same set of `VarLenRef`s in the row
1583 /// as the visitor provided to all other methods on `self` and `dst`.
1584 ///
1585 /// The `fixed_row_size` must be consistent with the `var_len_visitor`,
1586 /// and be equal to the value provided to all other methods on `self` and `dst`.
1587 ///
1588 /// The `starting_from` offset must point to a valid starting offset
1589 /// consistent with `fixed_row_size`.
1590 /// That is, it must not point into the middle of a row.
1591 pub unsafe fn copy_filter_into(
1592 &self,
1593 starting_from: PageOffset,
1594 dst: &mut Page,
1595 fixed_row_size: Size,
1596 var_len_visitor: &impl VarLenMembers,
1597 blob_store: &mut dyn BlobStore,
1598 mut filter: impl FnMut(&Page, PageOffset) -> bool,
1599 ) -> ControlFlow<(), PageOffset> {
1600 for row_offset in self
1601 .iter_fixed_len_from(fixed_row_size, starting_from)
1602 // Only copy rows satisfying the predicate `filter`.
1603 .filter(|o| filter(self, *o))
1604 {
1605 // SAFETY:
1606 // - `starting_from` points to a valid row and thus `row_offset` also does.
1607 // - `var_len_visitor` will visit the right `VarLenRef`s and is consistent with other calls.
1608 // - `fixed_row_size` is consistent with `var_len_visitor` and `self`.
1609 if !unsafe { self.copy_row_into(row_offset, dst, fixed_row_size, var_len_visitor, blob_store) } {
1610 // Target doesn't have enough space for row;
1611 // stop here and return the offset of the uncopied row
1612 // so a later call to `copy_filter_into` can start there.
1613 return ControlFlow::Continue(row_offset);
1614 }
1615 }
1616
1617 // The `for` loop completed.
1618 // We successfully copied the entire page of `self` into `target`.
1619 // The caller doesn't need to resume from this offset.
1620 ControlFlow::Break(())
1621 }
1622
1623 /// Copies the row at `row_offset` from `self` into `dst`
1624 /// or returns `false` otherwise if `dst` has no space for the row.
1625 ///
1626 /// # Safety
1627 ///
1628 /// - `row_offset` offset must point to a valid row.
1629 ///
1630 /// - `var_len_visitor` must visit the same set of `VarLenRef`s in the row
1631 /// as the visitor provided to all other methods on `self` and `dst`.
1632 ///
1633 /// - `fixed_row_size` must be consistent with the `var_len_visitor`,
1634 /// and be equal to the value provided to all other methods on `self` and `dst`.
1635 unsafe fn copy_row_into(
1636 &self,
1637 row_offset: PageOffset,
1638 dst: &mut Page,
1639 fixed_row_size: Size,
1640 var_len_visitor: &impl VarLenMembers,
1641 blob_store: &mut dyn BlobStore,
1642 ) -> bool {
1643 // SAFETY: Caller promised that `starting_from` points to a valid row
1644 // consistent with `fixed_row_size` which was also
1645 // claimed to be consistent with `var_len_visitor` and `self`.
1646 let required_granules = unsafe { self.row_total_granules(row_offset, fixed_row_size, var_len_visitor) };
1647 if !dst.has_space_for_row(fixed_row_size, required_granules) {
1648 // Target doesn't have enough space for row.
1649 return false;
1650 };
1651
1652 let src_row = self.get_row_data(row_offset, fixed_row_size);
1653
1654 // Allocate for the fixed-len data.
1655 // SAFETY: forward our requirement on `fixed_row_size` to `alloc_fixed_len`.
1656 let inserted_offset = unsafe { dst.alloc_fixed_len(fixed_row_size) }
1657 .expect("Failed to allocate fixed-len row in dst page after checking for available space");
1658
1659 // Copy all fixed-len data. We'll overwrite the var-len parts next.
1660 let (mut dst_fixed, mut dst_var) = dst.split_fixed_var_mut();
1661 let dst_row = dst_fixed.get_row_mut(inserted_offset, fixed_row_size);
1662 dst_row.copy_from_slice(src_row);
1663
1664 // Copy var-len members into target.
1665 // Fixup `VarLenRef`s in `dst_row` to point to the copied var-len objects.
1666 //
1667 // SAFETY: `src_row` is valid because it came from `self.iter_fixed_len_from`.
1668 //
1669 // Forward our safety requirements re: `var_len_visitor` to `visit_var_len`.
1670 let src_vlr_iter = unsafe { var_len_visitor.visit_var_len(src_row) };
1671 // SAFETY: forward our requirement on `var_len_visitor` to `visit_var_len_mut`.
1672 let target_vlr_iter = unsafe { var_len_visitor.visit_var_len_mut(dst_row) };
1673 for (src_vlr, target_vlr_slot) in src_vlr_iter.zip(target_vlr_iter) {
1674 // SAFETY:
1675 //
1676 // - requirements of `visit_var_len_assume_init` were met,
1677 // so we can assume that `src_vlr.first_granule` points to a valid granule or is NULL.
1678 //
1679 // - the call to `dst.has_space_for_row` above ensures
1680 // that the allocation will not fail part-way through.
1681 let target_vlr_fixup = unsafe { self.copy_var_len_into(*src_vlr, &mut dst_var, blob_store) }
1682 .expect("Failed to allocate var-len object in dst page after checking for available space");
1683
1684 *target_vlr_slot = target_vlr_fixup;
1685 }
1686
1687 true
1688 }
1689
1690 /// Copy a var-len object `src_vlr` from `self` into `dst_var`,
1691 /// and return the `VarLenRef` to the copy in `dst_var`.
1692 ///
1693 /// If the `src_vlr` is empty,
1694 /// i.e., has `first_granule.is_null()` and `length_in_bytes == 0`,
1695 /// this will return `VarLenRef::NULL`.
1696 ///
1697 /// # SAFETY:
1698 ///
1699 /// - `src_vlr.first_granule` must point to a valid granule or be NULL.
1700 ///
1701 /// - To avoid leaving dangling uninitialized allocations in `dst_var`,
1702 /// `dst_var` must already be checked to have enough size to store `src_vlr`
1703 /// using `Self::has_space_for_row`.
1704 unsafe fn copy_var_len_into(
1705 &self,
1706 src_vlr: VarLenRef,
1707 dst_var: &mut VarView<'_>,
1708 blob_store: &mut dyn BlobStore,
1709 ) -> Result<VarLenRef, Error> {
1710 // SAFETY: Caller promised that `src_vlr.first_granule` points to a valid granule is be NULL.
1711 let mut iter = unsafe { self.iter_var_len_object(src_vlr.first_granule) };
1712
1713 // If the `src_vlr` is empty, don't copy anything, and return null.
1714 let Some(mut src_chunk) = iter.next() else {
1715 debug_assert!(src_vlr.length_in_bytes == 0);
1716 return Ok(VarLenRef::NULL);
1717 };
1718 let mut dst_chunk = dst_var.alloc_granule()?;
1719
1720 let copied_head = dst_chunk;
1721
1722 // Weird-looking iterator so we can put the next-pointer into `copied_chunk`.
1723 for next_src_chunk in iter {
1724 // Allocate space for the next granule so we can initialize it in the next iteration.
1725 let next_dst_chunk = dst_var.alloc_granule()?;
1726 let data = src_chunk.data();
1727 // Initialize `dst_chunk` with data and next-pointer.
1728 //
1729 // SAFETY:
1730 // 1. `dst_chunk` is properly aligned as it came from `alloc_granule` either
1731 // before the loop or in the previous iteration.
1732 // This also ensures that both are in bounds
1733 // of the page for `granule + granule + VarLenGranule::SIZE`.
1734 //
1735 // 2. `next_dst_chunk` will be initialized
1736 // either in the next iteration or after the loop ends.
1737 //
1738 // 3. `dst_chunk` points to unused data as the space was allocated before the loop
1739 // or was `next_dst_chunk` in the previous iteration and hasn't been written to yet.
1740 unsafe { dst_var.write_chunk_to_granule(data, data.len(), dst_chunk, next_dst_chunk) };
1741 dst_chunk = next_dst_chunk;
1742 src_chunk = next_src_chunk;
1743 }
1744
1745 let data = src_chunk.data();
1746 // The last granule has null as next-pointer.
1747 //
1748 // SAFETY:
1749 // 1. `dst_chunk` is properly aligned as it came from `alloc_granule` either
1750 // before the loop or in the previous iteration.
1751 // This also ensures that both are in bounds
1752 // of the page for `granule + granule + VarLenGranule::SIZE`.
1753 //
1754 // 2. `next` is NULL which is trivially init.
1755 //
1756 // 3. `dst_chunk` points to unused data as the space was allocated before the loop
1757 // or was `next_dst_chunk` in the previous iteration and hasn't been written to yet.
1758 unsafe { dst_var.write_chunk_to_granule(data, data.len(), dst_chunk, PageOffset::VAR_LEN_NULL) };
1759
1760 // For a large blob object,
1761 // notify the `blob_store` that we've taken a reference to the blob hash.
1762 if src_vlr.is_large_blob() {
1763 blob_store
1764 .clone_blob(&src_chunk.blob_hash())
1765 .expect("blob_store could not mark hash as used");
1766 }
1767
1768 Ok(VarLenRef {
1769 first_granule: copied_head,
1770 length_in_bytes: src_vlr.length_in_bytes,
1771 })
1772 }
1773
1774 /// Make `self` empty, removing all rows from it and resetting the high water marks to zero.
1775 ///
1776 /// This also clears the `unmodified_hash`.
1777 pub fn clear(&mut self) {
1778 self.header.clear();
1779 }
1780
1781 /// Zeroes every byte of row data in this page.
1782 ///
1783 /// This is only used for benchmarks right now.
1784 ///
1785 /// # Safety:
1786 ///
1787 /// Causes the page header to no longer match the contents, invalidating many assumptions.
1788 /// Should be called in conjunction with [`Self::clear`].
1789 #[doc(hidden)]
1790 pub unsafe fn zero_data(&mut self) {
1791 for byte in &mut self.row_data {
1792 *byte = 0;
1793 }
1794 }
1795
1796 /// Computes the content hash of this page.
1797 pub fn content_hash(&self) -> blake3::Hash {
1798 let mut hasher = blake3::Hasher::new();
1799
1800 // Hash the page contents.
1801 hasher.update(&self.row_data);
1802
1803 // Hash the `FixedHeader`, first copy out the fixed part save for the bitset into an array.
1804 let fixed = &self.header.fixed;
1805 let mut fixed_bytes = [0u8; 6];
1806 fixed_bytes[0..2].copy_from_slice(&fixed.next_free.next.0.to_le_bytes());
1807 fixed_bytes[2..4].copy_from_slice(&fixed.last.0.to_le_bytes());
1808 fixed_bytes[4..6].copy_from_slice(&fixed.num_rows.to_le_bytes());
1809 hasher.update(&fixed_bytes);
1810
1811 // Hash the fixed bit set.
1812 hasher.update(bytemuck::must_cast_slice(fixed.present_rows.storage()));
1813
1814 // Hash the `VarHeader`.
1815 hasher.update(bytemuck::bytes_of(&self.header.var));
1816
1817 // We're done.
1818 // Note that `unmodified_hash` itself must not be hashed to avoid a recursive dependency.
1819 hasher.finalize()
1820 }
1821
1822 /// Computes the content hash of this page and saves it to [`PageHeader::unmodified_hash`].
1823 pub fn save_content_hash(&mut self) {
1824 let hash = self.content_hash();
1825 self.header.unmodified_hash = Some(hash);
1826 }
1827
1828 /// Return the page's content hash, computing and saving it if it is not already stored.
1829 pub fn save_or_get_content_hash(&mut self) -> blake3::Hash {
1830 self.unmodified_hash().copied().unwrap_or_else(|| {
1831 self.save_content_hash();
1832 self.header.unmodified_hash.unwrap()
1833 })
1834 }
1835
1836 /// Returns the stored unmodified hash, if any.
1837 pub fn unmodified_hash(&self) -> Option<&blake3::Hash> {
1838 self.header.unmodified_hash.as_ref()
1839 }
1840}
1841
1842/// An iterator over the `PageOffset`s of all present fixed-length rows in a [`Page`].
1843pub struct FixedLenRowsIter<'page> {
1844 /// The fixed header of the page,
1845 /// used to determine where the last fixed row is
1846 /// and whether the fixed row slot is actually a fixed row.
1847 idx_iter: IterSet<'page>,
1848 /// The size of a row in bytes.
1849 fixed_row_size: Size,
1850}
1851
1852impl Iterator for FixedLenRowsIter<'_> {
1853 type Item = PageOffset;
1854
1855 fn next(&mut self) -> Option<Self::Item> {
1856 self.idx_iter
1857 .next()
1858 .map(|idx| PageOffset(idx as u16 * self.fixed_row_size.0))
1859 }
1860}
1861
1862/// An iterator over the [`VarLenGranule`]s in a particular [`VarLenRef`] in `page`.
1863///
1864/// Constructing a `VarLenGranulesIter` should be considered unsafe
1865/// because the initial `next_granule` must either be `NULL` or point to a valid [`VarLenGranule`].
1866///
1867/// Iterating over [`VarLenRef::NULL`] is safe and will immediately return `None`.
1868#[derive(Clone, Copy)]
1869struct VarLenGranulesIter<'page> {
1870 /// The page to yield granules from.
1871 page: &'page Page,
1872 /// Location of the next granule in `page`.
1873 /// Must either be `NULL` or point to a valid granule.
1874 next_granule: PageOffset,
1875 // TODO(perf,bikeshedding): store length and implement `Iterator::size_hint`?
1876}
1877
1878impl<'page> Iterator for VarLenGranulesIter<'page> {
1879 type Item = &'page VarLenGranule;
1880
1881 fn next(&mut self) -> Option<Self::Item> {
1882 if self.next_granule.is_var_len_null() {
1883 return None;
1884 }
1885
1886 // SAFETY: By construction,
1887 // the initial `next_granule` was promised to either be `NULL` or point to a valid granule.
1888 // For a given granule, the same applies to its `.next()` granule.
1889 // At this point, we've excluded `NULL`,
1890 // so we know inductively that `next_granule` points to a valid granule, as required.
1891 let granule: &VarLenGranule = unsafe { get_ref(&self.page.row_data, self.next_granule) };
1892 self.next_granule = granule.header.next();
1893
1894 Some(granule)
1895 }
1896}
1897
1898#[cfg(test)]
1899pub(crate) mod tests {
1900 use super::*;
1901 use crate::{blob_store::NullBlobStore, layout::row_size_for_type, var_len::AlignedVarLenOffsets};
1902 use proptest::{collection::vec, prelude::*};
1903 use spacetimedb_lib::bsatn;
1904
1905 fn u64_row_size() -> Size {
1906 let fixed_row_size = row_size_for_type::<u64>();
1907 assert_eq!(fixed_row_size.len(), 8);
1908 fixed_row_size
1909 }
1910
1911 const U64_VL_VISITOR: AlignedVarLenOffsets<'_> = AlignedVarLenOffsets::from_offsets(&[]);
1912 fn u64_var_len_visitor() -> &'static AlignedVarLenOffsets<'static> {
1913 &U64_VL_VISITOR
1914 }
1915
1916 fn insert_u64(page: &mut Page, val: u64) -> PageOffset {
1917 let val_slice = val.to_le_bytes();
1918 unsafe { page.insert_row(&val_slice, &[] as &[&[u8]], u64_var_len_visitor(), &mut NullBlobStore) }
1919 .expect("Failed to insert first row")
1920 }
1921
1922 fn insert_u64_drop(page: &mut Page, val: u64) {
1923 insert_u64(page, val);
1924 }
1925
1926 fn read_u64(page: &Page, offset: PageOffset) -> u64 {
1927 let row = page.get_row_data(offset, u64_row_size());
1928 u64::from_le_bytes(row.try_into().unwrap())
1929 }
1930
1931 fn data_sub_n_vlg(n: usize) -> usize {
1932 PageOffset::PAGE_END.idx() - (VarLenGranule::SIZE * n).len()
1933 }
1934
1935 pub(crate) fn hash_unmodified_save_get(page: &mut Page) -> blake3::Hash {
1936 assert_eq!(page.header.unmodified_hash, None);
1937 page.save_content_hash();
1938 page.header.unmodified_hash.unwrap()
1939 }
1940
1941 #[test]
1942 fn insert_one_u64() {
1943 let mut page = Page::new(u64_row_size());
1944
1945 // First the hash is not saved, so compute it.
1946 let hash = hash_unmodified_save_get(&mut page);
1947
1948 let val: u64 = 0xa5a5_a5a5_a5a5_a5a5;
1949
1950 let offset = insert_u64(&mut page, val);
1951
1952 assert_eq!(offset.idx(), 0);
1953
1954 let row_val = read_u64(&page, offset);
1955
1956 assert_eq!(row_val, val);
1957
1958 // The hash should have been cleared.
1959 assert_ne!(hash, hash_unmodified_save_get(&mut page));
1960 }
1961
1962 fn insert_while(
1963 page: &mut Page,
1964 mut next_val: u64,
1965 fixed_row_size: Size,
1966 vl_num: usize,
1967 mut insert: impl FnMut(&mut Page, u64),
1968 ) -> u64 {
1969 while page.has_space_for_row(fixed_row_size, vl_num) {
1970 insert(page, next_val);
1971 next_val += 1;
1972 }
1973 next_val
1974 }
1975
1976 #[test]
1977 fn fill_then_iter_fixed_len_u64() {
1978 let mut page = Page::new(u64_row_size());
1979
1980 let last_val = insert_while(&mut page, 0, u64_row_size(), 0, insert_u64_drop);
1981 assert_eq!(last_val, (PageOffset::PAGE_END / u64_row_size()) as u64);
1982
1983 for (row_idx, expected_val) in page.iter_fixed_len(u64_row_size()).zip(0..last_val) {
1984 let row_val = read_u64(&page, row_idx);
1985 assert_eq!(
1986 row_val, expected_val,
1987 "row_val {:x} /= expected_val {:x}",
1988 row_val, expected_val
1989 );
1990 }
1991 }
1992
1993 #[test]
1994 fn fill_delete_iter_fixed_len_u64() {
1995 let mut page = Page::new(u64_row_size());
1996
1997 // First the hash is not saved, so compute it.
1998 let hash_pre_ins = hash_unmodified_save_get(&mut page);
1999
2000 // Insert rows.
2001 let mut odds: Vec<PageOffset> = Vec::new();
2002 let last_val = insert_while(&mut page, 2, u64_row_size(), 0, |page, val| {
2003 let offset = insert_u64(page, val);
2004 if val % 2 == 1 {
2005 odds.push(offset);
2006 }
2007 });
2008
2009 // The hash should have been cleared.
2010 let hash_pre_del = hash_unmodified_save_get(&mut page);
2011 assert_ne!(hash_pre_ins, hash_pre_del);
2012
2013 // Delete rows.
2014 for row_offset in odds {
2015 unsafe { page.delete_row(row_offset, u64_row_size(), u64_var_len_visitor(), &mut NullBlobStore) };
2016 }
2017
2018 // The hash should have been cleared.
2019 let hash_pre_iter = hash_unmodified_save_get(&mut page);
2020 assert_ne!(hash_pre_ins, hash_pre_iter);
2021 assert_ne!(hash_pre_del, hash_pre_iter);
2022
2023 // Iterate the rows.
2024 for (row_offset, expected_val) in page.iter_fixed_len(u64_row_size()).zip((2..last_val).step_by(2)) {
2025 let found_val = read_u64(&page, row_offset);
2026 assert_eq!(found_val, expected_val);
2027 }
2028
2029 // Hash is unchanged.
2030 assert_eq!(page.header.unmodified_hash, Some(hash_pre_iter));
2031 }
2032
2033 #[test]
2034 /// After deleting a fixed-length row and then inserting a new fixed-length row,
2035 /// the fixed-length high water mark must not change,
2036 /// i.e. we must re-use memory from the deleted row to store the new insertion.
2037 fn reuse_fixed_len_space() {
2038 let mut page = Page::new(u64_row_size());
2039
2040 // First the hash is not saved, so compute it.
2041 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2042
2043 // Insert two rows.
2044 let offset_0 = insert_u64(&mut page, 0xa5a5a5a5_a5a5a5a5);
2045 assert_eq!(offset_0.idx(), 0);
2046 let offset_1 = insert_u64(&mut page, 0xbeefbeef_beefbeef);
2047 assert_eq!(offset_1, u64_row_size());
2048
2049 assert_eq!(page.header.fixed.last, u64_row_size() * 2);
2050
2051 // Hash has been cleared after inserts.
2052 let hash_pre_del = hash_unmodified_save_get(&mut page);
2053 assert_ne!(hash_pre_ins, hash_pre_del);
2054
2055 // Delete first row.
2056 unsafe { page.delete_row(offset_0, u64_row_size(), u64_var_len_visitor(), &mut NullBlobStore) };
2057
2058 assert_eq!(page.header.fixed.last, u64_row_size() * 2);
2059
2060 // Hash has been cleared after deletes.
2061 let hash_pre_ins2 = hash_unmodified_save_get(&mut page);
2062 assert_ne!(hash_pre_ins, hash_pre_ins2);
2063 assert_ne!(hash_pre_del, hash_pre_ins2);
2064
2065 // Insert first row again, re-using memory.
2066 let offset_0_again = insert_u64(&mut page, 0xffffffff_ffffffff);
2067
2068 assert_eq!(offset_0_again.idx(), 0);
2069 assert_eq!(offset_0.idx(), offset_0_again.idx());
2070
2071 assert_eq!(page.header.fixed.last, u64_row_size() * 2);
2072
2073 // Hash has been cleared after last insert, despite re-using memory.
2074 let hash_post_ins2 = hash_unmodified_save_get(&mut page);
2075 assert_ne!(hash_pre_ins, hash_post_ins2);
2076 assert_ne!(hash_pre_del, hash_post_ins2);
2077 assert_ne!(hash_pre_ins2, hash_post_ins2);
2078 }
2079
2080 const STR_ROW_SIZE: Size = row_size_for_type::<VarLenRef>();
2081
2082 const _: () = assert!(STR_ROW_SIZE.len() == mem::size_of::<VarLenRef>());
2083
2084 const STR_VL_VISITOR: AlignedVarLenOffsets<'_> = AlignedVarLenOffsets::from_offsets(&[0]);
2085 fn str_var_len_visitor() -> &'static AlignedVarLenOffsets<'static> {
2086 &STR_VL_VISITOR
2087 }
2088
2089 fn insert_str(page: &mut Page, data: &[u8]) -> PageOffset {
2090 let fixed_len_data = [0u8; STR_ROW_SIZE.len()];
2091 unsafe { page.insert_row(&fixed_len_data, &[data], str_var_len_visitor(), &mut NullBlobStore) }
2092 .expect("Failed to insert row")
2093 }
2094
2095 fn read_str_ref(page: &Page, offset: PageOffset) -> VarLenRef {
2096 *unsafe { get_ref(&page.row_data, offset) }
2097 }
2098
2099 #[test]
2100 fn insert_empty_str() {
2101 let mut page = Page::new(STR_ROW_SIZE);
2102
2103 // First the hash is not saved, so compute it.
2104 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2105
2106 // Insert the empty string.
2107 let offset = insert_str(&mut page, &[]);
2108
2109 // No granules were used.
2110 let extracted = read_str_ref(&page, offset);
2111 let mut granules_iter = unsafe { page.iter_var_len_object(extracted.first_granule) };
2112 assert!(granules_iter.next().is_none());
2113 drop(granules_iter);
2114
2115 // Hash is cleared even though the string was empty.
2116 assert_ne!(hash_pre_ins, hash_unmodified_save_get(&mut page));
2117 }
2118
2119 proptest! {
2120 #[test]
2121 fn insert_one_short_str(data in vec(any::<u8>(), 1..VarLenGranule::DATA_SIZE)) {
2122 let mut page = Page::new(STR_ROW_SIZE);
2123
2124 // First the hash is not saved, so compute it.
2125 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2126
2127 // Insert the row.
2128 let offset = insert_str(&mut page, &data);
2129
2130 // Hash was cleared by the insert.
2131 let hash_pre_iter = hash_unmodified_save_get(&mut page);
2132 assert_ne!(hash_pre_ins, hash_pre_iter);
2133
2134 // Confirm we inserted correctly.
2135 let extracted = read_str_ref(&page, offset);
2136 let mut data_iter = unsafe { page.iter_vlo_data(extracted.first_granule) };
2137 let (first, next) = (data_iter.next(), data_iter.next());
2138 assert_eq!(first, Some(&*data));
2139 assert_eq!(next, None);
2140
2141 // Iteration and reading did not change the hash.
2142 assert_eq!(hash_pre_iter, page.header.unmodified_hash.unwrap());
2143 }
2144
2145 #[test]
2146 fn insert_one_long_str(data in vec(any::<u8>(), (VarLenGranule::OBJECT_SIZE_BLOB_THRESHOLD / 2)..VarLenGranule::OBJECT_SIZE_BLOB_THRESHOLD)) {
2147 let mut page = Page::new(STR_ROW_SIZE);
2148
2149 // First the hash is not saved, so compute it.
2150 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2151
2152 // Insert the long string.
2153 let offset = insert_str(&mut page, &data);
2154
2155 // The hash was cleared, and the new one is different.
2156 let hash_post_ins = hash_unmodified_save_get(&mut page);
2157 assert_ne!(hash_pre_ins, hash_post_ins);
2158
2159 // Check that we inserted correctly.
2160 let extracted = read_str_ref(&page, offset);
2161
2162 let mut data_iter = unsafe { page.iter_vlo_data(extracted.first_granule) };
2163 let mut chunks_iter = data.chunks(VarLenGranule::DATA_SIZE);
2164
2165 for (i, (data, chunk)) in (&mut data_iter).zip(&mut chunks_iter).enumerate() {
2166 assert_eq!(
2167 data,
2168 chunk,
2169 "Chunk {} does not match. Left is found, right is expected.",
2170 i,
2171 );
2172 }
2173
2174 // Both iterators must be finished, i.e. they must have the same length.
2175 assert!(data_iter.next().is_none());
2176 assert!(chunks_iter.next().is_none());
2177
2178 // Reading did not alter the hash.
2179 assert_eq!(hash_post_ins, page.header.unmodified_hash.unwrap());
2180 }
2181 }
2182
2183 #[test]
2184 fn reuse_var_len_space_no_fragmentation_concerns() {
2185 let data_0 = b"Hello, world!";
2186 let data_1 = b"How goes life?";
2187 let data_2 = b"Glad to hear it.";
2188
2189 let mut page = Page::new(STR_ROW_SIZE);
2190 let offset_0 = insert_str(&mut page, data_0);
2191 let offset_1 = insert_str(&mut page, data_1);
2192
2193 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(2));
2194
2195 assert_ne!(offset_0.idx(), offset_1.idx());
2196
2197 let var_len_0 = read_str_ref(&page, offset_0);
2198
2199 assert_eq!(var_len_0.length_in_bytes as usize, data_0.len());
2200 assert_eq!(var_len_0.first_granule.idx(), data_sub_n_vlg(1));
2201
2202 let var_len_1 = read_str_ref(&page, offset_1);
2203
2204 assert_eq!(var_len_1.length_in_bytes as usize, data_1.len());
2205 assert_eq!(var_len_1.first_granule.idx(), data_sub_n_vlg(2));
2206
2207 let hash_pre_del = hash_unmodified_save_get(&mut page);
2208
2209 unsafe { page.delete_row(offset_0, STR_ROW_SIZE, str_var_len_visitor(), &mut NullBlobStore) };
2210
2211 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2212
2213 let offset_2 = insert_str(&mut page, data_2);
2214
2215 let hash_post_ins = hash_unmodified_save_get(&mut page);
2216 assert_ne!(hash_pre_del, hash_pre_ins);
2217 assert_ne!(hash_pre_del, hash_post_ins);
2218 assert_ne!(hash_pre_ins, hash_post_ins);
2219
2220 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(2));
2221
2222 assert_eq!(offset_0.idx(), offset_2.idx());
2223
2224 let var_len_2 = read_str_ref(&page, offset_2);
2225
2226 assert_eq!(var_len_2.length_in_bytes as usize, data_2.len());
2227 assert_eq!(var_len_2.first_granule.idx(), var_len_0.first_granule.idx());
2228 }
2229
2230 #[test]
2231 fn free_var_len_obj_multiple_granules() {
2232 let mut page = Page::new(STR_ROW_SIZE);
2233
2234 // Allocate a 4-granule var-len object.
2235 let data_0 = [0xa5u8].repeat(VarLenGranule::DATA_SIZE * 4);
2236 let offset_0 = insert_str(&mut page, &data_0);
2237
2238 let var_len_0 = read_str_ref(&page, offset_0);
2239
2240 // Read the addresses of its var-len granules.
2241 let granules_0 = unsafe { page.iter_var_len_object(var_len_0.first_granule) }
2242 .map(|granule| granule as *const VarLenGranule as usize)
2243 .collect::<Vec<_>>();
2244
2245 // Sanity checks: we have allocated 4 granules.
2246 assert_eq!(granules_0.len(), 4);
2247 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(4));
2248
2249 // Delete the row.
2250 unsafe { page.delete_row(offset_0, STR_ROW_SIZE, str_var_len_visitor(), &mut NullBlobStore) };
2251
2252 // Allocate a new 4-granule var-len object.
2253 // This should use the same storage as the original row.
2254 let data_1 = [0xffu8].repeat(VarLenGranule::DATA_SIZE * 4);
2255 let offset_1 = insert_str(&mut page, &data_1);
2256
2257 let var_len_1 = read_str_ref(&page, offset_1);
2258
2259 // Read the addresses of the new allocation's var-len granules.
2260 let granules_1 = unsafe { page.iter_var_len_object(var_len_1.first_granule) }
2261 .map(|granule| granule as *const VarLenGranule as usize)
2262 .collect::<Vec<_>>();
2263
2264 // Sanity check: the new allocation is also 4 granules.
2265 assert_eq!(granules_1.len(), 4);
2266
2267 for granule in granules_1.iter().copied() {
2268 // The new var-len allocation must contain all the same granules by address
2269 // as the old var-len allocation.
2270 assert!(granules_0.iter().copied().any(|other_granule| other_granule == granule));
2271 }
2272
2273 // The var-len high water mark must not have moved.
2274 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(4));
2275 }
2276
2277 #[test]
2278 fn reuse_var_len_space_avoid_fragmentation() {
2279 let data_0 = &[0xa5u8];
2280 let data_1 = &[0xffu8];
2281 let data_2 = [0x11u8].repeat(VarLenGranule::DATA_SIZE + 1);
2282 let data_2 = data_2.as_ref();
2283
2284 let mut page = Page::new(STR_ROW_SIZE);
2285
2286 // First the hash is not saved, so compute it.
2287 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2288
2289 // Insert two string rows.
2290 let offset_0 = insert_str(&mut page, data_0);
2291 let _offset_1 = insert_str(&mut page, data_1);
2292
2293 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(2));
2294
2295 // Hash is cleared by inserting and the new one is different.
2296 let hash_pre_del = hash_unmodified_save_get(&mut page);
2297 assert_ne!(hash_pre_ins, hash_pre_del);
2298
2299 // Delete the first row.
2300 unsafe { page.delete_row(offset_0, STR_ROW_SIZE, str_var_len_visitor(), &mut NullBlobStore) };
2301
2302 // Hash is cleared by deleting.
2303 let hash_post_del = hash_unmodified_save_get(&mut page);
2304 assert_ne!(hash_pre_ins, hash_post_del);
2305 assert_ne!(hash_pre_del, hash_post_del);
2306
2307 // Insert again, re-using memory.
2308 let offset_2 = insert_str(&mut page, data_2);
2309
2310 assert_eq!(page.header.var.first.idx(), data_sub_n_vlg(3));
2311
2312 // Hash is cleared by inserting again, even though we re-used memory.
2313 let hash_post_ins2 = hash_unmodified_save_get(&mut page);
2314 assert_ne!(hash_pre_ins, hash_post_ins2);
2315 assert_ne!(hash_pre_del, hash_post_ins2);
2316 assert_ne!(hash_post_del, hash_post_ins2);
2317
2318 // Check that we inserted correctly.
2319 let var_len_2 = read_str_ref(&page, offset_2);
2320
2321 let mut data_iter = unsafe { page.iter_vlo_data(var_len_2.first_granule) };
2322 let mut chunks_iter = data_2.chunks(VarLenGranule::DATA_SIZE);
2323
2324 for (i, (data, chunk)) in (&mut data_iter).zip(&mut chunks_iter).enumerate() {
2325 assert_eq!(
2326 data, chunk,
2327 "Chunk {} does not match. Left is found, right is expected.",
2328 i,
2329 );
2330 }
2331
2332 // Both iterators must be finished, i.e. they must have the same length.
2333 assert!(data_iter.next().is_none());
2334 assert!(chunks_iter.next().is_none());
2335 }
2336
2337 fn check_u64_in_str(page: &Page, row_idx: PageOffset, expected_val: u64) {
2338 let vlr = read_str_ref(page, row_idx);
2339
2340 let mut var_len_iter = unsafe { page.iter_vlo_data(vlr.first_granule) };
2341 let data = var_len_iter.next().unwrap();
2342 assert!(var_len_iter.next().is_none());
2343 assert_eq!(data.len(), mem::size_of::<u64>());
2344
2345 let val = u64::from_le_bytes(data.try_into().unwrap());
2346 assert_eq!(val, expected_val);
2347 }
2348
2349 #[test]
2350 fn fill_then_iter_var_len_str() {
2351 let mut page = Page::new(STR_ROW_SIZE);
2352
2353 // First the hash is not saved, so compute it.
2354 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2355
2356 // Insert the strings.
2357 let last_val = insert_while(&mut page, 0, STR_ROW_SIZE, 1, |page, val| {
2358 insert_str(page, &val.to_le_bytes());
2359 });
2360
2361 // Hash is cleared by inserting and the new one is different.
2362 let hash_pre_iter = hash_unmodified_save_get(&mut page);
2363 assert_ne!(hash_pre_ins, hash_pre_iter);
2364
2365 // Check that we inserted correctly.
2366 let size_per_row = STR_ROW_SIZE + VarLenGranule::SIZE;
2367
2368 assert_eq!(last_val, (PageOffset::PAGE_END / size_per_row) as u64);
2369
2370 for (row_idx, expected_val) in page.iter_fixed_len(STR_ROW_SIZE).zip(0..last_val) {
2371 check_u64_in_str(&page, row_idx, expected_val);
2372 }
2373
2374 // Reading does not alter the hash.
2375 assert_eq!(hash_pre_iter, page.header.unmodified_hash.unwrap());
2376 }
2377
2378 #[test]
2379 fn fill_delete_iter_var_len_str() {
2380 let mut page = Page::new(STR_ROW_SIZE);
2381
2382 // First the hash is not saved, so compute it.
2383 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2384
2385 // Insert the string rows.
2386 let mut odds = Vec::new();
2387 let last_val = insert_while(&mut page, 0, STR_ROW_SIZE, 1, |page, val| {
2388 let offset = insert_str(page, &val.to_le_bytes());
2389 if val % 2 == 1 {
2390 odds.push(offset);
2391 }
2392 });
2393
2394 let size_per_row = STR_ROW_SIZE + VarLenGranule::SIZE;
2395 let num_rows_inserted = (PageOffset::PAGE_END / size_per_row) as u64;
2396 assert_eq!(last_val, num_rows_inserted);
2397
2398 // Hash was cleared by inserting and is different now.
2399 let hash_pre_del = hash_unmodified_save_get(&mut page);
2400 assert_ne!(hash_pre_ins, hash_pre_del);
2401
2402 // Delete the rows.
2403 for row_offset in odds {
2404 unsafe { page.delete_row(row_offset, STR_ROW_SIZE, str_var_len_visitor(), &mut NullBlobStore) };
2405 }
2406
2407 // Hash was cleared by deleting and is different now.
2408 let hash_pre_iter = hash_unmodified_save_get(&mut page);
2409 assert_ne!(hash_pre_ins, hash_pre_iter);
2410 assert_ne!(hash_pre_del, hash_pre_iter);
2411
2412 // Check that we deleted correctly.
2413 let num_rows_retained = num_rows_inserted.div_ceil(2);
2414 let num_rows_removed = num_rows_inserted / 2;
2415
2416 assert_eq!(page.header.fixed.num_rows as u64, num_rows_retained);
2417
2418 assert_eq!(page.header.var.freelist_len as u64, num_rows_removed);
2419
2420 for (row_idx, expected_val) in page.iter_fixed_len(STR_ROW_SIZE).zip((0..last_val).step_by(2)) {
2421 check_u64_in_str(&page, row_idx, expected_val);
2422 }
2423
2424 // Reading did not alter the hash.
2425 assert_eq!(hash_pre_iter, page.header.unmodified_hash.unwrap());
2426 }
2427
2428 #[test]
2429 fn serde_round_trip_whole_page() {
2430 let mut page = Page::new(u64_row_size());
2431
2432 // Construct an empty page, ser/de it, and assert that it's still empty.
2433 let hash_pre_ins = hash_unmodified_save_get(&mut page);
2434 let ser_pre_ins = bsatn::to_vec(&page).unwrap();
2435 let de_pre_ins = bsatn::from_slice::<Box<Page>>(&ser_pre_ins).unwrap();
2436 assert_eq!(de_pre_ins.content_hash(), hash_pre_ins);
2437 assert_eq!(de_pre_ins.header.fixed.num_rows, 0);
2438 assert!(de_pre_ins.header.fixed.present_rows == page.header.fixed.present_rows);
2439
2440 // Insert some rows into the page.
2441 let offsets = (0..64)
2442 .map(|val| insert_u64(&mut page, val))
2443 .collect::<Vec<PageOffset>>();
2444
2445 let hash_ins = hash_unmodified_save_get(&mut page);
2446
2447 // Ser/de the page and assert that it contains the same rows.
2448 let ser_ins = bsatn::to_vec(&page).unwrap();
2449 let de_ins = bsatn::from_slice::<Box<Page>>(&ser_ins).unwrap();
2450 assert_eq!(de_ins.content_hash(), hash_ins);
2451 assert_eq!(de_ins.header.fixed.num_rows, 64);
2452 assert!(de_ins.header.fixed.present_rows == page.header.fixed.present_rows);
2453 assert_eq!(
2454 de_ins.iter_fixed_len(u64_row_size()).collect::<Vec<PageOffset>>(),
2455 offsets
2456 );
2457
2458 // Delete the even-numbered rows, leaving the odds.
2459 let offsets = offsets
2460 .into_iter()
2461 .enumerate()
2462 .filter_map(|(i, offset)| {
2463 if i % 2 == 0 {
2464 unsafe { page.delete_row(offset, u64_row_size(), u64_var_len_visitor(), &mut NullBlobStore) };
2465 None
2466 } else {
2467 Some(offset)
2468 }
2469 })
2470 .collect::<Vec<PageOffset>>();
2471
2472 // Ser/de the page again and assert that it contains only the odd-numbered rows.
2473 let hash_del = hash_unmodified_save_get(&mut page);
2474 let ser_del = bsatn::to_vec(&page).unwrap();
2475 let de_del = bsatn::from_slice::<Box<Page>>(&ser_del).unwrap();
2476 assert_eq!(de_del.content_hash(), hash_del);
2477 assert_eq!(de_del.header.fixed.num_rows, 32);
2478 assert!(de_del.header.fixed.present_rows == page.header.fixed.present_rows);
2479 assert_eq!(
2480 de_del.iter_fixed_len(u64_row_size()).collect::<Vec<PageOffset>>(),
2481 offsets
2482 );
2483 }
2484}