Skip to main content

simd_r_drive_entry_handle/
entry_metadata.rs

1use crate::constants::*;
2
3/// Metadata structure for an append-only storage entry.
4///
5/// This structure stores metadata associated with each entry in the append-only storage.
6/// It includes a hash of the key for quick lookups, an offset pointing to the previous
7/// entry in the chain, and a checksum for integrity verification.
8///
9/// ## Entry Storage Layout
10///
11/// Aligned entry (non-tombstone):
12///
13/// | Offset Range   | Field              | Size (Bytes) | Description                       |
14/// |----------------|--------------------|--------------|-----------------------------------|
15/// | `P .. P+pad`   | Pre-Pad (optional) | `pad`        | Zero bytes to align payload start |
16/// | `P+pad .. N`   | Payload            | `N-(P+pad)`  | Variable-length data              |
17/// | `N .. N+8`     | Key Hash           | `8`          | 64-bit XXH3 key hash              |
18/// | `N+8 .. N+16`  | Prev Offset        | `8`          | Absolute offset of previous tail  |
19/// | `N+16 .. N+20` | Checksum           | `4`          | CRC32C of payload                 |
20///
21/// Where:
22/// - `pad = (A - (prev_tail % A)) & (A - 1)`, `A = PAYLOAD_ALIGNMENT`.
23/// - The next entry starts at `N + 20`.
24///
25/// Tombstone (deletion marker):
26///
27/// | Offset Range  | Field    | Size (Bytes) | Description            |
28/// |---------------|----------|--------------|------------------------|
29/// | `T .. T+1`    | Payload  | `1`          | Single byte `0x00`     |
30/// | `T+1 .. T+21` | Metadata | `20`         | Key hash, prev, crc32c |
31///
32/// Notes:
33/// - Using the previous tail in `Prev Offset` lets us insert pre-pad while
34///   keeping chain traversal unambiguous.
35/// - Readers compute `payload_start = prev_offset + prepad_len(prev_offset)`
36///   and use the current metadata position as `payload_end`.
37///
38/// <img src="https://github.com/jzombie/rust-simd-r-drive/blob/main/assets/storage-layout.png" alt="Storage Layout" />
39///
40/// ## Notes
41/// - The `prev_offset` forms a **backward-linked chain** for each key.
42/// - The checksum is **not cryptographically secure** but serves as a quick integrity check.
43/// - The first entry for a key has `prev_offset = 0`, indicating no previous version.
44#[repr(C)]
45#[derive(Debug, Clone)]
46pub struct EntryMetadata {
47    pub key_hash: u64,     // 8 bytes (hashed key for lookup)
48    pub prev_offset: u64,  // 8 bytes (absolute offset of previous entry)
49    pub checksum: [u8; 4], // 4 bytes (checksum for integrity)
50}
51
52impl EntryMetadata {
53    // TODO: Document
54    pub fn new(key_hash: u64, prev_offset: u64, checksum: [u8; 4]) -> Self {
55        Self {
56            key_hash,
57            prev_offset,
58            checksum,
59        }
60    }
61
62    /// Serializes the metadata into a byte array.
63    ///
64    /// Converts the `EntryMetadata` structure into a fixed-size array
65    /// for efficient storage. The serialized format ensures compatibility
66    /// with disk storage and memory-mapped access.
67    ///
68    /// # Format:
69    /// - Encodes the key hash, previous offset, and checksum into their respective byte ranges.
70    /// - Uses little-endian encoding for numeric values.
71    ///
72    /// # Returns:
73    /// - A byte array containing the serialized metadata.
74    #[inline]
75    pub fn serialize(&self) -> [u8; METADATA_SIZE] {
76        let mut buf = [0u8; METADATA_SIZE];
77
78        buf[KEY_HASH_RANGE].copy_from_slice(&self.key_hash.to_le_bytes());
79        buf[PREV_OFFSET_RANGE].copy_from_slice(&self.prev_offset.to_le_bytes());
80        buf[CHECKSUM_RANGE].copy_from_slice(&self.checksum);
81
82        buf
83    }
84
85    /// Deserializes a byte slice into an `EntryMetadata` instance.
86    ///
87    /// Reconstructs an `EntryMetadata` structure from a byte slice,
88    /// following the predefined binary format. Extracts the key hash,
89    /// previous offset, and checksum while ensuring correctness through
90    /// explicit range-based indexing.
91    ///
92    /// # Parameters:
93    /// - `data`: A byte slice containing the serialized metadata.
94    ///
95    /// # Returns:
96    /// - A reconstructed `EntryMetadata` instance.
97    ///
98    /// # Panics:
99    /// - If the provided `data` slice is too small.
100    #[inline]
101    pub fn deserialize(data: &[u8]) -> Self {
102        Self {
103            key_hash: u64::from_le_bytes(data[KEY_HASH_RANGE].try_into().unwrap()),
104            prev_offset: u64::from_le_bytes(data[PREV_OFFSET_RANGE].try_into().unwrap()),
105            // Use a `const`-safe way to construct a fixed-size array
106            checksum: {
107                let mut checksum = [0u8; CHECKSUM_LEN];
108                checksum.copy_from_slice(&data[CHECKSUM_RANGE]);
109                checksum
110            },
111        }
112    }
113}