simd_r_drive/storage_engine/
key_indexer.rs

1use crate::storage_engine::constants::*;
2use crate::storage_engine::digest::{Xxh3BuildHasher, compute_hash};
3use memmap2::Mmap;
4use simd_r_drive_entry_handle::EntryMetadata;
5use std::collections::hash_map::Entry;
6use std::collections::hash_map::Values;
7use std::collections::{HashMap, HashSet};
8
9/// Number of high bits reserved for collision-detection tag (16 bits).
10///
11/// This leaves 48 bits for actual file offset storage.
12const TAG_BITS: u64 = 16;
13
14/// Bitmask for extracting the lower 48-bit offset from a packed index value.
15const OFFSET_MASK: u64 = (1u64 << (64 - TAG_BITS)) - 1;
16
17/// `KeyIndexer` maps 64-bit key hashes to 48-bit file offsets, augmented with
18/// a 16-bit tag for lightweight collision detection.
19///
20/// ## Purpose
21/// While XXH3 is a high-quality 64-bit hash, collisions are still possible
22/// in large-scale stores. This index:
23///
24/// - Detects such collisions via a 16-bit fingerprint (`tag`)
25/// - Avoids storing full keys in memory
26/// - Maintains constant memory footprint (`u64` per key)
27///
28/// ## Packed Value Format
29/// Stored in a single `u64`:
30///
31/// ```text
32/// [63         48][47                      0]
33/// [  tag (16)  ][     file offset (48)     ]
34/// ```
35///
36/// ## Collision Handling
37/// During lookups, the stored tag is compared to a rederived one from the
38/// key or key hash. If the tags do not match, the entry is rejected as a
39/// potential collision.
40///
41/// ## Limitations
42///
43/// - **Max file size**: 2^48 bytes = **256 TiB**
44///   - Any file larger than this will overflow the offset field and
45///     corrupt the tag.
46/// - **Tag uniqueness**: 2^16 = **65,536** distinct tags
47///   - Probability of tag collision is 1 in 65,536 per conflicting key hash
48///   - This is sufficient to distinguish over 4 billion keys with
49///     ~50% collision probability (birthday bound)
50///
51/// ## Tradeoffs
52/// This scheme offers a middle ground:
53/// - Significantly improves safety over pure hash indexing
54/// - No dynamic memory cost
55/// - Small and predictable performance cost (~2 bit ops + 1 compare)
56pub struct KeyIndexer {
57    /// Index: key_hash → packed (tag | offset)
58    index: HashMap<u64, u64, Xxh3BuildHasher>,
59}
60
61impl KeyIndexer {
62    /// Returns a 16-bit tag from the upper bits of a key hash.
63    #[inline]
64    pub fn tag_from_hash(key_hash: u64) -> u16 {
65        (key_hash >> (64 - TAG_BITS)) as u16
66    }
67
68    /// Computes a tag directly from the raw key.
69    #[inline]
70    pub fn tag_from_key(key: &[u8]) -> u16 {
71        Self::tag_from_hash(compute_hash(key))
72    }
73
74    /// Combines a tag and offset into a packed 64-bit value.
75    ///
76    /// # Panics
77    /// If offset exceeds 48 bits (i.e. > 256 TiB), this will panic.
78    #[inline]
79    pub fn pack(tag: u16, offset: u64) -> u64 {
80        debug_assert!(
81            offset <= OFFSET_MASK,
82            "offset exceeds 48-bit range (tag would be corrupted)"
83        );
84        ((tag as u64) << (64 - TAG_BITS)) | offset
85    }
86
87    /// Extracts (tag, offset) from a packed value.
88    #[inline]
89    pub fn unpack(packed: u64) -> (u16, u64) {
90        let tag = (packed >> (64 - TAG_BITS)) as u16;
91        let offset = packed & OFFSET_MASK;
92        (tag, offset)
93    }
94
95    /// Scans the file backwards and builds a tag-aware hash index.
96    ///
97    /// The most recent version of each key hash is kept.
98    pub fn build(mmap: &Mmap, tail_offset: u64) -> Self {
99        let mut index = HashMap::with_hasher(Xxh3BuildHasher);
100        let mut seen = HashSet::with_hasher(Xxh3BuildHasher);
101        let mut cursor = tail_offset;
102
103        while cursor >= METADATA_SIZE as u64 {
104            let meta_off = cursor as usize - METADATA_SIZE;
105            let meta_bytes = &mmap[meta_off..meta_off + METADATA_SIZE];
106            let meta = EntryMetadata::deserialize(meta_bytes);
107
108            if seen.contains(&meta.key_hash) {
109                cursor = meta.prev_offset;
110                continue;
111            }
112
113            seen.insert(meta.key_hash);
114            let tag = Self::tag_from_hash(meta.key_hash);
115            index.insert(meta.key_hash, Self::pack(tag, meta_off as u64));
116
117            if meta.prev_offset == 0 {
118                break;
119            }
120            cursor = meta.prev_offset;
121        }
122
123        Self { index }
124    }
125
126    /// Inserts a new key hash and offset into the index, with collision detection.
127    ///
128    /// This method checks the tag of an existing entry before overwriting. If the
129    /// tags do not match, it returns an `Err`, indicating a hash collision.
130    ///
131    /// # Returns
132    /// - `Ok(Some(u64))`: On a successful update, returning the previous offset.
133    /// - `Ok(None)`: On a successful insert of a new key.
134    /// - `Err(&'static str)`: On a tag mismatch, indicating a hash collision.
135    pub fn insert(&mut self, key_hash: u64, new_offset: u64) -> Result<Option<u64>, &'static str> {
136        let new_tag = Self::tag_from_hash(key_hash);
137        let new_packed = Self::pack(new_tag, new_offset);
138
139        match self.index.entry(key_hash) {
140            // The key already exists in the index.
141            Entry::Occupied(mut entry) => {
142                let (stored_tag, _) = Self::unpack(*entry.get());
143
144                // VERIFY: The new tag must match the old one.
145                if new_tag != stored_tag {
146                    // This is a hash collision with a different key! Reject the write.
147                    return Err("Hash collision detected: tag mismatch on write");
148                }
149
150                // Tags match, it's a legitimate update.
151                let previous_packed = entry.insert(new_packed);
152                Ok(Some(Self::unpack(previous_packed).1))
153            }
154            // The key does not exist, safe to insert.
155            Entry::Vacant(entry) => {
156                entry.insert(new_packed);
157                Ok(None)
158            }
159        }
160    }
161
162    /// Gets the raw packed value.
163    #[inline]
164    pub fn get_packed(&self, key_hash: &u64) -> Option<&u64> {
165        self.index.get(key_hash)
166    }
167
168    /// Returns only the unpacked offset (ignores tag).
169    #[inline]
170    pub fn get_offset(&self, key_hash: &u64) -> Option<u64> {
171        self.index.get(key_hash).map(|&v| Self::unpack(v).1)
172    }
173
174    /// Removes a key and returns its offset.
175    #[inline]
176    pub fn remove(&mut self, key_hash: &u64) -> Option<u64> {
177        self.index.remove(key_hash).map(|v| Self::unpack(v).1)
178    }
179
180    #[inline]
181    pub fn len(&self) -> usize {
182        self.index.len()
183    }
184
185    #[inline]
186    pub fn is_empty(&self) -> bool {
187        self.index.is_empty()
188    }
189
190    /// Returns a memory-efficient iterator over the packed (tag|offset) values.
191    ///
192    /// This method is preferable to collecting all values into a `Vec` when the
193    /// index is large, as it avoids a large upfront memory allocation. The iterator
194    /// borrows the underlying index, so it must be used within the lifetime of the
195    /// `KeyIndexer`'s read lock.
196    ///
197    #[inline]
198    pub fn values(&self) -> Values<'_, u64, u64> {
199        self.index.values()
200    }
201}
simd_r_drive/storage_engine/key_indexer.rs

simd_r_drive/storage_engine/
key_indexer.rs