simd_r_drive/storage_engine/key_indexer.rs
1use crate::storage_engine::constants::*;
2use crate::storage_engine::digest::{Xxh3BuildHasher, compute_hash};
3use memmap2::Mmap;
4use simd_r_drive_entry_handle::EntryMetadata;
5use std::collections::hash_map::Entry;
6use std::collections::hash_map::Values;
7use std::collections::{HashMap, HashSet};
8
9/// Number of high bits reserved for collision-detection tag (16 bits).
10///
11/// This leaves 48 bits for actual file offset storage.
12const TAG_BITS: u64 = 16;
13
14/// Bitmask for extracting the lower 48-bit offset from a packed index value.
15const OFFSET_MASK: u64 = (1u64 << (64 - TAG_BITS)) - 1;
16
17/// `KeyIndexer` maps 64-bit key hashes to 48-bit file offsets, augmented with
18/// a 16-bit tag for lightweight collision detection.
19///
20/// ## Purpose
21/// While XXH3 is a high-quality 64-bit hash, collisions are still possible
22/// in large-scale stores. This index:
23///
24/// - Detects such collisions via a 16-bit fingerprint (`tag`)
25/// - Avoids storing full keys in memory
26/// - Maintains constant memory footprint (`u64` per key)
27///
28/// ## Packed Value Format
29/// Stored in a single `u64`:
30///
31/// ```text
32/// [63 48][47 0]
33/// [ tag (16) ][ file offset (48) ]
34/// ```
35///
36/// ## Collision Handling
37/// During lookups, the stored tag is compared to a rederived one from the
38/// key or key hash. If the tags do not match, the entry is rejected as a
39/// potential collision.
40///
41/// ## Limitations
42///
43/// - **Max file size**: 2^48 bytes = **256 TiB**
44/// - Any file larger than this will overflow the offset field and
45/// corrupt the tag.
46/// - **Tag uniqueness**: 2^16 = **65,536** distinct tags
47/// - Probability of tag collision is 1 in 65,536 per conflicting key hash
48/// - This is sufficient to distinguish over 4 billion keys with
49/// ~50% collision probability (birthday bound)
50///
51/// ## Tradeoffs
52/// This scheme offers a middle ground:
53/// - Significantly improves safety over pure hash indexing
54/// - No dynamic memory cost
55/// - Small and predictable performance cost (~2 bit ops + 1 compare)
56pub struct KeyIndexer {
57 /// Index: key_hash → packed (tag | offset)
58 index: HashMap<u64, u64, Xxh3BuildHasher>,
59}
60
61impl KeyIndexer {
62 /// Returns a 16-bit tag from the upper bits of a key hash.
63 #[inline]
64 pub fn tag_from_hash(key_hash: u64) -> u16 {
65 (key_hash >> (64 - TAG_BITS)) as u16
66 }
67
68 /// Computes a tag directly from the raw key.
69 #[inline]
70 pub fn tag_from_key(key: &[u8]) -> u16 {
71 Self::tag_from_hash(compute_hash(key))
72 }
73
74 /// Combines a tag and offset into a packed 64-bit value.
75 ///
76 /// # Panics
77 /// If offset exceeds 48 bits (i.e. > 256 TiB), this will panic.
78 #[inline]
79 pub fn pack(tag: u16, offset: u64) -> u64 {
80 debug_assert!(
81 offset <= OFFSET_MASK,
82 "offset exceeds 48-bit range (tag would be corrupted)"
83 );
84 ((tag as u64) << (64 - TAG_BITS)) | offset
85 }
86
87 /// Extracts (tag, offset) from a packed value.
88 #[inline]
89 pub fn unpack(packed: u64) -> (u16, u64) {
90 let tag = (packed >> (64 - TAG_BITS)) as u16;
91 let offset = packed & OFFSET_MASK;
92 (tag, offset)
93 }
94
95 /// Scans the file backwards and builds a tag-aware hash index.
96 ///
97 /// The most recent version of each key hash is kept.
98 pub fn build(mmap: &Mmap, tail_offset: u64) -> Self {
99 let mut index = HashMap::with_hasher(Xxh3BuildHasher);
100 let mut seen = HashSet::with_hasher(Xxh3BuildHasher);
101 let mut cursor = tail_offset;
102
103 while cursor >= METADATA_SIZE as u64 {
104 let meta_off = cursor as usize - METADATA_SIZE;
105 let meta_bytes = &mmap[meta_off..meta_off + METADATA_SIZE];
106 let meta = EntryMetadata::deserialize(meta_bytes);
107
108 if seen.contains(&meta.key_hash) {
109 cursor = meta.prev_offset;
110 continue;
111 }
112
113 seen.insert(meta.key_hash);
114 let tag = Self::tag_from_hash(meta.key_hash);
115 index.insert(meta.key_hash, Self::pack(tag, meta_off as u64));
116
117 if meta.prev_offset == 0 {
118 break;
119 }
120 cursor = meta.prev_offset;
121 }
122
123 Self { index }
124 }
125
126 /// Inserts a new key hash and offset into the index, with collision detection.
127 ///
128 /// This method checks the tag of an existing entry before overwriting. If the
129 /// tags do not match, it returns an `Err`, indicating a hash collision.
130 ///
131 /// # Returns
132 /// - `Ok(Some(u64))`: On a successful update, returning the previous offset.
133 /// - `Ok(None)`: On a successful insert of a new key.
134 /// - `Err(&'static str)`: On a tag mismatch, indicating a hash collision.
135 pub fn insert(&mut self, key_hash: u64, new_offset: u64) -> Result<Option<u64>, &'static str> {
136 let new_tag = Self::tag_from_hash(key_hash);
137 let new_packed = Self::pack(new_tag, new_offset);
138
139 match self.index.entry(key_hash) {
140 // The key already exists in the index.
141 Entry::Occupied(mut entry) => {
142 let (stored_tag, _) = Self::unpack(*entry.get());
143
144 // VERIFY: The new tag must match the old one.
145 if new_tag != stored_tag {
146 // This is a hash collision with a different key! Reject the write.
147 return Err("Hash collision detected: tag mismatch on write");
148 }
149
150 // Tags match, it's a legitimate update.
151 let previous_packed = entry.insert(new_packed);
152 Ok(Some(Self::unpack(previous_packed).1))
153 }
154 // The key does not exist, safe to insert.
155 Entry::Vacant(entry) => {
156 entry.insert(new_packed);
157 Ok(None)
158 }
159 }
160 }
161
162 /// Gets the raw packed value.
163 #[inline]
164 pub fn get_packed(&self, key_hash: &u64) -> Option<&u64> {
165 self.index.get(key_hash)
166 }
167
168 /// Returns only the unpacked offset (ignores tag).
169 #[inline]
170 pub fn get_offset(&self, key_hash: &u64) -> Option<u64> {
171 self.index.get(key_hash).map(|&v| Self::unpack(v).1)
172 }
173
174 /// Removes a key and returns its offset.
175 #[inline]
176 pub fn remove(&mut self, key_hash: &u64) -> Option<u64> {
177 self.index.remove(key_hash).map(|v| Self::unpack(v).1)
178 }
179
180 #[inline]
181 pub fn len(&self) -> usize {
182 self.index.len()
183 }
184
185 #[inline]
186 pub fn is_empty(&self) -> bool {
187 self.index.is_empty()
188 }
189
190 /// Returns a memory-efficient iterator over the packed (tag|offset) values.
191 ///
192 /// This method is preferable to collecting all values into a `Vec` when the
193 /// index is large, as it avoids a large upfront memory allocation. The iterator
194 /// borrows the underlying index, so it must be used within the lifetime of the
195 /// `KeyIndexer`'s read lock.
196 ///
197 #[inline]
198 pub fn values(&self) -> Values<'_, u64, u64> {
199 self.index.values()
200 }
201}