hashmap_mem/
lib.rs

1/*
2 * Copyright (c) Peter Bjorklund. All rights reserved. https://github.com/piot/hashmap-mem
3 * Licensed under the MIT License. See LICENSE in the project root for license information.
4 */
5
6use fxhash::FxHasher64;
7use std::cmp::{max, min};
8use std::hash::Hasher;
9use std::mem::size_of;
10use std::ops::Not;
11use std::{ptr, slice};
12
13#[repr(u8)]
14#[derive(Copy, Clone, PartialEq, Eq, Debug)]
15pub enum BucketStatus {
16    Empty = 0, // Must be zero, do not change!
17    Tombstone = 1,
18    Occupied = 2,
19}
20
21#[repr(C)]
22#[derive(Copy, Clone, Debug)]
23pub struct MapHeader {
24    // Do not change the order of the fields!
25    pub capacity: u16,      // Do not change,
26    pub element_count: u16, // Do not change
27    pub key_size: u32,
28    pub value_size: u32,
29
30    pub value_offset: u32,
31    pub bucket_size: u32,
32
33    pub logical_limit: u16,
34    pub key_offset: u8,
35    pub padding_and_secret_code: u8,
36}
37
38pub struct MapInit {
39    pub key_size: u32,
40    pub key_alignment: u8,
41    pub value_size: u32,
42    pub value_alignment: u8,
43    pub capacity: u16,
44    pub logical_limit: u16,
45    pub total_size: u32,
46}
47
48#[derive(Clone, Copy, Debug)]
49pub struct BucketLayout {
50    pub bucket_size: u32,
51    pub key_offset: u8,
52    pub value_offset: u32,
53}
54
55const MAP_BUCKETS_OFFSET: usize = size_of::<MapHeader>();
56const MAX_PROBE_DISTANCE: usize = 32;
57
58#[inline]
59fn calculate_hash_bytes(key_bytes: &[u8]) -> u64 {
60    let mut hasher = FxHasher64::default();
61    hasher.write(key_bytes);
62    hasher.finish()
63}
64
65/// Calculate memory layout for a map bucket
66#[inline]
67#[must_use]
68pub fn calculate_bucket_layout(
69    key_size: u32,
70    key_alignment: u8,
71    value_size: u32,
72    value_alignment: u8,
73) -> BucketLayout {
74    let status_size: u32 = 1;
75    let mut current_offset = status_size;
76
77    // Align key
78    let key_align = u32::from(key_alignment);
79    let key_offset = (current_offset + key_align - 1) & !(key_align - 1);
80    current_offset = key_offset + key_size;
81
82    // Align value
83    let value_align = u32::from(value_alignment);
84    let value_offset = (current_offset + value_align - 1) & !(value_align - 1);
85    current_offset = value_offset + value_size;
86
87    // Calculate final bucket size with proper alignment
88    let bucket_content_alignment = max(key_align, value_align);
89    let bucket_size =
90        (current_offset + bucket_content_alignment - 1) & !(bucket_content_alignment - 1);
91
92    BucketLayout {
93        bucket_size,
94        key_offset: key_offset as u8,
95        value_offset,
96    }
97}
98
99#[must_use]
100pub const fn total_size(capacity: u16, bucket_size: u32) -> u32 {
101    (MAP_BUCKETS_OFFSET + capacity as usize * bucket_size as usize) as u32
102}
103
104#[must_use]
105pub fn layout(
106    key_size: u32,
107    key_alignment: u8,
108    value_size: u32,
109    value_alignment: u8,
110    logical_limit: u16,
111) -> (BucketLayout, MapInit) {
112    let capacity = logical_limit.next_power_of_two();
113    let bucket_layout =
114        calculate_bucket_layout(key_size, key_alignment, value_size, value_alignment);
115    (
116        bucket_layout,
117        MapInit {
118            key_size,
119            key_alignment,
120            value_size,
121            value_alignment,
122            capacity,
123            logical_limit,
124            total_size: total_size(capacity, bucket_layout.bucket_size),
125        },
126    )
127}
128
129pub const SECRET_CODE: u8 = 0x3d;
130
131/// Initialize a new hash map in pre-allocated memory
132///
133/// # Safety
134///
135/// - `map_base` must point to valid, properly aligned memory of sufficient size
136/// - The memory must remain valid for the lifetime of the map
137pub unsafe fn init(map_base: *mut u8, config: &MapInit) {
138    debug_assert!(
139        config.capacity.is_power_of_two(),
140        "Capacity must be a power of two"
141    );
142
143    let map_header = map_base.cast::<MapHeader>();
144    let layout = calculate_bucket_layout(
145        config.key_size,
146        config.key_alignment,
147        config.value_size,
148        config.value_alignment,
149    );
150
151    // Initialize header
152    unsafe {
153        ptr::write(
154            map_header,
155            MapHeader {
156                capacity: config.capacity,
157                logical_limit: config.logical_limit,
158                key_size: config.key_size,
159                value_size: config.value_size,
160                bucket_size: layout.bucket_size,
161                key_offset: layout.key_offset,
162                value_offset: layout.value_offset,
163                element_count: 0,
164                padding_and_secret_code: SECRET_CODE,
165            },
166        );
167    }
168
169    // Initialize buckets to empty
170    let buckets_start_ptr = unsafe { map_base.add(MAP_BUCKETS_OFFSET) };
171    let capacity = usize::from(config.capacity);
172    let bucket_size = layout.bucket_size as usize;
173
174    // Zero out all bucket status bytes (Empty = 0)
175    for i in 0..capacity {
176        unsafe {
177            ptr::write(
178                buckets_start_ptr.add(i * bucket_size),
179                BucketStatus::Empty as u8,
180            );
181        }
182    }
183}
184
185/// Fast key comparison helper
186// TODO: Check if the performance difference is significant
187#[inline]
188unsafe fn matches_key(a: *const u8, b: *const u8, len: usize) -> bool {
189    unsafe {
190        if len <= 16 {
191            match len {
192                0 => true,
193                _ => {
194                    for i in 0..len {
195                        if *a.add(i) != *b.add(i) {
196                            return false;
197                        }
198                    }
199                    true
200                }
201            }
202        } else {
203            slice::from_raw_parts(a, len) == slice::from_raw_parts(b, len)
204        }
205    }
206}
207
208/// Get or reserve an entry in the map
209///
210/// # Safety
211///
212/// - `base_ptr` must point to a valid initialized map
213/// - `key_ptr` must point to a valid key of the size specified in the map header
214///
215/// # Returns
216///
217/// Pointer to the value location, or null if the map is full
218#[inline]
219pub unsafe fn get_or_reserve_entry(base_ptr: *mut u8, key_ptr: *const u8) -> *mut u8 {
220    unsafe {
221        let header = &*base_ptr.cast::<MapHeader>();
222
223        // Validate parameters
224        let capacity = header.capacity as usize;
225        let key_size = header.key_size as usize;
226        let bucket_size = header.bucket_size as usize;
227        let key_offset = header.key_offset as usize;
228        let value_offset = header.value_offset as usize;
229
230        debug_assert_eq!(
231            header.padding_and_secret_code, SECRET_CODE,
232            "hashmap, secret code failed"
233        );
234        debug_assert_ne!(key_size, 0, "Key size cannot be zero");
235        debug_assert_ne!(capacity, 0, "Capacity cannot be zero");
236        debug_assert!(
237            capacity.is_power_of_two(),
238            "Capacity must be a power of two"
239        );
240
241        let buckets_ptr = base_ptr.add(MAP_BUCKETS_OFFSET);
242        let key_slice = slice::from_raw_parts(key_ptr, key_size);
243        let hash = calculate_hash_bytes(key_slice);
244
245        // Initial probe position
246        let mut index = hash as usize & (capacity - 1);
247
248        // Track first tombstone for potential reuse
249        let mut first_tombstone = None;
250        let probe_limit = min(capacity, MAX_PROBE_DISTANCE);
251
252        for _ in 0..probe_limit {
253            let bucket_ptr = buckets_ptr.add(index * bucket_size);
254            let status = *bucket_ptr;
255
256            match status {
257                status if status == BucketStatus::Empty as u8 => {
258                    // TODO: Maybe go back to BucketStatus as constants instead, this feel a bit awkward
259                    // Use tombstone if found, otherwise use current empty slot
260                    let insert_index = first_tombstone.unwrap_or(index);
261                    let target_bucket = buckets_ptr.add(insert_index * bucket_size);
262
263                    // Mark as occupied and copy key
264                    *target_bucket = BucketStatus::Occupied as u8;
265                    let target_key_ptr = target_bucket.add(key_offset);
266                    ptr::copy_nonoverlapping(key_ptr, target_key_ptr, key_size);
267
268                    // Update element count
269                    let header_mut = &mut *base_ptr.cast::<MapHeader>();
270                    header_mut.element_count += 1;
271
272                    return target_bucket.add(value_offset);
273                }
274                status if status == BucketStatus::Occupied as u8 => {
275                    // Check if keys match
276                    let existing_key_ptr = bucket_ptr.add(key_offset);
277                    if matches_key(existing_key_ptr, key_ptr, key_size) {
278                        return bucket_ptr.add(value_offset);
279                    }
280                }
281                status if status == BucketStatus::Tombstone as u8 => {
282                    // Remember first tombstone for potential reuse
283                    if first_tombstone.is_none() {
284                        first_tombstone = Some(index);
285                    }
286                }
287                _ => unreachable!(),
288            }
289
290            // Linear probing with wraparound using bitmask
291            index = (index + 1) & (capacity - 1);
292        }
293
294        // If we found a tombstone during probing, use it
295        if let Some(tombstone_index) = first_tombstone {
296            let target_bucket = buckets_ptr.add(tombstone_index * bucket_size);
297
298            // Mark as occupied and copy key
299            *target_bucket = BucketStatus::Occupied as u8;
300            let target_key_ptr = target_bucket.add(key_offset);
301            ptr::copy_nonoverlapping(key_ptr, target_key_ptr, key_size);
302
303            // Update element count
304            let header_mut = &mut *base_ptr.cast::<MapHeader>();
305            header_mut.element_count += 1;
306
307            return target_bucket.add(value_offset);
308        }
309
310        // Map is full or probe limit exceeded
311        ptr::null_mut()
312    }
313}
314
315/// Check if a key exists in the map
316///
317/// # Safety
318///
319/// - `base_ptr` must point to a valid initialized map
320/// - `key_ptr` must point to a valid key of the size specified in the map header
321#[inline]
322#[must_use]
323pub unsafe fn has(base_ptr: *const u8, key_ptr: *const u8) -> bool {
324    unsafe { lookup(base_ptr.cast_mut(), key_ptr).is_null().not() }
325}
326
327/// Lookup an existing entry in the map
328///
329/// # Safety
330///
331/// - `base_ptr` must point to a valid initialized map
332/// - `key_ptr` must point to a valid key of the size specified in the map header
333///
334/// # Returns
335///
336/// Pointer to the found value, or null if not found
337#[inline]
338pub unsafe fn lookup(base_ptr: *mut u8, key_ptr: *const u8) -> *mut u8 {
339    unsafe {
340        let header = &*base_ptr.cast::<MapHeader>();
341
342        let capacity = header.capacity as usize;
343        let key_size = header.key_size as usize;
344        let bucket_size = header.bucket_size as usize;
345        let key_offset = header.key_offset as usize;
346        let value_offset = header.value_offset as usize;
347
348        debug_assert_eq!(
349            header.padding_and_secret_code, SECRET_CODE,
350            "hashmap, secret code failed"
351        );
352        debug_assert_ne!(key_size, 0, "Key size cannot be zero");
353        debug_assert_ne!(capacity, 0, "Capacity cannot be zero");
354        debug_assert!(
355            capacity.is_power_of_two(),
356            "Capacity must be a power of two {capacity}"
357        );
358
359        let buckets_ptr = base_ptr.add(MAP_BUCKETS_OFFSET);
360        let key_slice = slice::from_raw_parts(key_ptr, key_size);
361        let hash = calculate_hash_bytes(key_slice);
362
363        // Initial probe position
364        let mut index = hash as usize & (capacity - 1);
365        let probe_limit = min(capacity, MAX_PROBE_DISTANCE);
366
367        for _ in 0..probe_limit {
368            let bucket_ptr = buckets_ptr.add(index * bucket_size);
369            let status = *bucket_ptr;
370
371            match status {
372                status if status == BucketStatus::Empty as u8 => {
373                    // TODO: Maybe go back to constant
374                    // Empty slot means the key is not in the map
375                    return ptr::null_mut();
376                }
377                status if status == BucketStatus::Occupied as u8 => {
378                    // Check if keys match
379                    let existing_key_ptr = bucket_ptr.add(key_offset);
380                    if matches_key(existing_key_ptr, key_ptr, key_size) {
381                        return bucket_ptr.add(value_offset);
382                    }
383                }
384                _ => {} // Continue probing for tombstones
385            }
386
387            index = (index + 1) & (capacity - 1);
388        }
389
390        // Key not found within probe limit
391        ptr::null_mut()
392    }
393}
394
395/// Remove an entry from the map
396///
397/// # Safety
398///
399/// - `base_ptr` must point to a valid initialized map
400/// - `key_ptr` must point to a valid key of the size specified in the map header
401///
402/// # Returns
403///
404/// `true` if the key was found and removed, `false` otherwise
405#[inline]
406pub unsafe fn remove(base_ptr: *mut u8, key_ptr: *const u8) -> bool {
407    unsafe {
408        let header = &*base_ptr.cast::<MapHeader>();
409
410        let capacity = header.capacity as usize;
411        let key_size = header.key_size as usize;
412        let bucket_size = header.bucket_size as usize;
413        let key_offset = header.key_offset as usize;
414
415        debug_assert_eq!(
416            header.padding_and_secret_code, SECRET_CODE,
417            "hashmap, secret code failed"
418        );
419        debug_assert_ne!(key_size, 0, "Key size cannot be zero");
420        debug_assert_ne!(capacity, 0, "Capacity cannot be zero");
421        debug_assert!(
422            capacity.is_power_of_two(),
423            "Capacity must be a power of two"
424        );
425
426        let buckets_ptr = base_ptr.add(MAP_BUCKETS_OFFSET);
427        let key_slice = slice::from_raw_parts(key_ptr, key_size);
428        let hash = calculate_hash_bytes(key_slice);
429
430        // Initial probe position
431        let mut index = hash as usize & (capacity - 1);
432        let probe_limit = min(capacity, MAX_PROBE_DISTANCE);
433
434        for _ in 0..probe_limit {
435            let bucket_ptr = buckets_ptr.add(index * bucket_size);
436            let status = *bucket_ptr;
437
438            match status {
439                status if status == BucketStatus::Empty as u8 => {
440                    // Empty slot means the key is not in the map
441                    return false;
442                }
443                status if status == BucketStatus::Occupied as u8 => {
444                    // Check if keys match
445                    let existing_key_ptr = bucket_ptr.add(key_offset);
446                    if matches_key(existing_key_ptr, key_ptr, key_size) {
447                        // Convert to tombstone
448                        *bucket_ptr = BucketStatus::Tombstone as u8;
449
450                        // Update count
451                        let header_mut = &mut *base_ptr.cast::<MapHeader>();
452                        header_mut.element_count -= 1;
453
454                        return true;
455                    }
456                }
457                _ => {} // Continue probing for tombstones
458            }
459
460            index = (index + 1) & (capacity - 1);
461        }
462
463        // Key not found within probe limit
464        false
465    }
466}
467
468/// Copy all entries from source map to target map
469///
470/// # Safety
471///
472/// - Both maps must be properly initialized with compatible layouts (capacity can differ)
473/// - Target map must have sufficient capacity
474///
475/// # Returns
476///
477/// `true` if the operation succeeded, `false` if the target has insufficient capacity
478#[inline]
479pub unsafe fn overwrite(target_base: *mut u8, source: *const u8) -> bool {
480    unsafe {
481        let target_header = &mut *target_base.cast::<MapHeader>();
482        let source_header = &*source.cast::<MapHeader>();
483        debug_assert_eq!(
484            target_header.padding_and_secret_code, SECRET_CODE,
485            "hashmap, secret code failed"
486        );
487        debug_assert_eq!(
488            source_header.padding_and_secret_code, SECRET_CODE,
489            "hashmap, secret code failed"
490        );
491        // Check if target has enough capacity
492        if target_header.logical_limit < source_header.element_count {
493            return false;
494        }
495
496        // Validate compatible layouts
497        debug_assert_eq!(
498            target_header.bucket_size, source_header.bucket_size,
499            "Incompatible bucket sizes"
500        );
501        debug_assert_eq!(
502            target_header.key_size, source_header.key_size,
503            "Incompatible key sizes"
504        );
505        debug_assert_eq!(
506            target_header.value_size, source_header.value_size,
507            "Incompatible value sizes"
508        );
509
510        let source_buckets_ptr = source.add(MAP_BUCKETS_OFFSET);
511        let bucket_size = source_header.bucket_size as usize;
512        let key_offset = source_header.key_offset as usize;
513        let value_offset = source_header.value_offset as usize;
514        let value_size = source_header.value_size as usize;
515
516        // Copy each occupied bucket
517        for i in 0..source_header.capacity as usize {
518            let source_bucket = source_buckets_ptr.add(i * bucket_size);
519
520            if *source_bucket == BucketStatus::Occupied as u8 {
521                let source_key_ptr = source_bucket.add(key_offset);
522                let source_value_ptr = source_bucket.add(value_offset);
523
524                let target_value_ptr = get_or_reserve_entry(target_base, source_key_ptr);
525
526                if target_value_ptr.is_null() {
527                    return false;
528                }
529
530                ptr::copy_nonoverlapping(source_value_ptr, target_value_ptr, value_size);
531            }
532        }
533
534        true
535    }
536}
537
538/// Find the next valid entry in the map
539///
540/// # Safety
541///
542/// - `base` must point to a valid initialized map
543///
544/// # Returns
545///
546/// Tuple of (`key_ptr`, `value_ptr`, index) of the next valid entry,
547/// or (null, null, 0xFFFF) if no more entries exist
548#[inline]
549pub unsafe fn find_next_valid_entry(base: *mut u8, start_index: u16) -> (*const u8, *mut u8, u16) {
550    unsafe {
551        let map_header = &*base.cast::<MapHeader>();
552        let bucket_size = map_header.bucket_size as usize;
553        let buckets_start = base.add(MAP_BUCKETS_OFFSET);
554        let key_offset = map_header.key_offset as usize;
555        let value_offset = map_header.value_offset as usize;
556        debug_assert_eq!(
557            map_header.padding_and_secret_code, SECRET_CODE,
558            "hashmap, secret code failed"
559        );
560
561        let mut index = start_index as usize;
562
563        while index < map_header.capacity as usize {
564            let entry_ptr = buckets_start.add(index * bucket_size);
565
566            // Properly use the enum instead of magic number
567            if *entry_ptr == BucketStatus::Occupied as u8 {
568                let key_addr = entry_ptr.add(key_offset);
569                let value_addr = entry_ptr.add(value_offset);
570
571                return (key_addr, value_addr, index as u16);
572            }
573
574            index += 1;
575        }
576
577        (ptr::null(), ptr::null_mut(), 0xFFFF)
578    }
579}