hashmap_mem/
lib.rs

1/*
2 * Copyright (c) Peter Bjorklund. All rights reserved. https://github.com/piot/hashmap-mem
3 * Licensed under the MIT License. See LICENSE in the project root for license information.
4 */
5
6use fxhash::FxHasher64;
7use std::cmp::{max, min};
8use std::hash::Hasher;
9use std::mem::size_of;
10use std::ops::Not;
11use std::{ptr, slice};
12
13#[repr(u8)]
14#[derive(Copy, Clone, PartialEq, Eq, Debug)]
15pub enum BucketStatus {
16    Empty = 0, // Must be zero, do not change!
17    Tombstone = 1,
18    Occupied = 2,
19}
20
21#[repr(C)]
22#[derive(Copy, Clone, Debug)]
23pub struct MapHeader {
24    // Do not change the order of the fields!
25    pub capacity: u16,      // Do not change,
26    pub element_count: u16, // Do not change
27    pub key_size: u32,
28    pub value_size: u32,
29
30    pub value_offset: u32,
31    pub bucket_size: u32,
32
33    pub logical_limit: u16,
34    pub key_offset: u8,
35    pub padding_and_secret_code: u8,
36}
37
38pub struct MapInit {
39    pub key_size: u32,
40    pub key_alignment: u8,
41    pub value_size: u32,
42    pub value_alignment: u8,
43    pub capacity: u16,
44    pub logical_limit: u16,
45    pub total_size: u32,
46}
47
48#[derive(Clone, Copy, Debug)]
49pub struct BucketLayout {
50    pub bucket_size: u32,
51    pub key_offset: u8,
52    pub value_offset: u32,
53}
54
55const MAP_BUCKETS_OFFSET: usize = size_of::<MapHeader>();
56const MAX_PROBE_DISTANCE: usize = 32;
57
58#[inline]
59fn calculate_hash_bytes(key_bytes: &[u8]) -> u64 {
60    let mut hasher = FxHasher64::default();
61    hasher.write(key_bytes);
62    hasher.finish()
63}
64
65#[inline]
66fn index_from_hash(hash: u64, capacity: u16) -> usize {
67    assert!(capacity.is_power_of_two());
68
69    // take the top 16 bits; then mask to the actual size
70    // FxHash have badly mixed lower bits
71    ((hash >> 48) as usize) & ((capacity as usize) - 1)
72}
73
74/// Calculate memory layout for a map bucket
75#[inline]
76#[must_use]
77pub fn calculate_bucket_layout(
78    key_size: u32,
79    key_alignment: u8,
80    value_size: u32,
81    value_alignment: u8,
82) -> BucketLayout {
83    let status_size: u32 = 1;
84    let mut current_offset = status_size;
85
86    // Align key
87    let key_align = u32::from(key_alignment);
88    let key_offset = (current_offset + key_align - 1) & !(key_align - 1);
89    current_offset = key_offset + key_size;
90
91    // Align value
92    let value_align = u32::from(value_alignment);
93    let value_offset = (current_offset + value_align - 1) & !(value_align - 1);
94    current_offset = value_offset + value_size;
95
96    // Calculate final bucket size with proper alignment
97    let bucket_content_alignment = max(key_align, value_align);
98    let bucket_size =
99        (current_offset + bucket_content_alignment - 1) & !(bucket_content_alignment - 1);
100
101    BucketLayout {
102        bucket_size,
103        key_offset: key_offset as u8,
104        value_offset,
105    }
106}
107
108#[must_use]
109pub const fn total_size(capacity: u16, bucket_size: u32) -> u32 {
110    (MAP_BUCKETS_OFFSET + capacity as usize * bucket_size as usize) as u32
111}
112
113#[must_use]
114pub fn layout(
115    key_size: u32,
116    key_alignment: u8,
117    value_size: u32,
118    value_alignment: u8,
119    logical_limit: u16,
120) -> (BucketLayout, MapInit) {
121    let capacity = logical_limit.next_power_of_two();
122    let bucket_layout =
123        calculate_bucket_layout(key_size, key_alignment, value_size, value_alignment);
124    (
125        bucket_layout,
126        MapInit {
127            key_size,
128            key_alignment,
129            value_size,
130            value_alignment,
131            capacity,
132            logical_limit,
133            total_size: total_size(capacity, bucket_layout.bucket_size),
134        },
135    )
136}
137
138pub const SECRET_CODE: u8 = 0x3d;
139
140/// Initialize a new hash map in pre-allocated memory
141///
142/// # Safety
143///
144/// - `map_base` must point to valid, properly aligned memory of sufficient size
145/// - The memory must remain valid for the lifetime of the map
146pub unsafe fn init(map_base: *mut u8, config: &MapInit) {
147    assert!(
148        config.capacity.is_power_of_two(),
149        "Capacity must be a power of two"
150    );
151
152    let map_header = map_base.cast::<MapHeader>();
153    let layout = calculate_bucket_layout(
154        config.key_size,
155        config.key_alignment,
156        config.value_size,
157        config.value_alignment,
158    );
159
160    // Initialize header
161    unsafe {
162        ptr::write(
163            map_header,
164            MapHeader {
165                capacity: config.capacity,
166                logical_limit: config.logical_limit,
167                key_size: config.key_size,
168                value_size: config.value_size,
169                bucket_size: layout.bucket_size,
170                key_offset: layout.key_offset,
171                value_offset: layout.value_offset,
172                element_count: 0,
173                padding_and_secret_code: SECRET_CODE,
174            },
175        );
176    }
177
178    // Initialize buckets to empty
179    let buckets_start_ptr = unsafe { map_base.add(MAP_BUCKETS_OFFSET) };
180    let capacity = usize::from(config.capacity);
181    let bucket_size = layout.bucket_size as usize;
182
183    // Zero out all bucket status bytes (Empty = 0)
184    for i in 0..capacity {
185        unsafe {
186            ptr::write(
187                buckets_start_ptr.add(i * bucket_size),
188                BucketStatus::Empty as u8,
189            );
190        }
191    }
192}
193
194/// Fast key comparison helper
195// TODO: Check if the performance difference is significant
196#[inline]
197unsafe fn matches_key(a: *const u8, b: *const u8, len: usize) -> bool {
198    unsafe {
199        if len <= 16 {
200            if len == 0 { true } else {
201                for i in 0..len {
202                    if *a.add(i) != *b.add(i) {
203                        return false;
204                    }
205                }
206                true
207            }
208        } else {
209            slice::from_raw_parts(a, len) == slice::from_raw_parts(b, len)
210        }
211    }
212}
213
214/// Get or reserve an entry in the map
215///
216/// # Safety
217///
218/// - `base_ptr` must point to a valid initialized map
219/// - `key_ptr` must point to a valid key of the size specified in the map header
220///
221/// # Returns
222///
223/// Pointer to the value location, or null if the map is full
224#[inline]
225pub unsafe fn get_or_reserve_entry(base_ptr: *mut u8, key_ptr: *const u8) -> *mut u8 {
226    unsafe {
227        let header = &*base_ptr.cast::<MapHeader>();
228
229        // Validate parameters
230        let capacity = header.capacity as usize;
231        let key_size = header.key_size as usize;
232        let bucket_size = header.bucket_size as usize;
233        let key_offset = header.key_offset as usize;
234        let value_offset = header.value_offset as usize;
235
236        assert_eq!(
237            header.padding_and_secret_code, SECRET_CODE,
238            "hashmap, secret code failed"
239        );
240        assert_ne!(key_size, 0, "Key size cannot be zero");
241        assert_ne!(capacity, 0, "Capacity cannot be zero");
242        assert!(
243            capacity.is_power_of_two(),
244            "Capacity must be a power of two"
245        );
246
247        let buckets_ptr = base_ptr.add(MAP_BUCKETS_OFFSET);
248        let key_slice = slice::from_raw_parts(key_ptr, key_size);
249        let hash = calculate_hash_bytes(key_slice);
250
251        // Initial probe position
252        let mut index = index_from_hash(hash, header.capacity);
253
254        // Track first tombstone for potential reuse
255        let mut first_tombstone = None;
256        let probe_limit = min(capacity, MAX_PROBE_DISTANCE);
257
258        for _ in 0..probe_limit {
259            let bucket_ptr = buckets_ptr.add(index * bucket_size);
260            let status = *bucket_ptr;
261
262            match status {
263                status if status == BucketStatus::Empty as u8 => {
264                    // TODO: Maybe go back to BucketStatus as constants instead, this feel a bit awkward
265                    // Use tombstone if found, otherwise use current empty slot
266                    let insert_index = first_tombstone.unwrap_or(index);
267                    let target_bucket = buckets_ptr.add(insert_index * bucket_size);
268
269                    // Mark as occupied and copy key
270                    *target_bucket = BucketStatus::Occupied as u8;
271                    let target_key_ptr = target_bucket.add(key_offset);
272                    ptr::copy_nonoverlapping(key_ptr, target_key_ptr, key_size);
273
274                    // Update element count
275                    let header_mut = &mut *base_ptr.cast::<MapHeader>();
276                    header_mut.element_count += 1;
277
278                    return target_bucket.add(value_offset);
279                }
280                status if status == BucketStatus::Occupied as u8 => {
281                    // Check if keys match
282                    let existing_key_ptr = bucket_ptr.add(key_offset);
283                    if matches_key(existing_key_ptr, key_ptr, key_size) {
284                        return bucket_ptr.add(value_offset);
285                    }
286                }
287                status if status == BucketStatus::Tombstone as u8 => {
288                    // Remember first tombstone for potential reuse
289                    if first_tombstone.is_none() {
290                        first_tombstone = Some(index);
291                    }
292                }
293                _ => unreachable!(),
294            }
295
296            // Linear probing with wraparound using bitmask
297            index = (index + 1) & (capacity - 1);
298        }
299
300        // If we found a tombstone during probing, use it
301        if let Some(tombstone_index) = first_tombstone {
302            let target_bucket = buckets_ptr.add(tombstone_index * bucket_size);
303
304            // Mark as occupied and copy key
305            *target_bucket = BucketStatus::Occupied as u8;
306            let target_key_ptr = target_bucket.add(key_offset);
307            ptr::copy_nonoverlapping(key_ptr, target_key_ptr, key_size);
308
309            // Update element count
310            let header_mut = &mut *base_ptr.cast::<MapHeader>();
311            header_mut.element_count += 1;
312
313            return target_bucket.add(value_offset);
314        }
315
316        // Map is full or probe limit exceeded
317        ptr::null_mut()
318    }
319}
320
321/// Check if a key exists in the map
322///
323/// # Safety
324///
325/// - `base_ptr` must point to a valid initialized map
326/// - `key_ptr` must point to a valid key of the size specified in the map header
327#[inline]
328#[must_use]
329pub unsafe fn has(base_ptr: *const u8, key_ptr: *const u8) -> bool {
330    unsafe { lookup(base_ptr.cast_mut(), key_ptr).is_null().not() }
331}
332
333/// Lookup an existing entry in the map
334///
335/// # Safety
336///
337/// - `base_ptr` must point to a valid initialized map
338/// - `key_ptr` must point to a valid key of the size specified in the map header
339///
340/// # Returns
341///
342/// Pointer to the found value, or null if not found
343#[inline]
344pub unsafe fn lookup(base_ptr: *mut u8, key_ptr: *const u8) -> *mut u8 {
345    unsafe {
346        let header = &*base_ptr.cast::<MapHeader>();
347
348        let capacity = header.capacity as usize;
349        let key_size = header.key_size as usize;
350        let bucket_size = header.bucket_size as usize;
351        let key_offset = header.key_offset as usize;
352        let value_offset = header.value_offset as usize;
353
354        assert_eq!(
355            header.padding_and_secret_code, SECRET_CODE,
356            "hashmap, secret code failed"
357        );
358        assert_ne!(key_size, 0, "Key size cannot be zero");
359        assert_ne!(capacity, 0, "Capacity cannot be zero");
360        assert!(
361            capacity.is_power_of_two(),
362            "Capacity must be a power of two {capacity}"
363        );
364
365        let buckets_ptr = base_ptr.add(MAP_BUCKETS_OFFSET);
366        let key_slice = slice::from_raw_parts(key_ptr, key_size);
367        let hash = calculate_hash_bytes(key_slice);
368
369        // Initial probe position
370        let mut index = index_from_hash(hash, header.capacity);
371        let probe_limit = min(capacity, MAX_PROBE_DISTANCE);
372
373        for _ in 0..probe_limit {
374            let bucket_ptr = buckets_ptr.add(index * bucket_size);
375            let status = *bucket_ptr;
376
377            match status {
378                status if status == BucketStatus::Empty as u8 => {
379                    // TODO: Maybe go back to constant
380                    // Empty slot means the key is not in the map
381                    return ptr::null_mut();
382                }
383                status if status == BucketStatus::Occupied as u8 => {
384                    // Check if keys match
385                    let existing_key_ptr = bucket_ptr.add(key_offset);
386                    if matches_key(existing_key_ptr, key_ptr, key_size) {
387                        return bucket_ptr.add(value_offset);
388                    }
389                }
390                _ => {} // Continue probing for tombstones
391            }
392
393            index = (index + 1) & (capacity - 1);
394        }
395
396        // Key not found within probe limit
397        ptr::null_mut()
398    }
399}
400
401/// Remove an entry from the map
402///
403/// # Safety
404///
405/// - `base_ptr` must point to a valid initialized map
406/// - `key_ptr` must point to a valid key of the size specified in the map header
407///
408/// # Returns
409///
410/// `true` if the key was found and removed, `false` otherwise
411#[inline]
412pub unsafe fn remove(base_ptr: *mut u8, key_ptr: *const u8) -> bool {
413    unsafe {
414        let header = &*base_ptr.cast::<MapHeader>();
415
416        let capacity = header.capacity as usize;
417        let key_size = header.key_size as usize;
418        let bucket_size = header.bucket_size as usize;
419        let key_offset = header.key_offset as usize;
420
421        assert_eq!(
422            header.padding_and_secret_code, SECRET_CODE,
423            "hashmap, secret code failed"
424        );
425        assert_ne!(key_size, 0, "Key size cannot be zero");
426        assert_ne!(capacity, 0, "Capacity cannot be zero");
427        assert!(
428            capacity.is_power_of_two(),
429            "Capacity must be a power of two"
430        );
431
432        let buckets_ptr = base_ptr.add(MAP_BUCKETS_OFFSET);
433        let key_slice = slice::from_raw_parts(key_ptr, key_size);
434        let hash = calculate_hash_bytes(key_slice);
435
436        // Initial probe position
437        let mut index = index_from_hash(hash, header.capacity);
438        let probe_limit = min(capacity, MAX_PROBE_DISTANCE);
439
440        for _ in 0..probe_limit {
441            let bucket_ptr = buckets_ptr.add(index * bucket_size);
442            let status = *bucket_ptr;
443
444            match status {
445                status if status == BucketStatus::Empty as u8 => {
446                    // Empty slot means the key is not in the map
447                    return false;
448                }
449                status if status == BucketStatus::Occupied as u8 => {
450                    // Check if keys match
451                    let existing_key_ptr = bucket_ptr.add(key_offset);
452                    if matches_key(existing_key_ptr, key_ptr, key_size) {
453                        // Convert to tombstone
454                        *bucket_ptr = BucketStatus::Tombstone as u8;
455
456                        // Update count
457                        let header_mut = &mut *base_ptr.cast::<MapHeader>();
458                        header_mut.element_count -= 1;
459
460                        return true;
461                    }
462                }
463                _ => {} // Continue probing for tombstones
464            }
465
466            index = (index + 1) & (capacity - 1);
467        }
468
469        // Key not found within probe limit
470        false
471    }
472}
473
474/// Copy all entries from source map to target map
475///
476/// # Safety
477///
478/// - Both maps must be properly initialized with compatible layouts (capacity can differ)
479/// - Target map must have sufficient capacity
480///
481/// # Returns
482///
483/// `true` if the operation succeeded, `false` if the target has insufficient capacity
484#[inline]
485pub unsafe fn overwrite(target_base: *mut u8, source: *const u8) -> bool {
486    unsafe {
487        let target_header = &mut *target_base.cast::<MapHeader>();
488        let source_header = &*source.cast::<MapHeader>();
489        assert_eq!(
490            target_header.padding_and_secret_code, SECRET_CODE,
491            "hashmap, secret code failed"
492        );
493        assert_eq!(
494            source_header.padding_and_secret_code, SECRET_CODE,
495            "hashmap, secret code failed"
496        );
497        // Check if target has enough capacity
498        if target_header.logical_limit < source_header.element_count {
499            return false;
500        }
501
502        // Validate compatible layouts
503        assert_eq!(
504            target_header.bucket_size, source_header.bucket_size,
505            "Incompatible bucket sizes"
506        );
507        assert_eq!(
508            target_header.key_size, source_header.key_size,
509            "Incompatible key sizes"
510        );
511        assert_eq!(
512            target_header.value_size, source_header.value_size,
513            "Incompatible value sizes"
514        );
515
516        let source_buckets_ptr = source.add(MAP_BUCKETS_OFFSET);
517        let bucket_size = source_header.bucket_size as usize;
518        let key_offset = source_header.key_offset as usize;
519        let value_offset = source_header.value_offset as usize;
520        let value_size = source_header.value_size as usize;
521
522        // Copy each occupied bucket
523        for i in 0..source_header.capacity as usize {
524            let source_bucket = source_buckets_ptr.add(i * bucket_size);
525
526            if *source_bucket == BucketStatus::Occupied as u8 {
527                let source_key_ptr = source_bucket.add(key_offset);
528                let source_value_ptr = source_bucket.add(value_offset);
529
530                let target_value_ptr = get_or_reserve_entry(target_base, source_key_ptr);
531
532                if target_value_ptr.is_null() {
533                    return false;
534                }
535
536                ptr::copy_nonoverlapping(source_value_ptr, target_value_ptr, value_size);
537            }
538        }
539
540        true
541    }
542}
543
544/// Find the next valid entry in the map
545///
546/// # Safety
547///
548/// - `base` must point to a valid initialized map
549///
550/// # Returns
551///
552/// Tuple of (`key_ptr`, `value_ptr`, index) of the next valid entry,
553/// or (null, null, 0xFFFF) if no more entries exist
554#[inline]
555pub unsafe fn find_next_valid_entry(base: *mut u8, start_index: u16) -> (*const u8, *mut u8, u16) {
556    unsafe {
557        let map_header = &*base.cast::<MapHeader>();
558        let bucket_size = map_header.bucket_size as usize;
559        let buckets_start = base.add(MAP_BUCKETS_OFFSET);
560        let key_offset = map_header.key_offset as usize;
561        let value_offset = map_header.value_offset as usize;
562        assert_eq!(
563            map_header.padding_and_secret_code, SECRET_CODE,
564            "hashmap, secret code failed"
565        );
566
567        let mut index = start_index as usize;
568
569        while index < map_header.capacity as usize {
570            let entry_ptr = buckets_start.add(index * bucket_size);
571
572            // Properly use the enum instead of magic number
573            if *entry_ptr == BucketStatus::Occupied as u8 {
574                let key_addr = entry_ptr.add(key_offset);
575                let value_addr = entry_ptr.add(value_offset);
576
577                return (key_addr, value_addr, index as u16);
578            }
579
580            index += 1;
581        }
582
583        (ptr::null(), ptr::null_mut(), 0xFFFF)
584    }
585}