opendiskmap/
disk_map.rs

1use std::hash::BuildHasher;
2use std::io;
3use std::marker::PhantomData;
4use std::path::Path;
5
6use rustc_hash::FxBuildHasher;
7
8use crate::byte_store::{MMapFile, VecStore};
9use crate::entry::Entry;
10use crate::fixed_buffers::FixedVec;
11use crate::storage::MapStorage;
12use crate::types::{BytesDecode, BytesEncode, Native, Str};
13use crate::{Buffers, ByteStore};
14
15// Type aliases for common use cases
16pub type U64StringMap<BS = VecStore> = DiskHashMap<Native<u64>, Str, BS>;
17pub type StringU64Map<BS = VecStore> = DiskHashMap<Str, Native<u64>, BS>;
18pub type StringStringMap<BS = VecStore> = DiskHashMap<Str, Str, BS>;
19
20/// Entry API for the HashMap, similar to std::collections::HashMap
21pub enum MapEntry<'a, K, V, BS, S = FxBuildHasher>
22where
23    BS: ByteStore,
24    S: BuildHasher + Default,
25{
26    Occupied(OccupiedEntry<'a, K, V, BS, S>),
27    Vacant(VacantEntry<'a, K, V, BS, S>),
28}
29
30impl<K, V, BS, S> MapEntry<'_, K, V, BS, S>
31where
32    BS: ByteStore,
33    S: BuildHasher + Default,
34{
35    /// Returns true if the entry is occupied
36    pub fn is_occupied(&self) -> bool {
37        matches!(self, MapEntry::Occupied(_))
38    }
39
40    /// Returns true if the entry is vacant
41    pub fn is_vacant(&self) -> bool {
42        matches!(self, MapEntry::Vacant(_))
43    }
44
45    pub fn key(&self) -> <K as BytesDecode<'_>>::DItem
46    where
47        K: for<'a> BytesDecode<'a>,
48    {
49        let k = match self {
50            MapEntry::Occupied(entry) => <K as BytesDecode>::bytes_decode(entry.key_bytes()),
51            MapEntry::Vacant(entry) => <K as BytesDecode>::bytes_decode(&entry.key),
52        };
53        k.expect("Failed to decode key")
54    }
55}
56
57/// A view into an occupied entry in the map
58pub struct OccupiedEntry<'a, K, V, BS, S = FxBuildHasher>
59where
60    BS: ByteStore,
61    S: BuildHasher + Default,
62{
63    map: &'a mut DiskHashMap<K, V, BS, S>,
64    slot_idx: usize,
65}
66
67/// A view into a vacant entry in the map
68pub struct VacantEntry<'a, K, V, BS, S = FxBuildHasher>
69where
70    BS: ByteStore,
71    S: BuildHasher + Default,
72{
73    map: &'a mut DiskHashMap<K, V, BS, S>,
74    key: Vec<u8>,
75    slot_idx: usize,
76}
77
78/// This is an open address hash map implementation with trait-based encoding/decoding.
79/// It takes any types that implement BytesEncode/BytesDecode as key and value.
80/// It is designed to be used with a backing store that implements `ByteStore` trait,
81/// allowing for flexible storage options (in-memory with VecStore or persistent with MMapFile).
82/// The `ByteStore` is not used directly; instead we rely on `Buffers`
83/// which is technically a `Vec<Box<[u8]>>` but backed by a `ByteStore` trait.
84pub struct DiskHashMap<K, V, BS, S = FxBuildHasher>
85where
86    BS: ByteStore,
87    S: BuildHasher + Default,
88{
89    entries: FixedVec<Entry, BS>,
90    keys: Buffers<BS>,
91    values: Buffers<BS>,
92    capacity: usize,
93    size: usize,
94    hasher: S,
95    _marker: PhantomData<(K, V)>,
96}
97
98impl<K, V> Default for DiskHashMap<K, V, VecStore, FxBuildHasher> {
99    fn default() -> Self {
100        Self::new()
101    }
102}
103
104impl<K, V, BS, S> DiskHashMap<K, V, BS, S>
105where
106    BS: ByteStore,
107    S: BuildHasher + Default,
108{
109    /// Creates a new HashMap with the given backing stores
110    pub fn with_stores(entry_store: BS, keys_store: BS, values_store: BS) -> Self {
111        let keys = Buffers::new(keys_store);
112        let values = Buffers::new(values_store);
113        let entries = FixedVec::new(entry_store);
114        let capacity = entries.capacity();
115
116        Self {
117            keys,
118            values,
119            entries,
120            capacity,
121            size: 0,
122            hasher: S::default(),
123            _marker: PhantomData,
124        }
125    }
126
127    /// Returns the number of key-value pairs in the map
128    pub fn len(&self) -> usize {
129        self.size
130    }
131
132    /// Returns true if the map contains no elements
133    pub fn is_empty(&self) -> bool {
134        self.size == 0
135    }
136
137    /// Returns the current capacity of the map
138    pub fn capacity(&self) -> usize {
139        self.capacity
140    }
141
142    /// Returns the load factor of the map (size / capacity)
143    pub fn load_factor(&self) -> f64 {
144        if self.capacity == 0 {
145            return f64::INFINITY;
146        }
147        self.size as f64 / self.capacity as f64
148    }
149
150    /// Check if resizing is needed based on load factor
151    fn should_resize(&self) -> bool {
152        if self.capacity == 0 {
153            return true;
154        }
155        // Resize when load factor exceeds 40% for better performance.
156        self.load_factor() > 0.4
157    }
158
159    // fn hash_key(&self, key: &[u8]) -> u64 {
160    //     self.hasher.hash_one(key)
161    // }
162
163    /// Find the slot index for a key
164    /// if the key is found, returns Some(index),
165    /// if the key is not found return the first empty slot index
166    fn find_slot(
167        &self,
168        key: &[u8],
169        mut eq_fn: impl FnMut(&[u8], &[u8]) -> bool,
170        hash_fn: impl Fn(&[u8]) -> u64,
171    ) -> Result<usize, usize> {
172        if self.capacity == 0 {
173            return Err(0);
174        }
175
176        let hash = hash_fn(key);
177        let mut index = hash as usize % self.capacity;
178
179        // Linear probing
180        for _ in 0..self.capacity {
181            let entry = &self.entries[index];
182            if entry.is_empty() {
183                // Empty slot, key not found
184                return Err(index);
185            }
186
187            if !entry.is_deleted() {
188                // Check if this is our key
189                if let Some(stored_key) = self.keys.get(entry.key_pos()) {
190                    if eq_fn(key, stored_key) {
191                        return Ok(index);
192                    }
193                }
194            }
195            // continue probing for deleted slots or non-matching keys
196            index = (index + 1) % self.capacity;
197        }
198
199        Err(self.capacity)
200    }
201
202    pub fn stats(&self) -> (u64, u64, u64) {
203        (
204            self.entries.store().stats(),
205            self.keys.store().stats(),
206            self.values.store().stats(),
207        )
208    }
209
210    /// Insert a new entry at the given slot index
211    fn insert_new_entry(&mut self, slot_idx: usize, key_bytes: &[u8], value_bytes: &[u8]) -> Entry {
212        let key_idx = self.keys.append(key_bytes);
213        let value_idx = self.values.append(value_bytes);
214
215        let entry = Entry::occupied_at_pos(key_idx, value_idx);
216        self.entries[slot_idx] = entry;
217        self.size += 1;
218        entry
219    }
220}
221
222impl<
223    K: for<'a> BytesEncode<'a>,
224    V: for<'a> BytesEncode<'a> + for<'a> BytesDecode<'a>,
225    BS: ByteStore,
226    S: BuildHasher + Default,
227> DiskHashMap<K, V, BS, S>
228{
229    fn grow(&mut self) -> Result<(), Box<dyn std::error::Error + Sync + Send>> {
230        let new_capacity = if self.capacity == 0 {
231            16
232        } else {
233            self.capacity * 2
234        };
235        let mut new_entries = self.entries.new_empty(new_capacity);
236        let actual_new_capacity = new_entries.capacity();
237
238        // Re-hash all existing entries into the new larger array
239        for i in 0..self.capacity {
240            let entry = self.entries[i];
241            if entry.is_occupied() {
242                let key_data = self
243                    .keys
244                    .get(entry.key_pos())
245                    .expect("key must exist for occupied entry");
246                let hash = <K as BytesEncode>::hash_alt(key_data, &self.hasher);
247                let mut index = hash as usize % actual_new_capacity;
248
249                // Linear probing in the new_entries array
250                loop {
251                    if new_entries[index].is_empty() {
252                        new_entries[index] = entry;
253                        break;
254                    }
255                    index = (index + 1) % actual_new_capacity;
256                }
257            }
258        }
259        self.entries = new_entries;
260        self.capacity = actual_new_capacity;
261        Ok(())
262    }
263
264    /// Insert a key-value pair into the map using the trait-based API
265    pub fn insert<'a>(
266        &'a mut self,
267        key: &'a <K as BytesEncode<'a>>::EItem,
268        value: &'a <V as BytesEncode<'a>>::EItem,
269    ) -> Result<Option<<V as BytesDecode<'a>>::DItem>, Box<dyn std::error::Error + Sync + Send>>
270    {
271        if self.should_resize() {
272            self.grow()?;
273        }
274
275        let key_bytes = K::bytes_encode(key)?;
276        let value_bytes = V::bytes_encode(value)?;
277
278        self.insert_key_value_bytes(&key_bytes, &value_bytes)
279    }
280
281    /// Common insertion logic for key-value pairs using raw bytes
282    fn insert_key_value_bytes(
283        &mut self,
284        key_bytes: &[u8],
285        value_bytes: &[u8],
286    ) -> Result<Option<<V as BytesDecode<'_>>::DItem>, Box<dyn std::error::Error + Sync + Send>>
287    {
288        match self.find_slot_inner(key_bytes) {
289            Err(slot_idx) => {
290                // Found an empty slot, insert new key-value pair
291                self.insert_new_entry(slot_idx, key_bytes, value_bytes);
292                Ok(None)
293            }
294            Ok(slot_idx) => {
295                // Key already exists, update value
296                self.update_existing_entry(slot_idx, value_bytes)
297            }
298        }
299    }
300
301    /// Update an existing entry at the given slot index
302    fn update_existing_entry(
303        &mut self,
304        slot_idx: usize,
305        value_bytes: &[u8],
306    ) -> Result<Option<<V as BytesDecode<'_>>::DItem>, Box<dyn std::error::Error + Sync + Send>>
307    {
308        let entry = &mut self.entries[slot_idx];
309        let old_value_idx = entry.value_pos();
310        let new_value_idx = self.values.append(value_bytes);
311        entry.set_new_kv(entry.key_pos(), new_value_idx);
312
313        // Get the old value after the mutation
314        let old_value_bytes = self
315            .values
316            .get(old_value_idx)
317            .expect("value must exist for occupied entry");
318        let old_value = V::bytes_decode(old_value_bytes)?;
319
320        Ok(Some(old_value))
321    }
322
323    fn find_slot_inner(&self, key: &[u8]) -> Result<usize, usize> {
324        self.find_slot(
325            key,
326            |l, r| <K as BytesEncode>::eq_alt(l, r),
327            |k| <K as BytesEncode>::hash_alt(k, &self.hasher), // Use the same hash function as grow()
328        )
329    }
330
331    /// Get a value by key using the trait-based API
332    pub fn get<'a>(
333        &self,
334        key: &'a <K as BytesEncode<'a>>::EItem,
335    ) -> Result<Option<<V as BytesDecode<'_>>::DItem>, Box<dyn std::error::Error + Sync + Send>>
336    {
337        if self.is_empty() {
338            return Ok(None);
339        }
340
341        let key_bytes = K::bytes_encode(key)?;
342        match self.find_slot_inner(&key_bytes) {
343            Ok(slot_idx) => {
344                let entry = &self.entries[slot_idx];
345                let value_bytes = self
346                    .values
347                    .get(entry.value_pos())
348                    .expect("value must exist for occupied entry");
349                let value = V::bytes_decode(value_bytes)?;
350                Ok(Some(value))
351            }
352            Err(_) => Ok(None),
353        }
354    }
355
356    /// Get an entry for the given key using trait-based API
357    pub fn entry<'a>(
358        &'a mut self,
359        key: &'a <K as BytesEncode<'a>>::EItem,
360    ) -> Result<MapEntry<'a, K, V, BS, S>, Box<dyn std::error::Error + Sync + Send>>
361    where
362        for<'b> K: BytesEncode<'b>,
363        for<'b> V: BytesDecode<'b>,
364    {
365        let key_bytes = K::bytes_encode(key)?;
366        Ok(self.entry_raw(key_bytes.as_ref()))
367    }
368
369    /// Get an entry for the given key, allowing for efficient insertion/access patterns
370    fn entry_raw<Q: AsRef<[u8]>>(&mut self, key: Q) -> MapEntry<'_, K, V, BS, S>
371    where
372        for<'a> K: BytesEncode<'a>,
373        for<'b> V: BytesDecode<'b>,
374    {
375        if self.should_resize() {
376            let _ = self.grow();
377        }
378
379        let key_bytes = key.as_ref();
380        match self.find_slot_inner(key_bytes) {
381            Ok(slot_idx) => MapEntry::Occupied(OccupiedEntry {
382                map: self,
383                slot_idx,
384            }),
385            Err(slot_idx) => MapEntry::Vacant(VacantEntry {
386                map: self,
387                key: key_bytes.to_vec(),
388                slot_idx,
389            }),
390        }
391    }
392}
393
394impl<K, V> DiskHashMap<K, V, VecStore, FxBuildHasher> {
395    /// Creates a new in-memory HashMap
396    pub fn new() -> Self {
397        Self::with_stores(VecStore::new(), VecStore::new(), VecStore::new())
398    }
399}
400
401impl<K, V, S> DiskHashMap<K, V, MMapFile, S>
402where
403    S: BuildHasher + Default,
404{
405    pub fn new_in(path: &Path) -> io::Result<Self> {
406        const DEFAULT_ENTRIES_CAP: usize = 16;
407        const DEFAULT_KV_CAP: usize = 1024;
408
409        let storage = MapStorage::new_in(
410            path,
411            DEFAULT_ENTRIES_CAP * std::mem::size_of::<Entry>(),
412            DEFAULT_KV_CAP,
413            DEFAULT_KV_CAP,
414        )?;
415
416        let keys = Buffers::new(storage.keys);
417        let values = Buffers::new(storage.values);
418        let entries = FixedVec::<Entry, _>::new(storage.entries);
419        let capacity = entries.capacity();
420
421        Ok(Self {
422            keys,
423            values,
424            entries,
425            capacity,
426            size: 0,
427            hasher: S::default(),
428            _marker: PhantomData,
429        })
430    }
431
432    /// Creates a new HashMap with specified capacities, rounding up to nearest power of 2
433    pub fn with_capacity(
434        path: impl AsRef<Path>,
435        num_entries: usize,
436        keys_bytes: usize,
437        values_bytes: usize,
438    ) -> io::Result<Self> {
439        let path = path.as_ref();
440
441        // Ensure none of the capacities are zero
442        if num_entries == 0 || keys_bytes == 0 || values_bytes == 0 {
443            return Err(io::Error::new(
444                io::ErrorKind::InvalidInput,
445                "Capacities must be greater than zero",
446            ));
447        }
448
449        // Round up to nearest power of 2
450        let entries_cap = num_entries.next_power_of_two();
451        let keys_cap = keys_bytes.next_power_of_two();
452        let values_cap = values_bytes.next_power_of_two();
453
454        let storage = MapStorage::new_in(
455            path,
456            entries_cap * std::mem::size_of::<Entry>(),
457            keys_cap,
458            values_cap,
459        )?;
460
461        let keys = Buffers::new(storage.keys);
462        let values = Buffers::new(storage.values);
463        let entries = FixedVec::<Entry, _>::new(storage.entries);
464        let capacity = entries.capacity();
465
466        Ok(Self {
467            keys,
468            values,
469            entries,
470            capacity,
471            size: 0,
472            hasher: S::default(),
473            _marker: PhantomData,
474        })
475    }
476
477    pub fn load_from(path: &Path) -> io::Result<Self> {
478        let storage = MapStorage::load_from(path)?;
479
480        let keys = Buffers::load(storage.keys);
481        let values = Buffers::load(storage.values);
482        let entries = FixedVec::<Entry, _>::new(storage.entries);
483        let capacity = entries.capacity();
484
485        let mut size = 0;
486        for i in 0..capacity {
487            if entries[i].is_occupied() {
488                size += 1;
489            }
490        }
491
492        Ok(Self {
493            keys,
494            values,
495            entries,
496            capacity,
497            size,
498            hasher: S::default(),
499            _marker: PhantomData,
500        })
501    }
502}
503
504impl<'a, K, V, BS, S> OccupiedEntry<'a, K, V, BS, S>
505where
506    BS: ByteStore,
507    S: BuildHasher + Default,
508{
509    /// Get a reference to the key in the entry
510    pub fn key_bytes(&self) -> &[u8] {
511        let entry = &self.map.entries[self.slot_idx];
512        self.map
513            .keys
514            .get(entry.key_pos())
515            .expect("key must exist for occupied entry")
516    }
517
518    /// Get a reference to the value in the entry
519    pub fn value_bytes(&self) -> &[u8] {
520        let entry = &self.map.entries[self.slot_idx];
521        self.map
522            .values
523            .get(entry.value_pos())
524            .expect("value must exist for occupied entry")
525    }
526}
527
528// Trait-based extensions for OccupiedEntry
529impl<'a, K, V, BS, S> OccupiedEntry<'a, K, V, BS, S>
530where
531    BS: ByteStore,
532    S: BuildHasher + Default,
533    K: for<'b> BytesEncode<'b>,
534    V: for<'b> BytesEncode<'b> + for<'b> BytesDecode<'b>,
535{
536    /// Get the value in the entry using the trait-based API
537    pub fn get(
538        &self,
539    ) -> Result<<V as BytesDecode<'_>>::DItem, Box<dyn std::error::Error + Sync + Send>> {
540        let value_bytes = self.value_bytes();
541        V::bytes_decode(value_bytes)
542    }
543
544    /// Insert a new value into the entry, returning the old value
545    fn insert_bytes<V2: AsRef<[u8]>>(
546        self,
547        value: V2,
548    ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Send + Sync>> {
549        self.map
550            .update_existing_entry(self.slot_idx, value.as_ref())
551            .map(|r| r.unwrap())
552    }
553
554    /// Insert the value into the vacant entry using the trait-based API
555    /// Returns the raw bytes since we can't return borrowed decoded value from a consuming method
556    pub fn insert(
557        self,
558        value: &'a <V as BytesEncode<'a>>::EItem,
559    ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>> {
560        let value_bytes = V::bytes_encode(value)?;
561        self.insert_bytes(value_bytes.as_ref())
562    }
563
564    /// Insert the value into the vacant entry using trait-based API if vacant
565    pub fn or_insert(
566        self,
567        value: &'a <V as BytesEncode<'a>>::EItem,
568    ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>> {
569        self.insert(value)
570    }
571
572    /// Insert the value returned by the closure if the entry is vacant using trait-based API
573    pub fn or_insert_with<F>(
574        self,
575        f: F,
576    ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>>
577    where
578        F: FnOnce() -> &'a <V as BytesEncode<'a>>::EItem,
579    {
580        self.insert(f())
581    }
582}
583
584impl<'a, K, V, BS, S> VacantEntry<'a, K, V, BS, S>
585where
586    BS: ByteStore,
587    S: BuildHasher + Default,
588{
589    /// Insert the value into the vacant entry, returning a reference to the inserted value
590    fn insert_bytes<V2: AsRef<[u8]>>(self, value: V2) -> &'a [u8] {
591        let entry = self
592            .map
593            .insert_new_entry(self.slot_idx, &self.key, value.as_ref());
594        self.map
595            .values
596            .get(entry.value_pos())
597            .expect("value was just inserted")
598    }
599}
600
601// Trait-based extensions for VacantEntry
602impl<'a, K, V, BS, S> VacantEntry<'a, K, V, BS, S>
603where
604    BS: ByteStore,
605    S: BuildHasher + Default,
606    K: for<'b> BytesEncode<'b>,
607    V: for<'b> BytesEncode<'b> + for<'b> BytesDecode<'b>,
608{
609    /// Insert the value into the vacant entry using the trait-based API
610    /// Returns the raw bytes since we can't return borrowed decoded value from a consuming method
611    pub fn insert(
612        self,
613        value: &'a <V as BytesEncode<'a>>::EItem,
614    ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>> {
615        let value_bytes = V::bytes_encode(value)?;
616        let old_bytes = self.insert_bytes(value_bytes.as_ref());
617        V::bytes_decode(old_bytes)
618    }
619
620    /// Insert the value into the vacant entry using trait-based API if vacant
621    pub fn or_insert(
622        self,
623        value: &'a <V as BytesEncode<'a>>::EItem,
624    ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>> {
625        self.insert(value)
626    }
627
628    /// Insert the value returned by the closure if the entry is vacant using trait-based API
629    pub fn or_insert_with<F>(
630        self,
631        f: F,
632    ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>>
633    where
634        F: FnOnce() -> &'a <V as BytesEncode<'a>>::EItem,
635    {
636        self.insert(f())
637    }
638}
639
640#[cfg(test)]
641mod tests {
642    use super::*;
643    use crate::types::{Arch, Native, Str};
644    use crate::{Bytes, VecStore};
645    use proptest::prelude::*;
646    use rkyv::{Archive, Deserialize, Serialize};
647    use rustc_hash::FxBuildHasher;
648    use std::collections::HashMap as StdHashMap;
649    use tempfile::tempdir;
650
651    type BytesHM = DiskHashMap<Bytes, Bytes, VecStore, FxBuildHasher>;
652
653    // Legacy tests using raw byte API for backward compatibility
654    #[test]
655    fn test_insert_and_get_raw() {
656        let mut map: BytesHM = DiskHashMap::new();
657
658        // Insert a key-value pair using raw API
659        map.insert(b"hello", b"world").unwrap();
660
661        // Get the value using raw API
662        let value = map.get(b"hello").unwrap();
663        assert_eq!(value, Some(b"world".as_ref()));
664
665        // Test non-existent key
666        let value = map.get(b"not_found").unwrap();
667        assert_eq!(value, None);
668    }
669
670    #[test]
671    fn test_update_value_raw() {
672        let mut map: BytesHM = DiskHashMap::new();
673
674        // Insert a key-value pair
675        map.insert(b"key", b"value1").unwrap();
676        assert_eq!(map.get(b"key").unwrap(), Some(b"value1".as_ref()));
677
678        // Update the value
679        let old_value = map.insert(b"key", b"value2").unwrap();
680        assert_eq!(old_value, Some(b"value1".as_ref()));
681
682        // Get the updated value
683        let value = map.get(b"key").unwrap();
684        assert_eq!(value, Some(b"value2".as_ref()));
685        assert_eq!(map.len(), 1);
686    }
687
688    #[test]
689    fn test_multiple_entries_raw() {
690        let mut map: BytesHM = DiskHashMap::new();
691
692        // Insert multiple key-value pairs
693        map.insert(b"key1", b"value1").unwrap();
694        map.insert(b"key2", b"value2").unwrap();
695        map.insert(b"key3", b"value3").unwrap();
696
697        // Get values
698        assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
699        assert_eq!(map.get(b"key2").unwrap(), Some(b"value2".as_ref()));
700        assert_eq!(map.get(b"key3").unwrap(), Some(b"value3".as_ref()));
701    }
702
703    #[test]
704    fn test_empty_map() {
705        let map: BytesHM = DiskHashMap::new();
706
707        // Map should be empty
708        assert_eq!(map.len(), 0);
709        assert!(map.is_empty());
710
711        // Get on empty map
712        assert_eq!(map.get(b"key").unwrap(), None);
713    }
714
715    fn check_prop(hm: StdHashMap<Vec<u8>, Vec<u8>>) {
716        let mut map: BytesHM = DiskHashMap::new();
717
718        // Insert all key-value pairs from the StdHashMap
719        let mut already_inserted = vec![];
720        for (k, v) in hm.iter() {
721            map.insert(k.as_slice(), v.as_slice()).unwrap();
722            already_inserted.push((k.clone(), v.clone()));
723            for (k, v) in &already_inserted {
724                assert_eq!(map.get(k).unwrap(), Some(v.as_slice()), "key: {k:?}");
725
726                let entry = map.entry(k).unwrap();
727                assert!(entry.is_occupied());
728                assert_eq!(entry.key(), k);
729                match entry {
730                    MapEntry::Occupied(occupied) => {
731                        assert_eq!(occupied.value_bytes(), v);
732                    }
733                    MapEntry::Vacant(_) => panic!("Expected occupied entry"),
734                }
735            }
736        }
737
738        // Check the size of the map
739        assert_eq!(map.len(), hm.len());
740
741        // Check that all values can be retrieved
742        for (k, v) in hm.iter() {
743            assert_eq!(
744                map.get(k.as_slice()).unwrap(),
745                Some(v.as_slice()),
746                "key: {k:?}"
747            );
748        }
749    }
750
751    fn check_prop_native(hm: StdHashMap<u64, u64>) {
752        let mut map: DiskHashMap<Native<u64>, Native<u64>, VecStore, FxBuildHasher> =
753            DiskHashMap::new();
754
755        // Insert all key-value pairs from the StdHashMap
756        let mut already_inserted = vec![];
757        for (k, v) in hm.iter() {
758            map.insert(k, v).unwrap();
759            already_inserted.push((*k, *v));
760            for (k, v) in &already_inserted {
761                assert_eq!(map.get(k).unwrap(), Some(*v), "key: {k:?}");
762
763                let entry = map.entry(k).unwrap();
764                assert!(entry.is_occupied());
765                assert_eq!(entry.key(), *k);
766                match entry {
767                    MapEntry::Occupied(occupied) => {
768                        assert_eq!(occupied.get().unwrap(), *v);
769                    }
770                    MapEntry::Vacant(_) => panic!("Expected occupied entry"),
771                }
772            }
773        }
774
775        // Check the size of the map
776        assert_eq!(map.len(), hm.len());
777
778        // Check that all values can be retrieved
779        for (k, v) in hm.iter() {
780            assert_eq!(map.get(k).unwrap(), Some(*v), "key: {k:?}");
781        }
782    }
783
784    #[test]
785    fn it_s_a_hash_map() {
786        let small_hash_map_prop = proptest::collection::hash_map(
787            proptest::collection::vec(0u8..255, 1..32),
788            proptest::collection::vec(0u8..255, 1..32),
789            1..250,
790        );
791
792        proptest!(|(values in small_hash_map_prop)|{
793            check_prop(values);
794        });
795    }
796
797    #[test]
798    fn it_s_a_hash_map_native() {
799        let small_hash_map_prop = proptest::collection::hash_map(
800            proptest::num::u64::ANY,
801            proptest::num::u64::ANY,
802            1..250,
803        );
804
805        proptest!(|(values in small_hash_map_prop)|{
806            check_prop_native(values);
807        });
808    }
809
810    #[test]
811    fn it_s_a_hash_map_1() {
812        let mut expected = StdHashMap::new();
813        expected.insert(vec![225, 211, 10, 64, 102, 152], vec![173, 231, 92]);
814        expected.insert(vec![227, 209, 20, 158, 58, 22, 107, 62], vec![77]);
815        expected.insert(
816            vec![140, 134, 67, 127, 34, 190],
817            vec![144, 189, 239, 135, 30],
818        );
819        expected.insert(vec![206, 143, 221], vec![253, 107, 93, 29, 207]);
820        expected.insert(vec![182, 46, 63, 120], vec![110, 233, 124, 103]);
821        check_prop(expected);
822    }
823
824    #[test]
825    fn it_s_a_hash_map_2() {
826        let mut expected = StdHashMap::new();
827        let kvs = vec![
828            (vec![6], vec![0]),
829            (vec![214], vec![252]),
830            (vec![44], vec![0]),
831            (vec![113], vec![160]),
832            (vec![116], vec![15]),
833            (vec![67], vec![42]),
834            (vec![12], vec![0]),
835            (vec![191], vec![172]),
836            (vec![209], vec![119]),
837            (vec![11], vec![0]),
838            (vec![254], vec![104]),
839            (vec![121], vec![0]),
840            (vec![117], vec![174]),
841            (vec![38], vec![79]),
842            (vec![94], vec![66]),
843            (vec![16], vec![0]),
844            (vec![89], vec![167]),
845            (vec![112], vec![195]),
846            (vec![91], vec![18]),
847            (vec![23], vec![0]),
848            (vec![58], vec![0]),
849            (vec![32], vec![118]),
850            (vec![198], vec![47]),
851            (vec![18], vec![0]),
852            (vec![120], vec![0]),
853            (vec![0], vec![0]),
854            (vec![24], vec![0]),
855            (vec![7], vec![0]),
856            (vec![15], vec![0]),
857            (vec![22], vec![0]),
858            (vec![13], vec![0]),
859            (vec![102], vec![182]),
860            (vec![253], vec![68]),
861            (vec![139], vec![250]),
862            (vec![43], vec![0]),
863            (vec![14], vec![0]),
864            (vec![8], vec![0]),
865            (vec![88], vec![175]),
866            (vec![195], vec![150]),
867            (vec![41], vec![0]),
868            (vec![5], vec![46]),
869            (vec![10], vec![0]),
870            (vec![119], vec![0]),
871            (vec![239], vec![34]),
872            (vec![17], vec![0]),
873            (vec![42], vec![0]),
874            (vec![40], vec![213]),
875            (vec![1], vec![0]),
876            (vec![9], vec![0]),
877            (vec![140], vec![14]),
878            (vec![31], vec![51]),
879            (vec![57], vec![154]),
880            (vec![19], vec![102]),
881            (vec![238], vec![198]),
882            (vec![129], vec![15]),
883            (vec![141], vec![0]),
884            (vec![33], vec![0]),
885            (vec![95], vec![74]),
886            (vec![21], vec![162]),
887        ];
888
889        for (k, v) in kvs {
890            expected.insert(k, v);
891        }
892
893        check_prop(expected);
894    }
895
896    #[test]
897    fn test_persistence() {
898        let dir = tempdir().unwrap();
899        let path = dir.path();
900
901        type FileMap = DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher>;
902
903        // 1. Create a new map and add some data
904        {
905            let mut map: FileMap = FileMap::new_in(path).unwrap();
906            map.insert(b"key1", b"value1").unwrap();
907            map.insert(b"key2", b"value2").unwrap();
908            assert_eq!(map.len(), 2);
909            assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
910            assert_eq!(map.get(b"key2").unwrap(), Some(b"value2".as_ref()));
911        } // map is dropped, files should be persisted
912
913        // 2. Load the map from disk
914        {
915            let map: FileMap = FileMap::load_from(path).unwrap();
916            assert_eq!(map.len(), 2);
917            assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
918            assert_eq!(map.get(b"key2").unwrap(), Some(b"value2".as_ref()));
919            assert_eq!(map.get(b"key3").unwrap(), None);
920        }
921
922        // 3. Load again, and add more data
923        {
924            let mut map: FileMap = FileMap::load_from(path).unwrap();
925            map.insert(b"key3", b"value3").unwrap();
926            assert_eq!(map.len(), 3);
927            assert_eq!(map.get(b"key3").unwrap(), Some(b"value3".as_ref()));
928        }
929
930        // 4. Load one more time to check the new data is there
931        {
932            let map: FileMap = FileMap::load_from(path).unwrap();
933            assert_eq!(map.len(), 3);
934            assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
935            assert_eq!(map.get(b"key2").unwrap(), Some(b"value2".as_ref()));
936            assert_eq!(map.get(b"key3").unwrap(), Some(b"value3".as_ref()));
937        }
938    }
939
940    #[test]
941    fn test_no_resize_with_preallocation() {
942        let mut entry_store = VecStore::new();
943        entry_store.grow(256 * std::mem::size_of::<Entry>());
944        let mut key_store = VecStore::new();
945        key_store.grow(20 * 1024);
946        let mut value_store = VecStore::new();
947        value_store.grow(20 * 1024);
948
949        // The stores have been resized once to pre-allocate space.
950        assert_eq!(entry_store.stats(), 1);
951        assert_eq!(key_store.stats(), 1);
952        assert_eq!(value_store.stats(), 1);
953
954        let mut map: DiskHashMap<Bytes, Bytes, _, FxBuildHasher> =
955            DiskHashMap::with_stores(entry_store, key_store, value_store);
956
957        let initial_stats = map.stats();
958        assert_eq!(initial_stats, (1, 1, 1));
959
960        // Insert 100 elements. Should not trigger any more resizes.
961        for i in 0..100 {
962            let s = i.to_string();
963            map.insert(s.clone().into_bytes().as_slice(), s.into_bytes().as_slice())
964                .unwrap();
965        }
966        assert_eq!(
967            map.stats(),
968            initial_stats,
969            "No resize should happen with pre-allocation"
970        );
971
972        // Insert more elements to trigger a resize of the entries container.
973        for i in 100..150 {
974            let s = i.to_string();
975            map.insert(s.clone().into_bytes().as_slice(), s.into_bytes().as_slice())
976                .unwrap();
977        }
978
979        let (entries_resizes, keys_resizes, values_resizes) = map.stats();
980        assert_eq!(
981            entries_resizes, 0,
982            "entries store is replaced, so stats are reset"
983        );
984        assert_eq!(
985            keys_resizes, initial_stats.1,
986            "keys store should not resize"
987        );
988        assert_eq!(
989            values_resizes, initial_stats.2,
990            "values store should not resize"
991        );
992    }
993
994    #[test]
995    fn test_entry_api_vacant() {
996        let mut map: BytesHM = DiskHashMap::new();
997
998        // Test vacant entry insertion
999        match map.entry_raw(b"key1") {
1000            MapEntry::Vacant(entry) => {
1001                let value_ref = entry.insert_bytes(b"value1");
1002                assert_eq!(value_ref, b"value1");
1003            }
1004            MapEntry::Occupied(_) => panic!("Expected vacant entry"),
1005        }
1006
1007        assert_eq!(map.len(), 1);
1008        assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
1009    }
1010
1011    #[test]
1012    fn test_entry_api_occupied() {
1013        let mut map: BytesHM = DiskHashMap::new();
1014
1015        // Insert initial value
1016        map.insert(b"key1", b"value1").unwrap();
1017
1018        // Test occupied entry access and update
1019        match map.entry(b"key1").unwrap() {
1020            MapEntry::Occupied(entry) => {
1021                assert_eq!(entry.value_bytes(), b"value1");
1022                let old_value = entry.insert(b"value2").unwrap();
1023                assert_eq!(old_value, b"value1");
1024            }
1025            MapEntry::Vacant(_) => panic!("Expected occupied entry"),
1026        }
1027
1028        assert_eq!(map.len(), 1);
1029        assert_eq!(map.get(b"key1").unwrap(), Some(b"value2".as_ref()));
1030    }
1031
1032    #[test]
1033    fn test_entry_api_or_insert() {
1034        let mut map: BytesHM = DiskHashMap::new();
1035
1036        // Test or_insert with vacant entry
1037        match map.entry(b"key1").unwrap() {
1038            MapEntry::Vacant(entry) => {
1039                let value_ref = entry.or_insert(b"value1").unwrap();
1040                assert_eq!(value_ref, b"value1");
1041            }
1042            MapEntry::Occupied(_) => panic!("Expected vacant entry"),
1043        }
1044
1045        // Test entry with existing key (should not create occupied entry in this test)
1046        assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
1047        assert_eq!(map.len(), 1);
1048    }
1049
1050    #[test]
1051    fn test_entry_api_or_insert_with() {
1052        let mut map: BytesHM = DiskHashMap::new();
1053
1054        // Test or_insert_with with vacant entry
1055        match map.entry(b"key1").unwrap() {
1056            MapEntry::Vacant(entry) => {
1057                let value_ref = entry.or_insert_with(|| b"computed_value").unwrap();
1058                assert_eq!(value_ref, b"computed_value");
1059            }
1060            MapEntry::Occupied(_) => panic!("Expected vacant entry"),
1061        }
1062
1063        assert_eq!(map.get(b"key1").unwrap(), Some(b"computed_value".as_ref()));
1064        assert_eq!(map.len(), 1);
1065    }
1066
1067    #[test]
1068    fn test_insert_returns_previous_value() {
1069        let mut map: BytesHM = DiskHashMap::new();
1070
1071        // First insert should return None
1072        let previous = map.insert(b"key1", b"value1").unwrap();
1073        assert_eq!(previous, None);
1074
1075        // Second insert should return previous value
1076        let previous = map.insert(b"key1", b"value2").unwrap();
1077        assert_eq!(previous, Some(b"value1".as_ref()));
1078
1079        // Verify current value
1080        assert_eq!(map.get(b"key1").unwrap(), Some(b"value2".as_ref()));
1081        assert_eq!(map.len(), 1);
1082    }
1083
1084    // New trait-based API tests
1085    #[test]
1086    fn test_native_u64_str_string() {
1087        let mut map: DiskHashMap<Native<u64>, Str, VecStore> = DiskHashMap::new();
1088
1089        // Insert a key-value pair
1090        let result = map.insert(&42, "hello");
1091        assert!(result.is_ok());
1092        assert_eq!(result.unwrap(), None);
1093
1094        // Get the value
1095        let result = map.get(&42u64);
1096        assert!(result.is_ok());
1097        assert_eq!(result.unwrap(), Some("hello"));
1098
1099        // Update the value
1100        let result = map.insert(&42u64, "world");
1101        assert!(result.is_ok());
1102        assert_eq!(result.unwrap(), Some("hello"));
1103
1104        // Verify updated value
1105        let result = map.get(&42u64);
1106        assert!(result.is_ok());
1107        assert_eq!(result.unwrap(), Some("world"));
1108
1109        assert_eq!(map.len(), 1);
1110    }
1111
1112    #[test]
1113    fn test_str_string_native_u32() {
1114        let mut map: DiskHashMap<Str, Native<u32>, VecStore> = DiskHashMap::new();
1115
1116        // Insert multiple pairs
1117        let key1 = "key1".to_string();
1118        let key2 = "key2".to_string();
1119
1120        let result = map.insert(&key1, &100u32);
1121        assert!(result.is_ok());
1122        assert_eq!(result.unwrap(), None);
1123
1124        let result = map.insert(&key2, &200u32);
1125        assert!(result.is_ok());
1126        assert_eq!(result.unwrap(), None);
1127
1128        // Verify both values
1129        let result = map.get(&key1);
1130        assert!(result.is_ok());
1131        assert_eq!(result.unwrap(), Some(100u32));
1132
1133        let result = map.get(&key2);
1134        assert!(result.is_ok());
1135        assert_eq!(result.unwrap(), Some(200u32));
1136
1137        assert_eq!(map.len(), 2);
1138    }
1139
1140    #[test]
1141    fn test_capacity_and_growth() {
1142        let mut map: DiskHashMap<Native<u8>, Native<u8>, VecStore> = DiskHashMap::new();
1143
1144        // Insert enough items to trigger growth
1145        for i in 0u8..20 {
1146            let result = map.insert(&i, &(i * 2));
1147            assert!(result.is_ok());
1148            assert_eq!(result.unwrap(), None);
1149        }
1150
1151        assert_eq!(map.len(), 20);
1152
1153        // Verify all values are still accessible
1154        for i in 0u8..20 {
1155            let result = map.get(&i);
1156            assert!(result.is_ok(), "Failed to get key {i}");
1157            assert_eq!(result.unwrap(), Some(i * 2));
1158        }
1159    }
1160
1161    #[test]
1162    fn test_convenience_methods() {
1163        // Test U64StringMap
1164        let mut map: DiskHashMap<Native<u64>, Str, VecStore> = U64StringMap::new();
1165
1166        let result = map.insert(&42, "hello");
1167        assert!(result.is_ok());
1168        assert_eq!(result.unwrap(), None);
1169
1170        let result = map.get(&42u64);
1171        assert!(result.is_ok());
1172        assert_eq!(result.unwrap(), Some("hello"));
1173
1174        // Test StringU64Map
1175        let mut map2: StringU64Map = StringU64Map::new();
1176
1177        let result = map2.insert("key", &100);
1178        assert!(result.is_ok());
1179        assert_eq!(result.unwrap(), None);
1180
1181        let result = map2.get("key");
1182        assert!(result.is_ok());
1183        assert_eq!(result.unwrap(), Some(100));
1184
1185        // Test StringStringMap
1186        let mut map3: StringStringMap = StringStringMap::new();
1187
1188        let result = map3.insert("key", "value");
1189        assert!(result.is_ok());
1190        assert_eq!(result.unwrap(), None);
1191
1192        let result = map3.get("key");
1193        assert!(result.is_ok());
1194        assert_eq!(result.unwrap(), Some("value"));
1195    }
1196
1197    /// A complex data structure that we want to store and retrieve efficiently
1198    #[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
1199    pub struct UserProfile {
1200        pub id: u32,
1201        pub name: String,
1202        pub tags: Vec<String>,
1203        pub scores: Vec<f64>,
1204        pub metadata: Vec<(String, String)>,
1205    }
1206
1207    impl UserProfile {
1208        fn new(id: u32, name: &str) -> Self {
1209            Self {
1210                id,
1211                name: name.to_string(),
1212                tags: vec!["user".to_string(), "active".to_string()],
1213                scores: vec![85.5, 92.1, 78.3],
1214                metadata: vec![
1215                    ("created".to_string(), "2024-01-15".to_string()),
1216                    ("last_login".to_string(), "2024-01-20".to_string()),
1217                ],
1218            }
1219        }
1220
1221        /// Serialize this UserProfile to bytes using rkyv
1222        fn to_bytes(&self) -> Vec<u8> {
1223            rkyv::to_bytes::<rkyv::rancor::Error>(self)
1224                .unwrap()
1225                .to_vec()
1226        }
1227
1228        /// Deserialize from bytes without copying (zero-copy)
1229        fn from_bytes(
1230            bytes: &[u8],
1231        ) -> Result<&rkyv::Archived<UserProfile>, Box<dyn std::error::Error + Send + Sync>>
1232        {
1233            let archived = rkyv::access::<rkyv::Archived<UserProfile>, rkyv::rancor::Error>(bytes)
1234                .map_err(|e| format!("Validation failed: {e}"))?;
1235            Ok(archived)
1236        }
1237    }
1238
1239    #[test]
1240    fn simple_mmap_only_no_hash_map_rkyv_zerocopy() {
1241        let tmp_file = tempfile::NamedTempFile::new().expect("Failed to create temp dir");
1242
1243        // Create a UserProfile instance
1244        let check_alignment = |offset: usize| {
1245            let user = UserProfile::new(1, "Integration Test");
1246
1247            // Serialize to bytes using rkyv
1248            let user_bytes = user.to_bytes();
1249
1250            // Create a memory-mapped file and write the bytes
1251            let mut mmap_file = MMapFile::new(&tmp_file, 1024).expect("Failed to create mmap file");
1252            let aligned_range = offset..user_bytes.len() + offset;
1253            let items = &mut mmap_file.as_mut()[aligned_range.clone()];
1254            dbg!(align_of_val(items));
1255            dbg!(align_of_val(&user_bytes));
1256            items.copy_from_slice(&user_bytes);
1257
1258            // Read back the bytes from the mmap file
1259            let read_bytes = mmap_file.as_ref();
1260
1261            // Deserialize without copying (zero-copy)
1262            let archived_user = UserProfile::from_bytes(&read_bytes[aligned_range])
1263                .expect("Failed to deserialize UserProfile from bytes");
1264
1265            assert_eq!(archived_user.id, 1);
1266            assert_eq!(archived_user.name, "Integration Test");
1267        };
1268
1269        check_alignment(0);
1270        check_alignment(8); // Check with 8-byte alignment
1271        check_alignment(16); // Check with 16-byte alignment
1272        check_alignment(32); // Check with 16-byte alignment
1273        // check_alignment(1) this will fail, as it is not aligned
1274    }
1275
1276    #[test]
1277    fn archived_map() {
1278        let tempdir = tempfile::tempdir().expect("Failed to create temp dir");
1279
1280        let mut map: DiskHashMap<Native<u64>, Arch<UserProfile>, MMapFile, FxBuildHasher> =
1281            DiskHashMap::new_in(tempdir.path()).unwrap();
1282
1283        let user = UserProfile::new(1, "Integration Test");
1284
1285        map.insert(&3, &user)
1286            .expect("Failed to insert user profile into the map");
1287
1288        let user = map
1289            .get(&3)
1290            .expect("Failed to retrieve user profile from the map")
1291            .expect("User profile not found in the map");
1292
1293        assert_eq!(user.id, 1);
1294        assert_eq!(user.name, "Integration Test");
1295    }
1296
1297    #[test]
1298    fn test_with_capacity() {
1299        let tempdir = tempfile::tempdir().expect("Failed to create temp dir");
1300
1301        // Test with valid capacities
1302        let map_result: Result<DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher>, _> =
1303            DiskHashMap::with_capacity(tempdir.path(), 8, 512, 1024);
1304        assert!(map_result.is_ok());
1305
1306        let map = map_result.unwrap();
1307        // Capacity should be rounded up to power of 2: 8 -> 8 (already power of 2)
1308        assert_eq!(map.capacity(), 8);
1309
1310        // Test that we can actually use the map
1311        drop(map);
1312        let mut map: DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher> =
1313            DiskHashMap::load_from(tempdir.path()).unwrap();
1314
1315        map.insert(b"test_key", b"test_value").unwrap();
1316        assert_eq!(map.get(b"test_key").unwrap(), Some(b"test_value".as_ref()));
1317    }
1318
1319    #[test]
1320    fn test_with_capacity_rounds_up_to_power_of_2() {
1321        let tempdir = tempfile::tempdir().expect("Failed to create temp dir");
1322
1323        // Test with non-power-of-2 capacities
1324        let map: DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher> =
1325            DiskHashMap::with_capacity(tempdir.path(), 15, 300, 700).unwrap();
1326
1327        // 15 -> 16 (next power of 2)
1328        assert_eq!(map.capacity(), 16);
1329    }
1330
1331    #[test]
1332    fn test_with_capacity_zero_values_error() {
1333        let tempdir = tempfile::tempdir().expect("Failed to create temp dir");
1334
1335        // Test zero num_entries
1336        let result: Result<DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher>, _> =
1337            DiskHashMap::with_capacity(tempdir.path().join("zero_entries"), 0, 512, 1024);
1338        assert!(result.is_err());
1339        if let Err(err) = result {
1340            assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
1341        }
1342
1343        // Test zero keys_bytes
1344        let result: Result<DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher>, _> =
1345            DiskHashMap::with_capacity(tempdir.path().join("zero_keys"), 8, 0, 1024);
1346        assert!(result.is_err());
1347        if let Err(err) = result {
1348            assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
1349        }
1350
1351        // Test zero values_bytes
1352        let result: Result<DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher>, _> =
1353            DiskHashMap::with_capacity(tempdir.path().join("zero_values"), 8, 512, 0);
1354        assert!(result.is_err());
1355        if let Err(err) = result {
1356            assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
1357        }
1358    }
1359}