1use std::hash::BuildHasher;
2use std::io;
3use std::marker::PhantomData;
4use std::path::Path;
5
6use rustc_hash::FxBuildHasher;
7
8use crate::byte_store::{MMapFile, VecStore};
9use crate::entry::Entry;
10use crate::fixed_buffers::FixedVec;
11use crate::storage::MapStorage;
12use crate::types::{BytesDecode, BytesEncode, Native, Str};
13use crate::{Buffers, ByteStore};
14
15pub type U64StringMap<BS = VecStore> = DiskHashMap<Native<u64>, Str, BS>;
17pub type StringU64Map<BS = VecStore> = DiskHashMap<Str, Native<u64>, BS>;
18pub type StringStringMap<BS = VecStore> = DiskHashMap<Str, Str, BS>;
19
20pub enum MapEntry<'a, K, V, BS, S = FxBuildHasher>
22where
23 BS: ByteStore,
24 S: BuildHasher + Default,
25{
26 Occupied(OccupiedEntry<'a, K, V, BS, S>),
27 Vacant(VacantEntry<'a, K, V, BS, S>),
28}
29
30impl<K, V, BS, S> MapEntry<'_, K, V, BS, S>
31where
32 BS: ByteStore,
33 S: BuildHasher + Default,
34{
35 pub fn is_occupied(&self) -> bool {
37 matches!(self, MapEntry::Occupied(_))
38 }
39
40 pub fn is_vacant(&self) -> bool {
42 matches!(self, MapEntry::Vacant(_))
43 }
44
45 pub fn key(&self) -> <K as BytesDecode<'_>>::DItem
46 where
47 K: for<'a> BytesDecode<'a>,
48 {
49 let k = match self {
50 MapEntry::Occupied(entry) => <K as BytesDecode>::bytes_decode(entry.key_bytes()),
51 MapEntry::Vacant(entry) => <K as BytesDecode>::bytes_decode(&entry.key),
52 };
53 k.expect("Failed to decode key")
54 }
55}
56
57pub struct OccupiedEntry<'a, K, V, BS, S = FxBuildHasher>
59where
60 BS: ByteStore,
61 S: BuildHasher + Default,
62{
63 map: &'a mut DiskHashMap<K, V, BS, S>,
64 slot_idx: usize,
65}
66
67pub struct VacantEntry<'a, K, V, BS, S = FxBuildHasher>
69where
70 BS: ByteStore,
71 S: BuildHasher + Default,
72{
73 map: &'a mut DiskHashMap<K, V, BS, S>,
74 key: Vec<u8>,
75 slot_idx: usize,
76}
77
78pub struct DiskHashMap<K, V, BS, S = FxBuildHasher>
85where
86 BS: ByteStore,
87 S: BuildHasher + Default,
88{
89 entries: FixedVec<Entry, BS>,
90 keys: Buffers<BS>,
91 values: Buffers<BS>,
92 capacity: usize,
93 size: usize,
94 hasher: S,
95 _marker: PhantomData<(K, V)>,
96}
97
98impl<K, V> Default for DiskHashMap<K, V, VecStore, FxBuildHasher> {
99 fn default() -> Self {
100 Self::new()
101 }
102}
103
104impl<K, V, BS, S> DiskHashMap<K, V, BS, S>
105where
106 BS: ByteStore,
107 S: BuildHasher + Default,
108{
109 pub fn with_stores(entry_store: BS, keys_store: BS, values_store: BS) -> Self {
111 let keys = Buffers::new(keys_store);
112 let values = Buffers::new(values_store);
113 let entries = FixedVec::new(entry_store);
114 let capacity = entries.capacity();
115
116 Self {
117 keys,
118 values,
119 entries,
120 capacity,
121 size: 0,
122 hasher: S::default(),
123 _marker: PhantomData,
124 }
125 }
126
127 pub fn len(&self) -> usize {
129 self.size
130 }
131
132 pub fn is_empty(&self) -> bool {
134 self.size == 0
135 }
136
137 pub fn capacity(&self) -> usize {
139 self.capacity
140 }
141
142 pub fn load_factor(&self) -> f64 {
144 if self.capacity == 0 {
145 return f64::INFINITY;
146 }
147 self.size as f64 / self.capacity as f64
148 }
149
150 fn should_resize(&self) -> bool {
152 if self.capacity == 0 {
153 return true;
154 }
155 self.load_factor() > 0.4
157 }
158
159 fn find_slot(
167 &self,
168 key: &[u8],
169 mut eq_fn: impl FnMut(&[u8], &[u8]) -> bool,
170 hash_fn: impl Fn(&[u8]) -> u64,
171 ) -> Result<usize, usize> {
172 if self.capacity == 0 {
173 return Err(0);
174 }
175
176 let hash = hash_fn(key);
177 let mut index = hash as usize % self.capacity;
178
179 for _ in 0..self.capacity {
181 let entry = &self.entries[index];
182 if entry.is_empty() {
183 return Err(index);
185 }
186
187 if !entry.is_deleted() {
188 if let Some(stored_key) = self.keys.get(entry.key_pos()) {
190 if eq_fn(key, stored_key) {
191 return Ok(index);
192 }
193 }
194 }
195 index = (index + 1) % self.capacity;
197 }
198
199 Err(self.capacity)
200 }
201
202 pub fn stats(&self) -> (u64, u64, u64) {
203 (
204 self.entries.store().stats(),
205 self.keys.store().stats(),
206 self.values.store().stats(),
207 )
208 }
209
210 fn insert_new_entry(&mut self, slot_idx: usize, key_bytes: &[u8], value_bytes: &[u8]) -> Entry {
212 let key_idx = self.keys.append(key_bytes);
213 let value_idx = self.values.append(value_bytes);
214
215 let entry = Entry::occupied_at_pos(key_idx, value_idx);
216 self.entries[slot_idx] = entry;
217 self.size += 1;
218 entry
219 }
220}
221
222impl<
223 K: for<'a> BytesEncode<'a>,
224 V: for<'a> BytesEncode<'a> + for<'a> BytesDecode<'a>,
225 BS: ByteStore,
226 S: BuildHasher + Default,
227> DiskHashMap<K, V, BS, S>
228{
229 fn grow(&mut self) -> Result<(), Box<dyn std::error::Error + Sync + Send>> {
230 let new_capacity = if self.capacity == 0 {
231 16
232 } else {
233 self.capacity * 2
234 };
235 let mut new_entries = self.entries.new_empty(new_capacity);
236 let actual_new_capacity = new_entries.capacity();
237
238 for i in 0..self.capacity {
240 let entry = self.entries[i];
241 if entry.is_occupied() {
242 let key_data = self
243 .keys
244 .get(entry.key_pos())
245 .expect("key must exist for occupied entry");
246 let hash = <K as BytesEncode>::hash_alt(key_data, &self.hasher);
247 let mut index = hash as usize % actual_new_capacity;
248
249 loop {
251 if new_entries[index].is_empty() {
252 new_entries[index] = entry;
253 break;
254 }
255 index = (index + 1) % actual_new_capacity;
256 }
257 }
258 }
259 self.entries = new_entries;
260 self.capacity = actual_new_capacity;
261 Ok(())
262 }
263
264 pub fn insert<'a>(
266 &'a mut self,
267 key: &'a <K as BytesEncode<'a>>::EItem,
268 value: &'a <V as BytesEncode<'a>>::EItem,
269 ) -> Result<Option<<V as BytesDecode<'a>>::DItem>, Box<dyn std::error::Error + Sync + Send>>
270 {
271 if self.should_resize() {
272 self.grow()?;
273 }
274
275 let key_bytes = K::bytes_encode(key)?;
276 let value_bytes = V::bytes_encode(value)?;
277
278 self.insert_key_value_bytes(&key_bytes, &value_bytes)
279 }
280
281 fn insert_key_value_bytes(
283 &mut self,
284 key_bytes: &[u8],
285 value_bytes: &[u8],
286 ) -> Result<Option<<V as BytesDecode<'_>>::DItem>, Box<dyn std::error::Error + Sync + Send>>
287 {
288 match self.find_slot_inner(key_bytes) {
289 Err(slot_idx) => {
290 self.insert_new_entry(slot_idx, key_bytes, value_bytes);
292 Ok(None)
293 }
294 Ok(slot_idx) => {
295 self.update_existing_entry(slot_idx, value_bytes)
297 }
298 }
299 }
300
301 fn update_existing_entry(
303 &mut self,
304 slot_idx: usize,
305 value_bytes: &[u8],
306 ) -> Result<Option<<V as BytesDecode<'_>>::DItem>, Box<dyn std::error::Error + Sync + Send>>
307 {
308 let entry = &mut self.entries[slot_idx];
309 let old_value_idx = entry.value_pos();
310 let new_value_idx = self.values.append(value_bytes);
311 entry.set_new_kv(entry.key_pos(), new_value_idx);
312
313 let old_value_bytes = self
315 .values
316 .get(old_value_idx)
317 .expect("value must exist for occupied entry");
318 let old_value = V::bytes_decode(old_value_bytes)?;
319
320 Ok(Some(old_value))
321 }
322
323 fn find_slot_inner(&self, key: &[u8]) -> Result<usize, usize> {
324 self.find_slot(
325 key,
326 |l, r| <K as BytesEncode>::eq_alt(l, r),
327 |k| <K as BytesEncode>::hash_alt(k, &self.hasher), )
329 }
330
331 pub fn get<'a>(
333 &self,
334 key: &'a <K as BytesEncode<'a>>::EItem,
335 ) -> Result<Option<<V as BytesDecode<'_>>::DItem>, Box<dyn std::error::Error + Sync + Send>>
336 {
337 if self.is_empty() {
338 return Ok(None);
339 }
340
341 let key_bytes = K::bytes_encode(key)?;
342 match self.find_slot_inner(&key_bytes) {
343 Ok(slot_idx) => {
344 let entry = &self.entries[slot_idx];
345 let value_bytes = self
346 .values
347 .get(entry.value_pos())
348 .expect("value must exist for occupied entry");
349 let value = V::bytes_decode(value_bytes)?;
350 Ok(Some(value))
351 }
352 Err(_) => Ok(None),
353 }
354 }
355
356 pub fn entry<'a>(
358 &'a mut self,
359 key: &'a <K as BytesEncode<'a>>::EItem,
360 ) -> Result<MapEntry<'a, K, V, BS, S>, Box<dyn std::error::Error + Sync + Send>>
361 where
362 for<'b> K: BytesEncode<'b>,
363 for<'b> V: BytesDecode<'b>,
364 {
365 let key_bytes = K::bytes_encode(key)?;
366 Ok(self.entry_raw(key_bytes.as_ref()))
367 }
368
369 fn entry_raw<Q: AsRef<[u8]>>(&mut self, key: Q) -> MapEntry<'_, K, V, BS, S>
371 where
372 for<'a> K: BytesEncode<'a>,
373 for<'b> V: BytesDecode<'b>,
374 {
375 if self.should_resize() {
376 let _ = self.grow();
377 }
378
379 let key_bytes = key.as_ref();
380 match self.find_slot_inner(key_bytes) {
381 Ok(slot_idx) => MapEntry::Occupied(OccupiedEntry {
382 map: self,
383 slot_idx,
384 }),
385 Err(slot_idx) => MapEntry::Vacant(VacantEntry {
386 map: self,
387 key: key_bytes.to_vec(),
388 slot_idx,
389 }),
390 }
391 }
392}
393
394impl<K, V> DiskHashMap<K, V, VecStore, FxBuildHasher> {
395 pub fn new() -> Self {
397 Self::with_stores(VecStore::new(), VecStore::new(), VecStore::new())
398 }
399}
400
401impl<K, V, S> DiskHashMap<K, V, MMapFile, S>
402where
403 S: BuildHasher + Default,
404{
405 pub fn new_in(path: &Path) -> io::Result<Self> {
406 const DEFAULT_ENTRIES_CAP: usize = 16;
407 const DEFAULT_KV_CAP: usize = 1024;
408
409 let storage = MapStorage::new_in(
410 path,
411 DEFAULT_ENTRIES_CAP * std::mem::size_of::<Entry>(),
412 DEFAULT_KV_CAP,
413 DEFAULT_KV_CAP,
414 )?;
415
416 let keys = Buffers::new(storage.keys);
417 let values = Buffers::new(storage.values);
418 let entries = FixedVec::<Entry, _>::new(storage.entries);
419 let capacity = entries.capacity();
420
421 Ok(Self {
422 keys,
423 values,
424 entries,
425 capacity,
426 size: 0,
427 hasher: S::default(),
428 _marker: PhantomData,
429 })
430 }
431
432 pub fn with_capacity(
434 path: impl AsRef<Path>,
435 num_entries: usize,
436 keys_bytes: usize,
437 values_bytes: usize,
438 ) -> io::Result<Self> {
439 let path = path.as_ref();
440
441 if num_entries == 0 || keys_bytes == 0 || values_bytes == 0 {
443 return Err(io::Error::new(
444 io::ErrorKind::InvalidInput,
445 "Capacities must be greater than zero",
446 ));
447 }
448
449 let entries_cap = num_entries.next_power_of_two();
451 let keys_cap = keys_bytes.next_power_of_two();
452 let values_cap = values_bytes.next_power_of_two();
453
454 let storage = MapStorage::new_in(
455 path,
456 entries_cap * std::mem::size_of::<Entry>(),
457 keys_cap,
458 values_cap,
459 )?;
460
461 let keys = Buffers::new(storage.keys);
462 let values = Buffers::new(storage.values);
463 let entries = FixedVec::<Entry, _>::new(storage.entries);
464 let capacity = entries.capacity();
465
466 Ok(Self {
467 keys,
468 values,
469 entries,
470 capacity,
471 size: 0,
472 hasher: S::default(),
473 _marker: PhantomData,
474 })
475 }
476
477 pub fn load_from(path: &Path) -> io::Result<Self> {
478 let storage = MapStorage::load_from(path)?;
479
480 let keys = Buffers::load(storage.keys);
481 let values = Buffers::load(storage.values);
482 let entries = FixedVec::<Entry, _>::new(storage.entries);
483 let capacity = entries.capacity();
484
485 let mut size = 0;
486 for i in 0..capacity {
487 if entries[i].is_occupied() {
488 size += 1;
489 }
490 }
491
492 Ok(Self {
493 keys,
494 values,
495 entries,
496 capacity,
497 size,
498 hasher: S::default(),
499 _marker: PhantomData,
500 })
501 }
502}
503
504impl<'a, K, V, BS, S> OccupiedEntry<'a, K, V, BS, S>
505where
506 BS: ByteStore,
507 S: BuildHasher + Default,
508{
509 pub fn key_bytes(&self) -> &[u8] {
511 let entry = &self.map.entries[self.slot_idx];
512 self.map
513 .keys
514 .get(entry.key_pos())
515 .expect("key must exist for occupied entry")
516 }
517
518 pub fn value_bytes(&self) -> &[u8] {
520 let entry = &self.map.entries[self.slot_idx];
521 self.map
522 .values
523 .get(entry.value_pos())
524 .expect("value must exist for occupied entry")
525 }
526}
527
528impl<'a, K, V, BS, S> OccupiedEntry<'a, K, V, BS, S>
530where
531 BS: ByteStore,
532 S: BuildHasher + Default,
533 K: for<'b> BytesEncode<'b>,
534 V: for<'b> BytesEncode<'b> + for<'b> BytesDecode<'b>,
535{
536 pub fn get(
538 &self,
539 ) -> Result<<V as BytesDecode<'_>>::DItem, Box<dyn std::error::Error + Sync + Send>> {
540 let value_bytes = self.value_bytes();
541 V::bytes_decode(value_bytes)
542 }
543
544 fn insert_bytes<V2: AsRef<[u8]>>(
546 self,
547 value: V2,
548 ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Send + Sync>> {
549 self.map
550 .update_existing_entry(self.slot_idx, value.as_ref())
551 .map(|r| r.unwrap())
552 }
553
554 pub fn insert(
557 self,
558 value: &'a <V as BytesEncode<'a>>::EItem,
559 ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>> {
560 let value_bytes = V::bytes_encode(value)?;
561 self.insert_bytes(value_bytes.as_ref())
562 }
563
564 pub fn or_insert(
566 self,
567 value: &'a <V as BytesEncode<'a>>::EItem,
568 ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>> {
569 self.insert(value)
570 }
571
572 pub fn or_insert_with<F>(
574 self,
575 f: F,
576 ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>>
577 where
578 F: FnOnce() -> &'a <V as BytesEncode<'a>>::EItem,
579 {
580 self.insert(f())
581 }
582}
583
584impl<'a, K, V, BS, S> VacantEntry<'a, K, V, BS, S>
585where
586 BS: ByteStore,
587 S: BuildHasher + Default,
588{
589 fn insert_bytes<V2: AsRef<[u8]>>(self, value: V2) -> &'a [u8] {
591 let entry = self
592 .map
593 .insert_new_entry(self.slot_idx, &self.key, value.as_ref());
594 self.map
595 .values
596 .get(entry.value_pos())
597 .expect("value was just inserted")
598 }
599}
600
601impl<'a, K, V, BS, S> VacantEntry<'a, K, V, BS, S>
603where
604 BS: ByteStore,
605 S: BuildHasher + Default,
606 K: for<'b> BytesEncode<'b>,
607 V: for<'b> BytesEncode<'b> + for<'b> BytesDecode<'b>,
608{
609 pub fn insert(
612 self,
613 value: &'a <V as BytesEncode<'a>>::EItem,
614 ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>> {
615 let value_bytes = V::bytes_encode(value)?;
616 let old_bytes = self.insert_bytes(value_bytes.as_ref());
617 V::bytes_decode(old_bytes)
618 }
619
620 pub fn or_insert(
622 self,
623 value: &'a <V as BytesEncode<'a>>::EItem,
624 ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>> {
625 self.insert(value)
626 }
627
628 pub fn or_insert_with<F>(
630 self,
631 f: F,
632 ) -> Result<<V as BytesDecode<'a>>::DItem, Box<dyn std::error::Error + Sync + Send>>
633 where
634 F: FnOnce() -> &'a <V as BytesEncode<'a>>::EItem,
635 {
636 self.insert(f())
637 }
638}
639
640#[cfg(test)]
641mod tests {
642 use super::*;
643 use crate::types::{Arch, Native, Str};
644 use crate::{Bytes, VecStore};
645 use proptest::prelude::*;
646 use rkyv::{Archive, Deserialize, Serialize};
647 use rustc_hash::FxBuildHasher;
648 use std::collections::HashMap as StdHashMap;
649 use tempfile::tempdir;
650
651 type BytesHM = DiskHashMap<Bytes, Bytes, VecStore, FxBuildHasher>;
652
653 #[test]
655 fn test_insert_and_get_raw() {
656 let mut map: BytesHM = DiskHashMap::new();
657
658 map.insert(b"hello", b"world").unwrap();
660
661 let value = map.get(b"hello").unwrap();
663 assert_eq!(value, Some(b"world".as_ref()));
664
665 let value = map.get(b"not_found").unwrap();
667 assert_eq!(value, None);
668 }
669
670 #[test]
671 fn test_update_value_raw() {
672 let mut map: BytesHM = DiskHashMap::new();
673
674 map.insert(b"key", b"value1").unwrap();
676 assert_eq!(map.get(b"key").unwrap(), Some(b"value1".as_ref()));
677
678 let old_value = map.insert(b"key", b"value2").unwrap();
680 assert_eq!(old_value, Some(b"value1".as_ref()));
681
682 let value = map.get(b"key").unwrap();
684 assert_eq!(value, Some(b"value2".as_ref()));
685 assert_eq!(map.len(), 1);
686 }
687
688 #[test]
689 fn test_multiple_entries_raw() {
690 let mut map: BytesHM = DiskHashMap::new();
691
692 map.insert(b"key1", b"value1").unwrap();
694 map.insert(b"key2", b"value2").unwrap();
695 map.insert(b"key3", b"value3").unwrap();
696
697 assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
699 assert_eq!(map.get(b"key2").unwrap(), Some(b"value2".as_ref()));
700 assert_eq!(map.get(b"key3").unwrap(), Some(b"value3".as_ref()));
701 }
702
703 #[test]
704 fn test_empty_map() {
705 let map: BytesHM = DiskHashMap::new();
706
707 assert_eq!(map.len(), 0);
709 assert!(map.is_empty());
710
711 assert_eq!(map.get(b"key").unwrap(), None);
713 }
714
715 fn check_prop(hm: StdHashMap<Vec<u8>, Vec<u8>>) {
716 let mut map: BytesHM = DiskHashMap::new();
717
718 let mut already_inserted = vec![];
720 for (k, v) in hm.iter() {
721 map.insert(k.as_slice(), v.as_slice()).unwrap();
722 already_inserted.push((k.clone(), v.clone()));
723 for (k, v) in &already_inserted {
724 assert_eq!(map.get(k).unwrap(), Some(v.as_slice()), "key: {k:?}");
725
726 let entry = map.entry(k).unwrap();
727 assert!(entry.is_occupied());
728 assert_eq!(entry.key(), k);
729 match entry {
730 MapEntry::Occupied(occupied) => {
731 assert_eq!(occupied.value_bytes(), v);
732 }
733 MapEntry::Vacant(_) => panic!("Expected occupied entry"),
734 }
735 }
736 }
737
738 assert_eq!(map.len(), hm.len());
740
741 for (k, v) in hm.iter() {
743 assert_eq!(
744 map.get(k.as_slice()).unwrap(),
745 Some(v.as_slice()),
746 "key: {k:?}"
747 );
748 }
749 }
750
751 fn check_prop_native(hm: StdHashMap<u64, u64>) {
752 let mut map: DiskHashMap<Native<u64>, Native<u64>, VecStore, FxBuildHasher> =
753 DiskHashMap::new();
754
755 let mut already_inserted = vec![];
757 for (k, v) in hm.iter() {
758 map.insert(k, v).unwrap();
759 already_inserted.push((*k, *v));
760 for (k, v) in &already_inserted {
761 assert_eq!(map.get(k).unwrap(), Some(*v), "key: {k:?}");
762
763 let entry = map.entry(k).unwrap();
764 assert!(entry.is_occupied());
765 assert_eq!(entry.key(), *k);
766 match entry {
767 MapEntry::Occupied(occupied) => {
768 assert_eq!(occupied.get().unwrap(), *v);
769 }
770 MapEntry::Vacant(_) => panic!("Expected occupied entry"),
771 }
772 }
773 }
774
775 assert_eq!(map.len(), hm.len());
777
778 for (k, v) in hm.iter() {
780 assert_eq!(map.get(k).unwrap(), Some(*v), "key: {k:?}");
781 }
782 }
783
784 #[test]
785 fn it_s_a_hash_map() {
786 let small_hash_map_prop = proptest::collection::hash_map(
787 proptest::collection::vec(0u8..255, 1..32),
788 proptest::collection::vec(0u8..255, 1..32),
789 1..250,
790 );
791
792 proptest!(|(values in small_hash_map_prop)|{
793 check_prop(values);
794 });
795 }
796
797 #[test]
798 fn it_s_a_hash_map_native() {
799 let small_hash_map_prop = proptest::collection::hash_map(
800 proptest::num::u64::ANY,
801 proptest::num::u64::ANY,
802 1..250,
803 );
804
805 proptest!(|(values in small_hash_map_prop)|{
806 check_prop_native(values);
807 });
808 }
809
810 #[test]
811 fn it_s_a_hash_map_1() {
812 let mut expected = StdHashMap::new();
813 expected.insert(vec![225, 211, 10, 64, 102, 152], vec![173, 231, 92]);
814 expected.insert(vec![227, 209, 20, 158, 58, 22, 107, 62], vec![77]);
815 expected.insert(
816 vec![140, 134, 67, 127, 34, 190],
817 vec![144, 189, 239, 135, 30],
818 );
819 expected.insert(vec![206, 143, 221], vec![253, 107, 93, 29, 207]);
820 expected.insert(vec![182, 46, 63, 120], vec![110, 233, 124, 103]);
821 check_prop(expected);
822 }
823
824 #[test]
825 fn it_s_a_hash_map_2() {
826 let mut expected = StdHashMap::new();
827 let kvs = vec![
828 (vec![6], vec![0]),
829 (vec![214], vec![252]),
830 (vec![44], vec![0]),
831 (vec![113], vec![160]),
832 (vec![116], vec![15]),
833 (vec![67], vec![42]),
834 (vec![12], vec![0]),
835 (vec![191], vec![172]),
836 (vec![209], vec![119]),
837 (vec![11], vec![0]),
838 (vec![254], vec![104]),
839 (vec![121], vec![0]),
840 (vec![117], vec![174]),
841 (vec![38], vec![79]),
842 (vec![94], vec![66]),
843 (vec![16], vec![0]),
844 (vec![89], vec![167]),
845 (vec![112], vec![195]),
846 (vec![91], vec![18]),
847 (vec![23], vec![0]),
848 (vec![58], vec![0]),
849 (vec![32], vec![118]),
850 (vec![198], vec![47]),
851 (vec![18], vec![0]),
852 (vec![120], vec![0]),
853 (vec![0], vec![0]),
854 (vec![24], vec![0]),
855 (vec![7], vec![0]),
856 (vec![15], vec![0]),
857 (vec![22], vec![0]),
858 (vec![13], vec![0]),
859 (vec![102], vec![182]),
860 (vec![253], vec![68]),
861 (vec![139], vec![250]),
862 (vec![43], vec![0]),
863 (vec![14], vec![0]),
864 (vec![8], vec![0]),
865 (vec![88], vec![175]),
866 (vec![195], vec![150]),
867 (vec![41], vec![0]),
868 (vec![5], vec![46]),
869 (vec![10], vec![0]),
870 (vec![119], vec![0]),
871 (vec![239], vec![34]),
872 (vec![17], vec![0]),
873 (vec![42], vec![0]),
874 (vec![40], vec![213]),
875 (vec![1], vec![0]),
876 (vec![9], vec![0]),
877 (vec![140], vec![14]),
878 (vec![31], vec![51]),
879 (vec![57], vec![154]),
880 (vec![19], vec![102]),
881 (vec![238], vec![198]),
882 (vec![129], vec![15]),
883 (vec![141], vec![0]),
884 (vec![33], vec![0]),
885 (vec![95], vec![74]),
886 (vec![21], vec![162]),
887 ];
888
889 for (k, v) in kvs {
890 expected.insert(k, v);
891 }
892
893 check_prop(expected);
894 }
895
896 #[test]
897 fn test_persistence() {
898 let dir = tempdir().unwrap();
899 let path = dir.path();
900
901 type FileMap = DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher>;
902
903 {
905 let mut map: FileMap = FileMap::new_in(path).unwrap();
906 map.insert(b"key1", b"value1").unwrap();
907 map.insert(b"key2", b"value2").unwrap();
908 assert_eq!(map.len(), 2);
909 assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
910 assert_eq!(map.get(b"key2").unwrap(), Some(b"value2".as_ref()));
911 } {
915 let map: FileMap = FileMap::load_from(path).unwrap();
916 assert_eq!(map.len(), 2);
917 assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
918 assert_eq!(map.get(b"key2").unwrap(), Some(b"value2".as_ref()));
919 assert_eq!(map.get(b"key3").unwrap(), None);
920 }
921
922 {
924 let mut map: FileMap = FileMap::load_from(path).unwrap();
925 map.insert(b"key3", b"value3").unwrap();
926 assert_eq!(map.len(), 3);
927 assert_eq!(map.get(b"key3").unwrap(), Some(b"value3".as_ref()));
928 }
929
930 {
932 let map: FileMap = FileMap::load_from(path).unwrap();
933 assert_eq!(map.len(), 3);
934 assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
935 assert_eq!(map.get(b"key2").unwrap(), Some(b"value2".as_ref()));
936 assert_eq!(map.get(b"key3").unwrap(), Some(b"value3".as_ref()));
937 }
938 }
939
940 #[test]
941 fn test_no_resize_with_preallocation() {
942 let mut entry_store = VecStore::new();
943 entry_store.grow(256 * std::mem::size_of::<Entry>());
944 let mut key_store = VecStore::new();
945 key_store.grow(20 * 1024);
946 let mut value_store = VecStore::new();
947 value_store.grow(20 * 1024);
948
949 assert_eq!(entry_store.stats(), 1);
951 assert_eq!(key_store.stats(), 1);
952 assert_eq!(value_store.stats(), 1);
953
954 let mut map: DiskHashMap<Bytes, Bytes, _, FxBuildHasher> =
955 DiskHashMap::with_stores(entry_store, key_store, value_store);
956
957 let initial_stats = map.stats();
958 assert_eq!(initial_stats, (1, 1, 1));
959
960 for i in 0..100 {
962 let s = i.to_string();
963 map.insert(s.clone().into_bytes().as_slice(), s.into_bytes().as_slice())
964 .unwrap();
965 }
966 assert_eq!(
967 map.stats(),
968 initial_stats,
969 "No resize should happen with pre-allocation"
970 );
971
972 for i in 100..150 {
974 let s = i.to_string();
975 map.insert(s.clone().into_bytes().as_slice(), s.into_bytes().as_slice())
976 .unwrap();
977 }
978
979 let (entries_resizes, keys_resizes, values_resizes) = map.stats();
980 assert_eq!(
981 entries_resizes, 0,
982 "entries store is replaced, so stats are reset"
983 );
984 assert_eq!(
985 keys_resizes, initial_stats.1,
986 "keys store should not resize"
987 );
988 assert_eq!(
989 values_resizes, initial_stats.2,
990 "values store should not resize"
991 );
992 }
993
994 #[test]
995 fn test_entry_api_vacant() {
996 let mut map: BytesHM = DiskHashMap::new();
997
998 match map.entry_raw(b"key1") {
1000 MapEntry::Vacant(entry) => {
1001 let value_ref = entry.insert_bytes(b"value1");
1002 assert_eq!(value_ref, b"value1");
1003 }
1004 MapEntry::Occupied(_) => panic!("Expected vacant entry"),
1005 }
1006
1007 assert_eq!(map.len(), 1);
1008 assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
1009 }
1010
1011 #[test]
1012 fn test_entry_api_occupied() {
1013 let mut map: BytesHM = DiskHashMap::new();
1014
1015 map.insert(b"key1", b"value1").unwrap();
1017
1018 match map.entry(b"key1").unwrap() {
1020 MapEntry::Occupied(entry) => {
1021 assert_eq!(entry.value_bytes(), b"value1");
1022 let old_value = entry.insert(b"value2").unwrap();
1023 assert_eq!(old_value, b"value1");
1024 }
1025 MapEntry::Vacant(_) => panic!("Expected occupied entry"),
1026 }
1027
1028 assert_eq!(map.len(), 1);
1029 assert_eq!(map.get(b"key1").unwrap(), Some(b"value2".as_ref()));
1030 }
1031
1032 #[test]
1033 fn test_entry_api_or_insert() {
1034 let mut map: BytesHM = DiskHashMap::new();
1035
1036 match map.entry(b"key1").unwrap() {
1038 MapEntry::Vacant(entry) => {
1039 let value_ref = entry.or_insert(b"value1").unwrap();
1040 assert_eq!(value_ref, b"value1");
1041 }
1042 MapEntry::Occupied(_) => panic!("Expected vacant entry"),
1043 }
1044
1045 assert_eq!(map.get(b"key1").unwrap(), Some(b"value1".as_ref()));
1047 assert_eq!(map.len(), 1);
1048 }
1049
1050 #[test]
1051 fn test_entry_api_or_insert_with() {
1052 let mut map: BytesHM = DiskHashMap::new();
1053
1054 match map.entry(b"key1").unwrap() {
1056 MapEntry::Vacant(entry) => {
1057 let value_ref = entry.or_insert_with(|| b"computed_value").unwrap();
1058 assert_eq!(value_ref, b"computed_value");
1059 }
1060 MapEntry::Occupied(_) => panic!("Expected vacant entry"),
1061 }
1062
1063 assert_eq!(map.get(b"key1").unwrap(), Some(b"computed_value".as_ref()));
1064 assert_eq!(map.len(), 1);
1065 }
1066
1067 #[test]
1068 fn test_insert_returns_previous_value() {
1069 let mut map: BytesHM = DiskHashMap::new();
1070
1071 let previous = map.insert(b"key1", b"value1").unwrap();
1073 assert_eq!(previous, None);
1074
1075 let previous = map.insert(b"key1", b"value2").unwrap();
1077 assert_eq!(previous, Some(b"value1".as_ref()));
1078
1079 assert_eq!(map.get(b"key1").unwrap(), Some(b"value2".as_ref()));
1081 assert_eq!(map.len(), 1);
1082 }
1083
1084 #[test]
1086 fn test_native_u64_str_string() {
1087 let mut map: DiskHashMap<Native<u64>, Str, VecStore> = DiskHashMap::new();
1088
1089 let result = map.insert(&42, "hello");
1091 assert!(result.is_ok());
1092 assert_eq!(result.unwrap(), None);
1093
1094 let result = map.get(&42u64);
1096 assert!(result.is_ok());
1097 assert_eq!(result.unwrap(), Some("hello"));
1098
1099 let result = map.insert(&42u64, "world");
1101 assert!(result.is_ok());
1102 assert_eq!(result.unwrap(), Some("hello"));
1103
1104 let result = map.get(&42u64);
1106 assert!(result.is_ok());
1107 assert_eq!(result.unwrap(), Some("world"));
1108
1109 assert_eq!(map.len(), 1);
1110 }
1111
1112 #[test]
1113 fn test_str_string_native_u32() {
1114 let mut map: DiskHashMap<Str, Native<u32>, VecStore> = DiskHashMap::new();
1115
1116 let key1 = "key1".to_string();
1118 let key2 = "key2".to_string();
1119
1120 let result = map.insert(&key1, &100u32);
1121 assert!(result.is_ok());
1122 assert_eq!(result.unwrap(), None);
1123
1124 let result = map.insert(&key2, &200u32);
1125 assert!(result.is_ok());
1126 assert_eq!(result.unwrap(), None);
1127
1128 let result = map.get(&key1);
1130 assert!(result.is_ok());
1131 assert_eq!(result.unwrap(), Some(100u32));
1132
1133 let result = map.get(&key2);
1134 assert!(result.is_ok());
1135 assert_eq!(result.unwrap(), Some(200u32));
1136
1137 assert_eq!(map.len(), 2);
1138 }
1139
1140 #[test]
1141 fn test_capacity_and_growth() {
1142 let mut map: DiskHashMap<Native<u8>, Native<u8>, VecStore> = DiskHashMap::new();
1143
1144 for i in 0u8..20 {
1146 let result = map.insert(&i, &(i * 2));
1147 assert!(result.is_ok());
1148 assert_eq!(result.unwrap(), None);
1149 }
1150
1151 assert_eq!(map.len(), 20);
1152
1153 for i in 0u8..20 {
1155 let result = map.get(&i);
1156 assert!(result.is_ok(), "Failed to get key {i}");
1157 assert_eq!(result.unwrap(), Some(i * 2));
1158 }
1159 }
1160
1161 #[test]
1162 fn test_convenience_methods() {
1163 let mut map: DiskHashMap<Native<u64>, Str, VecStore> = U64StringMap::new();
1165
1166 let result = map.insert(&42, "hello");
1167 assert!(result.is_ok());
1168 assert_eq!(result.unwrap(), None);
1169
1170 let result = map.get(&42u64);
1171 assert!(result.is_ok());
1172 assert_eq!(result.unwrap(), Some("hello"));
1173
1174 let mut map2: StringU64Map = StringU64Map::new();
1176
1177 let result = map2.insert("key", &100);
1178 assert!(result.is_ok());
1179 assert_eq!(result.unwrap(), None);
1180
1181 let result = map2.get("key");
1182 assert!(result.is_ok());
1183 assert_eq!(result.unwrap(), Some(100));
1184
1185 let mut map3: StringStringMap = StringStringMap::new();
1187
1188 let result = map3.insert("key", "value");
1189 assert!(result.is_ok());
1190 assert_eq!(result.unwrap(), None);
1191
1192 let result = map3.get("key");
1193 assert!(result.is_ok());
1194 assert_eq!(result.unwrap(), Some("value"));
1195 }
1196
1197 #[derive(Archive, Deserialize, Serialize, Debug, Clone, PartialEq)]
1199 pub struct UserProfile {
1200 pub id: u32,
1201 pub name: String,
1202 pub tags: Vec<String>,
1203 pub scores: Vec<f64>,
1204 pub metadata: Vec<(String, String)>,
1205 }
1206
1207 impl UserProfile {
1208 fn new(id: u32, name: &str) -> Self {
1209 Self {
1210 id,
1211 name: name.to_string(),
1212 tags: vec!["user".to_string(), "active".to_string()],
1213 scores: vec![85.5, 92.1, 78.3],
1214 metadata: vec![
1215 ("created".to_string(), "2024-01-15".to_string()),
1216 ("last_login".to_string(), "2024-01-20".to_string()),
1217 ],
1218 }
1219 }
1220
1221 fn to_bytes(&self) -> Vec<u8> {
1223 rkyv::to_bytes::<rkyv::rancor::Error>(self)
1224 .unwrap()
1225 .to_vec()
1226 }
1227
1228 fn from_bytes(
1230 bytes: &[u8],
1231 ) -> Result<&rkyv::Archived<UserProfile>, Box<dyn std::error::Error + Send + Sync>>
1232 {
1233 let archived = rkyv::access::<rkyv::Archived<UserProfile>, rkyv::rancor::Error>(bytes)
1234 .map_err(|e| format!("Validation failed: {e}"))?;
1235 Ok(archived)
1236 }
1237 }
1238
1239 #[test]
1240 fn simple_mmap_only_no_hash_map_rkyv_zerocopy() {
1241 let tmp_file = tempfile::NamedTempFile::new().expect("Failed to create temp dir");
1242
1243 let check_alignment = |offset: usize| {
1245 let user = UserProfile::new(1, "Integration Test");
1246
1247 let user_bytes = user.to_bytes();
1249
1250 let mut mmap_file = MMapFile::new(&tmp_file, 1024).expect("Failed to create mmap file");
1252 let aligned_range = offset..user_bytes.len() + offset;
1253 let items = &mut mmap_file.as_mut()[aligned_range.clone()];
1254 dbg!(align_of_val(items));
1255 dbg!(align_of_val(&user_bytes));
1256 items.copy_from_slice(&user_bytes);
1257
1258 let read_bytes = mmap_file.as_ref();
1260
1261 let archived_user = UserProfile::from_bytes(&read_bytes[aligned_range])
1263 .expect("Failed to deserialize UserProfile from bytes");
1264
1265 assert_eq!(archived_user.id, 1);
1266 assert_eq!(archived_user.name, "Integration Test");
1267 };
1268
1269 check_alignment(0);
1270 check_alignment(8); check_alignment(16); check_alignment(32); }
1275
1276 #[test]
1277 fn archived_map() {
1278 let tempdir = tempfile::tempdir().expect("Failed to create temp dir");
1279
1280 let mut map: DiskHashMap<Native<u64>, Arch<UserProfile>, MMapFile, FxBuildHasher> =
1281 DiskHashMap::new_in(tempdir.path()).unwrap();
1282
1283 let user = UserProfile::new(1, "Integration Test");
1284
1285 map.insert(&3, &user)
1286 .expect("Failed to insert user profile into the map");
1287
1288 let user = map
1289 .get(&3)
1290 .expect("Failed to retrieve user profile from the map")
1291 .expect("User profile not found in the map");
1292
1293 assert_eq!(user.id, 1);
1294 assert_eq!(user.name, "Integration Test");
1295 }
1296
1297 #[test]
1298 fn test_with_capacity() {
1299 let tempdir = tempfile::tempdir().expect("Failed to create temp dir");
1300
1301 let map_result: Result<DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher>, _> =
1303 DiskHashMap::with_capacity(tempdir.path(), 8, 512, 1024);
1304 assert!(map_result.is_ok());
1305
1306 let map = map_result.unwrap();
1307 assert_eq!(map.capacity(), 8);
1309
1310 drop(map);
1312 let mut map: DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher> =
1313 DiskHashMap::load_from(tempdir.path()).unwrap();
1314
1315 map.insert(b"test_key", b"test_value").unwrap();
1316 assert_eq!(map.get(b"test_key").unwrap(), Some(b"test_value".as_ref()));
1317 }
1318
1319 #[test]
1320 fn test_with_capacity_rounds_up_to_power_of_2() {
1321 let tempdir = tempfile::tempdir().expect("Failed to create temp dir");
1322
1323 let map: DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher> =
1325 DiskHashMap::with_capacity(tempdir.path(), 15, 300, 700).unwrap();
1326
1327 assert_eq!(map.capacity(), 16);
1329 }
1330
1331 #[test]
1332 fn test_with_capacity_zero_values_error() {
1333 let tempdir = tempfile::tempdir().expect("Failed to create temp dir");
1334
1335 let result: Result<DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher>, _> =
1337 DiskHashMap::with_capacity(tempdir.path().join("zero_entries"), 0, 512, 1024);
1338 assert!(result.is_err());
1339 if let Err(err) = result {
1340 assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
1341 }
1342
1343 let result: Result<DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher>, _> =
1345 DiskHashMap::with_capacity(tempdir.path().join("zero_keys"), 8, 0, 1024);
1346 assert!(result.is_err());
1347 if let Err(err) = result {
1348 assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
1349 }
1350
1351 let result: Result<DiskHashMap<Bytes, Bytes, MMapFile, FxBuildHasher>, _> =
1353 DiskHashMap::with_capacity(tempdir.path().join("zero_values"), 8, 512, 0);
1354 assert!(result.is_err());
1355 if let Err(err) = result {
1356 assert_eq!(err.kind(), io::ErrorKind::InvalidInput);
1357 }
1358 }
1359}