massmap/
massmap.rs

1use serde::Deserialize;
2use std::borrow::Borrow;
3use std::hash::{BuildHasher, Hash};
4use std::io::{Error, ErrorKind, Result};
5use std::marker::PhantomData;
6
7use crate::{
8    MassMapBucketMeta, MassMapDefaultHashLoader, MassMapHashLoader, MassMapHeader, MassMapInfo,
9    MassMapMeta, MassMapReader,
10};
11
12/// Immutable hash map backed by a serialized massmap file.
13///
14/// A `MassMap` is created from a [`MassMapReader`] (typically a file) and
15/// provides low-latency lookups without loading the whole dataset into memory.
16/// Keys and values are deserialized on demand using `serde` and MessagePack.
17///
18/// # Type Parameters
19/// - `K`: key type stored in the map; must implement `serde::Deserialize`.
20/// - `V`: value type stored in the map; must implement `serde::Deserialize` and `Clone`.
21/// - `R`: reader that satisfies [`MassMapReader`].
22/// - `H`: hash loader used to reconstruct the [`BuildHasher`](BuildHasher) from
23///   the persisted [`MassMapHashConfig`](crate::MassMapHashConfig).
24#[derive(Debug)]
25pub struct MassMapInner<R: MassMapReader, H: MassMapHashLoader = MassMapDefaultHashLoader> {
26    /// Header serialized at the start of the massmap file.
27    pub header: MassMapHeader,
28    /// Metadata describing the layout and hashing strategy of the backing file.
29    pub meta: MassMapMeta,
30    /// Metadata for each hash bucket in the map.
31    pub(crate) bucket_metas: Vec<MassMapBucketMeta>,
32    /// Hash state initialized with the stored seed.
33    build_hasher: H::BuildHasher,
34    /// Reader used to access the backing storage.
35    pub(crate) reader: R,
36}
37
38/// Typed view over a [`MassMapInner`].
39///
40/// This wrapper carries the key/value types at compile time while sharing the
41/// underlying storage and hashing configuration with other typed views.
42pub struct MassMap<K, V, R: MassMapReader, H: MassMapHashLoader = MassMapDefaultHashLoader> {
43    inner: MassMapInner<R, H>,
44    /// Phantom data to associate key and value types.
45    phantom_data: PhantomData<(K, V)>,
46}
47
48impl<R: MassMapReader, H: MassMapHashLoader> MassMapInner<R, H> {
49    /// Constructs an untyped massmap from a [`MassMapReader`] implementation.
50    ///
51    /// The method validates the leading header (magic number, metadata offset and
52    /// length) and deserializes [`MassMapMeta`]. Any IO or deserialization errors
53    /// are forwarded to the caller.
54    pub fn load(reader: R) -> Result<Self> {
55        let header =
56            reader.read_exact_at(0, MassMapHeader::SIZE as u64, MassMapHeader::deserialize)?;
57
58        let (meta, bucket_metas): (MassMapMeta, Vec<MassMapBucketMeta>) =
59            reader.read_exact_at(header.meta_offset, header.meta_length, |data| {
60                rmp_serde::from_slice(data).map_err(|e| {
61                    Error::new(
62                        ErrorKind::InvalidData,
63                        format!("Failed to deserialize MassMapMeta: {}", e),
64                    )
65                })
66            })?;
67
68        let build_hasher = H::load(&meta.hash_config)?;
69        Ok(MassMapInner {
70            header,
71            meta,
72            bucket_metas,
73            build_hasher,
74            reader,
75        })
76    }
77
78    /// Returns the number of entries written into this map.
79    pub fn len(&self) -> u64 {
80        self.meta.entry_count
81    }
82
83    /// Returns `true` if the map contains no entries.
84    pub fn is_empty(&self) -> bool {
85        self.meta.entry_count == 0
86    }
87
88    /// Returns information about the map's structure and contents.
89    pub fn info(&self) -> MassMapInfo {
90        MassMapInfo {
91            header: self.header.clone(),
92            meta: self.meta.clone(),
93        }
94    }
95
96    /// Casts this untyped massmap into a typed view with the specified key and value types.
97    pub fn cast<K, V>(self) -> MassMap<K, V, R, H>
98    where
99        K: for<'de> Deserialize<'de> + Eq + Hash,
100        V: for<'de> Deserialize<'de> + Clone,
101    {
102        MassMap {
103            inner: self,
104            phantom_data: PhantomData,
105        }
106    }
107}
108
109impl<K, V, R: MassMapReader, H: MassMapHashLoader> MassMap<K, V, R, H>
110where
111    K: for<'de> Deserialize<'de> + Eq + Hash,
112    V: for<'de> Deserialize<'de> + Clone,
113{
114    /// Constructs a massmap from a [`MassMapReader`] implementation.
115    ///
116    /// The method validates the leading header (magic number, metadata offset and
117    /// length) and deserializes [`MassMapMeta`]. Any IO or deserialization errors
118    /// are forwarded to the caller.
119    pub fn load(reader: R) -> Result<Self> {
120        let inner = MassMapInner::load(reader)?;
121        Ok(MassMap {
122            inner,
123            phantom_data: PhantomData,
124        })
125    }
126
127    /// Returns the number of entries written into this map.
128    pub fn len(&self) -> u64 {
129        self.inner.len()
130    }
131
132    /// Returns `true` if the map contains no entries.
133    pub fn is_empty(&self) -> bool {
134        self.inner.is_empty()
135    }
136
137    /// Returns the number of buckets in the underlying massmap.
138    ///
139    /// This is mainly intended for testing and diagnostics.
140    pub fn bucket_count(&self) -> usize {
141        self.inner.bucket_metas.len()
142    }
143
144    /// Exposes a reference to the underlying immutable metadata.
145    ///
146    /// This is primarily intended for internal crate use (e.g. merging maps).
147    pub(crate) fn meta(&self) -> &MassMapMeta {
148        &self.inner.meta
149    }
150
151    /// Exposes a reference to the underlying bucket metadata array.
152    pub(crate) fn bucket_metas(&self) -> &[MassMapBucketMeta] {
153        &self.inner.bucket_metas
154    }
155
156    /// Exposes the underlying header for internal crate use.
157    pub(crate) fn header(&self) -> &MassMapHeader {
158        &self.inner.header
159    }
160
161    /// Exposes a reference to the underlying reader for internal crate use.
162    pub(crate) fn reader(&self) -> &R {
163        &self.inner.reader
164    }
165
166    /// Returns information about the map's structure and contents.
167    pub fn info(&self) -> MassMapInfo {
168        self.inner.info()
169    }
170
171    /// Attempts to deserialize the value associated with `k`.
172    ///
173    /// Keys are hashed using the stored seed and only the relevant bucket is
174    /// deserialized, minimizing IO when the entry is missing.
175    ///
176    /// # Errors
177    ///
178    /// Returns an error if the reader fails to provide the bucket or if the
179    /// serialized data cannot be deserialized into `(K, V)` pairs.
180    pub fn get<Q>(&self, k: &Q) -> Result<Option<V>>
181    where
182        K: Borrow<Q>,
183        Q: Eq + Hash + ?Sized,
184    {
185        let index = self.bucket_index(k);
186        let entries = self.get_bucket(index)?;
187        for (key, value) in entries.iter() {
188            if key.borrow() == k {
189                return Ok(Some(value.clone()));
190            }
191        }
192        Ok(None)
193    }
194
195    /// Performs multiple lookups in a single pass.
196    ///
197    /// The reader is asked to fetch each bucket sequentially; implementations
198    /// may override [`MassMapReader::batch_read_at`] to issue true scatter/gather
199    /// reads where available. Results preserve the order of `keys`.
200    ///
201    /// # Errors
202    ///
203    /// Returns an error under the same conditions as [`get`](Self::get).
204    pub fn batch_get<Q>(
205        &self,
206        keys: impl IntoIterator<Item = impl Borrow<Q>>,
207    ) -> Result<Vec<Option<V>>>
208    where
209        K: Borrow<Q>,
210        Q: Eq + Hash + ?Sized,
211    {
212        let iov = keys.into_iter().map(|key| {
213            let index = self.bucket_index(key.borrow());
214            let bucket = &self.inner.bucket_metas[index];
215            (key, bucket.offset, bucket.length as u64)
216        });
217
218        self.inner.reader.batch_read_at(iov, |expected, data| {
219            if data.is_empty() {
220                return Ok(None);
221            }
222
223            let entries: Vec<(K, V)> = rmp_serde::from_slice(data).map_err(|e| {
224                Error::new(
225                    ErrorKind::InvalidData,
226                    format!("Failed to deserialize bucket entries: {}", e),
227                )
228            })?;
229
230            for (key, value) in entries.iter() {
231                if key.borrow() == expected.borrow() {
232                    return Ok(Some(value.clone()));
233                }
234            }
235            Ok(None)
236        })
237    }
238
239    /// Creates an iterator that traverses all entries in the map by bucket order.
240    ///
241    /// The iterator reads each bucket sequentially from the backing storage,
242    /// deserializes all entries in the bucket, and yields them one at a time.
243    /// Each bucket is fully loaded into memory before any of its entries are
244    /// yielded. Iteration stops immediately if a bucket fails to deserialize.
245    ///
246    /// # Examples
247    ///
248    /// ```
249    /// use massmap::{MassMap, MassMapBuilder};
250    ///
251    /// # fn main() -> std::io::Result<()> {
252    /// let entries = [("a", 1), ("b", 2), ("c", 3)];
253    /// let file = std::fs::File::create("examples/iter_test.massmap")?;
254    /// MassMapBuilder::default().build(&file, entries.iter())?;
255    ///
256    /// let file = std::fs::File::open("examples/iter_test.massmap")?;
257    /// let map = MassMap::<String, i32, _>::load(file)?;
258    /// let all_entries: Vec<_> = map.iter().collect::<std::io::Result<Vec<_>>>()?;
259    /// assert_eq!(all_entries.len(), 3);
260    /// # Ok(())
261    /// # }
262    /// ```
263    pub fn iter(&self) -> MassMapIter<'_, K, V, R, H> {
264        MassMapIter {
265            map: self,
266            bucket_index: 0,
267            current_entries: Vec::new().into_iter(),
268        }
269    }
270
271    /// Retrieves all entries in the specified bucket.
272    ///
273    /// This method is primarily intended for testing and debugging.
274    ///
275    /// # Errors
276    ///
277    /// Returns an error if the reader fails to provide the bucket or if the
278    /// serialized data cannot be deserialized into `(K, V)` pairs.
279    pub fn get_bucket(&self, index: usize) -> Result<Vec<(K, V)>> {
280        let bucket = &self.inner.bucket_metas[index];
281        if bucket.count == 0 {
282            return Ok(Vec::new());
283        }
284
285        self.inner
286            .reader
287            .read_exact_at(bucket.offset, bucket.length as u64, |data| {
288                let entries: Vec<(K, V)> = rmp_serde::from_slice(data).map_err(|e| {
289                    Error::new(
290                        ErrorKind::InvalidData,
291                        format!("Failed to deserialize bucket entries: {}", e),
292                    )
293                })?;
294                Ok(entries)
295            })
296    }
297
298    fn bucket_index<Q>(&self, k: &Q) -> usize
299    where
300        K: Borrow<Q>,
301        Q: Eq + Hash + ?Sized,
302    {
303        (self.inner.build_hasher.hash_one(k) % (self.inner.bucket_metas.len() as u64)) as usize
304    }
305}
306
307/// Iterator over all entries in a [`MassMap`].
308///
309/// This iterator traverses buckets sequentially, loading each bucket fully into
310/// memory before yielding its entries one by one. Items are returned as
311/// `Result`s so that IO or deserialization failures propagate to the caller.
312pub struct MassMapIter<'a, K, V, R: MassMapReader, H: MassMapHashLoader> {
313    map: &'a MassMap<K, V, R, H>,
314    bucket_index: usize,
315    current_entries: std::vec::IntoIter<(K, V)>,
316}
317
318impl<'a, K, V, R: MassMapReader, H: MassMapHashLoader> Iterator for MassMapIter<'a, K, V, R, H>
319where
320    K: for<'de> Deserialize<'de> + Eq + Hash,
321    V: for<'de> Deserialize<'de> + Clone,
322{
323    type Item = Result<(K, V)>;
324
325    fn next(&mut self) -> Option<Self::Item> {
326        loop {
327            // If we have entries in the current bucket, yield the next one
328            if let Some(entry) = self.current_entries.next() {
329                return Some(Ok(entry));
330            }
331
332            // Move to the next bucket
333            if self.bucket_index >= self.map.inner.bucket_metas.len() {
334                return None;
335            }
336
337            // Read and deserialize the bucket
338            let result = self.map.get_bucket(self.bucket_index);
339            self.bucket_index += 1;
340
341            match result {
342                Ok(entries) => {
343                    let vec: Vec<(K, V)> = entries;
344                    self.current_entries = vec.into_iter();
345                }
346                Err(e) => return Some(Err(e)),
347            }
348        }
349    }
350}
351
352#[cfg(test)]
353mod tests {
354    use crate::*;
355
356    #[test]
357    fn test_basic() {
358        let dir = tempfile::tempdir().unwrap();
359        let file = dir.path().join("massmap.bin");
360        let writer = std::fs::File::create(&file).unwrap();
361        let entries = vec![
362            ("apple", 1),
363            ("banana", 2),
364            ("cherry", 3),
365            ("date", 4),
366            ("elderberry", 5),
367        ];
368        let builder = MassMapBuilder::default()
369            .with_hash_seed(42)
370            .with_bucket_count(8)
371            .with_writer_buffer_size(8 << 20) // 8 MiB
372            .with_field_names(true);
373        let info = builder.build(&writer, entries.iter()).unwrap();
374        assert_eq!(info.meta.entry_count, 5);
375
376        let file = std::fs::File::open(&file).unwrap();
377        assert_eq!(
378            info.header.meta_length + info.header.meta_offset,
379            file.metadata().unwrap().len()
380        );
381        let map = MassMap::<String, i32, _>::load(file).unwrap();
382        assert_eq!(info, map.info());
383        assert_eq!(map.len(), 5);
384        assert!(!map.is_empty());
385        assert_eq!(map.bucket_count(), 8);
386        assert_eq!(
387            map.inner.bucket_metas.iter().map(|b| b.count).sum::<u32>(),
388            5
389        );
390        assert_eq!(map.get("apple").unwrap(), Some(1));
391        assert_eq!(map.get("banana").unwrap(), Some(2));
392        assert_eq!(map.get("steins").unwrap(), None);
393        assert_eq!(map.get("gate").unwrap(), None);
394
395        let keys = vec!["cherry", "date", "fig", "elderberry", "steins", "gate"];
396        let results = map.batch_get::<str>(keys).unwrap();
397        assert_eq!(results, vec![Some(3), Some(4), None, Some(5), None, None]);
398
399        let keys = ["cherry", "date", "fig", "elderberry", "steins", "gate"].map(|s| s.to_string());
400        let results = map.batch_get::<String>(&keys).unwrap();
401        assert_eq!(results, vec![Some(3), Some(4), None, Some(5), None, None]);
402    }
403
404    #[test]
405    fn test_1m() {
406        let dir = tempfile::tempdir().unwrap();
407        let file = dir.path().join("massmap.bin");
408        let writer = std::fs::File::create(&file).unwrap();
409        const N: u64 = 1_000_000;
410        let entries = (0..N).map(|i| (i, i));
411
412        let builder = MassMapBuilder::default()
413            .with_bucket_count(N as u64)
414            .with_writer_buffer_size(8 << 20); // 8 MiB
415        builder.build(&writer, entries).unwrap();
416
417        let file = std::fs::File::open(&file).unwrap();
418        println!("massmap file size: {}", file.metadata().unwrap().len());
419
420        let map = MassMap::<u64, u64, _>::load(file).unwrap();
421        assert_eq!(map.len(), N as u64);
422        assert_eq!(map.bucket_count(), N as usize);
423        assert_eq!(
424            map.inner
425                .bucket_metas
426                .iter()
427                .map(|b| b.count as usize)
428                .sum::<usize>(),
429            N as usize
430        );
431
432        for _ in 0..10 {
433            let k = rand::random::<u64>() % N as u64;
434            assert_eq!(map.get(&k).unwrap(), Some(k));
435
436            let k = k + N as u64;
437            assert_eq!(map.get(&k).unwrap(), None);
438        }
439    }
440
441    #[test]
442    fn test_invalid_data() {
443        let dir = tempfile::tempdir().unwrap();
444        let path = dir.path().join("massmap_invalid.bin");
445        let writer = std::fs::File::create(&path).unwrap();
446        const N: u64 = 1000;
447        let entries = (0..N).map(|i| (i, i));
448
449        let builder = MassMapBuilder::default()
450            .with_bucket_count(1)
451            .with_writer_buffer_size(8 << 20); // 8 MiB
452        let info = builder.build(&writer, entries).unwrap();
453
454        let file = std::fs::OpenOptions::new()
455            .read(true)
456            .write(true)
457            .open(&path)
458            .unwrap();
459
460        {
461            file.write_all_at(b"invalid data", 24).unwrap();
462            let file = std::fs::File::open(&path).unwrap();
463            let map = MassMap::<u64, u64, _>::load(file).unwrap();
464            map.get(&0).unwrap_err();
465            map.batch_get([0]).unwrap_err();
466        }
467
468        {
469            file.write_all_at(b"invalid data", info.header.meta_offset)
470                .unwrap();
471            let file = std::fs::File::open(&path).unwrap();
472            assert!(MassMap::<u64, u64, _>::load(file).is_err());
473        }
474
475        {
476            file.set_len(info.header.meta_offset + info.header.meta_length - 8)
477                .unwrap();
478            let file = std::fs::File::open(&path).unwrap();
479            assert!(MassMap::<u64, u64, _>::load(file).is_err());
480        }
481
482        {
483            file.write_all_at(b"invalid data", 0).unwrap();
484            let file = std::fs::File::open(&path).unwrap();
485            assert!(MassMap::<u64, u64, _>::load(file).is_err());
486        }
487
488        {
489            let file = std::fs::File::create(&path).unwrap();
490            assert!(MassMap::<u64, u64, _>::load(file).is_err());
491        }
492
493        let writer = std::fs::File::create(&path).unwrap();
494        let builder = MassMapBuilder::default()
495            .with_bucket_count(1)
496            .with_writer_buffer_size(8 << 20)
497            .with_bucket_size_limit(16);
498        let entries = (0..N).map(|i| (i, i));
499        builder.build(&writer, entries).unwrap_err();
500    }
501
502    #[test]
503    fn test_iterator_basic() {
504        let dir = tempfile::tempdir().unwrap();
505        let file = dir.path().join("massmap_iter.bin");
506        let writer = std::fs::File::create(&file).unwrap();
507        let entries = vec![
508            ("apple", 1),
509            ("banana", 2),
510            ("cherry", 3),
511            ("date", 4),
512            ("elderberry", 5),
513        ];
514        let builder = MassMapBuilder::default()
515            .with_hash_seed(42)
516            .with_bucket_count(8);
517        builder.build(&writer, entries.iter()).unwrap();
518
519        let file = std::fs::File::open(&file).unwrap();
520        let map = MassMap::<String, i32, _>::load(file).unwrap();
521
522        // Collect all entries from the iterator
523        let mut collected: Vec<_> = map.iter().collect::<std::io::Result<Vec<_>>>().unwrap();
524        assert_eq!(collected.len(), 5);
525
526        // Sort to compare with original entries
527        collected.sort_by(|a, b| a.0.cmp(&b.0));
528        let mut expected = entries
529            .iter()
530            .map(|(k, v)| (k.to_string(), *v))
531            .collect::<Vec<_>>();
532        expected.sort_by(|a, b| a.0.cmp(&b.0));
533        assert_eq!(collected, expected);
534    }
535
536    #[test]
537    fn test_iterator_empty() {
538        let dir = tempfile::tempdir().unwrap();
539        let file = dir.path().join("massmap_iter_empty.bin");
540        let writer = std::fs::File::create(&file).unwrap();
541        let entries: Vec<(String, i32)> = vec![];
542        let builder = MassMapBuilder::default().with_bucket_count(8);
543        builder.build(&writer, entries.iter()).unwrap();
544
545        let file = std::fs::File::open(&file).unwrap();
546        let map = MassMap::<String, i32, _>::load(file).unwrap();
547
548        let collected: Vec<_> = map.iter().collect::<std::io::Result<Vec<_>>>().unwrap();
549        assert_eq!(collected.len(), 0);
550    }
551
552    #[test]
553    fn test_iterator_single_bucket() {
554        let dir = tempfile::tempdir().unwrap();
555        let file = dir.path().join("massmap_iter_single.bin");
556        let writer = std::fs::File::create(&file).unwrap();
557        let entries = vec![("a", 1), ("b", 2), ("c", 3), ("d", 4), ("e", 5)];
558        // Use 1 bucket to ensure all entries are in the same bucket
559        let builder = MassMapBuilder::default().with_bucket_count(1);
560        builder.build(&writer, entries.iter()).unwrap();
561
562        let file = std::fs::File::open(&file).unwrap();
563        let map = MassMap::<String, i32, _>::load(file).unwrap();
564
565        let collected: Vec<_> = map.iter().collect::<std::io::Result<Vec<_>>>().unwrap();
566        assert_eq!(collected.len(), 5);
567
568        // All entries should be present
569        let mut collected_sorted = collected.clone();
570        collected_sorted.sort_by(|a, b| a.0.cmp(&b.0));
571        let mut expected = entries
572            .iter()
573            .map(|(k, v)| (k.to_string(), *v))
574            .collect::<Vec<_>>();
575        expected.sort_by(|a, b| a.0.cmp(&b.0));
576        assert_eq!(collected_sorted, expected);
577    }
578
579    #[test]
580    fn test_iterator_many_buckets() {
581        let dir = tempfile::tempdir().unwrap();
582        let file = dir.path().join("massmap_iter_many.bin");
583        let writer = std::fs::File::create(&file).unwrap();
584        const N: u64 = 1000;
585        let entries = (0..N).map(|i| (i, i * 2));
586        // Use many buckets
587        let builder = MassMapBuilder::default().with_bucket_count(100);
588        builder.build(&writer, entries).unwrap();
589
590        let file = std::fs::File::open(&file).unwrap();
591        let map = MassMap::<u64, u64, _>::load(file).unwrap();
592
593        let collected: Vec<_> = map.iter().collect::<std::io::Result<Vec<_>>>().unwrap();
594        assert_eq!(collected.len(), N as usize);
595
596        // Verify all entries are present
597        let mut collected_sorted = collected.clone();
598        collected_sorted.sort_by(|a, b| a.0.cmp(&b.0));
599        for i in 0..N {
600            assert_eq!(collected_sorted[i as usize], (i, i * 2));
601        }
602    }
603
604    #[test]
605    fn test_iterator_multiple_iterations() {
606        let dir = tempfile::tempdir().unwrap();
607        let file = dir.path().join("massmap_iter_multiple.bin");
608        let writer = std::fs::File::create(&file).unwrap();
609        let entries = vec![("x", 10), ("y", 20), ("z", 30)];
610        let builder = MassMapBuilder::default().with_bucket_count(4);
611        builder.build(&writer, entries.iter()).unwrap();
612
613        let file = std::fs::File::open(&file).unwrap();
614        let map = MassMap::<String, i32, _>::load(file).unwrap();
615
616        // First iteration
617        let collected1: Vec<_> = map.iter().collect::<std::io::Result<Vec<_>>>().unwrap();
618        assert_eq!(collected1.len(), 3);
619
620        // Second iteration should yield the same results
621        let collected2: Vec<_> = map.iter().collect::<std::io::Result<Vec<_>>>().unwrap();
622        assert_eq!(collected2.len(), 3);
623        assert_eq!(collected1, collected2);
624    }
625
626    #[test]
627    fn test_iterator_partial_iteration() {
628        let dir = tempfile::tempdir().unwrap();
629        let file = dir.path().join("massmap_iter_partial.bin");
630        let writer = std::fs::File::create(&file).unwrap();
631        let entries = (0..100).map(|i| (i, i));
632        let builder = MassMapBuilder::default().with_bucket_count(10);
633        builder.build(&writer, entries).unwrap();
634
635        let file = std::fs::File::open(&file).unwrap();
636        let map = MassMap::<u64, u64, _>::load(file).unwrap();
637
638        // Take only the first 10 entries
639        let partial: Vec<_> = map
640            .iter()
641            .take(10)
642            .collect::<std::io::Result<Vec<_>>>()
643            .unwrap();
644        assert_eq!(partial.len(), 10);
645
646        // Skip some and take more
647        let skip_take: Vec<_> = map
648            .iter()
649            .skip(20)
650            .take(5)
651            .collect::<std::io::Result<Vec<_>>>()
652            .unwrap();
653        assert_eq!(skip_take.len(), 5);
654    }
655
656    #[test]
657    fn test_iterator_invalid_bucket() {
658        let dir = tempfile::tempdir().unwrap();
659        let path = dir.path().join("massmap_iter_invalid.bin");
660        let writer = std::fs::File::create(&path).unwrap();
661        let entries = (0..100).map(|i| (i, i));
662
663        let builder = MassMapBuilder::default()
664            .with_bucket_count(10)
665            .with_writer_buffer_size(8 << 20);
666        builder.build(&writer, entries).unwrap();
667
668        // Corrupt bucket data by writing invalid data at offset 24 (start of bucket data)
669        // Open the corrupted file and try to iterate
670        let file = std::fs::File::open(&path).unwrap();
671        let map = MassMap::<u64, u64, _>::load(file).unwrap();
672
673        for bucket in &map.inner.bucket_metas {
674            if bucket.offset != 24 && bucket.count > 0 {
675                // Corrupt the first non-empty bucket
676                let file = std::fs::OpenOptions::new()
677                    .read(true)
678                    .write(true)
679                    .open(&path)
680                    .unwrap();
681                file.write_all_at(b"corrupted bucket", bucket.offset)
682                    .unwrap();
683                break;
684            }
685        }
686
687        // The iterator should return an error when it tries to read the corrupted bucket
688        let mut found_error = false;
689        for result in map.iter() {
690            if result.is_err() {
691                found_error = true;
692                break;
693            }
694        }
695        assert!(found_error);
696    }
697
698    #[test]
699    fn test_massmap_cast() {
700        let dir = tempfile::tempdir().unwrap();
701        let file = dir.path().join("massmap_cast.bin");
702        let writer = std::fs::File::create(&file).unwrap();
703        let entries = vec![
704            ("apple", 1),
705            ("banana", 2),
706            ("cherry", 3),
707            ("date", 4),
708            ("elderberry", 5),
709        ];
710        let builder = MassMapBuilder::default()
711            .with_hash_seed(42)
712            .with_bucket_count(8);
713        builder.build(&writer, entries.iter()).unwrap();
714
715        let file = std::fs::File::open(&file).unwrap();
716        let map = MassMapInner::<_>::load(file).unwrap();
717
718        let casted_map: MassMap<String, i64, _, _> = map.cast();
719        assert_eq!(casted_map.get("apple").unwrap(), Some(1i64));
720        assert_eq!(casted_map.get("banana").unwrap(), Some(2i64));
721        assert_eq!(casted_map.get("steins").unwrap(), None);
722    }
723}