Skip to main content

forest/cid_collections/
hash_map.rs

1// Copyright 2019-2026 ChainSafe Systems
2// SPDX-License-Identifier: Apache-2.0, MIT
3
4use super::{CidV1DagCborBlake2b256, MaybeCompactedCid, Uncompactable};
5use cid::Cid;
6#[cfg(doc)]
7use std::collections::HashMap;
8use std::collections::hash_map::{
9    Entry as StdEntry, IntoIter as StdIntoIter, OccupiedEntry as StdOccupiedEntry,
10    VacantEntry as StdVacantEntry,
11};
12
13/// A space-optimised hash map of [`Cid`]s, matching the API for [`std::collections::HashMap`].
14///
15/// We accept the implementation complexity of per-compaction-method `HashMap`s for
16/// the space savings, which are constant per-variant, rather than constant per-item.
17///
18/// This is dramatic for large maps!
19/// Using, e.g [`SmallCidNonEmptyVec`](super::SmallCidNonEmptyVec) will cost
20/// 25% more per-CID in the median case (32 B vs 40 B)
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct CidHashMap<V> {
23    compact: ahash::HashMap<CidV1DagCborBlake2b256, V>,
24    uncompact: ahash::HashMap<Uncompactable, V>,
25}
26
27impl<V> CidHashMap<V> {
28    /// Creates an empty `HashMap`.
29    ///
30    /// See also [`HashMap::new`].
31    pub fn new() -> Self {
32        Self::default()
33    }
34    /// Returns the number of elements in the map.
35    ///
36    /// See also [`HashMap::len`].
37    pub fn len(&self) -> usize {
38        let Self { compact, uncompact } = self;
39        compact.len() + uncompact.len()
40    }
41    /// How many values this map is guaranteed to hold without reallocating.
42    #[allow(dead_code)] // mirror of `total_capacity`, below
43    pub fn capacity_min(&self) -> usize {
44        let Self { compact, uncompact } = self;
45        std::cmp::min(compact.capacity(), uncompact.capacity())
46    }
47    /// Reflective of reserved capacity of this map.
48    pub fn total_capacity(&self) -> usize {
49        let Self { compact, uncompact } = self;
50        compact.capacity() + uncompact.capacity()
51    }
52    /// Returns `true` if the map contains a value for the specified key.
53    ///
54    /// See also [`HashMap::contains_key`].
55    pub fn contains_key(&self, key: &Cid) -> bool {
56        match MaybeCompactedCid::from(*key) {
57            MaybeCompactedCid::Compact(c) => self.compact.contains_key(&c),
58            MaybeCompactedCid::Uncompactable(u) => self.uncompact.contains_key(&u),
59        }
60    }
61    /// Returns a reference to the value corresponding to the key.
62    ///
63    /// See also [`HashMap::get`].
64    pub fn get(&self, key: &Cid) -> Option<&V> {
65        match MaybeCompactedCid::from(*key) {
66            MaybeCompactedCid::Compact(c) => self.compact.get(&c),
67            MaybeCompactedCid::Uncompactable(u) => self.uncompact.get(&u),
68        }
69    }
70    /// Inserts a key-value pair into the map.
71    ///
72    /// If the map did not have this key present, [`None`] is returned.
73    ///
74    /// If the map did have this key present, the value is updated, and the old
75    /// value is returned.
76    ///
77    /// See also [`HashMap::insert`].
78    pub fn insert(&mut self, key: Cid, value: V) -> Option<V> {
79        match MaybeCompactedCid::from(key) {
80            MaybeCompactedCid::Compact(c) => self.compact.insert(c, value),
81            MaybeCompactedCid::Uncompactable(u) => self.uncompact.insert(u, value),
82        }
83    }
84    /// Removes a key from the map, returning the value at the key if the key
85    /// was previously in the map.
86    ///
87    /// See also [`HashMap::remove`].
88    pub fn remove(&mut self, key: &Cid) -> Option<V> {
89        match MaybeCompactedCid::from(*key) {
90            MaybeCompactedCid::Compact(c) => self.compact.remove(&c),
91            MaybeCompactedCid::Uncompactable(u) => self.uncompact.remove(&u),
92        }
93    }
94    /// Returns `true` if the map is empty.
95    ///
96    /// See also [`HashMap::is_empty`].
97    #[allow(dead_code)]
98    pub fn is_empty(&self) -> bool {
99        self.compact.is_empty() && self.uncompact.is_empty()
100    }
101}
102
103///////////////
104// Entry API //
105///////////////
106
107impl<V> CidHashMap<V> {
108    /// Gets the given key's corresponding entry in the map for in-place manipulation.
109    ///
110    /// See also [`HashMap::entry`].
111    #[allow(dead_code)]
112    pub fn entry(&mut self, key: Cid) -> Entry<'_, V> {
113        match MaybeCompactedCid::from(key) {
114            MaybeCompactedCid::Compact(c) => match self.compact.entry(c) {
115                StdEntry::Occupied(o) => Entry::Occupied(OccupiedEntry {
116                    inner: OccupiedEntryInner::Compact(o),
117                }),
118                StdEntry::Vacant(v) => Entry::Vacant(VacantEntry {
119                    inner: VacantEntryInner::Compact(v),
120                }),
121            },
122            MaybeCompactedCid::Uncompactable(u) => match self.uncompact.entry(u) {
123                StdEntry::Occupied(o) => Entry::Occupied(OccupiedEntry {
124                    inner: OccupiedEntryInner::Uncompact(o),
125                }),
126                StdEntry::Vacant(v) => Entry::Vacant(VacantEntry {
127                    inner: VacantEntryInner::Uncompact(v),
128                }),
129            },
130        }
131    }
132}
133
134/// A view into a single entry in a map, which may either be vacant or occupied.
135///
136/// This `enum` is constructed using [`CidHashMap::entry`].
137#[allow(dead_code)]
138#[derive(Debug)]
139pub enum Entry<'a, V: 'a> {
140    /// An occupied entry.
141    Occupied(OccupiedEntry<'a, V>),
142    /// A vacant entry.
143    Vacant(VacantEntry<'a, V>),
144}
145
146/// A view into an occupied entry in a `HashMap`.
147/// It is part of the [`Entry`] enum.
148///
149/// See also [`std::collections::hash_map::OccupiedEntry`].
150#[allow(dead_code)]
151#[derive(Debug)]
152pub struct OccupiedEntry<'a, V> {
153    inner: OccupiedEntryInner<'a, V>,
154}
155
156impl<V> OccupiedEntry<'_, V> {
157    /// Gets a reference to the value in the entry.
158    ///
159    /// See also [`std::collections::hash_map::OccupiedEntry::get`].
160    #[allow(dead_code)]
161    pub fn get(&self) -> &V {
162        match &self.inner {
163            OccupiedEntryInner::Compact(c) => c.get(),
164            OccupiedEntryInner::Uncompact(u) => u.get(),
165        }
166    }
167}
168
169/// Hides compaction from users.
170#[allow(dead_code)]
171#[derive(Debug)]
172enum OccupiedEntryInner<'a, V> {
173    Compact(StdOccupiedEntry<'a, CidV1DagCborBlake2b256, V>),
174    Uncompact(StdOccupiedEntry<'a, Uncompactable, V>),
175}
176
177/// A view into a vacant entry in a `HashMap`.
178/// It is part of the [`Entry`] enum.
179///
180/// See also [`std::collections::hash_map::VacantEntry`].
181#[allow(dead_code)]
182#[derive(Debug)]
183pub struct VacantEntry<'a, V> {
184    inner: VacantEntryInner<'a, V>,
185}
186
187impl<'a, V> VacantEntry<'a, V> {
188    /// Sets the value of the entry with the `VacantEntry`'s key,
189    /// and returns a mutable reference to it.
190    ///
191    /// See also [`std::collections::hash_map::VacantEntry::insert`].
192    #[allow(dead_code)]
193    pub fn insert(self, value: V) -> &'a mut V {
194        match self.inner {
195            VacantEntryInner::Compact(c) => c.insert(value),
196            VacantEntryInner::Uncompact(u) => u.insert(value),
197        }
198    }
199}
200
201/// Hides compaction from users.
202#[allow(dead_code)]
203#[derive(Debug)]
204enum VacantEntryInner<'a, V> {
205    Compact(StdVacantEntry<'a, CidV1DagCborBlake2b256, V>),
206    Uncompact(StdVacantEntry<'a, Uncompactable, V>),
207}
208
209////////////////////
210// Collection Ops //
211////////////////////
212
213impl<V> Default for CidHashMap<V> {
214    fn default() -> Self {
215        Self {
216            compact: Default::default(),
217            uncompact: Default::default(),
218        }
219    }
220}
221
222impl<V> Extend<(Cid, V)> for CidHashMap<V> {
223    fn extend<T: IntoIterator<Item = (Cid, V)>>(&mut self, iter: T) {
224        for (cid, v) in iter {
225            match MaybeCompactedCid::from(cid) {
226                MaybeCompactedCid::Compact(compact) => {
227                    self.compact.insert(compact, v);
228                }
229                MaybeCompactedCid::Uncompactable(uncompact) => {
230                    self.uncompact.insert(uncompact, v);
231                }
232            };
233        }
234    }
235}
236
237impl<V> FromIterator<(Cid, V)> for CidHashMap<V> {
238    fn from_iter<T: IntoIterator<Item = (Cid, V)>>(iter: T) -> Self {
239        let mut this = Self::new();
240        this.extend(iter);
241        this
242    }
243}
244
245pub struct IntoIter<V> {
246    compact: StdIntoIter<CidV1DagCborBlake2b256, V>,
247    uncompact: StdIntoIter<Uncompactable, V>,
248}
249
250impl<V> Iterator for IntoIter<V> {
251    type Item = (Cid, V);
252
253    fn next(&mut self) -> Option<Self::Item> {
254        self.compact
255            .next()
256            .map(|(k, v)| (MaybeCompactedCid::Compact(k).into(), v))
257            .or_else(|| {
258                self.uncompact
259                    .next()
260                    .map(|(k, v)| (MaybeCompactedCid::Uncompactable(k).into(), v))
261            })
262    }
263    fn size_hint(&self) -> (usize, Option<usize>) {
264        join_size_hints(self.compact.size_hint(), self.uncompact.size_hint())
265    }
266}
267
268fn join_size_hints(
269    left: (usize, Option<usize>),
270    right: (usize, Option<usize>),
271) -> (usize, Option<usize>) {
272    let (l_lower, l_upper) = left;
273    let (r_lower, r_upper) = right;
274    let lower = l_lower.saturating_add(r_lower);
275    let upper = match (l_upper, r_upper) {
276        (Some(l), Some(r)) => l.checked_add(r),
277        _ => None,
278    };
279    (lower, upper)
280}
281
282impl<V> IntoIterator for CidHashMap<V> {
283    type Item = (Cid, V);
284
285    type IntoIter = IntoIter<V>;
286
287    fn into_iter(self) -> Self::IntoIter {
288        let Self { compact, uncompact } = self;
289        IntoIter {
290            compact: compact.into_iter(),
291            uncompact: uncompact.into_iter(),
292        }
293    }
294}
295
296//////////
297// Keys //
298//////////
299
300#[cfg(test)]
301use std::collections::hash_map::Keys as StdKeys;
302
303#[cfg(test)]
304impl<V> CidHashMap<V> {
305    /// An iterator visiting all keys in arbitrary order.
306    ///
307    /// In a notable departure from [`HashMap::keys`], the element type is [`Cid`], not [`&Cid`].
308    ///
309    pub fn keys(&self) -> Keys<'_, V> {
310        let Self { compact, uncompact } = self;
311        Keys {
312            compact: compact.keys(),
313            uncompact: uncompact.keys(),
314        }
315    }
316}
317
318/// An iterator over the keys of a `HashMap`.
319///
320/// See [`CidHashMap::keys`].
321#[cfg(test)]
322pub struct Keys<'a, V> {
323    compact: StdKeys<'a, CidV1DagCborBlake2b256, V>,
324    uncompact: StdKeys<'a, Uncompactable, V>,
325}
326
327#[cfg(test)]
328impl<V> Iterator for Keys<'_, V> {
329    type Item = Cid;
330
331    fn next(&mut self) -> Option<Self::Item> {
332        self.compact
333            .next()
334            .copied()
335            .map(MaybeCompactedCid::Compact)
336            .map(Into::into)
337            .or_else(|| {
338                self.uncompact
339                    .next()
340                    .copied()
341                    .map(MaybeCompactedCid::Uncompactable)
342                    .map(Into::into)
343            })
344    }
345
346    fn size_hint(&self) -> (usize, Option<usize>) {
347        join_size_hints(self.compact.size_hint(), self.uncompact.size_hint())
348    }
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354
355    use quickcheck::quickcheck;
356
357    #[derive(derive_quickcheck_arbitrary::Arbitrary, Clone, Debug)]
358    enum Operation {
359        ContainsKey(MaybeCompactedCid),
360        Get(MaybeCompactedCid),
361        Insert(MaybeCompactedCid, u8),
362        Entry { key: MaybeCompactedCid, value: u8 },
363    }
364
365    quickcheck! {
366        fn operations(operations: Vec<Operation>) -> () {
367            use Operation as Op;
368
369            let mut subject = CidHashMap::default();
370            let mut reference = ahash::HashMap::default();
371            for operation in operations {
372                match operation {
373                    Op::ContainsKey(key) => {
374                        let key = key.into();
375                        assert_eq!(
376                            subject.contains_key(&key),
377                            reference.contains_key(&key)
378                        )
379                    },
380                    Op::Get(key) => {
381                        let key = key.into();
382                        assert_eq!(
383                            subject.get(&key),
384                            reference.get(&key)
385                        )
386                    },
387                    Op::Insert(key, val) => {
388                        let key = key.into();
389                        assert_eq!(
390                            subject.insert(key, val),
391                            reference.insert(key, val)
392                        )
393                    },
394                    Op::Entry {
395                        key, value
396                    } => {
397                        let key = key.into();
398                        match (subject.entry(key), reference.entry(key)) {
399                            (Entry::Occupied(subj), StdEntry::Occupied(refr)) => assert_eq!(subj.get(), refr.get()),
400                            (Entry::Vacant(subj), StdEntry::Vacant(refr)) => assert_eq!(subj.insert(value), refr.insert(value)),
401                            (subj, refr) => panic!("{subj:?}, {refr:?}")
402                        }
403                    }
404                }
405            };
406            assert_eq!(reference, ahash::HashMap::from_iter(subject));
407        }
408
409        fn collect(pairs: Vec<(Cid, u8)>) -> () {
410            let refr = ahash::HashMap::from_iter(pairs.clone());
411            let via_subject = ahash::HashMap::from_iter(
412                CidHashMap::from_iter(pairs)
413            );
414            assert_eq!(refr, via_subject);
415        }
416    }
417}