pgm_extra/collections/
map.rs

1//! An owned map optimized for read-heavy workloads backed by PGM-Index.
2//!
3//! `Map` is a drop-in replacement for `BTreeMap` in read-heavy workloads
4//! where you build the map once and perform many lookups.
5
6use alloc::vec::Vec;
7use core::cmp::Ordering;
8use core::fmt;
9use core::hash::{Hash, Hasher};
10use core::ops::RangeBounds;
11
12use crate::error::Error;
13use crate::index::external;
14use crate::index::key::Indexable;
15use crate::util::range::range_to_indices;
16
17/// An owned map optimized for read-heavy workloads, backed by a PGM-index.
18///
19/// This is a BTreeMap-like container that owns its data and provides
20/// efficient lookups using a learned index. Mutations are supported but
21/// trigger O(n) index rebuilds; for frequent updates use [`crate::Dynamic`].
22///
23/// Works with any key type that implements [`Indexable`]:
24/// - Numeric types (u64, i32, etc.) are indexed directly
25/// - String/bytes types use prefix extraction
26///
27/// # Example
28///
29/// ```
30/// use pgm_extra::Map;
31///
32/// // Numeric keys
33/// let entries: Vec<(u64, &str)> = vec![(1, "one"), (2, "two"), (3, "three")];
34/// let map = Map::from_sorted_unique(entries, 64, 4).unwrap();
35/// assert_eq!(map.get(&2), Some(&"two"));
36///
37/// // String keys
38/// let entries = vec![("apple", 1), ("banana", 2), ("cherry", 3)];
39/// let map = Map::from_sorted_unique(entries, 64, 4).unwrap();
40/// assert_eq!(map.get(&"banana"), Some(&2));
41/// ```
42#[cfg_attr(
43    feature = "rkyv",
44    derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
45)]
46#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47#[cfg_attr(
48    feature = "serde",
49    serde(
50        bound = "K: serde::Serialize + serde::de::DeserializeOwned, V: serde::Serialize + serde::de::DeserializeOwned, K::Key: serde::Serialize + serde::de::DeserializeOwned"
51    )
52)]
53pub struct Map<K: Indexable, V> {
54    keys: Vec<K>,
55    values: Vec<V>,
56    index: Option<external::Static<K>>,
57    epsilon: usize,
58    epsilon_recursive: usize,
59}
60
61impl<K: Indexable + Ord, V> Map<K, V>
62where
63    K::Key: Ord,
64{
65    /// Create a map from pre-sorted, unique key-value pairs.
66    ///
67    /// # Panics
68    ///
69    /// Debug builds will panic if keys are not sorted or contain duplicates.
70    pub fn from_sorted_unique(
71        entries: Vec<(K, V)>,
72        epsilon: usize,
73        epsilon_recursive: usize,
74    ) -> Result<Self, Error> {
75        let (keys, values): (Vec<K>, Vec<V>) = entries.into_iter().unzip();
76
77        debug_assert!(
78            keys.windows(2).all(|w| w[0] < w[1]),
79            "keys must be sorted and unique"
80        );
81
82        let index = if keys.is_empty() {
83            None
84        } else {
85            Some(external::Static::new(&keys, epsilon, epsilon_recursive)?)
86        };
87        Ok(Self {
88            keys,
89            values,
90            index,
91            epsilon,
92            epsilon_recursive,
93        })
94    }
95
96    /// Create a map from an unsorted iterator of key-value pairs.
97    ///
98    /// If duplicate keys exist, the last value wins (like `BTreeMap::from_iter`).
99    pub fn build<I>(iter: I, epsilon: usize, epsilon_recursive: usize) -> Result<Self, Error>
100    where
101        I: IntoIterator<Item = (K, V)>,
102    {
103        let mut entries: Vec<(K, V)> = iter.into_iter().collect();
104        entries.sort_by(|a, b| a.0.cmp(&b.0));
105        entries.dedup_by(|a, b| a.0 == b.0);
106
107        Self::from_sorted_unique(entries, epsilon, epsilon_recursive)
108    }
109
110    pub fn empty(epsilon: usize, epsilon_recursive: usize) -> Self {
111        Self {
112            keys: Vec::new(),
113            values: Vec::new(),
114            index: None,
115            epsilon,
116            epsilon_recursive,
117        }
118    }
119
120    pub fn new(entries: Vec<(K, V)>) -> Result<Self, Error> {
121        Self::build(entries, 64, 4)
122    }
123
124    #[inline]
125    pub fn get(&self, key: &K) -> Option<&V> {
126        let index = self.index.as_ref()?;
127        let approx = index.search(key);
128
129        let lo = approx.lo;
130        let hi = approx.hi.min(self.keys.len());
131
132        for i in lo..hi {
133            match self.keys[i].cmp(key) {
134                Ordering::Equal => return Some(&self.values[i]),
135                Ordering::Greater => return None,
136                Ordering::Less => continue,
137            }
138        }
139        None
140    }
141
142    #[inline]
143    pub fn get_key_value(&self, key: &K) -> Option<(&K, &V)> {
144        let index = self.index.as_ref()?;
145        let approx = index.search(key);
146
147        let lo = approx.lo;
148        let hi = approx.hi.min(self.keys.len());
149
150        for i in lo..hi {
151            match self.keys[i].cmp(key) {
152                Ordering::Equal => return Some((&self.keys[i], &self.values[i])),
153                Ordering::Greater => return None,
154                Ordering::Less => continue,
155            }
156        }
157        None
158    }
159
160    #[inline]
161    pub fn contains_key(&self, key: &K) -> bool {
162        self.get(key).is_some()
163    }
164
165    /// Find the index of the first key >= the given key.
166    #[inline]
167    pub fn lower_bound(&self, key: &K) -> usize {
168        match &self.index {
169            Some(index) => index.lower_bound(&self.keys, key),
170            None => 0,
171        }
172    }
173
174    /// Find the index of the first key > the given key.
175    #[inline]
176    pub fn upper_bound(&self, key: &K) -> usize {
177        match &self.index {
178            Some(index) => index.upper_bound(&self.keys, key),
179            None => 0,
180        }
181    }
182
183    #[inline]
184    pub fn range<R>(&self, range: R) -> impl DoubleEndedIterator<Item = (&K, &V)>
185    where
186        R: RangeBounds<K>,
187    {
188        let (start, end) = range_to_indices(
189            range,
190            self.keys.len(),
191            |k| self.lower_bound(k),
192            |k| self.upper_bound(k),
193        );
194        self.keys[start..end]
195            .iter()
196            .zip(self.values[start..end].iter())
197    }
198
199    #[inline]
200    pub fn first_key_value(&self) -> Option<(&K, &V)> {
201        if self.keys.is_empty() {
202            None
203        } else {
204            Some((&self.keys[0], &self.values[0]))
205        }
206    }
207
208    #[inline]
209    pub fn last_key_value(&self) -> Option<(&K, &V)> {
210        if self.keys.is_empty() {
211            None
212        } else {
213            let last = self.keys.len() - 1;
214            Some((&self.keys[last], &self.values[last]))
215        }
216    }
217
218    #[inline]
219    pub fn iter(&self) -> impl ExactSizeIterator<Item = (&K, &V)> + DoubleEndedIterator {
220        self.keys.iter().zip(self.values.iter())
221    }
222
223    #[inline]
224    pub fn keys(&self) -> impl ExactSizeIterator<Item = &K> + DoubleEndedIterator {
225        self.keys.iter()
226    }
227
228    #[inline]
229    pub fn values(&self) -> impl ExactSizeIterator<Item = &V> + DoubleEndedIterator {
230        self.values.iter()
231    }
232
233    #[inline]
234    pub fn values_mut(&mut self) -> impl ExactSizeIterator<Item = &mut V> + DoubleEndedIterator {
235        self.values.iter_mut()
236    }
237
238    #[inline]
239    pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
240        let index = self.index.as_ref()?;
241        let approx = index.search(key);
242
243        let lo = approx.lo;
244        let hi = approx.hi.min(self.keys.len());
245
246        for i in lo..hi {
247            match self.keys[i].cmp(key) {
248                Ordering::Equal => return Some(&mut self.values[i]),
249                Ordering::Greater => return None,
250                Ordering::Less => continue,
251            }
252        }
253        None
254    }
255
256    #[inline]
257    pub fn len(&self) -> usize {
258        self.keys.len()
259    }
260
261    #[inline]
262    pub fn is_empty(&self) -> bool {
263        self.keys.is_empty()
264    }
265
266    /// Get the number of segments in the underlying index.
267    #[inline]
268    pub fn segments_count(&self) -> usize {
269        self.index.as_ref().map_or(0, |i| i.segments_count())
270    }
271
272    /// Get the height of the underlying index.
273    #[inline]
274    pub fn height(&self) -> usize {
275        self.index.as_ref().map_or(0, |i| i.height())
276    }
277
278    /// Get the epsilon value.
279    #[inline]
280    pub fn epsilon(&self) -> usize {
281        self.epsilon
282    }
283
284    /// Get the epsilon_recursive value.
285    #[inline]
286    pub fn epsilon_recursive(&self) -> usize {
287        self.epsilon_recursive
288    }
289
290    /// Approximate memory usage in bytes.
291    pub fn size_in_bytes(&self) -> usize {
292        self.index.as_ref().map_or(0, |i| i.size_in_bytes())
293            + self.keys.capacity() * core::mem::size_of::<K>()
294            + self.values.capacity() * core::mem::size_of::<V>()
295    }
296
297    /// Get a reference to the underlying keys slice.
298    #[inline]
299    pub fn keys_slice(&self) -> &[K] {
300        &self.keys
301    }
302
303    /// Get a reference to the underlying values slice.
304    #[inline]
305    pub fn values_slice(&self) -> &[V] {
306        &self.values
307    }
308
309    /// Consume the map and return the underlying key-value pairs.
310    #[inline]
311    pub fn into_vec(self) -> Vec<(K, V)> {
312        self.keys.into_iter().zip(self.values).collect()
313    }
314
315    /// Get a reference to the underlying index.
316    #[inline]
317    pub fn index(&self) -> Option<&external::Static<K>> {
318        self.index.as_ref()
319    }
320
321    /// Insert a key-value pair into the map.
322    ///
323    /// Returns the old value if the key already existed, or `None` if it was newly inserted.
324    ///
325    /// **Note**: This rebuilds the entire index, making it O(n) per insertion.
326    /// For bulk insertions, prefer collecting into a new map.
327    /// For frequent mutations, consider using `index::owned::Dynamic` directly.
328    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
329        if let Some(existing) = self.get_mut(&key) {
330            return Some(core::mem::replace(existing, value));
331        }
332
333        let mut entries: Vec<(K, V)> = core::mem::take(&mut self.keys)
334            .into_iter()
335            .zip(core::mem::take(&mut self.values))
336            .collect();
337        entries.push((key, value));
338        entries.sort_by(|a, b| a.0.cmp(&b.0));
339
340        if let Ok(new_map) = Self::from_sorted_unique(entries, self.epsilon, self.epsilon_recursive)
341        {
342            *self = new_map;
343        }
344        None
345    }
346}
347
348impl<K: Indexable + Ord, V> core::ops::Index<&K> for Map<K, V>
349where
350    K::Key: Ord,
351{
352    type Output = V;
353
354    /// Returns a reference to the value corresponding to the key.
355    ///
356    /// # Panics
357    ///
358    /// Panics if the key is not present in the map.
359    #[inline]
360    fn index(&self, key: &K) -> &Self::Output {
361        self.get(key).expect("key not found")
362    }
363}
364
365// Standard trait implementations
366
367impl<K: Indexable + Clone, V: Clone> Clone for Map<K, V>
368where
369    K::Key: Clone,
370{
371    fn clone(&self) -> Self {
372        Self {
373            keys: self.keys.clone(),
374            values: self.values.clone(),
375            index: self.index.clone(),
376            epsilon: self.epsilon,
377            epsilon_recursive: self.epsilon_recursive,
378        }
379    }
380}
381
382impl<K: Indexable + fmt::Debug, V: fmt::Debug> fmt::Debug for Map<K, V> {
383    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
384        f.debug_map()
385            .entries(self.keys.iter().zip(self.values.iter()))
386            .finish()
387    }
388}
389
390impl<K: Indexable + Ord + PartialEq, V: PartialEq> PartialEq for Map<K, V> {
391    fn eq(&self, other: &Self) -> bool {
392        self.keys == other.keys && self.values == other.values
393    }
394}
395
396impl<K: Indexable + Ord + Eq, V: Eq> Eq for Map<K, V> {}
397
398impl<K: Indexable + Hash, V: Hash> Hash for Map<K, V> {
399    fn hash<H: Hasher>(&self, state: &mut H) {
400        for (k, v) in self.keys.iter().zip(self.values.iter()) {
401            k.hash(state);
402            v.hash(state);
403        }
404    }
405}
406
407impl<K: Indexable + Ord, V> IntoIterator for Map<K, V>
408where
409    K::Key: Ord,
410{
411    type Item = (K, V);
412    type IntoIter = alloc::vec::IntoIter<(K, V)>;
413
414    fn into_iter(self) -> Self::IntoIter {
415        self.keys
416            .into_iter()
417            .zip(self.values)
418            .collect::<Vec<_>>()
419            .into_iter()
420    }
421}
422
423impl<'a, K: Indexable, V> IntoIterator for &'a Map<K, V> {
424    type Item = (&'a K, &'a V);
425    type IntoIter = core::iter::Zip<core::slice::Iter<'a, K>, core::slice::Iter<'a, V>>;
426
427    fn into_iter(self) -> Self::IntoIter {
428        self.keys.iter().zip(self.values.iter())
429    }
430}
431
432impl<K: Indexable + Ord, V> FromIterator<(K, V)> for Map<K, V>
433where
434    K::Key: Ord,
435{
436    /// Creates a Map from an iterator.
437    ///
438    /// Returns an empty map if the iterator is empty.
439    fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
440        Self::build(iter, 64, 4).unwrap_or_else(|_| Self::empty(64, 4))
441    }
442}
443
444impl<K: Indexable + Ord, V> core::iter::Extend<(K, V)> for Map<K, V>
445where
446    K::Key: Ord,
447{
448    /// Extends the map with key-value pairs from an iterator.
449    ///
450    /// If duplicate keys exist, the last value wins.
451    ///
452    /// **Note**: This rebuilds the entire index, making it O(n) per call.
453    /// For bulk insertions, prefer collecting into a new map.
454    /// For frequent mutations, consider using `index::owned::Dynamic` directly.
455    fn extend<I: IntoIterator<Item = (K, V)>>(&mut self, iter: I) {
456        let mut entries: Vec<(K, V)> = core::mem::take(&mut self.keys)
457            .into_iter()
458            .zip(core::mem::take(&mut self.values))
459            .collect();
460        entries.extend(iter);
461        entries.sort_by(|a, b| a.0.cmp(&b.0));
462        entries.dedup_by(|a, b| a.0 == b.0);
463
464        match Self::from_sorted_unique(entries, self.epsilon, self.epsilon_recursive) {
465            Ok(new_map) => *self = new_map,
466            Err(_) => {
467                *self = Self::empty(self.epsilon, self.epsilon_recursive);
468            }
469        }
470    }
471}
472
473#[cfg(test)]
474mod tests {
475    use super::*;
476    use alloc::string::String;
477    use alloc::vec;
478
479    #[test]
480    fn test_map_numeric() {
481        let entries: Vec<(u64, i32)> = (0..1000).map(|i| (i, i as i32 * 2)).collect();
482        let map = Map::from_sorted_unique(entries, 64, 4).unwrap();
483
484        assert_eq!(map.len(), 1000);
485        assert_eq!(map.get(&500), Some(&1000));
486        assert_eq!(map.get(&1001), None);
487    }
488
489    #[test]
490    fn test_map_string_keys() {
491        let entries = vec![("apple", 1), ("banana", 2), ("cherry", 3), ("date", 4)];
492        let map = Map::from_sorted_unique(entries, 64, 4).unwrap();
493
494        assert_eq!(map.get(&"banana"), Some(&2));
495        assert_eq!(map.get(&"cherry"), Some(&3));
496        assert_eq!(map.get(&"elderberry"), None);
497    }
498
499    #[test]
500    fn test_map_owned_string_keys() {
501        let entries: Vec<(String, i32)> =
502            vec![("alpha".into(), 1), ("beta".into(), 2), ("gamma".into(), 3)];
503        let map = Map::from_sorted_unique(entries, 64, 4).unwrap();
504
505        assert_eq!(map.get(&String::from("beta")), Some(&2));
506        assert!(map.contains_key(&String::from("alpha")));
507    }
508
509    #[test]
510    fn test_map_get_key_value() {
511        let entries: Vec<(u64, &str)> = vec![(1, "a"), (2, "b")];
512        let map = Map::from_sorted_unique(entries, 4, 2).unwrap();
513
514        assert_eq!(map.get_key_value(&1), Some((&1, &"a")));
515        assert_eq!(map.get_key_value(&3), None);
516    }
517
518    #[test]
519    fn test_map_build() {
520        let entries = vec![(5u64, "e"), (3, "c"), (1, "a"), (4, "d"), (2, "b")];
521        let map = Map::build(entries, 4, 2).unwrap();
522
523        assert_eq!(map.len(), 5);
524        assert_eq!(map.get(&1), Some(&"a"));
525        assert_eq!(map.get(&5), Some(&"e"));
526
527        let keys: Vec<_> = map.keys().copied().collect();
528        assert_eq!(keys, vec![1, 2, 3, 4, 5]);
529    }
530
531    #[test]
532    fn test_map_first_last() {
533        let entries: Vec<(u64, &str)> = vec![(10, "ten"), (20, "twenty"), (30, "thirty")];
534        let map = Map::from_sorted_unique(entries, 4, 2).unwrap();
535
536        assert_eq!(map.first_key_value(), Some((&10, &"ten")));
537        assert_eq!(map.last_key_value(), Some((&30, &"thirty")));
538    }
539
540    #[test]
541    fn test_map_iter() {
542        let entries: Vec<(u64, i32)> = vec![(1, 10), (2, 20), (3, 30)];
543        let map = Map::from_sorted_unique(entries, 4, 2).unwrap();
544
545        let collected: Vec<_> = map.iter().map(|(&k, &v)| (k, v)).collect();
546        assert_eq!(collected, vec![(1, 10), (2, 20), (3, 30)]);
547    }
548
549    #[test]
550    fn test_map_index_operator() {
551        let entries: Vec<(u64, &str)> = vec![(1, "one"), (2, "two")];
552        let map = Map::from_sorted_unique(entries, 4, 2).unwrap();
553
554        assert_eq!(map[&1], "one");
555        assert_eq!(map[&2], "two");
556    }
557
558    #[test]
559    fn test_map_collect() {
560        let map: Map<u64, i32> = vec![(1, 10), (2, 20), (3, 30)].into_iter().collect();
561        assert_eq!(map.len(), 3);
562        assert_eq!(map.get(&2), Some(&20));
563    }
564
565    #[test]
566    fn test_map_get_mut() {
567        let mut map: Map<u64, i32> = vec![(1, 10), (2, 20), (3, 30)].into_iter().collect();
568
569        if let Some(v) = map.get_mut(&2) {
570            *v = 200;
571        }
572        assert_eq!(map.get(&2), Some(&200));
573
574        assert!(map.get_mut(&999).is_none());
575    }
576
577    #[test]
578    fn test_map_values_mut() {
579        let mut map: Map<u64, i32> = vec![(1, 10), (2, 20), (3, 30)].into_iter().collect();
580
581        for v in map.values_mut() {
582            *v *= 10;
583        }
584
585        assert_eq!(map.get(&1), Some(&100));
586        assert_eq!(map.get(&2), Some(&200));
587        assert_eq!(map.get(&3), Some(&300));
588    }
589
590    #[test]
591    fn test_map_empty() {
592        let map: Map<u64, i32> = Map::empty(64, 4);
593        assert!(map.is_empty());
594        assert_eq!(map.len(), 0);
595        assert_eq!(map.get(&0), None);
596        assert_eq!(map.first_key_value(), None);
597        assert_eq!(map.last_key_value(), None);
598    }
599
600    #[test]
601    fn test_map_collect_empty() {
602        let map: Map<u64, i32> = core::iter::empty().collect();
603        assert!(map.is_empty());
604        assert_eq!(map.len(), 0);
605    }
606
607    #[test]
608    fn test_map_insert() {
609        let mut map = Map::build(vec![(1u64, "a"), (3, "c"), (5, "e")], 4, 2).unwrap();
610        assert_eq!(map.len(), 3);
611
612        assert_eq!(map.insert(2, "b"), None);
613        assert_eq!(map.len(), 4);
614        assert_eq!(map.get(&2), Some(&"b"));
615
616        assert_eq!(map.insert(2, "B"), Some("b"));
617        assert_eq!(map.len(), 4);
618        assert_eq!(map.get(&2), Some(&"B"));
619
620        assert_eq!(map.insert(4, "d"), None);
621        let keys: Vec<_> = map.keys().copied().collect();
622        assert_eq!(keys, vec![1, 2, 3, 4, 5]);
623    }
624
625    #[test]
626    fn test_map_insert_into_empty() {
627        let mut map: Map<u64, &str> = Map::empty(64, 4);
628        assert_eq!(map.insert(42, "forty-two"), None);
629        assert_eq!(map.len(), 1);
630        assert_eq!(map.get(&42), Some(&"forty-two"));
631    }
632
633    #[test]
634    fn test_map_extend() {
635        let mut map: Map<u64, i32> = vec![(1, 10), (2, 20)].into_iter().collect();
636        map.extend(vec![(3, 30), (4, 40)]);
637        assert_eq!(map.len(), 4);
638        assert_eq!(map.get(&3), Some(&30));
639        assert_eq!(map.get(&4), Some(&40));
640    }
641
642    #[test]
643    fn test_map_extend_empty() {
644        let mut map: Map<u64, i32> = Map::empty(64, 4);
645        map.extend(vec![(3, 30), (1, 10), (2, 20)]);
646        assert_eq!(map.len(), 3);
647        let keys: Vec<_> = map.keys().copied().collect();
648        assert_eq!(keys, vec![1, 2, 3]);
649    }
650}