pgm_extra/collections/
map.rs

1//! An owned map optimized for read-heavy workloads backed by PGM-Index.
2//!
3//! `Map` is a drop-in replacement for `BTreeMap` in read-heavy workloads
4//! where you build the map once and perform many lookups.
5
6use alloc::vec::Vec;
7use core::cmp::Ordering;
8use core::fmt;
9use core::hash::{Hash, Hasher};
10use core::ops::RangeBounds;
11
12use crate::error::Error;
13use crate::index::external;
14use crate::index::key::Indexable;
15use crate::util::range::range_to_indices;
16
17/// An owned map optimized for read-heavy workloads, backed by a PGM-index.
18///
19/// This is a BTreeMap-like container that owns its data and provides
20/// efficient lookups using a learned index. Mutations are supported but
21/// trigger O(n) index rebuilds; for frequent updates use [`crate::Dynamic`].
22///
23/// Works with any key type that implements [`Indexable`]:
24/// - Numeric types (u64, i32, etc.) are indexed directly
25/// - String/bytes types use prefix extraction
26///
27/// # Example
28///
29/// ```
30/// use pgm_extra::Map;
31///
32/// // Numeric keys
33/// let entries: Vec<(u64, &str)> = vec![(1, "one"), (2, "two"), (3, "three")];
34/// let map = Map::from_sorted_unique(entries, 64, 4).unwrap();
35/// assert_eq!(map.get(&2), Some(&"two"));
36///
37/// // String keys
38/// let entries = vec![("apple", 1), ("banana", 2), ("cherry", 3)];
39/// let map = Map::from_sorted_unique(entries, 64, 4).unwrap();
40/// assert_eq!(map.get(&"banana"), Some(&2));
41/// ```
42#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
43#[cfg_attr(
44    feature = "serde",
45    serde(
46        bound = "K: serde::Serialize + serde::de::DeserializeOwned, V: serde::Serialize + serde::de::DeserializeOwned, K::Key: serde::Serialize + serde::de::DeserializeOwned"
47    )
48)]
49pub struct Map<K: Indexable, V> {
50    keys: Vec<K>,
51    values: Vec<V>,
52    index: Option<external::Static<K>>,
53    epsilon: usize,
54    epsilon_recursive: usize,
55}
56
57impl<K: Indexable + Ord, V> Map<K, V>
58where
59    K::Key: Ord,
60{
61    /// Create a map from pre-sorted, unique key-value pairs.
62    ///
63    /// # Panics
64    ///
65    /// Debug builds will panic if keys are not sorted or contain duplicates.
66    pub fn from_sorted_unique(
67        entries: Vec<(K, V)>,
68        epsilon: usize,
69        epsilon_recursive: usize,
70    ) -> Result<Self, Error> {
71        let (keys, values): (Vec<K>, Vec<V>) = entries.into_iter().unzip();
72
73        debug_assert!(
74            keys.windows(2).all(|w| w[0] < w[1]),
75            "keys must be sorted and unique"
76        );
77
78        let index = if keys.is_empty() {
79            None
80        } else {
81            Some(external::Static::new(&keys, epsilon, epsilon_recursive)?)
82        };
83        Ok(Self {
84            keys,
85            values,
86            index,
87            epsilon,
88            epsilon_recursive,
89        })
90    }
91
92    /// Create a map from an unsorted iterator of key-value pairs.
93    ///
94    /// If duplicate keys exist, the last value wins (like `BTreeMap::from_iter`).
95    pub fn build<I>(iter: I, epsilon: usize, epsilon_recursive: usize) -> Result<Self, Error>
96    where
97        I: IntoIterator<Item = (K, V)>,
98    {
99        let mut entries: Vec<(K, V)> = iter.into_iter().collect();
100        entries.sort_by(|a, b| a.0.cmp(&b.0));
101        entries.dedup_by(|a, b| a.0 == b.0);
102
103        Self::from_sorted_unique(entries, epsilon, epsilon_recursive)
104    }
105
106    pub fn empty(epsilon: usize, epsilon_recursive: usize) -> Self {
107        Self {
108            keys: Vec::new(),
109            values: Vec::new(),
110            index: None,
111            epsilon,
112            epsilon_recursive,
113        }
114    }
115
116    pub fn new(entries: Vec<(K, V)>) -> Result<Self, Error> {
117        Self::build(entries, 64, 4)
118    }
119
120    #[inline]
121    pub fn get(&self, key: &K) -> Option<&V> {
122        let index = self.index.as_ref()?;
123        let approx = index.search(key);
124
125        let lo = approx.lo;
126        let hi = approx.hi.min(self.keys.len());
127
128        for i in lo..hi {
129            match self.keys[i].cmp(key) {
130                Ordering::Equal => return Some(&self.values[i]),
131                Ordering::Greater => return None,
132                Ordering::Less => continue,
133            }
134        }
135        None
136    }
137
138    #[inline]
139    pub fn get_key_value(&self, key: &K) -> Option<(&K, &V)> {
140        let index = self.index.as_ref()?;
141        let approx = index.search(key);
142
143        let lo = approx.lo;
144        let hi = approx.hi.min(self.keys.len());
145
146        for i in lo..hi {
147            match self.keys[i].cmp(key) {
148                Ordering::Equal => return Some((&self.keys[i], &self.values[i])),
149                Ordering::Greater => return None,
150                Ordering::Less => continue,
151            }
152        }
153        None
154    }
155
156    #[inline]
157    pub fn contains_key(&self, key: &K) -> bool {
158        self.get(key).is_some()
159    }
160
161    /// Find the index of the first key >= the given key.
162    #[inline]
163    pub fn lower_bound(&self, key: &K) -> usize {
164        match &self.index {
165            Some(index) => index.lower_bound(&self.keys, key),
166            None => 0,
167        }
168    }
169
170    /// Find the index of the first key > the given key.
171    #[inline]
172    pub fn upper_bound(&self, key: &K) -> usize {
173        match &self.index {
174            Some(index) => index.upper_bound(&self.keys, key),
175            None => 0,
176        }
177    }
178
179    #[inline]
180    pub fn range<R>(&self, range: R) -> impl DoubleEndedIterator<Item = (&K, &V)>
181    where
182        R: RangeBounds<K>,
183    {
184        let (start, end) = range_to_indices(
185            range,
186            self.keys.len(),
187            |k| self.lower_bound(k),
188            |k| self.upper_bound(k),
189        );
190        self.keys[start..end]
191            .iter()
192            .zip(self.values[start..end].iter())
193    }
194
195    #[inline]
196    pub fn first_key_value(&self) -> Option<(&K, &V)> {
197        if self.keys.is_empty() {
198            None
199        } else {
200            Some((&self.keys[0], &self.values[0]))
201        }
202    }
203
204    #[inline]
205    pub fn last_key_value(&self) -> Option<(&K, &V)> {
206        if self.keys.is_empty() {
207            None
208        } else {
209            let last = self.keys.len() - 1;
210            Some((&self.keys[last], &self.values[last]))
211        }
212    }
213
214    #[inline]
215    pub fn iter(&self) -> impl ExactSizeIterator<Item = (&K, &V)> + DoubleEndedIterator {
216        self.keys.iter().zip(self.values.iter())
217    }
218
219    #[inline]
220    pub fn keys(&self) -> impl ExactSizeIterator<Item = &K> + DoubleEndedIterator {
221        self.keys.iter()
222    }
223
224    #[inline]
225    pub fn values(&self) -> impl ExactSizeIterator<Item = &V> + DoubleEndedIterator {
226        self.values.iter()
227    }
228
229    #[inline]
230    pub fn values_mut(&mut self) -> impl ExactSizeIterator<Item = &mut V> + DoubleEndedIterator {
231        self.values.iter_mut()
232    }
233
234    #[inline]
235    pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
236        let index = self.index.as_ref()?;
237        let approx = index.search(key);
238
239        let lo = approx.lo;
240        let hi = approx.hi.min(self.keys.len());
241
242        for i in lo..hi {
243            match self.keys[i].cmp(key) {
244                Ordering::Equal => return Some(&mut self.values[i]),
245                Ordering::Greater => return None,
246                Ordering::Less => continue,
247            }
248        }
249        None
250    }
251
252    #[inline]
253    pub fn len(&self) -> usize {
254        self.keys.len()
255    }
256
257    #[inline]
258    pub fn is_empty(&self) -> bool {
259        self.keys.is_empty()
260    }
261
262    /// Get the number of segments in the underlying index.
263    #[inline]
264    pub fn segments_count(&self) -> usize {
265        self.index.as_ref().map_or(0, |i| i.segments_count())
266    }
267
268    /// Get the height of the underlying index.
269    #[inline]
270    pub fn height(&self) -> usize {
271        self.index.as_ref().map_or(0, |i| i.height())
272    }
273
274    /// Get the epsilon value.
275    #[inline]
276    pub fn epsilon(&self) -> usize {
277        self.epsilon
278    }
279
280    /// Get the epsilon_recursive value.
281    #[inline]
282    pub fn epsilon_recursive(&self) -> usize {
283        self.epsilon_recursive
284    }
285
286    /// Approximate memory usage in bytes.
287    pub fn size_in_bytes(&self) -> usize {
288        self.index.as_ref().map_or(0, |i| i.size_in_bytes())
289            + self.keys.capacity() * core::mem::size_of::<K>()
290            + self.values.capacity() * core::mem::size_of::<V>()
291    }
292
293    /// Get a reference to the underlying keys slice.
294    #[inline]
295    pub fn keys_slice(&self) -> &[K] {
296        &self.keys
297    }
298
299    /// Get a reference to the underlying values slice.
300    #[inline]
301    pub fn values_slice(&self) -> &[V] {
302        &self.values
303    }
304
305    /// Consume the map and return the underlying key-value pairs.
306    #[inline]
307    pub fn into_vec(self) -> Vec<(K, V)> {
308        self.keys.into_iter().zip(self.values).collect()
309    }
310
311    /// Get a reference to the underlying index.
312    #[inline]
313    pub fn index(&self) -> Option<&external::Static<K>> {
314        self.index.as_ref()
315    }
316
317    /// Insert a key-value pair into the map.
318    ///
319    /// Returns the old value if the key already existed, or `None` if it was newly inserted.
320    ///
321    /// **Note**: This rebuilds the entire index, making it O(n) per insertion.
322    /// For bulk insertions, prefer collecting into a new map.
323    /// For frequent mutations, consider using `index::owned::Dynamic` directly.
324    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
325        if let Some(existing) = self.get_mut(&key) {
326            return Some(core::mem::replace(existing, value));
327        }
328
329        let mut entries: Vec<(K, V)> = core::mem::take(&mut self.keys)
330            .into_iter()
331            .zip(core::mem::take(&mut self.values))
332            .collect();
333        entries.push((key, value));
334        entries.sort_by(|a, b| a.0.cmp(&b.0));
335
336        if let Ok(new_map) = Self::from_sorted_unique(entries, self.epsilon, self.epsilon_recursive)
337        {
338            *self = new_map;
339        }
340        None
341    }
342}
343
344impl<K: Indexable + Ord, V> core::ops::Index<&K> for Map<K, V>
345where
346    K::Key: Ord,
347{
348    type Output = V;
349
350    /// Returns a reference to the value corresponding to the key.
351    ///
352    /// # Panics
353    ///
354    /// Panics if the key is not present in the map.
355    #[inline]
356    fn index(&self, key: &K) -> &Self::Output {
357        self.get(key).expect("key not found")
358    }
359}
360
361// Standard trait implementations
362
363impl<K: Indexable + Clone, V: Clone> Clone for Map<K, V>
364where
365    K::Key: Clone,
366{
367    fn clone(&self) -> Self {
368        Self {
369            keys: self.keys.clone(),
370            values: self.values.clone(),
371            index: self.index.clone(),
372            epsilon: self.epsilon,
373            epsilon_recursive: self.epsilon_recursive,
374        }
375    }
376}
377
378impl<K: Indexable + fmt::Debug, V: fmt::Debug> fmt::Debug for Map<K, V> {
379    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
380        f.debug_map()
381            .entries(self.keys.iter().zip(self.values.iter()))
382            .finish()
383    }
384}
385
386impl<K: Indexable + Ord + PartialEq, V: PartialEq> PartialEq for Map<K, V> {
387    fn eq(&self, other: &Self) -> bool {
388        self.keys == other.keys && self.values == other.values
389    }
390}
391
392impl<K: Indexable + Ord + Eq, V: Eq> Eq for Map<K, V> {}
393
394impl<K: Indexable + Hash, V: Hash> Hash for Map<K, V> {
395    fn hash<H: Hasher>(&self, state: &mut H) {
396        for (k, v) in self.keys.iter().zip(self.values.iter()) {
397            k.hash(state);
398            v.hash(state);
399        }
400    }
401}
402
403impl<K: Indexable + Ord, V> IntoIterator for Map<K, V>
404where
405    K::Key: Ord,
406{
407    type Item = (K, V);
408    type IntoIter = alloc::vec::IntoIter<(K, V)>;
409
410    fn into_iter(self) -> Self::IntoIter {
411        self.keys
412            .into_iter()
413            .zip(self.values)
414            .collect::<Vec<_>>()
415            .into_iter()
416    }
417}
418
419impl<'a, K: Indexable, V> IntoIterator for &'a Map<K, V> {
420    type Item = (&'a K, &'a V);
421    type IntoIter = core::iter::Zip<core::slice::Iter<'a, K>, core::slice::Iter<'a, V>>;
422
423    fn into_iter(self) -> Self::IntoIter {
424        self.keys.iter().zip(self.values.iter())
425    }
426}
427
428impl<K: Indexable + Ord, V> FromIterator<(K, V)> for Map<K, V>
429where
430    K::Key: Ord,
431{
432    /// Creates a Map from an iterator.
433    ///
434    /// Returns an empty map if the iterator is empty.
435    fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
436        Self::build(iter, 64, 4).unwrap_or_else(|_| Self::empty(64, 4))
437    }
438}
439
440impl<K: Indexable + Ord, V> core::iter::Extend<(K, V)> for Map<K, V>
441where
442    K::Key: Ord,
443{
444    /// Extends the map with key-value pairs from an iterator.
445    ///
446    /// If duplicate keys exist, the last value wins.
447    ///
448    /// **Note**: This rebuilds the entire index, making it O(n) per call.
449    /// For bulk insertions, prefer collecting into a new map.
450    /// For frequent mutations, consider using `index::owned::Dynamic` directly.
451    fn extend<I: IntoIterator<Item = (K, V)>>(&mut self, iter: I) {
452        let mut entries: Vec<(K, V)> = core::mem::take(&mut self.keys)
453            .into_iter()
454            .zip(core::mem::take(&mut self.values))
455            .collect();
456        entries.extend(iter);
457        entries.sort_by(|a, b| a.0.cmp(&b.0));
458        entries.dedup_by(|a, b| a.0 == b.0);
459
460        match Self::from_sorted_unique(entries, self.epsilon, self.epsilon_recursive) {
461            Ok(new_map) => *self = new_map,
462            Err(_) => {
463                *self = Self::empty(self.epsilon, self.epsilon_recursive);
464            }
465        }
466    }
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472    use alloc::string::String;
473    use alloc::vec;
474
475    #[test]
476    fn test_map_numeric() {
477        let entries: Vec<(u64, i32)> = (0..1000).map(|i| (i, i as i32 * 2)).collect();
478        let map = Map::from_sorted_unique(entries, 64, 4).unwrap();
479
480        assert_eq!(map.len(), 1000);
481        assert_eq!(map.get(&500), Some(&1000));
482        assert_eq!(map.get(&1001), None);
483    }
484
485    #[test]
486    fn test_map_string_keys() {
487        let entries = vec![("apple", 1), ("banana", 2), ("cherry", 3), ("date", 4)];
488        let map = Map::from_sorted_unique(entries, 64, 4).unwrap();
489
490        assert_eq!(map.get(&"banana"), Some(&2));
491        assert_eq!(map.get(&"cherry"), Some(&3));
492        assert_eq!(map.get(&"elderberry"), None);
493    }
494
495    #[test]
496    fn test_map_owned_string_keys() {
497        let entries: Vec<(String, i32)> =
498            vec![("alpha".into(), 1), ("beta".into(), 2), ("gamma".into(), 3)];
499        let map = Map::from_sorted_unique(entries, 64, 4).unwrap();
500
501        assert_eq!(map.get(&String::from("beta")), Some(&2));
502        assert!(map.contains_key(&String::from("alpha")));
503    }
504
505    #[test]
506    fn test_map_get_key_value() {
507        let entries: Vec<(u64, &str)> = vec![(1, "a"), (2, "b")];
508        let map = Map::from_sorted_unique(entries, 4, 2).unwrap();
509
510        assert_eq!(map.get_key_value(&1), Some((&1, &"a")));
511        assert_eq!(map.get_key_value(&3), None);
512    }
513
514    #[test]
515    fn test_map_build() {
516        let entries = vec![(5u64, "e"), (3, "c"), (1, "a"), (4, "d"), (2, "b")];
517        let map = Map::build(entries, 4, 2).unwrap();
518
519        assert_eq!(map.len(), 5);
520        assert_eq!(map.get(&1), Some(&"a"));
521        assert_eq!(map.get(&5), Some(&"e"));
522
523        let keys: Vec<_> = map.keys().copied().collect();
524        assert_eq!(keys, vec![1, 2, 3, 4, 5]);
525    }
526
527    #[test]
528    fn test_map_first_last() {
529        let entries: Vec<(u64, &str)> = vec![(10, "ten"), (20, "twenty"), (30, "thirty")];
530        let map = Map::from_sorted_unique(entries, 4, 2).unwrap();
531
532        assert_eq!(map.first_key_value(), Some((&10, &"ten")));
533        assert_eq!(map.last_key_value(), Some((&30, &"thirty")));
534    }
535
536    #[test]
537    fn test_map_iter() {
538        let entries: Vec<(u64, i32)> = vec![(1, 10), (2, 20), (3, 30)];
539        let map = Map::from_sorted_unique(entries, 4, 2).unwrap();
540
541        let collected: Vec<_> = map.iter().map(|(&k, &v)| (k, v)).collect();
542        assert_eq!(collected, vec![(1, 10), (2, 20), (3, 30)]);
543    }
544
545    #[test]
546    fn test_map_index_operator() {
547        let entries: Vec<(u64, &str)> = vec![(1, "one"), (2, "two")];
548        let map = Map::from_sorted_unique(entries, 4, 2).unwrap();
549
550        assert_eq!(map[&1], "one");
551        assert_eq!(map[&2], "two");
552    }
553
554    #[test]
555    fn test_map_collect() {
556        let map: Map<u64, i32> = vec![(1, 10), (2, 20), (3, 30)].into_iter().collect();
557        assert_eq!(map.len(), 3);
558        assert_eq!(map.get(&2), Some(&20));
559    }
560
561    #[test]
562    fn test_map_get_mut() {
563        let mut map: Map<u64, i32> = vec![(1, 10), (2, 20), (3, 30)].into_iter().collect();
564
565        if let Some(v) = map.get_mut(&2) {
566            *v = 200;
567        }
568        assert_eq!(map.get(&2), Some(&200));
569
570        assert!(map.get_mut(&999).is_none());
571    }
572
573    #[test]
574    fn test_map_values_mut() {
575        let mut map: Map<u64, i32> = vec![(1, 10), (2, 20), (3, 30)].into_iter().collect();
576
577        for v in map.values_mut() {
578            *v *= 10;
579        }
580
581        assert_eq!(map.get(&1), Some(&100));
582        assert_eq!(map.get(&2), Some(&200));
583        assert_eq!(map.get(&3), Some(&300));
584    }
585
586    #[test]
587    fn test_map_empty() {
588        let map: Map<u64, i32> = Map::empty(64, 4);
589        assert!(map.is_empty());
590        assert_eq!(map.len(), 0);
591        assert_eq!(map.get(&0), None);
592        assert_eq!(map.first_key_value(), None);
593        assert_eq!(map.last_key_value(), None);
594    }
595
596    #[test]
597    fn test_map_collect_empty() {
598        let map: Map<u64, i32> = core::iter::empty().collect();
599        assert!(map.is_empty());
600        assert_eq!(map.len(), 0);
601    }
602
603    #[test]
604    fn test_map_insert() {
605        let mut map = Map::build(vec![(1u64, "a"), (3, "c"), (5, "e")], 4, 2).unwrap();
606        assert_eq!(map.len(), 3);
607
608        assert_eq!(map.insert(2, "b"), None);
609        assert_eq!(map.len(), 4);
610        assert_eq!(map.get(&2), Some(&"b"));
611
612        assert_eq!(map.insert(2, "B"), Some("b"));
613        assert_eq!(map.len(), 4);
614        assert_eq!(map.get(&2), Some(&"B"));
615
616        assert_eq!(map.insert(4, "d"), None);
617        let keys: Vec<_> = map.keys().copied().collect();
618        assert_eq!(keys, vec![1, 2, 3, 4, 5]);
619    }
620
621    #[test]
622    fn test_map_insert_into_empty() {
623        let mut map: Map<u64, &str> = Map::empty(64, 4);
624        assert_eq!(map.insert(42, "forty-two"), None);
625        assert_eq!(map.len(), 1);
626        assert_eq!(map.get(&42), Some(&"forty-two"));
627    }
628
629    #[test]
630    fn test_map_extend() {
631        let mut map: Map<u64, i32> = vec![(1, 10), (2, 20)].into_iter().collect();
632        map.extend(vec![(3, 30), (4, 40)]);
633        assert_eq!(map.len(), 4);
634        assert_eq!(map.get(&3), Some(&30));
635        assert_eq!(map.get(&4), Some(&40));
636    }
637
638    #[test]
639    fn test_map_extend_empty() {
640        let mut map: Map<u64, i32> = Map::empty(64, 4);
641        map.extend(vec![(3, 30), (1, 10), (2, 20)]);
642        assert_eq!(map.len(), 3);
643        let keys: Vec<_> = map.keys().copied().collect();
644        assert_eq!(keys, vec![1, 2, 3]);
645    }
646}