Skip to main content

libdd_trace_utils/span/
vec_map.rs

1// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4//! This module defines a associative map datastructure for spans data (meta, metrics, etc.) backed
5//! by a vector. Spans are mostly allocated and constructed, and more rarely read or mutated.
6//! [VecMap] is thus optimized for insertion (which is just `Vec::push`), without any hashing
7//! involved. Fetching and removing a value is, on the other hand, linear time in the size of the
8//! map. However, since meta and metrics are expected to be typically small (20ish elements or
9//! less), linear scan is usually still competitive with hashmap's `get`.
10
11use serde::ser::{Serialize, Serializer};
12use std::borrow::Borrow;
13use std::collections::HashSet;
14use std::hash::Hash;
15
16/// A Vec-backed map that provides HashMap-like lookup by key.
17///
18/// # Duplicates
19///
20/// Duplicates are tolerated: [VecMap::insert] always appends, and [VecMap::get]/[VecMap::get_mut]
21/// return the *last* matching entry so that later writes shadow earlier ones. This optimizes for
22/// fast insertion and construction (that might happen on the client's application hot path),
23/// avoiding a linear scan on each insert, or a potential full re-hashing with a hashmap.
24/// Additionally, while overriding a metric or a meta definitively happens, it's assumed to be rare
25/// enough so such that the size penalty of duplication is expected to be reasonable.
26///
27/// **Important**: note that only [VecMap::get] and [VecMap::get_mut] are duplicate-aware, so to
28/// speak. [VecMap::len], [VecMap::iter], and others just delegates to the underlying `Vec`, and
29/// won't deduplicate.
30///
31/// Explicit deduplication is currently being done on-demand by [VecMap::dedup]. An internal flag is
32/// used to avoid undue deduplication (see [VecMap::dedup]). `VecMap` is automatically deduped
33/// before serialization.
34///
35/// In the future, we could trigger deduplication on other events, for example at insertion if the
36/// size is bigger than a threshold (and we haven't deduped for `x` operations).
37///
38/// # Ordering
39///
40/// As this is a map, iteration order is not defined nor guaranteed. In practice, iteration follows
41/// insertion order, but [Self::dedup] will reverse the underlying vector.
42#[derive(Clone, Debug, PartialEq)]
43pub struct VecMap<K, V> {
44    data: Vec<(K, V)>,
45    /// Deduped is a flag that is set after entry deduplication. It is dirtied (set to `false`)
46    /// when any modification that could create duplicates is performed (`deduped == false`
47    /// doesn't imply there are actual duplicates, just than there might be). This is useful to
48    /// avoid performing deduplication several times in a row, for example in the export
49    /// pipeline.
50    deduped: bool,
51}
52
53impl<K, V> Default for VecMap<K, V> {
54    fn default() -> Self {
55        Self {
56            data: Default::default(),
57            deduped: false,
58        }
59    }
60}
61
62impl<K, V> VecMap<K, V> {
63    #[must_use]
64    #[inline]
65    pub fn new() -> Self {
66        Self::default()
67    }
68
69    /// Dirty the `dedup` flag after a mutation that could introduce duplicates.
70    fn dirty(&mut self) {
71        self.deduped = false;
72    }
73
74    #[must_use]
75    #[inline]
76    pub fn with_capacity(capacity: usize) -> Self {
77        VecMap {
78            data: Vec::with_capacity(capacity),
79            deduped: false,
80        }
81    }
82
83    #[inline]
84    pub fn insert(&mut self, key: K, value: V) {
85        self.data.push((key, value));
86        self.dirty();
87    }
88
89    #[inline]
90    pub fn get<Q>(&self, key: &Q) -> Option<&V>
91    where
92        K: Borrow<Q>,
93        Q: ?Sized + PartialEq,
94    {
95        self.data
96            .iter()
97            .rev()
98            .find(|(k, _)| k.borrow() == key)
99            .map(|(_, v)| v)
100    }
101
102    #[inline]
103    pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut V>
104    where
105        K: Borrow<Q>,
106        Q: ?Sized + PartialEq,
107    {
108        self.data
109            .iter_mut()
110            .rev()
111            .find(|(k, _)| (*k).borrow() == key)
112            .map(|(_, v)| v)
113    }
114
115    #[inline]
116    pub fn contains_key<Q>(&self, key: &Q) -> bool
117    where
118        K: Borrow<Q>,
119        Q: ?Sized + PartialEq,
120    {
121        self.data.iter().any(|(k, _)| k.borrow() == key)
122    }
123
124    /// Remove all entries matching this key from the map. This method uses [Vec::retain], and is
125    /// thus potentially costly (like any removal in a vector-like datastructure).
126    // Note: we might implement a tombstone or option-based deletion later, if removal is a bit too
127    // costly.
128    #[inline]
129    pub fn remove_slow<Q>(&mut self, key: &Q)
130    where
131        K: Borrow<Q>,
132        Q: ?Sized + PartialEq,
133    {
134        self.data.retain(|(k, _)| k.borrow() != key);
135    }
136
137    /// Iterate over the element, including duplicate entries.
138    #[inline]
139    pub fn iter(&self) -> std::slice::Iter<'_, (K, V)> {
140        self.data.iter()
141    }
142
143    /// Iterate mutably over the elements, including duplicate entries.
144    #[inline]
145    pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, (K, V)> {
146        self.dirty();
147        self.data.iter_mut()
148    }
149
150    /// Return the length of the underlying vector, thus including duplicate entries.
151    #[inline]
152    pub fn len(&self) -> usize {
153        self.data.len()
154    }
155
156    #[inline]
157    pub fn is_empty(&self) -> bool {
158        self.data.is_empty()
159    }
160
161    /// Return `true` if the map hasn't been extended since the last call to [Self::dedup],
162    /// guaranteeing that the underlying vector doesn't have any duplicate key.
163    ///
164    /// If `is_deduped` returns `false`, the map may have duplicate keys.
165    #[inline]
166    pub fn is_deduped(&self) -> bool {
167        self.deduped
168    }
169}
170
171impl<K: Hash + Eq + Clone, V> VecMap<K, V> {
172    /// Remove entries with a duplicate key, only keeping the last one. After this, a flag is set
173    /// internally, such that as long as the map isn't extended or mutably iterated, the next
174    /// [Self::dedup] doesn't perform the work again.
175    pub fn dedup(&mut self) {
176        if self.deduped {
177            return;
178        }
179
180        // Since we're going to shuffle elements around, it's not easy to keep references to keys in
181        // the deduping set. The simplest is to clone them.
182        let mut seen = HashSet::with_capacity(self.len());
183
184        self.data.reverse();
185        self.data.retain(|(k, _)| seen.insert(k.clone()));
186        self.deduped = true;
187    }
188}
189
190impl<K, V> From<Vec<(K, V)>> for VecMap<K, V> {
191    fn from(data: Vec<(K, V)>) -> Self {
192        Self {
193            data,
194            deduped: false,
195        }
196    }
197}
198
199impl<K, V> From<VecMap<K, V>> for Vec<(K, V)> {
200    fn from(value: VecMap<K, V>) -> Self {
201        value.data
202    }
203}
204
205impl<K, V> FromIterator<(K, V)> for VecMap<K, V> {
206    fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> Self {
207        Self {
208            data: iter.into_iter().collect(),
209            deduped: false,
210        }
211    }
212}
213
214impl<K, V> IntoIterator for VecMap<K, V> {
215    type Item = (K, V);
216    type IntoIter = std::vec::IntoIter<(K, V)>;
217
218    fn into_iter(self) -> Self::IntoIter {
219        self.data.into_iter()
220    }
221}
222
223impl<'a, K, V> IntoIterator for &'a VecMap<K, V> {
224    type Item = &'a (K, V);
225    type IntoIter = std::slice::Iter<'a, (K, V)>;
226
227    fn into_iter(self) -> Self::IntoIter {
228        self.data.iter()
229    }
230}
231
232impl<'a, K, V> IntoIterator for &'a mut VecMap<K, V> {
233    type Item = &'a mut (K, V);
234    type IntoIter = std::slice::IterMut<'a, (K, V)>;
235
236    fn into_iter(self) -> Self::IntoIter {
237        // Since we iterate on keys as well, they can modified, and introduce duplicates.
238        self.dirty();
239        self.data.iter_mut()
240    }
241}
242
243impl<K, V> Extend<(K, V)> for VecMap<K, V> {
244    fn extend<I: IntoIterator<Item = (K, V)>>(&mut self, iter: I) {
245        self.dirty();
246        self.data.extend(iter);
247    }
248}
249
250impl<K: Serialize + Eq + Hash, V: Serialize> Serialize for VecMap<K, V> {
251    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
252        use serde::ser::SerializeMap;
253        use std::collections::HashMap;
254
255        // We pre-compute the deduped map. If deduplication were done on the fly during
256        // serialization, we couldn't provide a length up front to the serializer, and the current
257        // one (rmp) will allocate an intermediate buffer defensively.
258        if self.deduped {
259            let mut map_ser = serializer.serialize_map(Some(self.len()))?;
260
261            for (k, v) in self {
262                map_ser.serialize_entry(k, v)?;
263            }
264
265            map_ser.end()
266        } else {
267            // Note: using `dedup` would need an additional `clone()` of the whole map here. We can
268            // use references instead.
269            self.data
270                .iter()
271                .map(|(k, v)| (k, v))
272                // Since the iterator is sized, `collect()` should pre-allocate with the right
273                // capacity in one shot.
274                .collect::<HashMap<&K, &V>>()
275                .serialize(serializer)
276        }
277    }
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    #[test]
285    fn get_returns_last_inserted() {
286        let mut m = VecMap::new();
287        m.insert("a", 1);
288        m.insert("a", 2);
289        assert_eq!(m.get("a"), Some(&2));
290    }
291
292    #[test]
293    fn get_mut_returns_last_inserted() {
294        let mut m = VecMap::new();
295        m.insert("a", 1);
296        m.insert("a", 2);
297        *m.get_mut("a").unwrap() = 42;
298        assert_eq!(m.get("a"), Some(&42));
299        // First entry unchanged
300        assert_eq!(m.iter().next().unwrap().1, 1);
301    }
302
303    #[test]
304    fn remove_removes_all_occurrences() {
305        let mut m = VecMap::new();
306        m.insert("a", 1);
307        m.insert("b", 2);
308        m.insert("a", 3);
309        m.remove_slow("a");
310        assert_eq!(m.get("a"), None);
311        assert!(!m.contains_key("a"));
312        assert_eq!(m.len(), 1);
313    }
314
315    #[test]
316    fn contains_key_works() {
317        let mut m = VecMap::new();
318        assert!(!m.contains_key("x"));
319        m.insert("x", 10);
320        assert!(m.contains_key("x"));
321    }
322
323    #[test]
324    fn from_iterator() {
325        let m: VecMap<&str, i32> = vec![("a", 1), ("b", 2)].into_iter().collect();
326        assert_eq!(m.len(), 2);
327        assert_eq!(m.get("b"), Some(&2));
328    }
329
330    #[test]
331    fn into_iter_consuming() {
332        let mut m = VecMap::new();
333        m.insert("a", 1);
334        m.insert("b", 2);
335        let pairs: Vec<_> = m.into_iter().collect();
336        assert_eq!(pairs, vec![("a", 1), ("b", 2)]);
337    }
338
339    #[test]
340    fn is_deduped_false_initially() {
341        let m: VecMap<&str, i32> = VecMap::new();
342        assert!(!m.is_deduped());
343    }
344
345    #[test]
346    fn is_deduped_false_after_from() {
347        let m: VecMap<&str, i32> = vec![("a", 1)].into();
348        assert!(!m.is_deduped());
349    }
350
351    #[test]
352    fn is_deduped_false_after_collect() {
353        let m: VecMap<&str, i32> = vec![("a", 1)].into_iter().collect();
354        assert!(!m.is_deduped());
355    }
356
357    #[test]
358    fn dedup_sets_flag() {
359        let mut m = VecMap::new();
360        m.insert("a", 1);
361        assert!(!m.is_deduped());
362        m.dedup();
363        assert!(m.is_deduped());
364    }
365
366    #[test]
367    fn dedup_on_empty_map() {
368        let mut m: VecMap<String, i32> = VecMap::new();
369        m.dedup();
370        assert!(m.is_deduped());
371        assert!(m.is_empty());
372    }
373
374    #[test]
375    fn dedup_no_duplicates() {
376        let mut m = VecMap::new();
377        m.insert("a", 1);
378        m.insert("b", 2);
379        m.insert("c", 3);
380        m.dedup();
381        assert_eq!(m.len(), 3);
382        assert_eq!(m.get("a"), Some(&1));
383        assert_eq!(m.get("b"), Some(&2));
384        assert_eq!(m.get("c"), Some(&3));
385    }
386
387    #[test]
388    fn dedup_keeps_last_value() {
389        let mut m = VecMap::new();
390        m.insert("a", 1);
391        m.insert("b", 10);
392        m.insert("a", 2);
393        m.insert("a", 3);
394        m.insert("b", 20);
395        m.dedup();
396        assert_eq!(m.len(), 2);
397        assert_eq!(m.get("a"), Some(&3));
398        assert_eq!(m.get("b"), Some(&20));
399    }
400
401    #[test]
402    fn dedup_is_idempotent() {
403        let mut m = VecMap::new();
404        m.insert("a", 1);
405        m.insert("a", 2);
406        m.dedup();
407        assert!(m.is_deduped());
408        assert_eq!(m.len(), 1);
409        m.dedup();
410        assert!(m.is_deduped());
411        assert_eq!(m.len(), 1);
412        assert_eq!(m.get("a"), Some(&2));
413    }
414
415    #[test]
416    fn insert_dirties_dedup_flag() {
417        let mut m = VecMap::new();
418        m.insert("a", 1);
419        m.dedup();
420        assert!(m.is_deduped());
421
422        m.insert("b", 2);
423        assert!(!m.is_deduped());
424    }
425
426    #[test]
427    fn extend_dirties_dedup_flag() {
428        let mut m = VecMap::new();
429        m.insert("a", 1);
430        m.dedup();
431        assert!(m.is_deduped());
432
433        m.extend(vec![("b", 2)]);
434        assert!(!m.is_deduped());
435    }
436
437    #[test]
438    fn iter_mut_dirties_dedup_flag() {
439        let mut m = VecMap::new();
440        m.insert("a", 1);
441        m.dedup();
442        assert!(m.is_deduped());
443
444        for (_, v) in m.iter_mut() {
445            *v += 1;
446        }
447
448        assert!(!m.is_deduped());
449    }
450
451    #[test]
452    fn serialize_deduplicates_keeping_last() {
453        let mut m = VecMap::new();
454        m.insert("a", 0);
455        m.insert("b", 0);
456        m.insert("b", 1);
457        m.insert("a", 1);
458        m.insert("a", 3);
459        m.insert("b", 2);
460
461        let serialized: serde_json::Value = serde_json::to_value(&m).unwrap();
462        let obj = serialized.as_object().unwrap();
463
464        assert_eq!(obj.len(), 2);
465        assert_eq!(obj.get("a").unwrap(), 3);
466        assert_eq!(obj.get("b").unwrap(), 2);
467    }
468}