libdd_profiling/collections/identifiable/
mod.rs

1// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4mod string_id;
5
6use anyhow::Context;
7use std::hash::{BuildHasherDefault, Hash};
8use std::num::NonZeroU32;
9
10pub type FxIndexMap<K, V> = indexmap::IndexMap<K, V, BuildHasherDefault<rustc_hash::FxHasher>>;
11pub type FxIndexSet<K> = indexmap::IndexSet<K, BuildHasherDefault<rustc_hash::FxHasher>>;
12
13pub use string_id::*;
14
15pub trait Id: Copy + Eq + Hash {
16    type RawId;
17
18    /// Convert from a usize offset into an Id. This should be loss-less
19    /// except for certain edges.
20    /// # Panics
21    /// Panic if the usize cannot be represented in the Id, for instance if
22    /// the offset cannot fit in the underlying integer type. This is expected
23    /// to be ultra-rare (more than u32::MAX-1 items created?!).
24    fn from_offset(inner: usize) -> Self;
25
26    fn to_raw_id(&self) -> Self::RawId;
27
28    fn into_raw_id(self) -> Self::RawId {
29        self.to_raw_id()
30    }
31}
32
33pub trait Item: Eq + Hash {
34    /// The Id associated with this Item, e.g. Function -> FunctionId.
35    type Id: Id;
36}
37
38/// Creates a non-zero, 32-bit unsigned id from the offset. It's guaranteed to
39/// be the offset + 1, with guards to not overflow the size of u32.
40///
41/// This is useful because many pprof collections do not allow an item with an
42/// id of zero, even if it's the first item in the collection.
43#[inline]
44pub fn small_non_zero_pprof_id(offset: usize) -> Option<NonZeroU32> {
45    let small: u32 = offset.try_into().ok()?;
46    let non_zero = small.checked_add(1)?;
47    // Safety: the `checked_add(1)?` guards this from ever being zero.
48    Some(unsafe { NonZeroU32::new_unchecked(non_zero) })
49}
50
51pub trait Dedup<T: Item> {
52    /// Deduplicate the Item and return its associated Id.
53    /// # Panics
54    /// Panics if the number of items overflows the storage capabilities of
55    /// the associated Id type.
56    fn dedup(&mut self, item: T) -> <T as Item>::Id;
57
58    fn try_dedup(&mut self, item: T) -> anyhow::Result<<T as Item>::Id>;
59
60    /// Deduplicate the Item, and check if the generated Id is valid.
61    fn checked_dedup(&mut self, item: T) -> anyhow::Result<<T as Item>::Id>;
62}
63
64impl<T: Item> Dedup<T> for FxIndexSet<T> {
65    fn dedup(&mut self, item: T) -> <T as Item>::Id {
66        let (id, _) = self.insert_full(item);
67        <T as Item>::Id::from_offset(id)
68    }
69
70    fn try_dedup(&mut self, item: T) -> anyhow::Result<<T as Item>::Id> {
71        self.try_reserve(1)?;
72        let (id, _) = self.insert_full(item);
73        // todo: add Id::try_from_offset
74        Ok(<T as Item>::Id::from_offset(id))
75    }
76
77    /// In incident 35390 (JIRA PROF-11456) we observed invalid location_ids being present in
78    /// emitted profiles. It's not likely that the incorrect ids are coming from the underlying
79    /// collection, but we're doing extra checks here so that if we see incorrect ids again,
80    /// we are 100% sure they were not introduced at this stage.
81    fn checked_dedup(&mut self, item: T) -> anyhow::Result<<T as Item>::Id> {
82        self.try_reserve(1)
83            .context("failed to reserve memory for deduplicating an item")?;
84        let (id, _) = self.insert_full(item);
85
86        anyhow::ensure!(
87            id < self.len(),
88            "out of bounds id generated {:?}, len was {:?}",
89            id,
90            self.len()
91        );
92        small_non_zero_pprof_id(id).with_context(|| format!("invalid id generated {id:?}"))?;
93
94        Ok(<T as Item>::Id::from_offset(id))
95    }
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101
102    #[test]
103    fn test_small_non_zero_pprof_id() {
104        assert_eq!(NonZeroU32::new(1), small_non_zero_pprof_id(0));
105        assert_eq!(NonZeroU32::new(2), small_non_zero_pprof_id(1));
106        assert_eq!(
107            NonZeroU32::new(u32::MAX),
108            small_non_zero_pprof_id((u32::MAX - 1) as usize)
109        );
110
111        assert_eq!(None, small_non_zero_pprof_id(u32::MAX as usize));
112        assert_eq!(None, small_non_zero_pprof_id(usize::MAX));
113    }
114}