libdd_profiling/profiles/collections/
slice_set.rs

1// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use super::{SetError, ThinSlice};
5use core::hash::{BuildHasher, Hash};
6use core::hint::unreachable_unchecked;
7use hashbrown::HashTable;
8use libdd_alloc::{ChainAllocator, VirtualAllocator};
9
10use super::SetHasher as Hasher;
11
12/// Holds unique slices and provides handles to fetch them later.
13pub struct SliceSet<T: Copy + Hash + Eq + 'static> {
14    /// The bytes of each slice stored in `slices` are allocated here.
15    pub(crate) arena: ChainAllocator<VirtualAllocator>,
16
17    /// The unordered hash set of unique slices.
18    /// The static lifetimes are a lie; they are tied to the `arena`, which is
19    /// only moved if the slice set is moved.
20    /// References to the underlying slices should generally not be handed,
21    /// but if they are, they should be bound to the slice set's lifetime.
22    pub(crate) slices: HashTable<ThinSlice<'static, T>>,
23}
24
25impl<T: Copy + Hash + Eq + 'static> SliceSet<T> {
26    const SIZE_HINT: usize = 1024 * 1024;
27
28    pub fn try_with_capacity(capacity: usize) -> Result<Self, SetError> {
29        let arena = ChainAllocator::new_in(Self::SIZE_HINT, VirtualAllocator {});
30
31        let mut slices = HashTable::new();
32        // SAFETY: we just made the empty hash table, so there's nothing that
33        // needs to be rehashed.
34        slices.try_reserve(capacity, |_| unsafe { unreachable_unchecked() })?;
35
36        Ok(SliceSet { arena, slices })
37    }
38
39    /// # Safety
40    ///
41    /// The slice must not already exist within the set.
42    pub unsafe fn insert_unique_uncontended(
43        &mut self,
44        slice: &[T],
45    ) -> Result<ThinSlice<'static, T>, SetError> {
46        let hash = Hasher::default().hash_one(slice);
47        self.insert_unique_uncontended_with_hash(hash, slice)
48    }
49
50    /// # Safety
51    ///  1. The hash must be the same as if the slice was re-hashed with the hasher the slice set
52    ///     would use.
53    ///  2. The slice must not already exist within the set.
54    #[inline(never)]
55    pub unsafe fn insert_unique_uncontended_with_hash(
56        &mut self,
57        hash: u64,
58        slice: &[T],
59    ) -> Result<ThinSlice<'static, T>, SetError> {
60        let obj = ThinSlice::try_allocate_for(slice, &self.arena)?;
61        let uninit = unsafe { &mut *obj.as_ptr() };
62        let new_slice = ThinSlice::try_from_slice_in(slice, uninit)?;
63
64        self.slices
65            .try_reserve(1, |thin| Hasher::default().hash_one(thin.as_slice()))?;
66
67        // Add it to the set. The memory was previously reserved.
68        // SAFETY: The try_reserve above means any necessary re-hashing has
69        // already been done, so the hash closure cannot be called.
70        self.slices
71            .insert_unique(hash, new_slice, |_| unsafe { unreachable_unchecked() });
72
73        Ok(new_slice)
74    }
75
76    /// Adds the slice to the slice set if it isn't present already, and
77    /// returns a handle to the slice that can be used to retrieve it later.
78    pub fn try_insert(&mut self, slice: &[T]) -> Result<ThinSlice<'static, T>, SetError>
79    where
80        T: Hash,
81    {
82        let hash = Hasher::default().hash_one(slice);
83
84        // SAFETY: the slice's hash is correct, we use the same hasher as
85        // SliceSet uses.
86        if let Some(id) = unsafe { self.find_with_hash(hash, slice) } {
87            return Ok(id);
88        }
89
90        // SAFETY: we just checked above that the slice isn't in the set.
91        unsafe { self.insert_unique_uncontended(slice) }
92    }
93
94    /// # Safety
95    /// The hash must be the same as if the slice was re-hashed with the
96    /// hasher the slice set would use.
97    #[inline(never)]
98    pub(crate) unsafe fn find_with_hash(
99        &self,
100        hash: u64,
101        slice: &[T],
102    ) -> Option<ThinSlice<'static, T>>
103    where
104        T: PartialEq,
105    {
106        let interned_slice = self
107            .slices
108            .find(hash, |thin_slice| thin_slice.as_slice() == slice)?;
109        Some(*interned_slice)
110    }
111
112    /// Returns an iterator over all slices in the set.
113    pub fn iter(&self) -> impl Iterator<Item = ThinSlice<'_, T>> + '_ {
114        self.slices.iter().copied()
115    }
116
117    /// Returns the number of slices in the set.
118    pub fn len(&self) -> usize {
119        self.slices.len()
120    }
121
122    /// Returns true if the set is empty.
123    pub fn is_empty(&self) -> bool {
124        self.slices.is_empty()
125    }
126
127    /// Returns the capacity of the hash table.
128    pub fn capacity(&self) -> usize {
129        self.slices.capacity()
130    }
131}