libdd_profiling/profiles/collections/
set.rs

1// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
2// SPDX-License-Identifier: Apache-2.0
3
4use super::SetError;
5use super::SetHasher as Hasher;
6use core::hint::unreachable_unchecked;
7use core::{fmt, mem, ptr};
8use hashbrown::HashTable;
9use libdd_alloc::{Allocator, ChainAllocator, VirtualAllocator};
10use std::ffi::c_void;
11use std::hash::{BuildHasher, Hash};
12
13pub const SET_MIN_CAPACITY: usize = 14;
14
15#[repr(transparent)]
16#[derive(Debug, Eq, Hash, PartialEq)]
17pub struct SetId<T>(pub(crate) ptr::NonNull<T>);
18
19impl<T> SetId<T> {
20    /// Cast to another type. Although this is safe, using the result is not
21    /// necessarily safe.
22    #[inline]
23    #[must_use]
24    pub fn cast<U>(self) -> SetId<U> {
25        SetId(self.0.cast())
26    }
27
28    pub fn into_raw(self) -> ptr::NonNull<c_void> {
29        self.0.cast()
30    }
31
32    /// Re-creates a [`SetId`] from calling [`SetId::into_raw`].
33    ///
34    /// # Safety
35    ///
36    /// The set it belongs to must still be alive, and the repr should be
37    /// unchanged since it was created by [`SetId::into_raw`].
38    pub unsafe fn from_raw(raw: ptr::NonNull<c_void>) -> Self {
39        Self(raw.cast::<T>())
40    }
41}
42
43// This is different from derive(Clone), because derive(Clone) will be Clone
44// only if T is Clone, and that's not true here--the type is always Clone.
45impl<T> Clone for SetId<T> {
46    fn clone(&self) -> Self {
47        *self
48    }
49}
50impl<T> Copy for SetId<T> {}
51
52impl<T: Hash + Eq + 'static> fmt::Debug for Set<T> {
53    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54        f.debug_struct("Set").field("table", &self.table).finish()
55    }
56}
57
58pub struct Set<T: Hash + Eq + 'static> {
59    pub(crate) arena: ChainAllocator<VirtualAllocator>,
60    pub(crate) table: HashTable<ptr::NonNull<T>>,
61}
62
63impl<T: Eq + Hash + 'static> Set<T> {
64    const SIZE_HINT: usize = 1024 * 1024;
65
66    pub fn try_new() -> Result<Self, SetError> {
67        Self::try_with_capacity(SET_MIN_CAPACITY)
68    }
69
70    #[inline]
71    pub(crate) fn allocate_one(&mut self, value: T) -> Result<ptr::NonNull<T>, SetError> {
72        let layout = core::alloc::Layout::new::<T>();
73        // Allocate raw bytes for a single `T`
74        let obj = self.arena.allocate(layout)?; // Result<NonNull<[u8]>, AllocError>
75        let raw_slice_ptr: *mut [u8] = obj.as_ptr();
76        let raw = raw_slice_ptr as *mut u8 as *mut T;
77
78        // SAFETY: `raw` points to allocated, properly aligned memory for `T`.
79        unsafe { ptr::write(raw, value) };
80
81        // SAFETY: cannot be null as it was just allocated.
82        Ok(unsafe { ptr::NonNull::new_unchecked(raw) })
83    }
84
85    pub fn try_insert(&mut self, value: T) -> Result<SetId<T>, SetError> {
86        let hash = Hasher::default().hash_one(&value);
87        // SAFETY: hash computed by this set's hasher for value.
88        if let Some(existing) = unsafe { self.find_with_hash(hash, &value) } {
89            return Ok(existing);
90        }
91        // SAFETY: hash computed by this set's hasher, uniqueness is enforced
92        // by a prior find.
93        unsafe { self.insert_unique_uncontended_with_hash(hash, value) }
94    }
95
96    pub fn len(&self) -> usize {
97        self.table.len()
98    }
99
100    pub fn is_empty(&self) -> bool {
101        self.table.is_empty()
102    }
103    pub fn capacity(&self) -> usize {
104        self.table.capacity()
105    }
106
107    /// Returns the `SetId` for `value` if it exists in the set, without inserting.
108    ///
109    /// This is primarily intended for tests and debugging. In typical usage
110    /// you should prefer `try_insert` which handles both existence checks and
111    /// insertion atomically in the intended access pattern.
112    pub fn find(&self, value: &T) -> Option<SetId<T>> {
113        let hash = Hasher::default().hash_one(value);
114        // SAFETY: `hash` was computed using this set's hasher over `&value`.
115        unsafe { self.find_with_hash(hash, value) }
116    }
117
118    /// Returns a shared reference to the value for a given `SetId`.
119    ///
120    /// # Safety
121    /// - The `id` must have been obtained from this exact `Set<T>` instance (or remain valid for
122    ///   it). Using an id from another set, or after the backing arena is torn down, is undefined
123    ///   behavior.
124    /// # Safety
125    /// - `id` must have been obtained from this exact `Set<T>` instance and still refer to a live
126    ///   element in its arena.
127    pub unsafe fn get(&self, id: SetId<T>) -> &T {
128        // SAFETY: Caller guarantees the `SetId` belongs to this set and points
129        // to a live, properly aligned `T` in the arena.
130        unsafe { id.0.as_ref() }
131    }
132}
133
134impl<T: Hash + Eq + 'static> Drop for Set<T> {
135    fn drop(&mut self) {
136        if mem::needs_drop::<T>() {
137            for nn in self.table.iter() {
138                // SAFETY: Elements in the table were allocated and initialized
139                // via `allocate_one` and remain valid for the lifetime of this
140                // set (arena-backed). We only drop if `T` requires dropping.
141                unsafe { ptr::drop_in_place(nn.as_ptr()) };
142            }
143        }
144    }
145}
146
147impl<T: Hash + Eq + 'static> Set<T> {
148    fn try_with_capacity(capacity: usize) -> Result<Self, SetError> {
149        let arena = ChainAllocator::new_in(Self::SIZE_HINT, VirtualAllocator {});
150        let mut table = HashTable::new();
151
152        // SAFETY: new empty table cannot require rehash, callback unreachable.
153        table.try_reserve(capacity, |_| unsafe { unreachable_unchecked() })?;
154        Ok(Self { arena, table })
155    }
156
157    unsafe fn find_with_hash(&self, hash: u64, key: &T) -> Option<SetId<T>> {
158        let found = self
159            .table
160            // SAFETY: NonNull<T> inside table points to live, properly aligned Ts.
161            .find(hash, |nn| unsafe { nn.as_ref() == key })?;
162        Some(SetId(*found))
163    }
164
165    unsafe fn insert_unique_uncontended_with_hash(
166        &mut self,
167        hash: u64,
168        value: T,
169    ) -> Result<SetId<T>, SetError> {
170        // Reserve table space BEFORE allocating the new value so we don't
171        // need to drop it on reserve failure.
172        // SAFETY: NonNull<T> entries are valid; closure only hashes existing entries.
173        self.table
174            .try_reserve(1, |nnv| Hasher::default().hash_one(unsafe { nnv.as_ref() }))?;
175        let nn = self.allocate_one(value)?;
176        // SAFETY: reserve above guarantees no rehash occurs; closure unreachable.
177        self.table
178            .insert_unique(hash, nn, |_| unsafe { unreachable_unchecked() });
179        Ok(SetId(nn))
180    }
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186    use proptest::prelude::*;
187    use std::collections::HashSet as StdHashSet;
188    use std::sync::{Arc, Weak};
189
190    proptest! {
191        #![proptest_config(ProptestConfig {
192            cases: if cfg!(miri) { 4 } else { 64 },
193            .. ProptestConfig::default()
194        })]
195
196        #[test]
197        fn proptest_matches_std_hashset(values in proptest::collection::vec(any::<u64>(), 0..if cfg!(miri) { 32 } else { 512 })) {
198            let mut set = Set::<u64>::try_new().unwrap();
199            let mut shadow = StdHashSet::<u64>::new();
200
201            for v in &values {
202                shadow.insert(*v);
203                let _ = set.try_insert(*v).unwrap();
204            }
205
206            prop_assert_eq!(set.len(), shadow.len());
207
208            for &v in &shadow {
209                let id = set.find(&v).unwrap();
210                // SAFETY: id just obtained from this set
211                let fetched = unsafe { set.get(id) };
212                prop_assert_eq!(*fetched, v);
213            }
214        }
215    }
216
217    #[test]
218    fn set_drops_elements_on_drop() {
219        let mut set = Set::<Arc<u64>>::try_new().unwrap();
220        let mut weaks: Vec<Weak<u64>> = Vec::new();
221
222        let total = if cfg!(miri) { 8 } else { 64 };
223        for i in 0..total {
224            let arc = Arc::new(i as u64);
225            weaks.push(Arc::downgrade(&arc));
226            // Transfer ownership into the set
227            let _ = set.try_insert(arc).unwrap();
228        }
229
230        drop(set);
231
232        for (idx, w) in weaks.iter().enumerate() {
233            assert!(w.upgrade().is_none(), "weak at {idx} still alive");
234        }
235    }
236}