libdd_profiling/profiles/collections/
string_set.rs1use super::slice_set::SliceSet;
5use super::SetError;
6use super::ThinStr;
7use std::ffi::c_void;
8use std::hash::BuildHasher;
9use std::ops::Deref;
10use std::ptr::NonNull;
11
12use super::SetHasher as Hasher;
13
14#[repr(transparent)]
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub struct StringRef(pub ThinStr<'static>);
26
27impl StringRef {
28 pub fn into_raw(self) -> NonNull<c_void> {
29 self.0.into_raw()
30 }
31
32 pub unsafe fn from_raw(this: NonNull<c_void>) -> Self {
39 Self(ThinStr::from_raw(this))
40 }
41}
42
43impl From<&StringRef> for StringRef {
44 fn from(value: &StringRef) -> Self {
45 *value
46 }
47}
48
49impl Default for StringRef {
50 fn default() -> Self {
51 Self::EMPTY
52 }
53}
54
55impl StringRef {
56 pub const EMPTY: StringRef = StringRef(ThinStr::new());
57 pub const END_TIMESTAMP_NS: StringRef = StringRef(ThinStr::end_timestamp_ns());
58 pub const LOCAL_ROOT_SPAN_ID: StringRef = StringRef(ThinStr::local_root_span_id());
59 pub const TRACE_ENDPOINT: StringRef = StringRef(ThinStr::trace_endpoint());
60 pub const SPAN_ID: StringRef = StringRef(ThinStr::span_id());
61}
62
63pub const WELL_KNOWN_STRING_REFS: [StringRef; 5] = [
64 StringRef::EMPTY,
65 StringRef::END_TIMESTAMP_NS,
66 StringRef::LOCAL_ROOT_SPAN_ID,
67 StringRef::TRACE_ENDPOINT,
68 StringRef::SPAN_ID,
69];
70
71pub struct UnsyncStringSet(SliceSet<u8>);
74
75impl UnsyncStringSet {
76 pub fn try_with_capacity(capacity: usize) -> Result<Self, SetError> {
77 let mut set = Self(SliceSet::try_with_capacity(capacity)?);
78 let strings = &mut set.0.slices;
79 for id in WELL_KNOWN_STRING_REFS {
80 let hash = Hasher::default().hash_one(id.0.deref().as_bytes());
81 strings.insert_unique(hash, id.0.into(), |t| Hasher::default().hash_one(t.deref()));
82 }
83
84 Ok(set)
85 }
86
87 pub fn try_new() -> Result<Self, SetError> {
91 Self::try_with_capacity(28)
92 }
93
94 unsafe fn find_with_hash(&self, hash: u64, str: &str) -> Option<StringRef> {
95 let interned_str = self.0.slices.find(hash, |thin_slice| {
96 let slice_str = unsafe { std::str::from_utf8_unchecked(thin_slice.as_slice()) };
98 slice_str == str
99 })?;
100 Some(StringRef((*interned_str).into()))
101 }
102
103 pub unsafe fn insert_unique_uncontended(&mut self, str: &str) -> Result<StringRef, SetError> {
108 let hash = Hasher::default().hash_one(str.as_bytes());
109 self.insert_unique_uncontended_with_hash(hash, str)
110 }
111
112 pub unsafe fn insert_unique_uncontended_with_hash(
120 &mut self,
121 hash: u64,
122 str: &str,
123 ) -> Result<StringRef, SetError> {
124 let new_slice = self
125 .0
126 .insert_unique_uncontended_with_hash(hash, str.as_bytes())?;
127 Ok(StringRef(new_slice.into()))
128 }
129
130 pub fn try_insert(&mut self, str: &str) -> Result<StringRef, SetError> {
133 let hash = Hasher::default().hash_one(str.as_bytes());
134 unsafe { self.try_insert_with_hash(hash, str) }
135 }
136
137 pub unsafe fn try_insert_with_hash(
144 &mut self,
145 hash: u64,
146 str: &str,
147 ) -> Result<StringRef, SetError> {
148 if let Some(id) = self.find_with_hash(hash, str) {
151 return Ok(id);
152 }
153
154 self.insert_unique_uncontended_with_hash(hash, str)
156 }
157
158 pub fn string_ids(&self) -> impl Iterator<Item = StringRef> + '_ {
160 self.0.slices.iter().map(|slice| StringRef((*slice).into()))
161 }
162
163 pub fn len(&self) -> usize {
164 self.0.len()
165 }
166
167 pub fn is_empty(&self) -> bool {
168 self.0.is_empty()
169 }
170
171 pub fn capacity(&self) -> usize {
172 self.0.capacity()
173 }
174
175 pub unsafe fn get_string(&self, id: StringRef) -> &str {
179 unsafe { core::mem::transmute::<&str, &str>(id.0.deref()) }
182 }
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188
189 #[test]
190 fn test_string_set_basic_operations() {
191 let mut set = UnsyncStringSet::try_new().unwrap();
192
193 let id1 = set.try_insert("hello").unwrap();
195 let id2 = set.try_insert("world").unwrap();
196 let id3 = set.try_insert("hello").unwrap(); assert_eq!(&*id1.0, &*id3.0);
200 assert_ne!(&*id1.0, &*id2.0);
201
202 unsafe {
204 assert_eq!(set.get_string(id1), "hello");
205 assert_eq!(set.get_string(id2), "world");
206 assert_eq!(set.get_string(id3), "hello");
207 }
208 }
209
210 #[test]
211 fn test_string_lengths_and_alignment() {
212 let mut set = UnsyncStringSet::try_new().unwrap();
213
214 let test_strings = [
216 "", "a", "ab", "abc", "abcd", "abcdefg", "abcdefgh", "abcdefghijklmno", "abcdefghijklmnop", "abcdefghijklmnopqrstuvwxyz123456789", ];
227
228 let mut ids = Vec::new();
229 for s in &test_strings {
230 let id = set.try_insert(s).unwrap();
231 ids.push(id);
232 }
233
234 for (id, expected) in ids.iter().zip(&test_strings) {
236 unsafe {
237 assert_eq!(set.get_string(*id), *expected);
238 }
239 }
240 }
241
242 #[test]
243 fn test_unicode_strings() {
244 let mut set = UnsyncStringSet::try_new().unwrap();
245
246 let unicode_strings = [
247 "café", "🦀", "こんにちは", "Здравствуй", "🔥💯✨", "a\u{0000}b", "line1\nline2", "tab\there", ];
256
257 let mut ids = Vec::new();
258 for s in &unicode_strings {
259 let id = set.try_insert(s).unwrap();
260 ids.push(id);
261 }
262
263 for (id, expected) in ids.iter().zip(&unicode_strings) {
265 unsafe {
266 assert_eq!(set.get_string(*id), *expected);
267 }
268 }
269 }
270
271 #[test]
272 fn test_capacity_and_growth() {
273 let mut set = UnsyncStringSet::try_with_capacity(1).unwrap();
275
276 let test_strings: Vec<String> = (0..50).map(|i| format!("growth_test_{}", i)).collect();
278
279 let mut ids = Vec::new();
280 for s in &test_strings {
281 let id = set.try_insert(s).unwrap();
282 ids.push(id);
283 }
284
285 for (id, expected) in ids.iter().zip(&test_strings) {
287 unsafe {
288 assert_eq!(set.get_string(*id), expected);
289 }
290 }
291 }
292
293 #[test]
294 #[cfg_attr(miri, ignore)]
295 fn test_large_strings() {
296 let mut set = UnsyncStringSet::try_new().unwrap();
297
298 let large_string = "x".repeat(1024);
300 let id1 = set.try_insert(&large_string).unwrap();
301
302 unsafe {
303 assert_eq!(set.get_string(id1), large_string);
304 }
305
306 let very_large_string = "y".repeat(65536);
308 let id2 = set.try_insert(&very_large_string).unwrap();
309
310 unsafe {
311 assert_eq!(set.get_string(id2), very_large_string);
312 assert_eq!(set.get_string(id1), large_string);
314 }
315
316 let huge_string = "z".repeat(2 * 1024 * 1024 + 1000); let id3 = set.try_insert(&huge_string).unwrap();
319
320 unsafe {
321 assert_eq!(set.get_string(id3), huge_string);
322 assert_eq!(set.get_string(id1), large_string);
324 assert_eq!(set.get_string(id2), very_large_string);
325 }
326 }
327
328 #[test]
329 fn test_many_small_strings() {
330 const NUM_STRINGS: usize = if cfg!(miri) { 100 } else { 1000 };
331 let mut set = UnsyncStringSet::try_new().unwrap();
332
333 let mut ids = Vec::with_capacity(NUM_STRINGS);
335 let mut expected = Vec::with_capacity(NUM_STRINGS);
336
337 for i in 0..NUM_STRINGS {
338 let s = format!("{}", i);
339 let id = set.try_insert(&s).unwrap();
340 ids.push(id);
341 expected.push(s);
342 }
343
344 for (id, expected_str) in ids.iter().zip(&expected) {
346 unsafe {
347 assert_eq!(set.get_string(*id), expected_str);
348 }
349 }
350 }
351}