1use super::common::{Interned, RawInterned, UnsafeLock};
2use bumpalo::Bump;
3use hashbrown::{HashTable, hash_table::Entry};
4use std::{
5 alloc::Layout,
6 hash::{BuildHasher, Hash},
7 ptr::{self, NonNull},
8 slice,
9};
10
11pub trait Dropless {
60 fn as_byte_ptr(&self) -> NonNull<u8> {
62 NonNull::from_ref(self).cast::<u8>()
63 }
64
65 fn layout(&self) -> Layout {
67 Layout::for_value(self)
68 }
69
70 unsafe fn from_bytes(bytes: &[u8]) -> &Self;
80
81 unsafe fn hash<S: BuildHasher>(build_hasher: &S, bytes: &[u8]) -> u64;
91
92 unsafe fn eq(a: &[u8], b: &[u8]) -> bool;
102}
103
104macro_rules! simple_impl_dropless_fn {
105 (from_bytes) => {
106 unsafe fn from_bytes(bytes: &[u8]) -> &Self {
107 let ptr = bytes.as_ptr().cast::<Self>();
108 unsafe { ptr.as_ref().unwrap_unchecked() }
109 }
110 };
111 (hash) => {
112 unsafe fn hash<S: BuildHasher>(build_hasher: &S, bytes: &[u8]) -> u64 {
113 let this = unsafe { Self::from_bytes(bytes) };
114 build_hasher.hash_one(this)
115 }
116 };
117 (eq) => {
118 unsafe fn eq(a: &[u8], b: &[u8]) -> bool {
119 unsafe { Self::from_bytes(a) == Self::from_bytes(b) }
120 }
121 };
122}
123
124macro_rules! impl_dropless {
125 ($($ty:ty)*) => {
126 $(
127 impl Dropless for $ty {
128 simple_impl_dropless_fn!(from_bytes);
129 simple_impl_dropless_fn!(hash);
130 simple_impl_dropless_fn!(eq);
131 }
132 )*
133 };
134}
135
136impl_dropless!(i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize bool char);
137
138impl<T: Dropless + Hash + Eq> Dropless for [T] {
139 unsafe fn from_bytes(bytes: &[u8]) -> &Self {
140 let ptr = bytes.as_ptr().cast::<T>();
141 let stride = Layout::new::<T>().pad_to_align().size();
142 let num_elems = bytes.len() / stride;
143 unsafe { slice::from_raw_parts(ptr, num_elems) }
144 }
145 simple_impl_dropless_fn!(hash);
146 simple_impl_dropless_fn!(eq);
147}
148
149impl<T: Dropless + Hash + Eq, const N: usize> Dropless for [T; N] {
150 simple_impl_dropless_fn!(from_bytes);
151 simple_impl_dropless_fn!(hash);
152 simple_impl_dropless_fn!(eq);
153}
154
155impl Dropless for str {
156 unsafe fn from_bytes(bytes: &[u8]) -> &Self {
157 unsafe { str::from_utf8_unchecked(bytes) }
158 }
159 simple_impl_dropless_fn!(hash);
160 simple_impl_dropless_fn!(eq);
161}
162
163impl<T: Dropless + Hash + Eq> Dropless for Option<T> {
164 simple_impl_dropless_fn!(from_bytes);
165 simple_impl_dropless_fn!(hash);
166 simple_impl_dropless_fn!(eq);
167}
168
169impl<T: Dropless + Hash + Eq, E: Dropless + Hash + Eq> Dropless for Result<T, E> {
170 simple_impl_dropless_fn!(from_bytes);
171 simple_impl_dropless_fn!(hash);
172 simple_impl_dropless_fn!(eq);
173}
174
175#[derive(Debug)]
212pub struct DroplessInterner<S = fxhash::FxBuildHasher> {
213 inner: UnsafeLock<DroplessInternSet<S>>,
214}
215
216impl DroplessInterner {
217 pub fn new() -> Self {
218 Self::default()
219 }
220}
221
222impl<S: BuildHasher> DroplessInterner<S> {
223 pub fn with_hasher(hash_builder: S) -> Self {
224 let inner = unsafe { UnsafeLock::new(DroplessInternSet::with_hasher(hash_builder)) };
226 Self { inner }
227 }
228
229 pub fn len(&self) -> usize {
231 self.with_inner(|set| set.len())
232 }
233
234 pub fn is_empty(&self) -> bool {
236 self.with_inner(|set| set.is_empty())
237 }
238
239 pub fn intern<K: Dropless + ?Sized>(&self, value: &K) -> Interned<'_, K> {
269 self.with_inner(|set| set.intern(value))
270 }
271
272 pub fn get<K: Dropless + ?Sized>(&self, value: &K) -> Option<Interned<'_, K>> {
291 self.with_inner(|set| set.get(value))
292 }
293
294 pub fn clear(&mut self) {
299 self.with_inner(|set| set.clear())
300 }
301
302 fn with_inner<'this, F, R>(&'this self, f: F) -> R
303 where
304 F: FnOnce(&'this mut DroplessInternSet<S>) -> R,
305 R: 'this,
306 {
307 unsafe {
309 let set = self.inner.lock().as_mut();
310 let ret = f(set);
311 self.inner.unlock();
312 ret
313 }
314 }
315}
316
317impl<S: Default> Default for DroplessInterner<S> {
318 fn default() -> Self {
319 let inner = unsafe { UnsafeLock::new(DroplessInternSet::default()) };
321 Self { inner }
322 }
323}
324
325#[derive(Debug)]
329pub struct DroplessInternSet<S = fxhash::FxBuildHasher> {
330 bump: Bump,
331 set: HashTable<DynInternEntry<S>>,
332 hash_builder: S,
333}
334
335impl DroplessInternSet {
336 pub fn new() -> Self {
337 Self::default()
338 }
339}
340
341impl<S: BuildHasher> DroplessInternSet<S> {
342 pub fn with_hasher(hash_builder: S) -> Self {
343 Self {
344 bump: Bump::new(),
345 set: HashTable::new(),
346 hash_builder,
347 }
348 }
349
350 pub fn intern<K: Dropless + ?Sized>(&mut self, value: &K) -> Interned<'_, K> {
351 let src = value.as_byte_ptr();
352 let layout = value.layout();
353 unsafe {
354 let bytes = slice::from_raw_parts(src.as_ptr(), layout.size());
355 let hash = <K as Dropless>::hash(&self.hash_builder, bytes);
356 let eq = Self::table_eq::<K>(bytes, layout.align());
357 let hasher = Self::table_hasher(&self.hash_builder);
358 let ref_ = match self.set.entry(hash, eq, hasher) {
359 Entry::Occupied(entry) => Self::ref_from_entry(entry.get()),
360 Entry::Vacant(entry) => {
361 let dst = self.bump.alloc_layout(layout);
363 ptr::copy_nonoverlapping(src.as_ptr(), dst.as_ptr(), layout.size());
364
365 let occupied = entry.insert(DynInternEntry {
366 data: RawInterned(dst).cast(),
367 layout,
368 hash: <K as Dropless>::hash,
369 });
370 Self::ref_from_entry(occupied.get())
371 }
372 };
373 Interned::unique(ref_)
374 }
375 }
376
377 pub fn get<K: Dropless + ?Sized>(&self, value: &K) -> Option<Interned<'_, K>> {
390 let ptr = value.as_byte_ptr();
391 let layout = value.layout();
392 unsafe {
393 let bytes = slice::from_raw_parts(ptr.as_ptr(), layout.size());
394 let hash = <K as Dropless>::hash(&self.hash_builder, bytes);
395 let eq = Self::table_eq::<K>(bytes, layout.align());
396 let entry = self.set.find(hash, eq)?;
397 let ref_ = Self::ref_from_entry(entry);
398 Some(Interned::unique(ref_))
399 }
400 }
401
402 pub fn len(&self) -> usize {
403 self.set.len()
404 }
405
406 pub fn is_empty(&self) -> bool {
407 self.len() == 0
408 }
409
410 pub fn clear(&mut self) {
411 self.bump.reset();
412 self.set.clear();
413 }
414
415 fn table_eq<K: Dropless + ?Sized>(
417 key: &[u8],
418 align: usize,
419 ) -> impl FnMut(&DynInternEntry<S>) -> bool {
420 move |entry: &DynInternEntry<S>| unsafe {
421 if align == entry.layout.align() {
422 let entry_bytes =
423 slice::from_raw_parts(entry.data.cast::<u8>().as_ptr(), entry.layout.size());
424 <K as Dropless>::eq(key, entry_bytes)
425 } else {
426 false
427 }
428 }
429 }
430
431 fn table_hasher(hash_builder: &S) -> impl Fn(&DynInternEntry<S>) -> u64 {
433 |entry: &DynInternEntry<S>| unsafe {
434 let entry_bytes =
435 slice::from_raw_parts(entry.data.cast::<u8>().as_ptr(), entry.layout.size());
436 (entry.hash)(hash_builder, entry_bytes)
438 }
439 }
440
441 unsafe fn ref_from_entry<'a, K: Dropless + ?Sized>(entry: &DynInternEntry<S>) -> &'a K {
442 unsafe {
443 let bytes =
444 slice::from_raw_parts(entry.data.cast::<u8>().as_ptr(), entry.layout.size());
445 K::from_bytes(bytes)
446 }
447 }
448}
449
450impl<S: Default> Default for DroplessInternSet<S> {
451 fn default() -> Self {
452 Self {
453 bump: Bump::new(),
454 set: HashTable::new(),
455 hash_builder: Default::default(),
456 }
457 }
458}
459
460#[derive(Debug)]
461struct DynInternEntry<S> {
462 data: RawInterned,
463 layout: Layout,
464 hash: unsafe fn(&S, &[u8]) -> u64,
466}
467
468#[cfg(test)]
469mod tests {
470 use super::*;
471 use crate::common;
472
473 #[test]
474 fn test_dropless_interner() {
475 test_dropless_interner_int();
476 test_dropless_interner_str();
477 test_dropless_interner_bytes();
478 test_dropless_interner_mixed();
479 test_dropless_interner_many();
480 test_dropless_interner_alignment_handling();
481 }
482
483 fn test_dropless_interner_int() {
484 let interner = DroplessInterner::new();
485
486 let a = interner.intern(&0_u32).erased_raw();
487 let b = interner.intern(&0_u32).erased_raw();
488 let c = interner.intern(&1_u32).erased_raw();
489 let d = interner.intern(&1_i32).erased_raw();
491
492 let groups: [&[RawInterned]; _] = [&[a, b], &[c, d]];
493 common::assert_group_addr_eq(&groups);
494 }
495
496 fn test_dropless_interner_str() {
497 let interner = DroplessInterner::new();
498
499 let a = interner.intern("apple").erased_raw();
500 let b = interner.intern(*Box::new("apple")).erased_raw();
501 let c = interner.intern(&*String::from("apple")).erased_raw();
502 let d = interner.intern("banana").erased_raw();
503
504 let groups: [&[RawInterned]; _] = [&[a, b, c], &[d]];
505 common::assert_group_addr_eq(&groups);
506 }
507
508 fn test_dropless_interner_bytes() {
509 let interner = DroplessInterner::new();
510
511 let a = interner.intern(&[0, 1]).erased_raw();
512 let boxed: Box<[i32]> = Box::new([0, 1]);
513 let b = interner.intern(&*boxed).erased_raw();
514 let c = interner.intern(&*vec![0, 1]).erased_raw();
515 let d = interner.intern(&[2, 3]).erased_raw();
516
517 let groups: [&[RawInterned]; _] = [&[a, b, c], &[d]];
518 common::assert_group_addr_eq(&groups);
519 }
520
521 fn test_dropless_interner_mixed() {
522 let interner = DroplessInterner::new();
523
524 interner.intern(&1_u32);
525 interner.intern(&[2, 3]);
526 interner.intern("apple");
527 assert_eq!(interner.get("apple").as_deref(), Some(&"apple"));
528 assert!(interner.get("banana").is_none());
529 assert_eq!(interner.get(&1_u32).as_deref(), Some(&&1_u32));
530 assert!(interner.get(&2_u32).is_none());
531 assert_eq!(interner.get(&[2, 3]).as_deref(), Some(&&[2, 3]));
532 assert!(interner.get(&[2]).is_none());
533 }
534
535 fn test_dropless_interner_many() {
536 let interner = DroplessInterner::new();
537
538 let mut interned_int = Vec::new();
539 let mut interned_str = Vec::new();
540 let mut interned_bytes = Vec::new();
541
542 const N: usize = 1000;
543
544 let strs = (0..N).map(|i| (i * 10_000).to_string()).collect::<Vec<_>>();
545
546 for i in 0..N {
548 let int = &(i as u32); let str_ = &*strs[i]; let bytes = &[i as u16]; interned_int.push(interner.intern(int).erased_raw());
552 interned_str.push(interner.intern(str_).erased_raw());
553 interned_bytes.push(interner.intern(bytes).erased_raw());
554 }
555
556 interned_int.sort_unstable();
558 interned_int.dedup();
559 interned_str.sort_unstable();
560 interned_str.dedup();
561 interned_bytes.sort_unstable();
562 interned_bytes.dedup();
563 assert_eq!(interned_int.len(), N);
564 assert_eq!(interned_str.len(), N);
565 assert_eq!(interned_bytes.len(), N);
566 let whole = interned_int
567 .iter()
568 .chain(&interned_str)
569 .chain(&interned_bytes)
570 .cloned()
571 .collect::<fxhash::FxHashSet<_>>();
572 assert_eq!(whole.len(), N * 3);
573
574 for i in 0..N {
576 let int = &(i as u32); let str_ = &*strs[i]; let bytes = &[i as u16]; assert_eq!(*interner.get(int).unwrap(), int);
580 assert_eq!(*interner.get(str_).unwrap(), str_);
581 assert_eq!(*interner.get(bytes).unwrap(), bytes);
582 }
583 }
584
585 #[rustfmt::skip]
586 fn test_dropless_interner_alignment_handling() {
587 #[derive(PartialEq, Eq, Hash, Debug)] #[repr(C, align(4))] struct T4 (u16, [u8; 2]);
588 #[derive(PartialEq, Eq, Hash, Debug)] #[repr(C, align(8))] struct T8 (u16, [u8; 6]);
589 #[derive(PartialEq, Eq, Hash, Debug)] #[repr(C, align(16))] struct T16 (u16, [u8; 14]);
590
591 macro_rules! impl_for_first_2bytes {
592 () => {
593 unsafe fn hash<S: BuildHasher>(hash_builder: &S, bytes: &[u8]) -> u64 {
594 hash_builder.hash_one(&bytes[0..2])
595 }
596 unsafe fn eq(a: &[u8], b: &[u8]) -> bool {
597 a[0..2] == b[0..2]
598 }
599 };
600 }
601
602 impl Dropless for T4 { simple_impl_dropless_fn!(from_bytes); impl_for_first_2bytes!(); }
603 impl Dropless for T8 { simple_impl_dropless_fn!(from_bytes); impl_for_first_2bytes!(); }
604 impl Dropless for T16 { simple_impl_dropless_fn!(from_bytes); impl_for_first_2bytes!(); }
605
606 let interner = DroplessInterner::new();
607
608 let mut interned_4 = Vec::new();
609 let mut interned_8 = Vec::new();
610 let mut interned_16 = Vec::new();
611
612 const N: usize = 1000;
613
614 for i in 0..N {
619 let t4 = T4(i as u16, [4; _]);
620 let t8 = T8(i as u16, [8; _]);
621 let t16 = T16(i as u16, [16; _]);
622 interned_4.push(interner.intern(&t4).erased_raw());
623 interned_8.push(interner.intern(&t8).erased_raw());
624 interned_16.push(interner.intern(&t16).erased_raw());
625 }
626
627 for i in 0..N {
628 let t4 = T4(i as u16, [4; _]);
629 let t8 = T8(i as u16, [8; _]);
630 let t16 = T16(i as u16, [16; _]);
631 unsafe {
632 assert_eq!(*<Interned<'_, T4>>::from_erased_raw(interned_4[i]), &t4);
633 assert_eq!(*<Interned<'_, T8>>::from_erased_raw(interned_8[i]), &t8);
634 assert_eq!(*<Interned<'_, T16>>::from_erased_raw(interned_16[i]), &t16);
635 }
636 }
637 }
638}