1use std::alloc::{alloc, dealloc, Layout, LayoutError};
4use std::borrow::Borrow;
5use std::cmp::Ordering;
6use std::fmt::{self, Debug, Formatter};
7use std::hash::Hash;
8use std::ops::Deref;
9use std::ptr::{copy_nonoverlapping, NonNull};
10use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering};
11
12use dashmap::{DashSet, SharedValue};
13use lazy_static::lazy_static;
14
15use crate::thin::{ThinMut, ThinMutExt, ThinRef, ThinRefExt};
16
17use super::value::{IValue, TypeTag};
18
19#[repr(C)]
20#[repr(align(4))]
21struct Header {
22 rc: AtomicUsize,
23 len_lower: u32,
25 len_upper: u16,
26 shard_index: u16,
27}
28
29trait HeaderRef<'a>: ThinRefExt<'a, Header> {
30 fn len(&self) -> usize {
31 (u64::from(self.len_lower) | (u64::from(self.len_upper) << 32)) as usize
32 }
33 fn shard_index(&self) -> usize {
34 self.shard_index as usize
35 }
36 fn str_ptr(&self) -> *const u8 {
37 unsafe { self.ptr().add(1).cast() }
39 }
40 fn bytes(&self) -> &'a [u8] {
41 unsafe { std::slice::from_raw_parts(self.str_ptr(), self.len()) }
43 }
44 fn str(&self) -> &'a str {
45 unsafe { std::str::from_utf8_unchecked(self.bytes()) }
47 }
48}
49
50trait HeaderMut<'a>: ThinMutExt<'a, Header> {
51 fn str_ptr_mut(mut self) -> *mut u8 {
52 unsafe { self.ptr_mut().add(1).cast() }
54 }
55}
56
57impl<'a, T: ThinRefExt<'a, Header>> HeaderRef<'a> for T {}
58impl<'a, T: ThinMutExt<'a, Header>> HeaderMut<'a> for T {}
59
60lazy_static! {
61 static ref STRING_CACHE: DashSet<WeakIString> = DashSet::new();
62}
63
64#[cfg(any(test, feature = "ctor"))]
67#[ctor::ctor]
68fn ctor_init_cache() {
69 lazy_static::initialize(&STRING_CACHE);
70}
71
72#[doc(hidden)]
73pub fn init_cache() {
74 lazy_static::initialize(&STRING_CACHE);
75}
76
77struct WeakIString {
78 ptr: NonNull<Header>,
79}
80
81unsafe impl Send for WeakIString {}
82unsafe impl Sync for WeakIString {}
83impl PartialEq for WeakIString {
84 fn eq(&self, other: &Self) -> bool {
85 **self == **other
86 }
87}
88impl Eq for WeakIString {}
89impl Hash for WeakIString {
90 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
91 (**self).hash(state);
92 }
93}
94
95impl Deref for WeakIString {
96 type Target = str;
97 fn deref(&self) -> &str {
98 self.borrow()
99 }
100}
101
102impl Borrow<str> for WeakIString {
103 fn borrow(&self) -> &str {
104 self.header().str()
105 }
106}
107impl WeakIString {
108 fn header(&self) -> ThinRef<Header> {
109 unsafe { ThinRef::new(self.ptr.as_ptr()) }
111 }
112 fn upgrade(&self) -> IString {
113 unsafe {
114 self.ptr.as_ref().rc.fetch_add(1, AtomicOrdering::Relaxed);
115 IString(IValue::new_ptr(
116 self.ptr.as_ptr().cast::<u8>(),
117 TypeTag::StringOrNull,
118 ))
119 }
120 }
121}
122
123#[repr(transparent)]
138#[derive(Clone)]
139pub struct IString(pub(crate) IValue);
140
141value_subtype_impls!(IString, into_string, as_string, as_string_mut);
142
143static EMPTY_HEADER: Header = Header {
144 len_lower: 0,
145 len_upper: 0,
146 shard_index: 0,
147 rc: AtomicUsize::new(0),
148};
149
150impl IString {
151 fn layout(len: usize) -> Result<Layout, LayoutError> {
152 Ok(Layout::new::<Header>()
153 .extend(Layout::array::<u8>(len)?)?
154 .0
155 .pad_to_align())
156 }
157
158 fn alloc(s: &str, shard_index: usize) -> *mut Header {
159 assert!((s.len() as u64) < (1 << 48));
160 assert!(shard_index < (1 << 16));
161 unsafe {
162 let ptr = alloc(Self::layout(s.len()).unwrap()).cast::<Header>();
163 ptr.write(Header {
164 len_lower: s.len() as u32,
165 len_upper: ((s.len() as u64) >> 32) as u16,
166 shard_index: shard_index as u16,
167 rc: AtomicUsize::new(0),
168 });
169 let hd = ThinMut::new(ptr);
170 copy_nonoverlapping(s.as_ptr(), hd.str_ptr_mut(), s.len());
171 ptr
172 }
173 }
174
175 fn dealloc(ptr: *mut Header) {
176 unsafe {
177 let hd = ThinRef::new(ptr);
178 let layout = Self::layout(hd.len()).unwrap();
179 dealloc(ptr.cast::<u8>(), layout);
180 }
181 }
182
183 #[must_use]
185 pub fn intern(s: &str) -> Self {
186 if s.is_empty() {
187 return Self::new();
188 }
189 let cache = &*STRING_CACHE;
190 let shard_index = cache.determine_map(s);
191
192 let shard = unsafe { cache.shards().get_unchecked(shard_index) };
194 let mut guard = shard.write();
195 if let Some((k, _)) = guard.get_key_value(s) {
196 k.upgrade()
197 } else {
198 let k = unsafe {
199 WeakIString {
200 ptr: NonNull::new_unchecked(Self::alloc(s, shard_index)),
201 }
202 };
203 let res = k.upgrade();
204 guard.insert(k, SharedValue::new(()));
205 res
206 }
207 }
208
209 fn header(&self) -> ThinRef<Header> {
210 unsafe { ThinRef::new(self.0.ptr().cast()) }
211 }
212
213 #[must_use]
215 pub fn len(&self) -> usize {
216 self.header().len()
217 }
218
219 #[must_use]
221 pub fn is_empty(&self) -> bool {
222 self.len() == 0
223 }
224
225 #[must_use]
227 pub fn as_str(&self) -> &str {
228 self.header().str()
229 }
230
231 #[must_use]
233 pub fn as_bytes(&self) -> &[u8] {
234 self.header().bytes()
235 }
236
237 #[must_use]
239 pub fn new() -> Self {
240 unsafe { IString(IValue::new_ref(&EMPTY_HEADER, TypeTag::StringOrNull)) }
241 }
242
243 pub(crate) fn clone_impl(&self) -> IValue {
244 if self.is_empty() {
245 Self::new().0
246 } else {
247 self.header().rc.fetch_add(1, AtomicOrdering::Relaxed);
248 unsafe { self.0.raw_copy() }
249 }
250 }
251 pub(crate) fn drop_impl(&mut self) {
252 if !self.is_empty() {
253 let hd = self.header();
254
255 let mut rc = hd.rc.load(AtomicOrdering::Relaxed);
258 while rc > 1 {
259 match hd.rc.compare_exchange_weak(
260 rc,
261 rc - 1,
262 AtomicOrdering::Relaxed,
263 AtomicOrdering::Relaxed,
264 ) {
265 Ok(_) => return,
266 Err(new_rc) => rc = new_rc,
267 }
268 }
269
270 let cache = &*STRING_CACHE;
272 let shard = unsafe { cache.shards().get_unchecked(hd.shard_index()) };
274 let mut guard = shard.write();
275 if hd.rc.fetch_sub(1, AtomicOrdering::Relaxed) == 1 {
276 assert!(guard.remove(hd.str()).is_some());
278
279 if guard.len() * 3 < guard.capacity() || guard.is_empty() {
284 guard.shrink_to_fit();
285 }
286 drop(guard);
287
288 Self::dealloc(unsafe { self.0.ptr().cast() });
289 }
290 }
291 }
292}
293
294impl Deref for IString {
295 type Target = str;
296 fn deref(&self) -> &str {
297 self.as_str()
298 }
299}
300
301impl Borrow<str> for IString {
302 fn borrow(&self) -> &str {
303 self.as_str()
304 }
305}
306
307impl From<&str> for IString {
308 fn from(other: &str) -> Self {
309 Self::intern(other)
310 }
311}
312
313impl From<&mut str> for IString {
314 fn from(other: &mut str) -> Self {
315 Self::intern(other)
316 }
317}
318
319impl From<String> for IString {
320 fn from(other: String) -> Self {
321 Self::intern(other.as_str())
322 }
323}
324
325impl From<&String> for IString {
326 fn from(other: &String) -> Self {
327 Self::intern(other.as_str())
328 }
329}
330
331impl From<&mut String> for IString {
332 fn from(other: &mut String) -> Self {
333 Self::intern(other.as_str())
334 }
335}
336
337impl From<IString> for String {
338 fn from(other: IString) -> Self {
339 other.as_str().into()
340 }
341}
342
343impl PartialEq for IString {
344 fn eq(&self, other: &Self) -> bool {
345 self.0.raw_eq(&other.0)
346 }
347}
348
349impl PartialEq<str> for IString {
350 fn eq(&self, other: &str) -> bool {
351 self.as_str() == other
352 }
353}
354
355impl PartialEq<IString> for str {
356 fn eq(&self, other: &IString) -> bool {
357 self == other.as_str()
358 }
359}
360
361impl PartialEq<String> for IString {
362 fn eq(&self, other: &String) -> bool {
363 self.as_str() == other
364 }
365}
366
367impl PartialEq<IString> for String {
368 fn eq(&self, other: &IString) -> bool {
369 self == other.as_str()
370 }
371}
372
373impl Default for IString {
374 fn default() -> Self {
375 Self::new()
376 }
377}
378
379impl Eq for IString {}
380impl Ord for IString {
381 fn cmp(&self, other: &Self) -> Ordering {
382 if self == other {
383 Ordering::Equal
384 } else {
385 self.as_str().cmp(other.as_str())
386 }
387 }
388}
389impl PartialOrd for IString {
390 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
391 Some(self.cmp(other))
392 }
393}
394impl Hash for IString {
395 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
396 self.0.raw_hash(state);
397 }
398}
399
400impl Debug for IString {
401 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
402 Debug::fmt(self.as_str(), f)
403 }
404}
405
406#[cfg(test)]
407mod tests {
408 use super::*;
409
410 #[mockalloc::test]
411 fn can_intern() {
412 let x = IString::intern("foo");
413 let y = IString::intern("bar");
414 let z = IString::intern("foo");
415
416 assert_eq!(x.as_ptr(), z.as_ptr());
417 assert_ne!(x.as_ptr(), y.as_ptr());
418 assert_eq!(x.as_str(), "foo");
419 assert_eq!(y.as_str(), "bar");
420 }
421
422 #[mockalloc::test]
423 fn default_interns_string() {
424 let x = IString::intern("");
425 let y = IString::new();
426 let z = IString::intern("foo");
427
428 assert_eq!(x.as_ptr(), y.as_ptr());
429 assert_ne!(x.as_ptr(), z.as_ptr());
430 }
431}