1use crate::dynamic_set::{dynamic_set, Entry};
11use crate::static_sets::StaticAtomSet;
12use debug_unreachable::debug_unreachable;
13
14use std::borrow::Cow;
15use std::cmp::Ordering::{self, Equal};
16use std::fmt;
17use std::hash::{Hash, Hasher};
18use std::marker::PhantomData;
19use std::mem;
20use std::num::NonZeroU64;
21use std::ops;
22use std::slice;
23use std::str;
24use std::sync::atomic::Ordering::SeqCst;
25
26const DYNAMIC_TAG: u8 = 0b_00;
27const INLINE_TAG: u8 = 0b_01; const STATIC_TAG: u8 = 0b_10;
29const TAG_MASK: u64 = 0b_11;
30const LEN_OFFSET: u64 = 4;
31const LEN_MASK: u64 = 0xF0;
32
33const MAX_INLINE_LEN: usize = 7;
34const STATIC_SHIFT_BITS: usize = 32;
35
36#[derive(PartialEq, Eq)]
79pub struct Atom<Static> {
81 unsafe_data: NonZeroU64,
82 phantom: PhantomData<Static>,
83}
84
85impl<Static> Atom<Static> {
89 #[inline(always)]
91 #[doc(hidden)]
92 pub const fn pack_static(n: u32) -> Self {
93 Self {
94 unsafe_data: unsafe {
95 NonZeroU64::new_unchecked((STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS))
97 },
98 phantom: PhantomData,
99 }
100 }
101
102 #[inline(always)]
104 #[doc(hidden)]
105 pub const fn pack_inline(mut n: u64, len: u8) -> Self {
106 if cfg!(target_endian = "big") {
107 n = n.to_le() << 8;
111 }
112
113 let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n;
114 Self {
115 unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
117 phantom: PhantomData,
118 }
119 }
120
121 fn tag(&self) -> u8 {
122 (self.unsafe_data.get() & TAG_MASK) as u8
123 }
124}
125
126impl<Static: StaticAtomSet> Atom<Static> {
127 #[doc(hidden)]
129 pub fn unsafe_data(&self) -> u64 {
130 self.unsafe_data.get()
131 }
132
133 #[doc(hidden)]
135 pub fn is_static(&self) -> bool {
136 self.tag() == STATIC_TAG
137 }
138
139 #[doc(hidden)]
141 pub fn is_dynamic(&self) -> bool {
142 self.tag() == DYNAMIC_TAG
143 }
144
145 #[doc(hidden)]
147 pub fn is_inline(&self) -> bool {
148 self.tag() == INLINE_TAG
149 }
150
151 fn static_index(&self) -> u64 {
152 self.unsafe_data.get() >> STATIC_SHIFT_BITS
153 }
154
155 pub fn get_hash(&self) -> u32 {
157 match self.tag() {
158 DYNAMIC_TAG => {
159 let entry = self.unsafe_data.get() as *const Entry;
160 unsafe { (*entry).hash }
161 }
162 STATIC_TAG => Static::get().hashes[self.static_index() as usize],
163 INLINE_TAG => {
164 let data = self.unsafe_data.get();
165 ((data >> 32) ^ data) as u32
167 }
168 _ => unsafe { debug_unreachable!() },
169 }
170 }
171
172 pub fn try_static(string_to_add: &str) -> Option<Self> {
173 Self::try_static_internal(string_to_add).ok()
174 }
175
176 fn try_static_internal(string_to_add: &str) -> Result<Self, phf_shared::Hashes> {
177 let static_set = Static::get();
178 let hash = phf_shared::hash(&*string_to_add, &static_set.key);
179 let index = phf_shared::get_index(&hash, static_set.disps, static_set.atoms.len());
180
181 if static_set.atoms[index as usize] == string_to_add {
182 Ok(Self::pack_static(index))
183 } else {
184 Err(hash)
185 }
186 }
187}
188
189impl<Static: StaticAtomSet> Default for Atom<Static> {
190 #[inline]
191 fn default() -> Self {
192 Atom::pack_static(Static::empty_string_index())
193 }
194}
195
196impl<Static: StaticAtomSet> Hash for Atom<Static> {
197 #[inline]
198 fn hash<H>(&self, state: &mut H)
199 where
200 H: Hasher,
201 {
202 state.write_u32(self.get_hash())
203 }
204}
205
206impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
207 fn from(string_to_add: Cow<'a, str>) -> Self {
208 let len = string_to_add.len();
209 if len == 0 {
210 Self::pack_static(Static::empty_string_index())
211 } else if len <= MAX_INLINE_LEN {
212 let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
213 {
214 let dest = inline_atom_slice_mut(&mut data);
215 dest[..len].copy_from_slice(string_to_add.as_bytes());
216 }
217 Atom {
218 unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
220 phantom: PhantomData,
221 }
222 } else {
223 Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
224 let ptr: std::ptr::NonNull<Entry> = dynamic_set().insert(string_to_add, hash.g);
225 let data = ptr.as_ptr() as u64;
226 debug_assert!(0 == data & TAG_MASK);
227 Atom {
228 unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
230 phantom: PhantomData,
231 }
232 })
233 }
234 }
235}
236
237impl<Static: StaticAtomSet> Clone for Atom<Static> {
238 #[inline(always)]
239 fn clone(&self) -> Self {
240 if self.tag() == DYNAMIC_TAG {
241 let entry = self.unsafe_data.get() as *const Entry;
242 unsafe { &*entry }.ref_count.fetch_add(1, SeqCst);
243 }
244 Atom { ..*self }
245 }
246}
247
248impl<Static> Drop for Atom<Static> {
249 #[inline]
250 fn drop(&mut self) {
251 if self.tag() == DYNAMIC_TAG {
252 let entry = self.unsafe_data.get() as *const Entry;
253 if unsafe { &*entry }.ref_count.fetch_sub(1, SeqCst) == 1 {
254 drop_slow(self)
255 }
256 }
257
258 fn drop_slow<Static>(this: &mut Atom<Static>) {
260 dynamic_set().remove(this.unsafe_data.get() as *mut Entry);
261 }
262 }
263}
264
265impl<Static: StaticAtomSet> ops::Deref for Atom<Static> {
266 type Target = str;
267
268 #[inline]
269 fn deref(&self) -> &str {
270 unsafe {
271 match self.tag() {
272 DYNAMIC_TAG => {
273 let entry = self.unsafe_data.get() as *const Entry;
274 &(*entry).string
275 }
276 INLINE_TAG => {
277 let len = (self.unsafe_data() & LEN_MASK) >> LEN_OFFSET;
278 debug_assert!(len as usize <= MAX_INLINE_LEN);
279 let src = inline_atom_slice(&self.unsafe_data);
280 str::from_utf8_unchecked(src.get_unchecked(..(len as usize)))
281 }
282 STATIC_TAG => Static::get().atoms[self.static_index() as usize],
283 _ => debug_unreachable!(),
284 }
285 }
286 }
287}
288
289impl<Static: StaticAtomSet> fmt::Debug for Atom<Static> {
290 #[inline]
291 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
292 let ty_str = unsafe {
293 match self.tag() {
294 DYNAMIC_TAG => "dynamic",
295 INLINE_TAG => "inline",
296 STATIC_TAG => "static",
297 _ => debug_unreachable!(),
298 }
299 };
300
301 write!(f, "Atom('{}' type={})", &*self, ty_str)
302 }
303}
304
305impl<Static: StaticAtomSet> PartialOrd for Atom<Static> {
306 #[inline]
307 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
308 if self.unsafe_data == other.unsafe_data {
309 return Some(Equal);
310 }
311 self.as_ref().partial_cmp(other.as_ref())
312 }
313}
314
315impl<Static: StaticAtomSet> Ord for Atom<Static> {
316 #[inline]
317 fn cmp(&self, other: &Self) -> Ordering {
318 if self.unsafe_data == other.unsafe_data {
319 return Equal;
320 }
321 self.as_ref().cmp(other.as_ref())
322 }
323}
324
325impl<Static: StaticAtomSet> Atom<Static> {
329 fn from_mutated_str<F: FnOnce(&mut str)>(s: &str, f: F) -> Self {
330 let mut buffer = mem::MaybeUninit::<[u8; 64]>::uninit();
331 let buffer = unsafe { &mut *buffer.as_mut_ptr() };
332
333 if let Some(buffer_prefix) = buffer.get_mut(..s.len()) {
334 buffer_prefix.copy_from_slice(s.as_bytes());
335 let as_str = unsafe { ::std::str::from_utf8_unchecked_mut(buffer_prefix) };
336 f(as_str);
337 Atom::from(&*as_str)
338 } else {
339 let mut string = s.to_owned();
340 f(&mut string);
341 Atom::from(string)
342 }
343 }
344
345 pub fn to_ascii_uppercase(&self) -> Self {
349 for (i, b) in self.bytes().enumerate() {
350 if let b'a'..=b'z' = b {
351 return Atom::from_mutated_str(self, |s| s[i..].make_ascii_uppercase());
352 }
353 }
354 self.clone()
355 }
356
357 pub fn to_ascii_lowercase(&self) -> Self {
361 for (i, b) in self.bytes().enumerate() {
362 if let b'A'..=b'Z' = b {
363 return Atom::from_mutated_str(self, |s| s[i..].make_ascii_lowercase());
364 }
365 }
366 self.clone()
367 }
368
369 pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
373 (self == other) || self.eq_str_ignore_ascii_case(&**other)
374 }
375
376 pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool {
380 (&**self).eq_ignore_ascii_case(other)
381 }
382}
383
384#[inline(always)]
385fn inline_atom_slice(x: &NonZeroU64) -> &[u8] {
386 let x: *const NonZeroU64 = x;
387 let mut data = x as *const u8;
388 if cfg!(target_endian = "little") {
390 data = unsafe { data.offset(1) };
391 }
392 let len = 7;
393 unsafe { slice::from_raw_parts(data, len) }
394}
395
396#[inline(always)]
397fn inline_atom_slice_mut(x: &mut u64) -> &mut [u8] {
398 let x: *mut u64 = x;
399 let mut data = x as *mut u8;
400 if cfg!(target_endian = "little") {
402 data = unsafe { data.offset(1) };
403 }
404 let len = 7;
405 unsafe { slice::from_raw_parts_mut(data, len) }
406}