Skip to main content

mago_atom/
lib.rs

1#![allow(clippy::too_many_arguments)]
2
3//! A high-performance, globally-interned string library for the Mago ecosystem.
4//!
5//! This crate provides `Atom`, a canonical string type that guarantees any given
6//! string is stored in memory only once. It acts as a wrapper for the `ustr` crate and adds
7//! highly-optimized constructors for common string manipulations like lowercasing,
8//! concatenation, and number formatting.
9//!
10//! The key feature is the ability to perform these operations without heap allocations
11//! for common cases by using stack-allocated buffers, making this crate ideal for
12//! performance-critical code.
13//!
14//! # Usage
15//!
16//! ```
17//! use mago_atom::*;
18//!
19//! // Create an Atom. This is a cheap lookup in a global cache.
20//! let s1 = atom("Hello");
21//!
22//! // Use an optimized, zero-heap-allocation constructor.
23//! let s2 = ascii_lowercase_atom("Hello");
24//!
25//! assert_eq!(s2.as_str(), "hello");
26//!
27//! // Use the specialized, high-performance map.
28//! let mut map = AtomMap::default();
29//! map.insert(s1, 123);
30//! ```
31
32#[cfg(target_arch = "aarch64")]
33use std::arch::aarch64::vandq_u8;
34#[cfg(target_arch = "aarch64")]
35use std::arch::aarch64::vceqq_u8;
36#[cfg(target_arch = "aarch64")]
37use std::arch::aarch64::vcgeq_u8;
38#[cfg(target_arch = "aarch64")]
39use std::arch::aarch64::vcleq_u8;
40#[cfg(target_arch = "aarch64")]
41use std::arch::aarch64::vdupq_n_u8;
42#[cfg(target_arch = "aarch64")]
43use std::arch::aarch64::vld1q_u8;
44#[cfg(target_arch = "aarch64")]
45use std::arch::aarch64::vminvq_u8;
46#[cfg(target_arch = "aarch64")]
47use std::arch::aarch64::vorrq_u8;
48#[cfg(target_arch = "x86_64")]
49use std::arch::x86_64::*;
50use std::collections::HashMap;
51use std::collections::HashSet;
52use std::hash::BuildHasherDefault;
53
54use ustr::IdentityHasher;
55
56pub use ustr::Ustr as Atom;
57pub use ustr::ustr as atom;
58
59/// A high-performance `HashMap` using `Atom` as the key.
60///
61/// This map is significantly faster than a standard `HashMap` because it uses the
62/// `Atom`'s pre-computed hash instead of hashing the string content on every lookup.
63pub type AtomMap<V> = HashMap<Atom, V, BuildHasherDefault<IdentityHasher>>;
64
65/// A high-performance `HashSet` using `Atom` as the key.
66///
67/// This set is significantly faster than a standard `HashSet` because it uses the
68/// `Atom`'s pre-computed hash.
69pub type AtomSet = HashSet<Atom, BuildHasherDefault<IdentityHasher>>;
70
71/// The maximum size in bytes for a string to be processed on the stack.
72const STACK_BUF_SIZE: usize = 256;
73
74thread_local! {
75    static EMPTY_ATOM: Atom = atom("");
76}
77
78/// Returns the canonical `Atom` for an empty string.
79///
80/// This is a very cheap operation.
81#[inline]
82#[must_use]
83pub fn empty_atom() -> Atom {
84    EMPTY_ATOM.with(|&atom| atom)
85}
86
87/// A macro to concatenate between 2 and 12 string slices into a single `Atom`.
88///
89/// This macro dispatches to a specialized, zero-heap-allocation function based on the
90/// number of arguments provided, making it highly performant for a known number of inputs.
91/// It uses a stack-allocated buffer to avoid hitting the heap.
92///
93/// # Panics
94///
95/// Panics at compile time if called with 0, 1, or more than 12 arguments.
96#[macro_export]
97macro_rules! concat_atom {
98    ($s1:expr, $s2:expr $(,)?) => {
99        $crate::concat_atom2(&$s1, &$s2)
100    };
101    ($s1:expr, $s2:expr, $s3:expr $(,)?) => {
102        $crate::concat_atom3(&$s1, &$s2, &$s3)
103    };
104    ($s1:expr, $s2:expr, $s3:expr, $s4:expr $(,)?) => {
105        $crate::concat_atom4(&$s1, &$s2, &$s3, &$s4)
106    };
107    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr $(,)?) => {
108        $crate::concat_atom5(&$s1, &$s2, &$s3, &$s4, &$s5)
109    };
110    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr $(,)?) => {
111        $crate::concat_atom6(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6)
112    };
113    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr $(,)?) => {
114        $crate::concat_atom7(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7)
115    };
116    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr $(,)?) => {
117        $crate::concat_atom8(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8)
118    };
119    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr $(,)?) => {
120        $crate::concat_atom9(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9)
121    };
122    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr $(,)?) => {
123        $crate::concat_atom10(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10)
124    };
125    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr, $s11:expr $(,)?) => {
126        $crate::concat_atom11(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10, &$s11)
127    };
128    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr, $s11:expr, $s12:expr $(,)?) => {
129        $crate::concat_atom12(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10, &$s11, &$s12)
130    };
131    ($($arg:expr),+ $(,)?) => {
132        compile_error!("concat_atom! macro supports between 2 and 12 arguments only")
133    };
134}
135
136/// Creates an `Atom` from a constant name, lowercasing only the namespace part.
137///
138/// This function is optimized to avoid heap allocations for constant names up to
139/// `STACK_BUF_SIZE` bytes by building the new string on the stack. For names
140/// longer than the buffer, it falls back to a heap allocation.
141#[inline]
142#[must_use]
143pub fn ascii_lowercase_constant_name_atom(name: &str) -> Atom {
144    if let Some(last_slash_idx) = name.rfind('\\') {
145        let (namespace, const_name) = name.split_at(last_slash_idx);
146        let const_name = &const_name[1..];
147
148        if name.len() > STACK_BUF_SIZE {
149            let mut lowercased_namespace = namespace.to_ascii_lowercase();
150            lowercased_namespace.push('\\');
151            lowercased_namespace.push_str(const_name);
152            return atom(&lowercased_namespace);
153        }
154
155        let mut stack_buf = [0u8; STACK_BUF_SIZE];
156        let mut index = 0;
157
158        for byte in namespace.bytes() {
159            stack_buf[index] = byte.to_ascii_lowercase();
160            index += 1;
161        }
162
163        stack_buf[index] = b'\\';
164        index += 1;
165
166        let const_bytes = const_name.as_bytes();
167        stack_buf[index..index + const_bytes.len()].copy_from_slice(const_bytes);
168        index += const_bytes.len();
169
170        atom(
171            // SAFETY: We only write valid UTF-8 bytes into the stack buffer.
172            unsafe { std::str::from_utf8_unchecked(&stack_buf[..index]) },
173        )
174    } else {
175        atom(name)
176    }
177}
178
179/// Creates an `Atom` from a lowercased version of a string slice.
180///
181/// This function is highly optimized. It performs a fast scan, and if the string
182/// is already lowercase, it returns an `Atom` without any new allocations.
183/// Otherwise, it builds the lowercase version on the stack for strings up to
184/// `STACK_BUF_SIZE` bytes.
185#[inline]
186#[must_use]
187pub fn ascii_lowercase_atom(s: &str) -> Atom {
188    let bytes = s.as_bytes();
189
190    // Fast path: single pass to check if already lowercase ASCII
191    // This combines the is_ascii() and any(is_ascii_uppercase) checks into one iteration
192    let mut needs_lowercasing = false;
193    let mut is_ascii = true;
194    for &b in bytes {
195        if b > 127 {
196            is_ascii = false;
197            break;
198        }
199        if b.is_ascii_uppercase() {
200            needs_lowercasing = true;
201        }
202    }
203
204    // If it's ASCII and already lowercase, return as-is
205    if is_ascii && !needs_lowercasing {
206        return atom(s);
207    }
208
209    // Fast path for ASCII-only strings: use simple byte manipulation
210    if is_ascii && s.len() <= STACK_BUF_SIZE {
211        let mut stack_buf = [0u8; STACK_BUF_SIZE];
212        for (i, &b) in bytes.iter().enumerate() {
213            stack_buf[i] = b.to_ascii_lowercase();
214        }
215        return atom(
216            // SAFETY: ASCII lowercase of ASCII bytes is valid UTF-8
217            unsafe { std::str::from_utf8_unchecked(&stack_buf[..s.len()]) },
218        );
219    }
220
221    atom(&s.to_lowercase())
222}
223
224/// Checks if `haystack` starts with `prefix`, ignoring ASCII case.
225///
226/// This function uses SIMD instructions (AVX2 on `x86_64`, NEON on aarch64)
227/// when available and beneficial for the input size.
228///
229/// # Examples
230///
231/// ```
232/// use mago_atom::starts_with_ignore_case;
233///
234/// assert!(starts_with_ignore_case("HelloWorld", "hello"));
235/// assert!(starts_with_ignore_case("FOOBAR", "FooBar"));
236/// assert!(starts_with_ignore_case("test", "TEST"));
237/// assert!(!starts_with_ignore_case("hello", "world"));
238/// assert!(!starts_with_ignore_case("hi", "hello"));
239/// ```
240#[inline]
241#[must_use]
242pub fn starts_with_ignore_case(haystack: &str, prefix: &str) -> bool {
243    #[cfg(target_arch = "x86_64")]
244    #[target_feature(enable = "avx2")]
245    unsafe fn starts_with_avx2(haystack: &str, prefix: &str, len: usize) -> bool {
246        unsafe {
247            let haystack_bytes = haystack.as_bytes();
248            let prefix_bytes = prefix.as_bytes();
249
250            let lower_a = _mm256_set1_epi8(b'a' as i8);
251            let lower_z = _mm256_set1_epi8(b'z' as i8);
252            let case_bit = _mm256_set1_epi8(0x20);
253
254            let mut i = 0;
255            while i + 32 <= len {
256                let h = _mm256_loadu_si256(haystack_bytes.as_ptr().add(i) as *const __m256i);
257                let p = _mm256_loadu_si256(prefix_bytes.as_ptr().add(i) as *const __m256i);
258
259                // Convert haystack chunk to lowercase
260                let h_is_lower = _mm256_and_si256(
261                    _mm256_cmpgt_epi8(h, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1))),
262                    _mm256_cmpgt_epi8(_mm256_add_epi8(lower_z, _mm256_set1_epi8(1)), h),
263                );
264                let h_lower = _mm256_or_si256(h, _mm256_and_si256(h_is_lower, case_bit));
265
266                // Convert prefix chunk to lowercase
267                let p_is_lower = _mm256_and_si256(
268                    _mm256_cmpgt_epi8(p, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1))),
269                    _mm256_cmpgt_epi8(_mm256_add_epi8(lower_z, _mm256_set1_epi8(1)), p),
270                );
271                let p_lower = _mm256_or_si256(p, _mm256_and_si256(p_is_lower, case_bit));
272
273                let eq = _mm256_cmpeq_epi8(h_lower, p_lower);
274                let mask = _mm256_movemask_epi8(eq);
275                if mask != -1i32 {
276                    return false;
277                }
278
279                i += 32;
280            }
281
282            // Handle remaining bytes
283            haystack_bytes[i..len].eq_ignore_ascii_case(&prefix_bytes[i..len])
284        }
285    }
286
287    #[cfg(target_arch = "aarch64")]
288    #[target_feature(enable = "neon")]
289    unsafe fn starts_with_neon(haystack: &str, prefix: &str, len: usize) -> bool {
290        unsafe {
291            let haystack_bytes = haystack.as_bytes();
292            let prefix_bytes = prefix.as_bytes();
293
294            let lower_a = vdupq_n_u8(b'a');
295            let lower_z = vdupq_n_u8(b'z');
296            let case_bit = vdupq_n_u8(0x20);
297
298            let mut i = 0;
299            while i + 16 <= len {
300                let h = vld1q_u8(haystack_bytes.as_ptr().add(i));
301                let p = vld1q_u8(prefix_bytes.as_ptr().add(i));
302
303                // Convert haystack chunk to lowercase
304                let h_ge_a = vcgeq_u8(h, lower_a);
305                let h_le_z = vcleq_u8(h, lower_z);
306                let h_is_lower = vandq_u8(h_ge_a, h_le_z);
307                let h_lower = vorrq_u8(h, vandq_u8(h_is_lower, case_bit));
308
309                // Convert prefix chunk to lowercase
310                let p_ge_a = vcgeq_u8(p, lower_a);
311                let p_le_z = vcleq_u8(p, lower_z);
312                let p_is_lower = vandq_u8(p_ge_a, p_le_z);
313                let p_lower = vorrq_u8(p, vandq_u8(p_is_lower, case_bit));
314
315                let eq = vceqq_u8(h_lower, p_lower);
316                let min = vminvq_u8(eq);
317                if min != 0xFF {
318                    return false;
319                }
320
321                i += 16;
322            }
323
324            // Handle remaining bytes
325            haystack_bytes[i..len].eq_ignore_ascii_case(&prefix_bytes[i..len])
326        }
327    }
328
329    let len = prefix.len();
330    if haystack.len() < len {
331        return false;
332    }
333
334    #[cfg(target_arch = "x86_64")]
335    {
336        if len >= 32 && std::is_x86_feature_detected!("avx2") {
337            // SAFETY: we've checked that AVX2 is available and haystack.len() >= len
338            return unsafe { starts_with_avx2(haystack, prefix, len) };
339        }
340    }
341
342    #[cfg(target_arch = "aarch64")]
343    {
344        if len >= 16 {
345            // SAFETY: NEON is always available on aarch64 and haystack.len() >= len
346            return unsafe { starts_with_neon(haystack, prefix, len) };
347        }
348    }
349
350    haystack.as_bytes()[..len].eq_ignore_ascii_case(prefix.as_bytes())
351}
352
353/// A helper macro to generate the specialized `*_atom` functions for integer types.
354macro_rules! integer_to_atom_fns {
355    ( $( $func_name:ident($num_type:ty) ),+ $(,)? ) => {
356        $(
357            #[doc = "Creates an `Atom` from a `"]
358            #[doc = stringify!($num_type)]
359            #[doc = "` value with zero heap allocations."]
360            #[inline]
361            #[must_use]
362            pub fn $func_name(n: $num_type) -> Atom {
363                let mut buffer = itoa::Buffer::new();
364                let s = buffer.format(n);
365
366                atom(s)
367            }
368        )+
369    };
370}
371
372/// A helper macro to generate the specialized `*_atom` functions for float types.
373macro_rules! float_to_atom_fns {
374    ( $( $func_name:ident($num_type:ty) ),+ $(,)? ) => {
375        $(
376            #[doc = "Creates an `Atom` from a `"]
377            #[doc = stringify!($num_type)]
378            #[doc = "` value with zero heap allocations."]
379            #[inline]
380            #[must_use]
381            pub fn $func_name(n: $num_type) -> Atom {
382                let mut buffer = ryu::Buffer::new();
383                let s = buffer.format(n);
384
385                atom(s)
386            }
387        )+
388    };
389}
390
391/// A helper macro to generate the specialized `concat_atomN` functions.
392macro_rules! concat_fns {
393    ( $( $func_name:ident($n:literal, $($s:ident),+) ),+ $(,)?) => {
394        $(
395            #[doc = "Creates an `Atom` as a result of concatenating "]
396            #[doc = stringify!($n)]
397            #[doc = " string slices."]
398            #[inline]
399            #[must_use]
400            #[allow(unused_assignments)]
401            pub fn $func_name($($s: &str),+) -> Atom {
402                let total_len = 0 $(+ $s.len())+;
403
404                if total_len <= STACK_BUF_SIZE {
405                    let mut buffer = [0u8; STACK_BUF_SIZE];
406                    let mut index = 0;
407                    $(
408                        buffer[index..index + $s.len()].copy_from_slice($s.as_bytes());
409                        index += $s.len();
410                    )+
411                    return atom(unsafe { std::str::from_utf8_unchecked(&buffer[..total_len]) });
412                }
413
414                // Fallback to heap for very long strings.
415                let mut result = String::with_capacity(total_len);
416                $( result.push_str($s); )+
417                atom(&result)
418            }
419        )+
420    };
421}
422
423// Generate functions for integer types
424integer_to_atom_fns!(
425    i8_atom(i8),
426    i16_atom(i16),
427    i32_atom(i32),
428    i64_atom(i64),
429    i128_atom(i128),
430    isize_atom(isize),
431    u8_atom(u8),
432    u16_atom(u16),
433    u32_atom(u32),
434    u64_atom(u64),
435    u128_atom(u128),
436    usize_atom(usize),
437);
438
439float_to_atom_fns!(f32_atom(f32), f64_atom(f64),);
440
441concat_fns!(
442    concat_atom2(2, s1, s2),
443    concat_atom3(3, s1, s2, s3),
444    concat_atom4(4, s1, s2, s3, s4),
445    concat_atom5(5, s1, s2, s3, s4, s5),
446    concat_atom6(6, s1, s2, s3, s4, s5, s6),
447    concat_atom7(7, s1, s2, s3, s4, s5, s6, s7),
448    concat_atom8(8, s1, s2, s3, s4, s5, s6, s7, s8),
449    concat_atom9(9, s1, s2, s3, s4, s5, s6, s7, s8, s9),
450    concat_atom10(10, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10),
451    concat_atom11(11, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11),
452    concat_atom12(12, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12),
453);