Skip to main content

mago_atom/
lib.rs

1#![allow(clippy::too_many_arguments)]
2
3//! A high-performance, globally-interned string library for the Mago ecosystem.
4//!
5//! This crate provides `Atom`, a canonical string type that guarantees any given
6//! string is stored in memory only once. It acts as a wrapper for the `ustr` crate and adds
7//! highly-optimized constructors for common string manipulations like lowercasing,
8//! concatenation, and number formatting.
9//!
10//! The key feature is the ability to perform these operations without heap allocations
11//! for common cases by using stack-allocated buffers, making this crate ideal for
12//! performance-critical code.
13//!
14//! # Usage
15//!
16//! ```
17//! use mago_atom::*;
18//!
19//! // Create an Atom. This is a cheap lookup in a global cache.
20//! let s1 = atom("Hello");
21//!
22//! // Use an optimized, zero-heap-allocation constructor.
23//! let s2 = ascii_lowercase_atom("Hello");
24//!
25//! assert_eq!(s2.as_str(), "hello");
26//!
27//! // Use the specialized, high-performance map.
28//! let mut map = AtomMap::default();
29//! map.insert(s1, 123);
30//! ```
31
32#[cfg(target_arch = "aarch64")]
33use std::arch::aarch64::vandq_u8;
34#[cfg(target_arch = "aarch64")]
35use std::arch::aarch64::vceqq_u8;
36#[cfg(target_arch = "aarch64")]
37use std::arch::aarch64::vcgeq_u8;
38#[cfg(target_arch = "aarch64")]
39use std::arch::aarch64::vcleq_u8;
40#[cfg(target_arch = "aarch64")]
41use std::arch::aarch64::vdupq_n_u8;
42#[cfg(target_arch = "aarch64")]
43use std::arch::aarch64::vld1q_u8;
44#[cfg(target_arch = "aarch64")]
45use std::arch::aarch64::vminvq_u8;
46#[cfg(target_arch = "aarch64")]
47use std::arch::aarch64::vorrq_u8;
48#[cfg(target_arch = "x86_64")]
49use std::arch::x86_64::*;
50use std::collections::HashMap;
51use std::collections::HashSet;
52use std::hash::BuildHasherDefault;
53
54use ustr::IdentityHasher;
55
56pub use ustr::Ustr as Atom;
57pub use ustr::ustr as atom;
58
59/// A high-performance `HashMap` using `Atom` as the key.
60///
61/// This map is significantly faster than a standard `HashMap` because it uses the
62/// `Atom`'s pre-computed hash instead of hashing the string content on every lookup.
63pub type AtomMap<V> = HashMap<Atom, V, BuildHasherDefault<IdentityHasher>>;
64
65/// A high-performance `HashSet` using `Atom` as the key.
66///
67/// This set is significantly faster than a standard `HashSet` because it uses the
68/// `Atom`'s pre-computed hash.
69pub type AtomSet = HashSet<Atom, BuildHasherDefault<IdentityHasher>>;
70
71/// The maximum size in bytes for a string to be processed on the stack.
72const STACK_BUF_SIZE: usize = 256;
73
74thread_local! {
75    static EMPTY_ATOM: Atom = atom("");
76}
77
78/// Returns the canonical `Atom` for an empty string.
79///
80/// This is a very cheap operation.
81#[inline]
82#[must_use]
83pub fn empty_atom() -> Atom {
84    EMPTY_ATOM.with(|&atom| atom)
85}
86
87/// A macro to concatenate between 2 and 12 string slices into a single `Atom`.
88///
89/// This macro dispatches to a specialized, zero-heap-allocation function based on the
90/// number of arguments provided, making it highly performant for a known number of inputs.
91/// It uses a stack-allocated buffer to avoid hitting the heap.
92///
93/// # Panics
94///
95/// Panics at compile time if called with 0, 1, or more than 12 arguments.
96#[macro_export]
97macro_rules! concat_atom {
98    ($s1:expr, $s2:expr $(,)?) => {
99        $crate::concat_atom2(&$s1, &$s2)
100    };
101    ($s1:expr, $s2:expr, $s3:expr $(,)?) => {
102        $crate::concat_atom3(&$s1, &$s2, &$s3)
103    };
104    ($s1:expr, $s2:expr, $s3:expr, $s4:expr $(,)?) => {
105        $crate::concat_atom4(&$s1, &$s2, &$s3, &$s4)
106    };
107    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr $(,)?) => {
108        $crate::concat_atom5(&$s1, &$s2, &$s3, &$s4, &$s5)
109    };
110    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr $(,)?) => {
111        $crate::concat_atom6(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6)
112    };
113    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr $(,)?) => {
114        $crate::concat_atom7(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7)
115    };
116    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr $(,)?) => {
117        $crate::concat_atom8(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8)
118    };
119    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr $(,)?) => {
120        $crate::concat_atom9(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9)
121    };
122    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr $(,)?) => {
123        $crate::concat_atom10(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10)
124    };
125    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr, $s11:expr $(,)?) => {
126        $crate::concat_atom11(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10, &$s11)
127    };
128    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr, $s11:expr, $s12:expr $(,)?) => {
129        $crate::concat_atom12(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10, &$s11, &$s12)
130    };
131    ($($arg:expr),+ $(,)?) => {
132        compile_error!("concat_atom! macro supports between 2 and 12 arguments only")
133    };
134}
135
136/// Creates an `Atom` from a constant name, lowercasing only the namespace part.
137///
138/// This function is optimized to avoid heap allocations for constant names up to
139/// `STACK_BUF_SIZE` bytes by building the new string on the stack. For names
140/// longer than the buffer, it falls back to a heap allocation.
141#[inline]
142#[must_use]
143pub fn ascii_lowercase_constant_name_atom(name: &str) -> Atom {
144    if let Some(last_slash_idx) = name.rfind('\\') {
145        let (namespace, const_name) = name.split_at(last_slash_idx);
146        let const_name = &const_name[1..];
147
148        if name.len() > STACK_BUF_SIZE {
149            let mut lowercased_namespace = namespace.to_ascii_lowercase();
150            lowercased_namespace.push('\\');
151            lowercased_namespace.push_str(const_name);
152            return atom(&lowercased_namespace);
153        }
154
155        let mut stack_buf = [0u8; STACK_BUF_SIZE];
156        let mut index = 0;
157
158        for byte in namespace.bytes() {
159            stack_buf[index] = byte.to_ascii_lowercase();
160            index += 1;
161        }
162
163        stack_buf[index] = b'\\';
164        index += 1;
165
166        let const_bytes = const_name.as_bytes();
167        stack_buf[index..index + const_bytes.len()].copy_from_slice(const_bytes);
168        index += const_bytes.len();
169
170        atom(
171            // SAFETY: We only write valid UTF-8 bytes into the stack buffer.
172            unsafe { std::str::from_utf8_unchecked(&stack_buf[..index]) },
173        )
174    } else {
175        atom(name)
176    }
177}
178
179/// Creates an `Atom` from a lowercased version of a string slice.
180///
181/// This function is highly optimized. It performs a fast scan, and if the string
182/// is already lowercase, it returns an `Atom` without any new allocations.
183/// Otherwise, it builds the lowercase version on the stack for strings up to
184/// `STACK_BUF_SIZE` bytes.
185#[inline]
186#[must_use]
187pub fn ascii_lowercase_atom(s: &str) -> Atom {
188    let bytes = s.as_bytes();
189
190    // Fast path: single pass to check if already lowercase ASCII
191    // This combines the is_ascii() and any(is_ascii_uppercase) checks into one iteration
192    let mut needs_lowercasing = false;
193    let mut is_ascii = true;
194    for &b in bytes {
195        if b > 127 {
196            is_ascii = false;
197            break;
198        }
199        if b.is_ascii_uppercase() {
200            needs_lowercasing = true;
201        }
202    }
203
204    // If it's ASCII and already lowercase, return as-is
205    if is_ascii && !needs_lowercasing {
206        return atom(s);
207    }
208
209    // Fast path for ASCII-only strings: use simple byte manipulation
210    if is_ascii && s.len() <= STACK_BUF_SIZE {
211        let mut stack_buf = [0u8; STACK_BUF_SIZE];
212        for (i, &b) in bytes.iter().enumerate() {
213            stack_buf[i] = b.to_ascii_lowercase();
214        }
215        return atom(
216            // SAFETY: ASCII lowercase of ASCII bytes is valid UTF-8
217            unsafe { std::str::from_utf8_unchecked(&stack_buf[..s.len()]) },
218        );
219    }
220
221    // Non-ASCII path: handle Unicode lowercasing
222    if s.len() <= STACK_BUF_SIZE {
223        let mut stack_buf = [0u8; STACK_BUF_SIZE];
224        let mut index = 0;
225
226        for c in s.chars() {
227            for lower_c in c.to_lowercase() {
228                let mut char_buf = [0u8; 4];
229                let encoded = lower_c.encode_utf8(&mut char_buf).as_bytes();
230
231                if index + encoded.len() > STACK_BUF_SIZE {
232                    return atom(&s.to_lowercase());
233                }
234
235                stack_buf[index..index + encoded.len()].copy_from_slice(encoded);
236                index += encoded.len();
237            }
238        }
239
240        return atom(
241            // SAFETY: We only write valid UTF-8 bytes into the stack buffer.
242            unsafe { std::str::from_utf8_unchecked(&stack_buf[..index]) },
243        );
244    }
245
246    atom(&s.to_lowercase())
247}
248
249/// Checks if `haystack` starts with `prefix`, ignoring ASCII case.
250///
251/// This function uses SIMD instructions (AVX2 on `x86_64`, NEON on aarch64)
252/// when available and beneficial for the input size.
253///
254/// # Examples
255///
256/// ```
257/// use mago_atom::starts_with_ignore_case;
258///
259/// assert!(starts_with_ignore_case("HelloWorld", "hello"));
260/// assert!(starts_with_ignore_case("FOOBAR", "FooBar"));
261/// assert!(starts_with_ignore_case("test", "TEST"));
262/// assert!(!starts_with_ignore_case("hello", "world"));
263/// assert!(!starts_with_ignore_case("hi", "hello"));
264/// ```
265#[inline]
266#[must_use]
267pub fn starts_with_ignore_case(haystack: &str, prefix: &str) -> bool {
268    #[cfg(target_arch = "x86_64")]
269    #[target_feature(enable = "avx2")]
270    unsafe fn starts_with_avx2(haystack: &str, prefix: &str, len: usize) -> bool {
271        unsafe {
272            let haystack_bytes = haystack.as_bytes();
273            let prefix_bytes = prefix.as_bytes();
274
275            let lower_a = _mm256_set1_epi8(b'a' as i8);
276            let lower_z = _mm256_set1_epi8(b'z' as i8);
277            let case_bit = _mm256_set1_epi8(0x20);
278
279            let mut i = 0;
280            while i + 32 <= len {
281                let h = _mm256_loadu_si256(haystack_bytes.as_ptr().add(i) as *const __m256i);
282                let p = _mm256_loadu_si256(prefix_bytes.as_ptr().add(i) as *const __m256i);
283
284                // Convert haystack chunk to lowercase
285                let h_is_lower = _mm256_and_si256(
286                    _mm256_cmpgt_epi8(h, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1))),
287                    _mm256_cmpgt_epi8(_mm256_add_epi8(lower_z, _mm256_set1_epi8(1)), h),
288                );
289                let h_lower = _mm256_or_si256(h, _mm256_and_si256(h_is_lower, case_bit));
290
291                // Convert prefix chunk to lowercase
292                let p_is_lower = _mm256_and_si256(
293                    _mm256_cmpgt_epi8(p, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1))),
294                    _mm256_cmpgt_epi8(_mm256_add_epi8(lower_z, _mm256_set1_epi8(1)), p),
295                );
296                let p_lower = _mm256_or_si256(p, _mm256_and_si256(p_is_lower, case_bit));
297
298                let eq = _mm256_cmpeq_epi8(h_lower, p_lower);
299                let mask = _mm256_movemask_epi8(eq);
300                if mask != -1i32 {
301                    return false;
302                }
303
304                i += 32;
305            }
306
307            // Handle remaining bytes
308            haystack_bytes[i..len].eq_ignore_ascii_case(&prefix_bytes[i..len])
309        }
310    }
311
312    #[cfg(target_arch = "aarch64")]
313    #[target_feature(enable = "neon")]
314    unsafe fn starts_with_neon(haystack: &str, prefix: &str, len: usize) -> bool {
315        unsafe {
316            let haystack_bytes = haystack.as_bytes();
317            let prefix_bytes = prefix.as_bytes();
318
319            let lower_a = vdupq_n_u8(b'a');
320            let lower_z = vdupq_n_u8(b'z');
321            let case_bit = vdupq_n_u8(0x20);
322
323            let mut i = 0;
324            while i + 16 <= len {
325                let h = vld1q_u8(haystack_bytes.as_ptr().add(i));
326                let p = vld1q_u8(prefix_bytes.as_ptr().add(i));
327
328                // Convert haystack chunk to lowercase
329                let h_ge_a = vcgeq_u8(h, lower_a);
330                let h_le_z = vcleq_u8(h, lower_z);
331                let h_is_lower = vandq_u8(h_ge_a, h_le_z);
332                let h_lower = vorrq_u8(h, vandq_u8(h_is_lower, case_bit));
333
334                // Convert prefix chunk to lowercase
335                let p_ge_a = vcgeq_u8(p, lower_a);
336                let p_le_z = vcleq_u8(p, lower_z);
337                let p_is_lower = vandq_u8(p_ge_a, p_le_z);
338                let p_lower = vorrq_u8(p, vandq_u8(p_is_lower, case_bit));
339
340                let eq = vceqq_u8(h_lower, p_lower);
341                let min = vminvq_u8(eq);
342                if min != 0xFF {
343                    return false;
344                }
345
346                i += 16;
347            }
348
349            // Handle remaining bytes
350            haystack_bytes[i..len].eq_ignore_ascii_case(&prefix_bytes[i..len])
351        }
352    }
353
354    let len = prefix.len();
355    if haystack.len() < len {
356        return false;
357    }
358
359    #[cfg(target_arch = "x86_64")]
360    {
361        if len >= 32 && std::is_x86_feature_detected!("avx2") {
362            // SAFETY: we've checked that AVX2 is available and haystack.len() >= len
363            return unsafe { starts_with_avx2(haystack, prefix, len) };
364        }
365    }
366
367    #[cfg(target_arch = "aarch64")]
368    {
369        if len >= 16 {
370            // SAFETY: NEON is always available on aarch64 and haystack.len() >= len
371            return unsafe { starts_with_neon(haystack, prefix, len) };
372        }
373    }
374
375    haystack.as_bytes()[..len].eq_ignore_ascii_case(prefix.as_bytes())
376}
377
378/// A helper macro to generate the specialized `*_atom` functions for integer types.
379macro_rules! integer_to_atom_fns {
380    ( $( $func_name:ident($num_type:ty) ),+ $(,)? ) => {
381        $(
382            #[doc = "Creates an `Atom` from a `"]
383            #[doc = stringify!($num_type)]
384            #[doc = "` value with zero heap allocations."]
385            #[inline]
386            #[must_use]
387            pub fn $func_name(n: $num_type) -> Atom {
388                let mut buffer = itoa::Buffer::new();
389                let s = buffer.format(n);
390
391                atom(s)
392            }
393        )+
394    };
395}
396
397/// A helper macro to generate the specialized `*_atom` functions for float types.
398macro_rules! float_to_atom_fns {
399    ( $( $func_name:ident($num_type:ty) ),+ $(,)? ) => {
400        $(
401            #[doc = "Creates an `Atom` from a `"]
402            #[doc = stringify!($num_type)]
403            #[doc = "` value with zero heap allocations."]
404            #[inline]
405            #[must_use]
406            pub fn $func_name(n: $num_type) -> Atom {
407                let mut buffer = ryu::Buffer::new();
408                let s = buffer.format(n);
409
410                atom(s)
411            }
412        )+
413    };
414}
415
416/// A helper macro to generate the specialized `concat_atomN` functions.
417macro_rules! concat_fns {
418    ( $( $func_name:ident($n:literal, $($s:ident),+) ),+ $(,)?) => {
419        $(
420            #[doc = "Creates an `Atom` as a result of concatenating "]
421            #[doc = stringify!($n)]
422            #[doc = " string slices."]
423            #[inline]
424            #[must_use]
425            #[allow(unused_assignments)]
426            pub fn $func_name($($s: &str),+) -> Atom {
427                let total_len = 0 $(+ $s.len())+;
428
429                if total_len <= STACK_BUF_SIZE {
430                    let mut buffer = [0u8; STACK_BUF_SIZE];
431                    let mut index = 0;
432                    $(
433                        buffer[index..index + $s.len()].copy_from_slice($s.as_bytes());
434                        index += $s.len();
435                    )+
436                    return atom(unsafe { std::str::from_utf8_unchecked(&buffer[..total_len]) });
437                }
438
439                // Fallback to heap for very long strings.
440                let mut result = String::with_capacity(total_len);
441                $( result.push_str($s); )+
442                atom(&result)
443            }
444        )+
445    };
446}
447
448// Generate functions for integer types
449integer_to_atom_fns!(
450    i8_atom(i8),
451    i16_atom(i16),
452    i32_atom(i32),
453    i64_atom(i64),
454    i128_atom(i128),
455    isize_atom(isize),
456    u8_atom(u8),
457    u16_atom(u16),
458    u32_atom(u32),
459    u64_atom(u64),
460    u128_atom(u128),
461    usize_atom(usize),
462);
463
464float_to_atom_fns!(f32_atom(f32), f64_atom(f64),);
465
466concat_fns!(
467    concat_atom2(2, s1, s2),
468    concat_atom3(3, s1, s2, s3),
469    concat_atom4(4, s1, s2, s3, s4),
470    concat_atom5(5, s1, s2, s3, s4, s5),
471    concat_atom6(6, s1, s2, s3, s4, s5, s6),
472    concat_atom7(7, s1, s2, s3, s4, s5, s6, s7),
473    concat_atom8(8, s1, s2, s3, s4, s5, s6, s7, s8),
474    concat_atom9(9, s1, s2, s3, s4, s5, s6, s7, s8, s9),
475    concat_atom10(10, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10),
476    concat_atom11(11, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11),
477    concat_atom12(12, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12),
478);