Skip to main content

mago_atom/
lib.rs

1#![allow(clippy::too_many_arguments)]
2
3//! A high-performance, globally-interned string library for the Mago ecosystem.
4//!
5//! This crate provides `Atom`, a canonical string type that guarantees any given
6//! string is stored in memory only once. It acts as a wrapper for the `ustr` crate and adds
7//! highly-optimized constructors for common string manipulations like lowercasing,
8//! concatenation, and number formatting.
9//!
10//! The key feature is the ability to perform these operations without heap allocations
11//! for common cases by using stack-allocated buffers, making this crate ideal for
12//! performance-critical code.
13//!
14//! # Usage
15//!
16//! ```
17//! use mago_atom::*;
18//!
19//! // Create an Atom. This is a cheap lookup in a global cache.
20//! let s1 = atom("Hello");
21//!
22//! // Use an optimized, zero-heap-allocation constructor.
23//! let s2 = ascii_lowercase_atom("Hello");
24//!
25//! assert_eq!(s2.as_str(), "hello");
26//!
27//! // Use the specialized, high-performance map.
28//! let mut map = AtomMap::default();
29//! map.insert(s1, 123);
30//! ```
31
32#[cfg(target_arch = "aarch64")]
33use std::arch::aarch64::vandq_u8;
34#[cfg(target_arch = "aarch64")]
35use std::arch::aarch64::vceqq_u8;
36#[cfg(target_arch = "aarch64")]
37use std::arch::aarch64::vcgeq_u8;
38#[cfg(target_arch = "aarch64")]
39use std::arch::aarch64::vcleq_u8;
40#[cfg(target_arch = "aarch64")]
41use std::arch::aarch64::vdupq_n_u8;
42#[cfg(target_arch = "aarch64")]
43use std::arch::aarch64::vld1q_u8;
44#[cfg(target_arch = "aarch64")]
45use std::arch::aarch64::vminvq_u8;
46#[cfg(target_arch = "aarch64")]
47use std::arch::aarch64::vorrq_u8;
48#[cfg(target_arch = "x86_64")]
49use std::arch::x86_64::*;
50use std::collections::HashMap;
51use std::collections::HashSet;
52use std::hash::BuildHasherDefault;
53
54use ustr::IdentityHasher;
55
56pub use ustr::Ustr as Atom;
57pub use ustr::ustr as atom;
58
59/// A high-performance `HashMap` using `Atom` as the key.
60///
61/// This map is significantly faster than a standard `HashMap` because it uses the
62/// `Atom`'s pre-computed hash instead of hashing the string content on every lookup.
63pub type AtomMap<V> = HashMap<Atom, V, BuildHasherDefault<IdentityHasher>>;
64
65/// A high-performance `HashSet` using `Atom` as the key.
66///
67/// This set is significantly faster than a standard `HashSet` because it uses the
68/// `Atom`'s pre-computed hash.
69pub type AtomSet = HashSet<Atom, BuildHasherDefault<IdentityHasher>>;
70
71/// The maximum size in bytes for a string to be processed on the stack.
72const STACK_BUF_SIZE: usize = 256;
73
74/// Returns the canonical `Atom` for an empty string.
75///
76/// This is a very cheap operation.
77#[inline]
78#[must_use]
79pub fn empty_atom() -> Atom {
80    atom("")
81}
82
83/// A macro to concatenate between 2 and 12 string slices into a single `Atom`.
84///
85/// This macro dispatches to a specialized, zero-heap-allocation function based on the
86/// number of arguments provided, making it highly performant for a known number of inputs.
87/// It uses a stack-allocated buffer to avoid hitting the heap.
88///
89/// # Panics
90///
91/// Panics at compile time if called with 0, 1, or more than 12 arguments.
92#[macro_export]
93macro_rules! concat_atom {
94    ($s1:expr, $s2:expr $(,)?) => {
95        $crate::concat_atom2(&$s1, &$s2)
96    };
97    ($s1:expr, $s2:expr, $s3:expr $(,)?) => {
98        $crate::concat_atom3(&$s1, &$s2, &$s3)
99    };
100    ($s1:expr, $s2:expr, $s3:expr, $s4:expr $(,)?) => {
101        $crate::concat_atom4(&$s1, &$s2, &$s3, &$s4)
102    };
103    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr $(,)?) => {
104        $crate::concat_atom5(&$s1, &$s2, &$s3, &$s4, &$s5)
105    };
106    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr $(,)?) => {
107        $crate::concat_atom6(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6)
108    };
109    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr $(,)?) => {
110        $crate::concat_atom7(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7)
111    };
112    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr $(,)?) => {
113        $crate::concat_atom8(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8)
114    };
115    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr $(,)?) => {
116        $crate::concat_atom9(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9)
117    };
118    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr $(,)?) => {
119        $crate::concat_atom10(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10)
120    };
121    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr, $s11:expr $(,)?) => {
122        $crate::concat_atom11(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10, &$s11)
123    };
124    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr, $s11:expr, $s12:expr $(,)?) => {
125        $crate::concat_atom12(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10, &$s11, &$s12)
126    };
127    ($($arg:expr),+ $(,)?) => {
128        compile_error!("concat_atom! macro supports between 2 and 12 arguments only")
129    };
130}
131
132/// Creates an `Atom` from a constant name, lowercasing only the namespace part.
133///
134/// This function is optimized to avoid heap allocations for constant names up to
135/// `STACK_BUF_SIZE` bytes by building the new string on the stack. For names
136/// longer than the buffer, it falls back to a heap allocation.
137#[inline]
138#[must_use]
139pub fn ascii_lowercase_constant_name_atom(name: &str) -> Atom {
140    if let Some(last_slash_idx) = name.rfind('\\') {
141        let (namespace, const_name) = name.split_at(last_slash_idx);
142        let const_name = &const_name[1..];
143
144        if name.len() > STACK_BUF_SIZE {
145            let mut lowercased_namespace = namespace.to_ascii_lowercase();
146            lowercased_namespace.push('\\');
147            lowercased_namespace.push_str(const_name);
148            return atom(&lowercased_namespace);
149        }
150
151        let mut stack_buf = [0u8; STACK_BUF_SIZE];
152        let mut index = 0;
153
154        for byte in namespace.bytes() {
155            stack_buf[index] = byte.to_ascii_lowercase();
156            index += 1;
157        }
158
159        stack_buf[index] = b'\\';
160        index += 1;
161
162        let const_bytes = const_name.as_bytes();
163        stack_buf[index..index + const_bytes.len()].copy_from_slice(const_bytes);
164        index += const_bytes.len();
165
166        atom(
167            // SAFETY: We only write valid UTF-8 bytes into the stack buffer.
168            unsafe { std::str::from_utf8_unchecked(&stack_buf[..index]) },
169        )
170    } else {
171        atom(name)
172    }
173}
174
175/// Creates an `Atom` from a lowercased version of a string slice.
176///
177/// This function is highly optimized. It performs a fast scan, and if the string
178/// is already lowercase, it returns an `Atom` without any new allocations.
179/// Otherwise, it builds the lowercase version on the stack for strings up to
180/// `STACK_BUF_SIZE` bytes.
181#[inline]
182#[must_use]
183pub fn ascii_lowercase_atom(s: &str) -> Atom {
184    let bytes = s.as_bytes();
185
186    // Fast path: single pass to check if already lowercase ASCII
187    // This combines the is_ascii() and any(is_ascii_uppercase) checks into one iteration
188    let mut needs_lowercasing = false;
189    let mut is_ascii = true;
190    for &b in bytes {
191        if b > 127 {
192            is_ascii = false;
193            break;
194        }
195        if b.is_ascii_uppercase() {
196            needs_lowercasing = true;
197        }
198    }
199
200    // If it's ASCII and already lowercase, return as-is
201    if is_ascii && !needs_lowercasing {
202        return atom(s);
203    }
204
205    // Fast path for ASCII-only strings: use simple byte manipulation
206    if is_ascii && s.len() <= STACK_BUF_SIZE {
207        let mut stack_buf = [0u8; STACK_BUF_SIZE];
208        for (i, &b) in bytes.iter().enumerate() {
209            stack_buf[i] = b.to_ascii_lowercase();
210        }
211        return atom(
212            // SAFETY: ASCII lowercase of ASCII bytes is valid UTF-8
213            unsafe { std::str::from_utf8_unchecked(&stack_buf[..s.len()]) },
214        );
215    }
216
217    // Non-ASCII path: handle Unicode lowercasing
218    if s.len() <= STACK_BUF_SIZE {
219        let mut stack_buf = [0u8; STACK_BUF_SIZE];
220        let mut index = 0;
221
222        for c in s.chars() {
223            for lower_c in c.to_lowercase() {
224                let mut char_buf = [0u8; 4];
225                let encoded = lower_c.encode_utf8(&mut char_buf).as_bytes();
226
227                if index + encoded.len() > STACK_BUF_SIZE {
228                    return atom(&s.to_lowercase());
229                }
230
231                stack_buf[index..index + encoded.len()].copy_from_slice(encoded);
232                index += encoded.len();
233            }
234        }
235
236        return atom(
237            // SAFETY: We only write valid UTF-8 bytes into the stack buffer.
238            unsafe { std::str::from_utf8_unchecked(&stack_buf[..index]) },
239        );
240    }
241
242    atom(&s.to_lowercase())
243}
244
245/// Checks if `haystack` starts with `prefix`, ignoring ASCII case.
246///
247/// This function uses SIMD instructions (AVX2 on `x86_64`, NEON on aarch64)
248/// when available and beneficial for the input size.
249///
250/// # Examples
251///
252/// ```
253/// use mago_atom::starts_with_ignore_case;
254///
255/// assert!(starts_with_ignore_case("HelloWorld", "hello"));
256/// assert!(starts_with_ignore_case("FOOBAR", "FooBar"));
257/// assert!(starts_with_ignore_case("test", "TEST"));
258/// assert!(!starts_with_ignore_case("hello", "world"));
259/// assert!(!starts_with_ignore_case("hi", "hello"));
260/// ```
261#[inline]
262#[must_use]
263pub fn starts_with_ignore_case(haystack: &str, prefix: &str) -> bool {
264    #[cfg(target_arch = "x86_64")]
265    #[target_feature(enable = "avx2")]
266    unsafe fn starts_with_avx2(haystack: &str, prefix: &str, len: usize) -> bool {
267        unsafe {
268            let haystack_bytes = haystack.as_bytes();
269            let prefix_bytes = prefix.as_bytes();
270
271            let lower_a = _mm256_set1_epi8(b'a' as i8);
272            let lower_z = _mm256_set1_epi8(b'z' as i8);
273            let case_bit = _mm256_set1_epi8(0x20);
274
275            let mut i = 0;
276            while i + 32 <= len {
277                let h = _mm256_loadu_si256(haystack_bytes.as_ptr().add(i) as *const __m256i);
278                let p = _mm256_loadu_si256(prefix_bytes.as_ptr().add(i) as *const __m256i);
279
280                // Convert haystack chunk to lowercase
281                let h_is_lower = _mm256_and_si256(
282                    _mm256_cmpgt_epi8(h, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1))),
283                    _mm256_cmpgt_epi8(_mm256_add_epi8(lower_z, _mm256_set1_epi8(1)), h),
284                );
285                let h_lower = _mm256_or_si256(h, _mm256_and_si256(h_is_lower, case_bit));
286
287                // Convert prefix chunk to lowercase
288                let p_is_lower = _mm256_and_si256(
289                    _mm256_cmpgt_epi8(p, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1))),
290                    _mm256_cmpgt_epi8(_mm256_add_epi8(lower_z, _mm256_set1_epi8(1)), p),
291                );
292                let p_lower = _mm256_or_si256(p, _mm256_and_si256(p_is_lower, case_bit));
293
294                let eq = _mm256_cmpeq_epi8(h_lower, p_lower);
295                let mask = _mm256_movemask_epi8(eq);
296                if mask != -1i32 {
297                    return false;
298                }
299
300                i += 32;
301            }
302
303            // Handle remaining bytes
304            haystack_bytes[i..len].eq_ignore_ascii_case(&prefix_bytes[i..len])
305        }
306    }
307
308    #[cfg(target_arch = "aarch64")]
309    #[target_feature(enable = "neon")]
310    unsafe fn starts_with_neon(haystack: &str, prefix: &str, len: usize) -> bool {
311        unsafe {
312            let haystack_bytes = haystack.as_bytes();
313            let prefix_bytes = prefix.as_bytes();
314
315            let lower_a = vdupq_n_u8(b'a');
316            let lower_z = vdupq_n_u8(b'z');
317            let case_bit = vdupq_n_u8(0x20);
318
319            let mut i = 0;
320            while i + 16 <= len {
321                let h = vld1q_u8(haystack_bytes.as_ptr().add(i));
322                let p = vld1q_u8(prefix_bytes.as_ptr().add(i));
323
324                // Convert haystack chunk to lowercase
325                let h_ge_a = vcgeq_u8(h, lower_a);
326                let h_le_z = vcleq_u8(h, lower_z);
327                let h_is_lower = vandq_u8(h_ge_a, h_le_z);
328                let h_lower = vorrq_u8(h, vandq_u8(h_is_lower, case_bit));
329
330                // Convert prefix chunk to lowercase
331                let p_ge_a = vcgeq_u8(p, lower_a);
332                let p_le_z = vcleq_u8(p, lower_z);
333                let p_is_lower = vandq_u8(p_ge_a, p_le_z);
334                let p_lower = vorrq_u8(p, vandq_u8(p_is_lower, case_bit));
335
336                let eq = vceqq_u8(h_lower, p_lower);
337                let min = vminvq_u8(eq);
338                if min != 0xFF {
339                    return false;
340                }
341
342                i += 16;
343            }
344
345            // Handle remaining bytes
346            haystack_bytes[i..len].eq_ignore_ascii_case(&prefix_bytes[i..len])
347        }
348    }
349
350    let len = prefix.len();
351    if haystack.len() < len {
352        return false;
353    }
354
355    #[cfg(target_arch = "x86_64")]
356    {
357        if len >= 32 && std::is_x86_feature_detected!("avx2") {
358            // SAFETY: we've checked that AVX2 is available and haystack.len() >= len
359            return unsafe { starts_with_avx2(haystack, prefix, len) };
360        }
361    }
362
363    #[cfg(target_arch = "aarch64")]
364    {
365        if len >= 16 {
366            // SAFETY: NEON is always available on aarch64 and haystack.len() >= len
367            return unsafe { starts_with_neon(haystack, prefix, len) };
368        }
369    }
370
371    haystack.as_bytes()[..len].eq_ignore_ascii_case(prefix.as_bytes())
372}
373
374/// A helper macro to generate the specialized `*_atom` functions for integer types.
375macro_rules! integer_to_atom_fns {
376    ( $( $func_name:ident($num_type:ty) ),+ $(,)? ) => {
377        $(
378            #[doc = "Creates an `Atom` from a `"]
379            #[doc = stringify!($num_type)]
380            #[doc = "` value with zero heap allocations."]
381            #[inline]
382            #[must_use]
383            pub fn $func_name(n: $num_type) -> Atom {
384                let mut buffer = itoa::Buffer::new();
385                let s = buffer.format(n);
386
387                atom(s)
388            }
389        )+
390    };
391}
392
393/// A helper macro to generate the specialized `*_atom` functions for float types.
394macro_rules! float_to_atom_fns {
395    ( $( $func_name:ident($num_type:ty) ),+ $(,)? ) => {
396        $(
397            #[doc = "Creates an `Atom` from a `"]
398            #[doc = stringify!($num_type)]
399            #[doc = "` value with zero heap allocations."]
400            #[inline]
401            #[must_use]
402            pub fn $func_name(n: $num_type) -> Atom {
403                let mut buffer = ryu::Buffer::new();
404                let s = buffer.format(n);
405
406                atom(s)
407            }
408        )+
409    };
410}
411
412/// A helper macro to generate the specialized `concat_atomN` functions.
413macro_rules! concat_fns {
414    ( $( $func_name:ident($n:literal, $($s:ident),+) ),+ $(,)?) => {
415        $(
416            #[doc = "Creates an `Atom` as a result of concatenating "]
417            #[doc = stringify!($n)]
418            #[doc = " string slices."]
419            #[inline]
420            #[must_use]
421            #[allow(unused_assignments)]
422            pub fn $func_name($($s: &str),+) -> Atom {
423                let total_len = 0 $(+ $s.len())+;
424
425                if total_len <= STACK_BUF_SIZE {
426                    let mut buffer = [0u8; STACK_BUF_SIZE];
427                    let mut index = 0;
428                    $(
429                        buffer[index..index + $s.len()].copy_from_slice($s.as_bytes());
430                        index += $s.len();
431                    )+
432                    return atom(unsafe { std::str::from_utf8_unchecked(&buffer[..total_len]) });
433                }
434
435                // Fallback to heap for very long strings.
436                let mut result = String::with_capacity(total_len);
437                $( result.push_str($s); )+
438                atom(&result)
439            }
440        )+
441    };
442}
443
444// Generate functions for integer types
445integer_to_atom_fns!(
446    i8_atom(i8),
447    i16_atom(i16),
448    i32_atom(i32),
449    i64_atom(i64),
450    i128_atom(i128),
451    isize_atom(isize),
452    u8_atom(u8),
453    u16_atom(u16),
454    u32_atom(u32),
455    u64_atom(u64),
456    u128_atom(u128),
457    usize_atom(usize),
458);
459
460float_to_atom_fns!(f32_atom(f32), f64_atom(f64),);
461
462concat_fns!(
463    concat_atom2(2, s1, s2),
464    concat_atom3(3, s1, s2, s3),
465    concat_atom4(4, s1, s2, s3, s4),
466    concat_atom5(5, s1, s2, s3, s4, s5),
467    concat_atom6(6, s1, s2, s3, s4, s5, s6),
468    concat_atom7(7, s1, s2, s3, s4, s5, s6, s7),
469    concat_atom8(8, s1, s2, s3, s4, s5, s6, s7, s8),
470    concat_atom9(9, s1, s2, s3, s4, s5, s6, s7, s8, s9),
471    concat_atom10(10, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10),
472    concat_atom11(11, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11),
473    concat_atom12(12, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12),
474);