mago_atom/
lib.rs

1#![allow(clippy::too_many_arguments)]
2
3//! A high-performance, globally-interned string library for the Mago ecosystem.
4//!
5//! This crate provides `Atom`, a canonical string type that guarantees any given
6//! string is stored in memory only once. It acts as a wrapper for the `ustr` crate and adds
7//! highly-optimized constructors for common string manipulations like lowercasing,
8//! concatenation, and number formatting.
9//!
10//! The key feature is the ability to perform these operations without heap allocations
11//! for common cases by using stack-allocated buffers, making this crate ideal for
12//! performance-critical code.
13//!
14//! # Usage
15//!
16//! ```
17//! use mago_atom::*;
18//!
19//! // Create an Atom. This is a cheap lookup in a global cache.
20//! let s1 = atom("Hello");
21//!
22//! // Use an optimized, zero-heap-allocation constructor.
23//! let s2 = ascii_lowercase_atom("Hello");
24//!
25//! assert_eq!(s2.as_str(), "hello");
26//!
27//! // Use the specialized, high-performance map.
28//! let mut map = AtomMap::default();
29//! map.insert(s1, 123);
30//! ```
31
32use std::collections::HashMap;
33use std::collections::HashSet;
34use std::hash::BuildHasherDefault;
35
36use ustr::IdentityHasher;
37
38pub use ustr::Ustr as Atom;
39pub use ustr::ustr as atom;
40
41/// A high-performance `HashMap` using `Atom` as the key.
42///
43/// This map is significantly faster than a standard `HashMap` because it uses the
44/// `Atom`'s pre-computed hash instead of hashing the string content on every lookup.
45pub type AtomMap<V> = HashMap<Atom, V, BuildHasherDefault<IdentityHasher>>;
46
47/// A high-performance `HashSet` using `Atom` as the key.
48///
49/// This set is significantly faster than a standard `HashSet` because it uses the
50/// `Atom`'s pre-computed hash.
51pub type AtomSet = HashSet<Atom, BuildHasherDefault<IdentityHasher>>;
52
53/// The maximum size in bytes for a string to be processed on the stack.
54const STACK_BUF_SIZE: usize = 256;
55
56/// Returns the canonical `Atom` for an empty string.
57///
58/// This is a very cheap operation.
59#[inline]
60#[must_use]
61pub fn empty_atom() -> Atom {
62    atom("")
63}
64
65/// A macro to concatenate between 2 and 12 string slices into a single `Atom`.
66///
67/// This macro dispatches to a specialized, zero-heap-allocation function based on the
68/// number of arguments provided, making it highly performant for a known number of inputs.
69/// It uses a stack-allocated buffer to avoid hitting the heap.
70///
71/// # Panics
72///
73/// Panics at compile time if called with 0, 1, or more than 12 arguments.
74#[macro_export]
75macro_rules! concat_atom {
76    ($s1:expr, $s2:expr $(,)?) => {
77        $crate::concat_atom2(&$s1, &$s2)
78    };
79    ($s1:expr, $s2:expr, $s3:expr $(,)?) => {
80        $crate::concat_atom3(&$s1, &$s2, &$s3)
81    };
82    ($s1:expr, $s2:expr, $s3:expr, $s4:expr $(,)?) => {
83        $crate::concat_atom4(&$s1, &$s2, &$s3, &$s4)
84    };
85    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr $(,)?) => {
86        $crate::concat_atom5(&$s1, &$s2, &$s3, &$s4, &$s5)
87    };
88    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr $(,)?) => {
89        $crate::concat_atom6(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6)
90    };
91    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr $(,)?) => {
92        $crate::concat_atom7(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7)
93    };
94    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr $(,)?) => {
95        $crate::concat_atom8(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8)
96    };
97    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr $(,)?) => {
98        $crate::concat_atom9(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9)
99    };
100    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr $(,)?) => {
101        $crate::concat_atom10(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10)
102    };
103    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr, $s11:expr $(,)?) => {
104        $crate::concat_atom11(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10, &$s11)
105    };
106    ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr, $s11:expr, $s12:expr $(,)?) => {
107        $crate::concat_atom12(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10, &$s11, &$s12)
108    };
109    ($($arg:expr),+ $(,)?) => {
110        compile_error!("concat_atom! macro supports between 2 and 12 arguments only")
111    };
112}
113
114/// Creates an `Atom` from a constant name, lowercasing only the namespace part.
115///
116/// This function is optimized to avoid heap allocations for constant names up to
117/// `STACK_BUF_SIZE` bytes by building the new string on the stack. For names
118/// longer than the buffer, it falls back to a heap allocation.
119#[inline]
120#[must_use]
121pub fn ascii_lowercase_constant_name_atom(name: &str) -> Atom {
122    if let Some(last_slash_idx) = name.rfind('\\') {
123        let (namespace, const_name) = name.split_at(last_slash_idx);
124        let const_name = &const_name[1..];
125
126        if name.len() > STACK_BUF_SIZE {
127            let mut lowercased_namespace = namespace.to_ascii_lowercase();
128            lowercased_namespace.push('\\');
129            lowercased_namespace.push_str(const_name);
130            return atom(&lowercased_namespace);
131        }
132
133        let mut stack_buf = [0u8; STACK_BUF_SIZE];
134        let mut index = 0;
135
136        for byte in namespace.bytes() {
137            stack_buf[index] = byte.to_ascii_lowercase();
138            index += 1;
139        }
140
141        stack_buf[index] = b'\\';
142        index += 1;
143
144        let const_bytes = const_name.as_bytes();
145        stack_buf[index..index + const_bytes.len()].copy_from_slice(const_bytes);
146        index += const_bytes.len();
147
148        atom(
149            // SAFETY: We only write valid UTF-8 bytes into the stack buffer.
150            unsafe { std::str::from_utf8_unchecked(&stack_buf[..index]) },
151        )
152    } else {
153        atom(name)
154    }
155}
156
157/// Creates an `Atom` from a lowercased version of a string slice.
158///
159/// This function is highly optimized. It performs a fast scan, and if the string
160/// is already lowercase, it returns an `Atom` without any new allocations.
161/// Otherwise, it builds the lowercase version on the stack for strings up to
162/// `STACK_BUF_SIZE` bytes.
163#[inline]
164#[must_use]
165pub fn ascii_lowercase_atom(s: &str) -> Atom {
166    if s.is_ascii() && !s.bytes().any(|b| b.is_ascii_uppercase()) {
167        return atom(s);
168    }
169
170    if s.len() <= STACK_BUF_SIZE {
171        let mut stack_buf = [0u8; STACK_BUF_SIZE];
172        let mut index = 0;
173
174        for c in s.chars() {
175            for lower_c in c.to_lowercase() {
176                let mut char_buf = [0u8; 4];
177                let bytes = lower_c.encode_utf8(&mut char_buf).as_bytes();
178
179                if index + bytes.len() > STACK_BUF_SIZE {
180                    return atom(&s.to_lowercase());
181                }
182
183                stack_buf[index..index + bytes.len()].copy_from_slice(bytes);
184                index += bytes.len();
185            }
186        }
187
188        return atom(
189            // SAFETY: We only write valid UTF-8 bytes into the stack buffer.
190            unsafe { std::str::from_utf8_unchecked(&stack_buf[..index]) },
191        );
192    }
193
194    atom(&s.to_lowercase())
195}
196
197/// A helper macro to generate the specialized `*_atom` functions for integer types.
198macro_rules! integer_to_atom_fns {
199    ( $( $func_name:ident($num_type:ty) ),+ $(,)? ) => {
200        $(
201            #[doc = "Creates an `Atom` from a `"]
202            #[doc = stringify!($num_type)]
203            #[doc = "` value with zero heap allocations."]
204            #[inline]
205            #[must_use]
206            pub fn $func_name(n: $num_type) -> Atom {
207                let mut buffer = itoa::Buffer::new();
208                let s = buffer.format(n);
209
210                atom(s)
211            }
212        )+
213    };
214}
215
216/// A helper macro to generate the specialized `*_atom` functions for float types.
217macro_rules! float_to_atom_fns {
218    ( $( $func_name:ident($num_type:ty) ),+ $(,)? ) => {
219        $(
220            #[doc = "Creates an `Atom` from a `"]
221            #[doc = stringify!($num_type)]
222            #[doc = "` value with zero heap allocations."]
223            #[inline]
224            #[must_use]
225            pub fn $func_name(n: $num_type) -> Atom {
226                let mut buffer = ryu::Buffer::new();
227                let s = buffer.format(n);
228
229                atom(s)
230            }
231        )+
232    };
233}
234
235/// A helper macro to generate the specialized `concat_atomN` functions.
236macro_rules! concat_fns {
237    ( $( $func_name:ident($n:literal, $($s:ident),+) ),+ $(,)?) => {
238        $(
239            #[doc = "Creates an `Atom` as a result of concatenating "]
240            #[doc = stringify!($n)]
241            #[doc = " string slices."]
242            #[inline]
243            #[must_use]
244            #[allow(unused_assignments)]
245            pub fn $func_name($($s: &str),+) -> Atom {
246                let total_len = 0 $(+ $s.len())+;
247
248                if total_len <= STACK_BUF_SIZE {
249                    let mut buffer = [0u8; STACK_BUF_SIZE];
250                    let mut index = 0;
251                    $(
252                        buffer[index..index + $s.len()].copy_from_slice($s.as_bytes());
253                        index += $s.len();
254                    )+
255                    return atom(unsafe { std::str::from_utf8_unchecked(&buffer[..total_len]) });
256                }
257
258                // Fallback to heap for very long strings.
259                let mut result = String::with_capacity(total_len);
260                $( result.push_str($s); )+
261                atom(&result)
262            }
263        )+
264    };
265}
266
267// Generate functions for integer types
268integer_to_atom_fns!(
269    i8_atom(i8),
270    i16_atom(i16),
271    i32_atom(i32),
272    i64_atom(i64),
273    i128_atom(i128),
274    isize_atom(isize),
275    u8_atom(u8),
276    u16_atom(u16),
277    u32_atom(u32),
278    u64_atom(u64),
279    u128_atom(u128),
280    usize_atom(usize),
281);
282
283float_to_atom_fns!(f32_atom(f32), f64_atom(f64),);
284
285concat_fns!(
286    concat_atom2(2, s1, s2),
287    concat_atom3(3, s1, s2, s3),
288    concat_atom4(4, s1, s2, s3, s4),
289    concat_atom5(5, s1, s2, s3, s4, s5),
290    concat_atom6(6, s1, s2, s3, s4, s5, s6),
291    concat_atom7(7, s1, s2, s3, s4, s5, s6, s7),
292    concat_atom8(8, s1, s2, s3, s4, s5, s6, s7, s8),
293    concat_atom9(9, s1, s2, s3, s4, s5, s6, s7, s8, s9),
294    concat_atom10(10, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10),
295    concat_atom11(11, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11),
296    concat_atom12(12, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12),
297);