1#![allow(clippy::too_many_arguments)]
2
3#[cfg(target_arch = "aarch64")]
33use std::arch::aarch64::vandq_u8;
34#[cfg(target_arch = "aarch64")]
35use std::arch::aarch64::vceqq_u8;
36#[cfg(target_arch = "aarch64")]
37use std::arch::aarch64::vcgeq_u8;
38#[cfg(target_arch = "aarch64")]
39use std::arch::aarch64::vcleq_u8;
40#[cfg(target_arch = "aarch64")]
41use std::arch::aarch64::vdupq_n_u8;
42#[cfg(target_arch = "aarch64")]
43use std::arch::aarch64::vld1q_u8;
44#[cfg(target_arch = "aarch64")]
45use std::arch::aarch64::vminvq_u8;
46#[cfg(target_arch = "aarch64")]
47use std::arch::aarch64::vorrq_u8;
48#[cfg(target_arch = "x86_64")]
49use std::arch::x86_64::*;
50use std::collections::HashMap;
51use std::collections::HashSet;
52use std::hash::BuildHasherDefault;
53
54use ustr::IdentityHasher;
55
56pub use ustr::Ustr as Atom;
57pub use ustr::ustr as atom;
58
59pub type AtomMap<V> = HashMap<Atom, V, BuildHasherDefault<IdentityHasher>>;
64
65pub type AtomSet = HashSet<Atom, BuildHasherDefault<IdentityHasher>>;
70
71const STACK_BUF_SIZE: usize = 256;
73
74#[inline]
78#[must_use]
79pub fn empty_atom() -> Atom {
80 atom("")
81}
82
83#[macro_export]
93macro_rules! concat_atom {
94 ($s1:expr, $s2:expr $(,)?) => {
95 $crate::concat_atom2(&$s1, &$s2)
96 };
97 ($s1:expr, $s2:expr, $s3:expr $(,)?) => {
98 $crate::concat_atom3(&$s1, &$s2, &$s3)
99 };
100 ($s1:expr, $s2:expr, $s3:expr, $s4:expr $(,)?) => {
101 $crate::concat_atom4(&$s1, &$s2, &$s3, &$s4)
102 };
103 ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr $(,)?) => {
104 $crate::concat_atom5(&$s1, &$s2, &$s3, &$s4, &$s5)
105 };
106 ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr $(,)?) => {
107 $crate::concat_atom6(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6)
108 };
109 ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr $(,)?) => {
110 $crate::concat_atom7(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7)
111 };
112 ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr $(,)?) => {
113 $crate::concat_atom8(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8)
114 };
115 ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr $(,)?) => {
116 $crate::concat_atom9(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9)
117 };
118 ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr $(,)?) => {
119 $crate::concat_atom10(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10)
120 };
121 ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr, $s11:expr $(,)?) => {
122 $crate::concat_atom11(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10, &$s11)
123 };
124 ($s1:expr, $s2:expr, $s3:expr, $s4:expr, $s5:expr, $s6:expr, $s7:expr, $s8:expr, $s9:expr, $s10:expr, $s11:expr, $s12:expr $(,)?) => {
125 $crate::concat_atom12(&$s1, &$s2, &$s3, &$s4, &$s5, &$s6, &$s7, &$s8, &$s9, &$s10, &$s11, &$s12)
126 };
127 ($($arg:expr),+ $(,)?) => {
128 compile_error!("concat_atom! macro supports between 2 and 12 arguments only")
129 };
130}
131
132#[inline]
138#[must_use]
139pub fn ascii_lowercase_constant_name_atom(name: &str) -> Atom {
140 if let Some(last_slash_idx) = name.rfind('\\') {
141 let (namespace, const_name) = name.split_at(last_slash_idx);
142 let const_name = &const_name[1..];
143
144 if name.len() > STACK_BUF_SIZE {
145 let mut lowercased_namespace = namespace.to_ascii_lowercase();
146 lowercased_namespace.push('\\');
147 lowercased_namespace.push_str(const_name);
148 return atom(&lowercased_namespace);
149 }
150
151 let mut stack_buf = [0u8; STACK_BUF_SIZE];
152 let mut index = 0;
153
154 for byte in namespace.bytes() {
155 stack_buf[index] = byte.to_ascii_lowercase();
156 index += 1;
157 }
158
159 stack_buf[index] = b'\\';
160 index += 1;
161
162 let const_bytes = const_name.as_bytes();
163 stack_buf[index..index + const_bytes.len()].copy_from_slice(const_bytes);
164 index += const_bytes.len();
165
166 atom(
167 unsafe { std::str::from_utf8_unchecked(&stack_buf[..index]) },
169 )
170 } else {
171 atom(name)
172 }
173}
174
175#[inline]
182#[must_use]
183pub fn ascii_lowercase_atom(s: &str) -> Atom {
184 let bytes = s.as_bytes();
185
186 let mut needs_lowercasing = false;
189 let mut is_ascii = true;
190 for &b in bytes {
191 if b > 127 {
192 is_ascii = false;
193 break;
194 }
195 if b.is_ascii_uppercase() {
196 needs_lowercasing = true;
197 }
198 }
199
200 if is_ascii && !needs_lowercasing {
202 return atom(s);
203 }
204
205 if is_ascii && s.len() <= STACK_BUF_SIZE {
207 let mut stack_buf = [0u8; STACK_BUF_SIZE];
208 for (i, &b) in bytes.iter().enumerate() {
209 stack_buf[i] = b.to_ascii_lowercase();
210 }
211 return atom(
212 unsafe { std::str::from_utf8_unchecked(&stack_buf[..s.len()]) },
214 );
215 }
216
217 if s.len() <= STACK_BUF_SIZE {
219 let mut stack_buf = [0u8; STACK_BUF_SIZE];
220 let mut index = 0;
221
222 for c in s.chars() {
223 for lower_c in c.to_lowercase() {
224 let mut char_buf = [0u8; 4];
225 let encoded = lower_c.encode_utf8(&mut char_buf).as_bytes();
226
227 if index + encoded.len() > STACK_BUF_SIZE {
228 return atom(&s.to_lowercase());
229 }
230
231 stack_buf[index..index + encoded.len()].copy_from_slice(encoded);
232 index += encoded.len();
233 }
234 }
235
236 return atom(
237 unsafe { std::str::from_utf8_unchecked(&stack_buf[..index]) },
239 );
240 }
241
242 atom(&s.to_lowercase())
243}
244
245#[inline]
262#[must_use]
263pub fn starts_with_ignore_case(haystack: &str, prefix: &str) -> bool {
264 #[cfg(target_arch = "x86_64")]
265 #[target_feature(enable = "avx2")]
266 unsafe fn starts_with_avx2(haystack: &str, prefix: &str, len: usize) -> bool {
267 unsafe {
268 let haystack_bytes = haystack.as_bytes();
269 let prefix_bytes = prefix.as_bytes();
270
271 let lower_a = _mm256_set1_epi8(b'a' as i8);
272 let lower_z = _mm256_set1_epi8(b'z' as i8);
273 let case_bit = _mm256_set1_epi8(0x20);
274
275 let mut i = 0;
276 while i + 32 <= len {
277 let h = _mm256_loadu_si256(haystack_bytes.as_ptr().add(i) as *const __m256i);
278 let p = _mm256_loadu_si256(prefix_bytes.as_ptr().add(i) as *const __m256i);
279
280 let h_is_lower = _mm256_and_si256(
282 _mm256_cmpgt_epi8(h, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1))),
283 _mm256_cmpgt_epi8(_mm256_add_epi8(lower_z, _mm256_set1_epi8(1)), h),
284 );
285 let h_lower = _mm256_or_si256(h, _mm256_and_si256(h_is_lower, case_bit));
286
287 let p_is_lower = _mm256_and_si256(
289 _mm256_cmpgt_epi8(p, _mm256_sub_epi8(lower_a, _mm256_set1_epi8(1))),
290 _mm256_cmpgt_epi8(_mm256_add_epi8(lower_z, _mm256_set1_epi8(1)), p),
291 );
292 let p_lower = _mm256_or_si256(p, _mm256_and_si256(p_is_lower, case_bit));
293
294 let eq = _mm256_cmpeq_epi8(h_lower, p_lower);
295 let mask = _mm256_movemask_epi8(eq);
296 if mask != -1i32 {
297 return false;
298 }
299
300 i += 32;
301 }
302
303 haystack_bytes[i..len].eq_ignore_ascii_case(&prefix_bytes[i..len])
305 }
306 }
307
308 #[cfg(target_arch = "aarch64")]
309 #[target_feature(enable = "neon")]
310 unsafe fn starts_with_neon(haystack: &str, prefix: &str, len: usize) -> bool {
311 unsafe {
312 let haystack_bytes = haystack.as_bytes();
313 let prefix_bytes = prefix.as_bytes();
314
315 let lower_a = vdupq_n_u8(b'a');
316 let lower_z = vdupq_n_u8(b'z');
317 let case_bit = vdupq_n_u8(0x20);
318
319 let mut i = 0;
320 while i + 16 <= len {
321 let h = vld1q_u8(haystack_bytes.as_ptr().add(i));
322 let p = vld1q_u8(prefix_bytes.as_ptr().add(i));
323
324 let h_ge_a = vcgeq_u8(h, lower_a);
326 let h_le_z = vcleq_u8(h, lower_z);
327 let h_is_lower = vandq_u8(h_ge_a, h_le_z);
328 let h_lower = vorrq_u8(h, vandq_u8(h_is_lower, case_bit));
329
330 let p_ge_a = vcgeq_u8(p, lower_a);
332 let p_le_z = vcleq_u8(p, lower_z);
333 let p_is_lower = vandq_u8(p_ge_a, p_le_z);
334 let p_lower = vorrq_u8(p, vandq_u8(p_is_lower, case_bit));
335
336 let eq = vceqq_u8(h_lower, p_lower);
337 let min = vminvq_u8(eq);
338 if min != 0xFF {
339 return false;
340 }
341
342 i += 16;
343 }
344
345 haystack_bytes[i..len].eq_ignore_ascii_case(&prefix_bytes[i..len])
347 }
348 }
349
350 let len = prefix.len();
351 if haystack.len() < len {
352 return false;
353 }
354
355 #[cfg(target_arch = "x86_64")]
356 {
357 if len >= 32 && std::is_x86_feature_detected!("avx2") {
358 return unsafe { starts_with_avx2(haystack, prefix, len) };
360 }
361 }
362
363 #[cfg(target_arch = "aarch64")]
364 {
365 if len >= 16 {
366 return unsafe { starts_with_neon(haystack, prefix, len) };
368 }
369 }
370
371 haystack.as_bytes()[..len].eq_ignore_ascii_case(prefix.as_bytes())
372}
373
374macro_rules! integer_to_atom_fns {
376 ( $( $func_name:ident($num_type:ty) ),+ $(,)? ) => {
377 $(
378 #[doc = "Creates an `Atom` from a `"]
379 #[doc = stringify!($num_type)]
380 #[doc = "` value with zero heap allocations."]
381 #[inline]
382 #[must_use]
383 pub fn $func_name(n: $num_type) -> Atom {
384 let mut buffer = itoa::Buffer::new();
385 let s = buffer.format(n);
386
387 atom(s)
388 }
389 )+
390 };
391}
392
393macro_rules! float_to_atom_fns {
395 ( $( $func_name:ident($num_type:ty) ),+ $(,)? ) => {
396 $(
397 #[doc = "Creates an `Atom` from a `"]
398 #[doc = stringify!($num_type)]
399 #[doc = "` value with zero heap allocations."]
400 #[inline]
401 #[must_use]
402 pub fn $func_name(n: $num_type) -> Atom {
403 let mut buffer = ryu::Buffer::new();
404 let s = buffer.format(n);
405
406 atom(s)
407 }
408 )+
409 };
410}
411
412macro_rules! concat_fns {
414 ( $( $func_name:ident($n:literal, $($s:ident),+) ),+ $(,)?) => {
415 $(
416 #[doc = "Creates an `Atom` as a result of concatenating "]
417 #[doc = stringify!($n)]
418 #[doc = " string slices."]
419 #[inline]
420 #[must_use]
421 #[allow(unused_assignments)]
422 pub fn $func_name($($s: &str),+) -> Atom {
423 let total_len = 0 $(+ $s.len())+;
424
425 if total_len <= STACK_BUF_SIZE {
426 let mut buffer = [0u8; STACK_BUF_SIZE];
427 let mut index = 0;
428 $(
429 buffer[index..index + $s.len()].copy_from_slice($s.as_bytes());
430 index += $s.len();
431 )+
432 return atom(unsafe { std::str::from_utf8_unchecked(&buffer[..total_len]) });
433 }
434
435 let mut result = String::with_capacity(total_len);
437 $( result.push_str($s); )+
438 atom(&result)
439 }
440 )+
441 };
442}
443
444integer_to_atom_fns!(
446 i8_atom(i8),
447 i16_atom(i16),
448 i32_atom(i32),
449 i64_atom(i64),
450 i128_atom(i128),
451 isize_atom(isize),
452 u8_atom(u8),
453 u16_atom(u16),
454 u32_atom(u32),
455 u64_atom(u64),
456 u128_atom(u128),
457 usize_atom(usize),
458);
459
460float_to_atom_fns!(f32_atom(f32), f64_atom(f64),);
461
462concat_fns!(
463 concat_atom2(2, s1, s2),
464 concat_atom3(3, s1, s2, s3),
465 concat_atom4(4, s1, s2, s3, s4),
466 concat_atom5(5, s1, s2, s3, s4, s5),
467 concat_atom6(6, s1, s2, s3, s4, s5, s6),
468 concat_atom7(7, s1, s2, s3, s4, s5, s6, s7),
469 concat_atom8(8, s1, s2, s3, s4, s5, s6, s7, s8),
470 concat_atom9(9, s1, s2, s3, s4, s5, s6, s7, s8, s9),
471 concat_atom10(10, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10),
472 concat_atom11(11, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11),
473 concat_atom12(12, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12),
474);