use std::sync::atomic::{AtomicU64, Ordering};
use unicode_ident::{is_xid_continue, is_xid_start};
static UNICODE_CHAR_CHECKS: AtomicU64 = AtomicU64::new(0);
static UNICODE_EMOJI_HITS: AtomicU64 = AtomicU64::new(0);
#[allow(dead_code)]
pub fn get_unicode_stats() -> (u64, u64) {
(UNICODE_CHAR_CHECKS.load(Ordering::Relaxed), UNICODE_EMOJI_HITS.load(Ordering::Relaxed))
}
#[allow(dead_code)]
pub fn reset_unicode_stats() {
UNICODE_CHAR_CHECKS.store(0, Ordering::Relaxed);
UNICODE_EMOJI_HITS.store(0, Ordering::Relaxed);
}
pub fn is_perl_identifier_start(ch: char) -> bool {
UNICODE_CHAR_CHECKS.fetch_add(1, Ordering::Relaxed);
if ch == '_' || is_xid_start(ch) {
return true;
}
let is_emoji = matches!(ch as u32,
0x1F300..=0x1F6FF | 0x1F900..=0x1F9FF | 0x2600..=0x26FF | 0x2700..=0x27BF | 0x1F000..=0x1F02F | 0x1F0A0..=0x1F0FF | 0x1F100..=0x1F1FF | 0x1F200..=0x1F2FF | 0x1F700..=0x1F77F | 0x1F780..=0x1F7FF | 0x1F800..=0x1F8FF | 0x1FA00..=0x1FA6F | 0x1FA70..=0x1FAFF );
if is_emoji {
UNICODE_EMOJI_HITS.fetch_add(1, Ordering::Relaxed);
}
is_emoji
}
pub fn is_perl_identifier_continue(ch: char) -> bool {
is_perl_identifier_start(ch)
|| is_xid_continue(ch)
|| ch == '\''
|| matches!(
ch as u32,
0x200C | 0x200D |
0xFE00..=0xFE0F |
0x1F3FB..=0x1F3FF
)
}
#[allow(dead_code)]
pub fn analyze_unicode_complexity(text: &str) -> (usize, usize, usize) {
let mut char_count = 0;
let mut emoji_count = 0;
let mut complex_char_count = 0;
for ch in text.chars() {
char_count += 1;
let ch_u32 = ch as u32;
if matches!(ch_u32, 0x1F300..=0x1F9FF | 0x2600..=0x27BF) {
emoji_count += 1;
}
if ch_u32 > 0xFFFF || ch.len_utf8() > 2 {
complex_char_count += 1;
}
}
(char_count, emoji_count, complex_char_count)
}