#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum Cluster {
Wide,
ZeroWidth,
}
pub(crate) fn is_cjk_wide_char(ch: char) -> bool {
matches!(
ch,
'\u{1100}'..='\u{11ff}'
| '\u{2e80}'..='\u{a4cf}'
| '\u{a960}'..='\u{a97f}'
| '\u{ac00}'..='\u{d7ff}'
| '\u{f900}'..='\u{faff}'
| '\u{fe10}'..='\u{fe1f}'
| '\u{fe30}'..='\u{fe4f}'
| '\u{ff01}'..='\u{ff60}'
| '\u{ffe0}'..='\u{ffe6}'
| '\u{20000}'..='\u{2fa1f}'
| '\u{30000}'..='\u{323af}'
)
}
pub(crate) fn is_emoji_wide_char(ch: char) -> bool {
matches!(
ch,
'\u{2600}'..='\u{27bf}' | '\u{1f000}'..='\u{1faff}'
)
}
pub(crate) fn is_emoji_modifier_char(ch: char) -> bool {
matches!(
ch,
'\u{200d}' | '\u{20e3}' | '\u{fe00}'..='\u{fe0f}' | '\u{1f3fb}'..='\u{1f3ff}'
)
}
pub(crate) fn is_regional_indicator(ch: char) -> bool {
matches!(ch, '\u{1f1e6}'..='\u{1f1ff}')
}
pub(crate) fn is_keycap_starter(ch: char) -> bool {
ch.is_ascii_digit() || matches!(ch, '#' | '*')
}
pub(crate) fn consume_cluster(chars: &[char], idx: usize) -> Option<(Cluster, usize)> {
let ch = *chars.get(idx)?;
if is_keycap_starter(ch) {
let next = idx + 1;
let keycap = if chars.get(next) == Some(&'\u{fe0f}') {
next + 1
} else {
next
};
if chars.get(keycap) == Some(&'\u{20e3}') {
return Some((Cluster::Wide, keycap + 1));
}
}
if is_regional_indicator(ch)
&& chars
.get(idx + 1)
.copied()
.is_some_and(is_regional_indicator)
{
return Some((Cluster::Wide, idx + 2));
}
if is_emoji_wide_char(ch) {
let mut end = idx + 1;
while end < chars.len() {
if chars[end] == '\u{200d}'
&& chars.get(end + 1).copied().is_some_and(is_emoji_wide_char)
{
end += 2;
} else if is_emoji_modifier_char(chars[end]) {
end += 1;
} else {
break;
}
}
return Some((Cluster::Wide, end));
}
if is_emoji_modifier_char(ch) {
return Some((Cluster::ZeroWidth, idx + 1));
}
None
}
#[cfg(all(test, feature = "mermaid_engine_internal_tests"))]
mod tests {
use super::*;
fn chars(s: &str) -> Vec<char> {
s.chars().collect()
}
#[test]
fn keycap_sequence_is_wide() {
let c = chars("1\u{fe0f}\u{20e3}");
assert_eq!(consume_cluster(&c, 0), Some((Cluster::Wide, c.len())));
}
#[test]
fn keycap_without_vs_is_wide() {
let c = chars("1\u{20e3}");
assert_eq!(consume_cluster(&c, 0), Some((Cluster::Wide, c.len())));
}
#[test]
fn regional_indicator_pair_is_wide() {
let c = chars("\u{1f1e8}\u{1f1f3}");
assert_eq!(consume_cluster(&c, 0), Some((Cluster::Wide, 2)));
}
#[test]
fn zwj_family_is_single_wide_cluster() {
let c = chars("\u{1f468}\u{200d}\u{1f469}\u{200d}\u{1f467}\u{200d}\u{1f466}");
assert_eq!(consume_cluster(&c, 0), Some((Cluster::Wide, c.len())));
}
#[test]
fn skin_tone_modifier_extends_cluster() {
let c = chars("\u{1f44d}\u{1f3fd}");
assert_eq!(consume_cluster(&c, 0), Some((Cluster::Wide, c.len())));
}
#[test]
fn standalone_zwj_is_zero_width() {
let c = chars("\u{200d}");
assert_eq!(consume_cluster(&c, 0), Some((Cluster::ZeroWidth, 1)));
}
#[test]
fn standalone_variation_selector_is_zero_width() {
let c = chars("\u{fe0f}");
assert_eq!(consume_cluster(&c, 0), Some((Cluster::ZeroWidth, 1)));
}
#[test]
fn ascii_returns_none() {
let c = chars("a");
assert_eq!(consume_cluster(&c, 0), None);
}
#[test]
fn cjk_returns_none() {
let c = chars("中");
assert_eq!(consume_cluster(&c, 0), None);
}
}