use std::num::NonZeroUsize;
use std::sync::{LazyLock, Mutex};
use crate::sync::lock_recover;
use lru::LruCache;
use unicode_width::UnicodeWidthChar;
const CACHE_MIN_LEN: usize = 8;
static CELL_LEN_CACHE: LazyLock<Mutex<LruCache<String, usize>>> =
LazyLock::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(1024).expect("non-zero"))));
#[must_use]
pub fn get_character_cell_size(c: char) -> usize {
c.width().unwrap_or(0)
}
#[inline]
fn compute_cell_width(text: &str) -> usize {
text.chars().map(get_character_cell_size).sum()
}
#[must_use]
pub fn cell_len(text: &str) -> usize {
if text.len() < CACHE_MIN_LEN {
return compute_cell_width(text);
}
{
let mut cache = lock_recover(&CELL_LEN_CACHE);
if let Some(&cached) = cache.get(text) {
return cached;
}
}
let width = compute_cell_width(text);
lock_recover(&CELL_LEN_CACHE).put(text.to_string(), width);
width
}
#[must_use]
pub fn cell_len_uncached(text: &str) -> usize {
compute_cell_width(text)
}
#[must_use]
pub fn set_cell_size(text: &str, total: usize) -> String {
let current = cell_len(text);
if current == total {
return text.to_string();
}
if current < total {
let padding = total - current;
return format!("{text}{}", " ".repeat(padding));
}
let (truncated, width) = truncate_to_width(text, total);
if width < total {
format!("{truncated}{}", " ".repeat(total - width))
} else {
truncated
}
}
fn truncate_to_width(text: &str, max_width: usize) -> (String, usize) {
let mut width = 0;
let mut result = String::new();
for c in text.chars() {
let char_width = get_character_cell_size(c);
if width + char_width > max_width {
break;
}
width += char_width;
result.push(c);
}
(result, width)
}
#[must_use]
pub fn chop_cells(text: &str, max_size: usize) -> (&str, &str) {
let mut width = 0;
let mut byte_pos = 0;
for (i, c) in text.char_indices() {
let char_width = get_character_cell_size(c);
if width + char_width > max_size {
break;
}
width += char_width;
byte_pos = i + c.len_utf8();
}
(&text[..byte_pos], &text[byte_pos..])
}
#[must_use]
pub fn cell_positions(text: &str) -> Vec<(usize, usize)> {
let mut positions = Vec::new();
let mut cell_pos = 0;
for (byte_idx, c) in text.char_indices() {
positions.push((byte_idx, cell_pos));
cell_pos += get_character_cell_size(c);
}
positions
}
#[must_use]
pub fn cell_to_byte_index(text: &str, cell_pos: usize) -> Option<usize> {
let mut current_cell = 0;
for (byte_idx, c) in text.char_indices() {
if current_cell >= cell_pos {
return Some(byte_idx);
}
current_cell += get_character_cell_size(c);
}
if current_cell >= cell_pos {
Some(text.len())
} else {
None
}
}
#[must_use]
pub fn has_wide_chars(text: &str) -> bool {
text.chars().any(|c| get_character_cell_size(c) > 1)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ascii_width() {
assert_eq!(cell_len("hello"), 5);
assert_eq!(cell_len("Hello, World!"), 13);
}
#[test]
fn test_character_width() {
assert_eq!(get_character_cell_size('a'), 1);
assert_eq!(get_character_cell_size(' '), 1);
}
#[test]
fn test_cjk_width() {
assert_eq!(cell_len("日本語"), 6); assert_eq!(cell_len("中文"), 4); }
#[test]
fn test_mixed_width() {
assert_eq!(cell_len("Hello日本"), 9); }
#[test]
fn test_set_cell_size_pad() {
let result = set_cell_size("hi", 5);
assert_eq!(result, "hi ");
assert_eq!(cell_len(&result), 5);
}
#[test]
fn test_set_cell_size_truncate() {
let result = set_cell_size("hello world", 5);
assert_eq!(result, "hello");
assert_eq!(cell_len(&result), 5);
}
#[test]
fn test_set_cell_size_exact() {
let result = set_cell_size("hello", 5);
assert_eq!(result, "hello");
}
#[test]
fn test_chop_cells() {
let (left, right) = chop_cells("hello world", 5);
assert_eq!(left, "hello");
assert_eq!(right, " world");
}
#[test]
fn test_chop_cells_cjk() {
let (left, right) = chop_cells("日本語", 3);
assert_eq!(cell_len(left), 2);
assert_eq!(left, "日");
assert_eq!(right, "本語");
}
#[test]
fn test_cell_positions() {
let positions = cell_positions("aあb");
assert_eq!(positions[0], (0, 0)); assert_eq!(positions[1], (1, 1)); assert_eq!(positions[2], (4, 3)); }
#[test]
fn test_has_wide_chars() {
assert!(!has_wide_chars("hello"));
assert!(has_wide_chars("hello日本"));
assert!(has_wide_chars("日本語"));
}
#[test]
fn test_control_characters() {
assert_eq!(get_character_cell_size('\0'), 0);
assert_eq!(get_character_cell_size('\x1b'), 0); }
#[test]
fn test_spec_basic_width_concept() {
for c in ' '..='~' {
assert_eq!(
get_character_cell_size(c),
1,
"ASCII '{c}' should be 1 cell"
);
}
let cjk_chars = ['日', '本', '語', '中', '文', '한', '국', '어'];
for c in cjk_chars {
assert_eq!(get_character_cell_size(c), 2, "CJK '{c}' should be 2 cells");
}
assert_eq!(get_character_cell_size('\x00'), 0); assert_eq!(get_character_cell_size('\x01'), 0); assert_eq!(get_character_cell_size('\x1f'), 0); }
#[test]
fn test_spec_cell_width_ranges() {
assert_eq!(get_character_cell_size('\u{0300}'), 0); assert_eq!(get_character_cell_size('\u{0301}'), 0);
assert_eq!(get_character_cell_size('\u{1100}'), 2);
assert_eq!(get_character_cell_size('\u{3000}'), 2);
assert_eq!(get_character_cell_size('\u{4E00}'), 2); assert_eq!(get_character_cell_size('\u{9FCC}'), 2); }
#[test]
fn test_spec_ascii_fast_path() {
assert_eq!(get_character_cell_size(' '), 1); assert_eq!(get_character_cell_size('~'), 1); assert_eq!(get_character_cell_size('A'), 1);
assert_eq!(get_character_cell_size('z'), 1);
assert_eq!(get_character_cell_size('0'), 1);
assert_eq!(get_character_cell_size('!'), 1);
assert_eq!(get_character_cell_size('\u{00A0}'), 1); assert_eq!(get_character_cell_size('é'), 1); assert_eq!(get_character_cell_size('ñ'), 1); }
#[test]
fn test_spec_cell_len_algorithm() {
assert_eq!(cell_len("hello"), 5);
assert_eq!(cell_len(""), 0);
assert_eq!(cell_len("日本語"), 6); assert_eq!(cell_len("中文测试"), 8);
assert_eq!(cell_len("Hello日本"), 9); assert_eq!(cell_len("a中b"), 4);
}
#[test]
fn test_spec_set_cell_size_operations() {
assert_eq!(set_cell_size("hello", 5), "hello");
let padded = set_cell_size("hi", 5);
assert_eq!(padded, "hi ");
assert_eq!(cell_len(&padded), 5);
let truncated = set_cell_size("hello world", 5);
assert_eq!(truncated, "hello");
assert_eq!(cell_len(&truncated), 5);
let cjk_trunc = set_cell_size("日本語", 5);
assert_eq!(cell_len(&cjk_trunc), 5);
assert!(cjk_trunc.starts_with("日本"));
let mixed = set_cell_size("Hello日本", 7);
assert_eq!(cell_len(&mixed), 7);
}
#[test]
fn test_spec_chop_cells_operations() {
let (left, right) = chop_cells("hello world", 5);
assert_eq!(left, "hello");
assert_eq!(right, " world");
let (left, right) = chop_cells("日本語", 3);
assert_eq!(left, "日"); assert_eq!(right, "本語");
assert_eq!(cell_len(left), 2);
let (left, right) = chop_cells("日本語", 4);
assert_eq!(left, "日本"); assert_eq!(right, "語");
let (left, right) = chop_cells("hello", 0);
assert_eq!(left, "");
assert_eq!(right, "hello");
}
#[test]
fn test_spec_cell_positions_mapping() {
let pos = cell_positions("abc");
assert_eq!(pos, vec![(0, 0), (1, 1), (2, 2)]);
let pos = cell_positions("a日b");
assert_eq!(pos[0], (0, 0)); assert_eq!(pos[1], (1, 1)); assert_eq!(pos[2], (4, 3)); }
#[test]
fn test_spec_cell_to_byte_index() {
assert_eq!(cell_to_byte_index("hello", 0), Some(0));
assert_eq!(cell_to_byte_index("hello", 3), Some(3));
assert_eq!(cell_to_byte_index("hello", 5), Some(5));
assert_eq!(cell_to_byte_index("hello", 10), None);
let s = "a日b";
assert_eq!(cell_to_byte_index(s, 0), Some(0)); assert_eq!(cell_to_byte_index(s, 1), Some(1)); assert_eq!(cell_to_byte_index(s, 3), Some(4)); }
#[test]
fn test_spec_has_wide_chars() {
assert!(!has_wide_chars("hello world"));
assert!(!has_wide_chars("Hello, World! 123"));
assert!(!has_wide_chars(""));
assert!(has_wide_chars("日"));
assert!(has_wide_chars("Hello日本"));
assert!(has_wide_chars("a中b文c"));
}
#[test]
fn test_spec_empty_string_handling() {
assert_eq!(cell_len(""), 0);
assert_eq!(set_cell_size("", 5), " ");
let (left, right) = chop_cells("", 5);
assert_eq!(left, "");
assert_eq!(right, "");
assert!(cell_positions("").is_empty());
}
#[test]
fn test_spec_fullwidth_punctuation() {
assert_eq!(get_character_cell_size('!'), 2); assert_eq!(get_character_cell_size('A'), 2); assert_eq!(cell_len("!A"), 4);
}
#[test]
fn test_cell_len_caching() {
let short = "hello";
assert_eq!(cell_len(short), 5);
assert_eq!(cell_len(short), 5);
let long = "Hello, this is a longer string for testing";
let width1 = cell_len(long);
let width2 = cell_len(long); assert_eq!(width1, width2);
assert_eq!(width1, 42);
assert_eq!(cell_len_uncached(long), 42);
let cjk_long = "日本語テスト文字列";
let cjk_width = cell_len(cjk_long);
assert_eq!(cjk_width, 18); assert_eq!(cell_len(cjk_long), cjk_width); }
#[test]
fn test_unicode_combining_characters() {
assert_eq!(get_character_cell_size('\u{0301}'), 0);
assert_eq!(get_character_cell_size('\u{0300}'), 0);
assert_eq!(get_character_cell_size('\u{0308}'), 0);
let decomposed_e_acute = "e\u{0301}"; assert_eq!(cell_len(decomposed_e_acute), 1);
let multi_combining = "o\u{0302}\u{0303}";
assert_eq!(cell_len(multi_combining), 1);
let stacked = "a\u{0300}\u{0301}\u{0302}"; assert_eq!(cell_len(stacked), 1);
}
#[test]
fn test_unicode_precomposed_vs_decomposed() {
let precomposed = "é";
assert_eq!(cell_len(precomposed), 1);
let decomposed = "e\u{0301}";
assert_eq!(cell_len(decomposed), 1);
assert_eq!(cell_len(precomposed), cell_len(decomposed));
let precomposed_n = "Ñ";
let decomposed_n = "N\u{0303}";
assert_eq!(cell_len(precomposed_n), 1);
assert_eq!(cell_len(decomposed_n), 1);
}
#[test]
fn test_unicode_zero_width_chars() {
assert_eq!(get_character_cell_size('\u{200B}'), 0);
assert_eq!(get_character_cell_size('\u{200C}'), 0);
assert_eq!(get_character_cell_size('\u{200D}'), 0);
assert_eq!(get_character_cell_size('\u{2060}'), 0);
assert_eq!(get_character_cell_size('\u{00AD}'), 0);
let with_zwj = "a\u{200D}b\u{200D}c";
assert_eq!(cell_len(with_zwj), 3);
}
#[test]
fn test_unicode_zwj_sequences() {
assert_eq!(get_character_cell_size('\u{200D}'), 0);
let family = "\u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F466}";
let width = cell_len(family);
assert!(width >= 2, "Family emoji should have some width: {width}");
}
#[test]
fn test_unicode_emoji_skin_tones() {
let wave = "\u{1F44B}";
let wave_width = cell_len(wave);
assert!(
wave_width >= 1,
"Wave emoji should have width: {wave_width}"
);
let wave_light = "\u{1F44B}\u{1F3FB}";
let wave_light_width = cell_len(wave_light);
assert!(
wave_light_width >= wave_width,
"Wave with skin tone should be >= base: {wave_light_width}"
);
let thumbs_up = "\u{1F44D}";
let thumbs_dark = "\u{1F44D}\u{1F3FF}";
assert!(cell_len(thumbs_up) >= 1);
assert!(cell_len(thumbs_dark) >= cell_len(thumbs_up));
}
#[test]
fn test_unicode_variation_selectors() {
assert_eq!(get_character_cell_size('\u{FE0E}'), 0);
assert_eq!(get_character_cell_size('\u{FE0F}'), 0);
let heart_text = "\u{2764}\u{FE0E}";
let heart_emoji = "\u{2764}\u{FE0F}";
assert!(cell_len(heart_text) >= 1);
assert!(cell_len(heart_emoji) >= 1);
}
#[test]
fn test_unicode_rtl_text() {
let arabic = "مرحبا"; let arabic_width = cell_len(arabic);
assert!(arabic_width >= 1, "Arabic text should have width");
let hebrew = "שלום"; let hebrew_width = cell_len(hebrew);
assert!(hebrew_width >= 1, "Hebrew text should have width");
let mixed = "Hello مرحبا World";
let mixed_width = cell_len(mixed);
assert!(mixed_width >= 12, "Mixed text should include all chars");
assert_eq!(get_character_cell_size('\u{200F}'), 0);
assert_eq!(get_character_cell_size('\u{200E}'), 0);
}
#[test]
fn test_unicode_arabic_tashkeel() {
assert_eq!(get_character_cell_size('\u{064E}'), 0);
assert_eq!(get_character_cell_size('\u{0650}'), 0);
assert_eq!(get_character_cell_size('\u{064F}'), 0);
assert_eq!(get_character_cell_size('\u{0651}'), 0);
let ba_with_fatha = "\u{0628}\u{064E}"; assert!(cell_len(ba_with_fatha) >= 1);
}
#[test]
fn test_unicode_emoji_width() {
let smile = "😀";
assert!(cell_len(smile) >= 1, "Smile emoji should have width");
let heart = "❤";
assert!(cell_len(heart) >= 1, "Heart should have width");
let flag = "🇺🇸"; assert!(cell_len(flag) >= 2, "Flag should have width");
let emoji_string = "😀🎉🚀";
assert!(cell_len(emoji_string) >= 3, "Multiple emoji should sum");
}
#[test]
fn test_unicode_supplementary_planes() {
let math_a = "\u{1D400}";
assert!(cell_len(math_a) >= 1);
let g_clef = "\u{1D11E}";
assert!(cell_len(g_clef) >= 1);
let emoji_smp = "\u{1F600}"; assert!(cell_len(emoji_smp) >= 1);
let linear_b = "\u{10000}";
assert!(cell_len(linear_b) >= 1);
}
#[test]
fn test_unicode_special_whitespace() {
assert_eq!(get_character_cell_size('\t'), 0);
assert_eq!(get_character_cell_size('\n'), 0);
assert_eq!(get_character_cell_size('\r'), 0);
assert_eq!(get_character_cell_size('\u{2002}'), 1);
assert_eq!(get_character_cell_size('\u{2003}'), 1);
assert_eq!(get_character_cell_size('\u{2007}'), 1);
assert_eq!(get_character_cell_size('\u{202F}'), 1);
assert_eq!(get_character_cell_size('\u{3000}'), 2);
}
#[test]
fn test_unicode_private_use_area() {
let pua_char = '\u{E000}';
let width = get_character_cell_size(pua_char);
assert!(width <= 2, "PUA char should have reasonable width: {width}");
let pua_char2 = '\u{F000}';
let width2 = get_character_cell_size(pua_char2);
assert!(
width2 <= 2,
"PUA char should have reasonable width: {width2}"
);
}
#[test]
fn test_unicode_hangul() {
let hangul = "한글"; assert_eq!(cell_len(hangul), 4);
assert_eq!(get_character_cell_size('\u{1100}'), 2);
let korean = "안녕하세요"; assert_eq!(cell_len(korean), 10); }
#[test]
fn test_unicode_thai() {
assert_eq!(get_character_cell_size('\u{0E48}'), 0);
let thai = "ภาษาไทย"; let width = cell_len(thai);
assert!(width >= 1, "Thai text should have width");
}
#[test]
fn test_unicode_string_operations() {
let combining = "e\u{0301}"; let sized = set_cell_size(combining, 5);
assert_eq!(cell_len(&sized), 5);
let emoji_str = "Hello 😀 World";
let (left, right) = chop_cells(emoji_str, 6);
assert_eq!(left, "Hello ");
assert!(right.starts_with("😀"));
let mixed = "a日\u{0301}b"; let positions = cell_positions(mixed);
assert!(!positions.is_empty());
assert!(has_wide_chars("Hello 😀"));
}
#[test]
fn test_unicode_long_combining_sequences() {
let mut long_combining = String::from("a");
for _ in 0..10 {
long_combining.push('\u{0301}'); }
assert_eq!(cell_len(&long_combining), 1);
}
#[test]
fn test_unicode_grapheme_awareness() {
let flag = "🇯🇵"; let flag_width = cell_len(flag);
assert!(
flag_width >= 2,
"Flag should have width from regional indicators"
);
let keycap = "1\u{20E3}"; let keycap_width = cell_len(keycap);
assert!(keycap_width >= 1);
}
}