use std::borrow::Cow;
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthChar;
pub(crate) fn graphemes(text: &str) -> impl Iterator<Item = &str> {
UnicodeSegmentation::graphemes(text, true)
}
pub fn cell_len(text: &str) -> usize {
if text.bytes().all(|b| (0x20..0x7f).contains(&b)) {
return text.len();
}
text.chars()
.map(|c| {
if (c as u32) < 0x80 {
if (c as u32) >= 0x20 && c != '\x7f' {
1
} else {
0
}
} else {
cached_char_width(c)
}
})
.sum()
}
pub fn get_character_cell_size(c: char) -> usize {
if (c as u32) < 0x80 {
return if (c as u32) >= 0x20 && c != '\x7f' {
1
} else {
0
};
}
cached_char_width(c)
}
pub fn set_cell_size(text: &str, total: usize) -> Cow<'_, str> {
if text.bytes().all(|b| (0x20..0x7f).contains(&b)) {
let text_len = text.len(); if text_len == total {
return Cow::Borrowed(text);
}
if text_len < total {
let mut result = String::with_capacity(total);
result.push_str(text);
result.extend(std::iter::repeat_n(' ', total - text_len));
return Cow::Owned(result);
}
if total == 0 {
return Cow::Borrowed("");
}
return Cow::Borrowed(&text[..total]);
}
let current_len = cell_len(text);
if current_len == total {
return Cow::Borrowed(text);
}
if current_len < total {
let mut result = String::with_capacity(text.len() + (total - current_len));
result.push_str(text);
result.push_str(&" ".repeat(total - current_len));
return Cow::Owned(result);
}
if total == 0 {
return Cow::Borrowed("");
}
let mut result = String::with_capacity(text.len());
let mut cell_position = 0;
for cluster in graphemes(text) {
let cluster_width = cell_len(cluster);
if cell_position + cluster_width <= total {
result.push_str(cluster);
cell_position += cluster_width;
} else if cell_position < total {
result.push_str(&" ".repeat(total - cell_position));
break;
} else {
break;
}
}
Cow::Owned(result)
}
pub fn chop_cells(text: &str, width: usize) -> Vec<String> {
if width == 0 {
return vec![];
}
let mut lines = Vec::new();
let mut current_line = String::new();
let mut current_width = 0;
for cluster in UnicodeSegmentation::graphemes(text, true) {
let cluster_width = cell_len(cluster);
if current_width + cluster_width <= width {
current_line.push_str(cluster);
current_width += cluster_width;
} else {
if !current_line.is_empty() {
lines.push(current_line);
current_line = String::new();
}
current_line.push_str(cluster);
current_width = cluster_width;
}
}
if !current_line.is_empty() {
lines.push(current_line);
}
lines
}
pub fn is_single_cell_widths(text: &str) -> bool {
text.chars().all(|c| get_character_cell_size(c) == 1)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cell_len_cache_hit_on_second_call() {
let cjk_str = "γγγ³"; let first = cell_len(cjk_str);
assert_eq!(first, 6, "cell_len('γγγ³') must be 6");
let cached = super::cache_hit_count_for('γ');
assert!(
cached.is_some(),
"after cell_len('γγγ³'), char 'γ' must be in the LRU cache \
(cache was not populated β Opt 3 not yet implemented)"
);
assert_eq!(cached.unwrap(), 2u8, "cached width of 'γ' must be 2");
let second = cell_len(cjk_str);
assert_eq!(second, 6, "repeated cell_len must return same value");
let emoji_str = "π©";
let emoji_len = cell_len(emoji_str);
assert_eq!(emoji_len, 2, "cell_len('π©') must be 2");
let cached_emoji = super::cache_hit_count_for('π©');
assert!(
cached_emoji.is_some(),
"after cell_len('π©'), the emoji must be in the LRU cache"
);
assert_eq!(cached_emoji.unwrap(), 2u8, "cached width of 'π©' must be 2");
}
#[test]
fn test_get_character_cell_size() {
assert_eq!(get_character_cell_size('\0'), 0);
let x01_width = get_character_cell_size('\x01');
let x1f_width = get_character_cell_size('\x1f');
assert!(
x01_width <= 1,
"\\x01 width should be 0 or 1, got {}",
x01_width
);
assert!(
x1f_width <= 1,
"\\x1f width should be 0 or 1, got {}",
x1f_width
);
assert_eq!(get_character_cell_size('a'), 1);
assert_eq!(get_character_cell_size('A'), 1);
assert_eq!(get_character_cell_size('0'), 1);
assert_eq!(get_character_cell_size(' '), 1);
assert_eq!(get_character_cell_size('π©'), 2);
assert_eq!(get_character_cell_size('π½'), 2);
assert_eq!(get_character_cell_size('γ'), 2);
assert_eq!(get_character_cell_size('γ'), 2);
assert_eq!(get_character_cell_size('γ'), 2);
assert_eq!(get_character_cell_size('γ³'), 2);
}
#[test]
fn test_cell_len() {
assert_eq!(cell_len(""), 0);
assert_eq!(cell_len("abc"), 3);
assert_eq!(cell_len("hello world"), 11);
assert_eq!(cell_len("π©"), 2);
assert_eq!(cell_len("π½π½"), 4);
assert_eq!(cell_len("γγγ³"), 6); assert_eq!(cell_len("γ"), 2);
assert_eq!(cell_len("γγγγ¨γ"), 10);
assert_eq!(cell_len("aγb"), 4);
let x01_len = cell_len("\x01");
assert!(x01_len <= 1, "Expected \\x01 width 0 or 1, got {}", x01_len);
let x1f_len = cell_len("\x1f");
assert!(x1f_len <= 1, "Expected \\x1f width 0 or 1, got {}", x1f_len);
let a_x01_b_len = cell_len("a\x01b");
assert!(
(2..=3).contains(&a_x01_b_len),
"Expected a\\x01b width 2-3, got {}",
a_x01_b_len
);
assert_eq!(cell_len("βββ¬β"), 4);
assert_eq!(cell_len("β ββ"), 4);
}
#[test]
fn test_set_cell_size_exact_match() {
assert_eq!(set_cell_size("foo", 3), "foo");
assert_eq!(set_cell_size("π½π½", 4), "π½π½");
}
#[test]
fn test_set_cell_size_padding() {
assert_eq!(set_cell_size("foo", 4), "foo ");
assert_eq!(set_cell_size("foo", 5), "foo ");
assert_eq!(set_cell_size("π½π½", 5), "π½π½ ");
assert_eq!(set_cell_size("a", 10), "a ");
}
#[test]
fn test_set_cell_size_cropping() {
assert_eq!(set_cell_size("foo", 0), "");
assert_eq!(set_cell_size("foo", 1), "f");
assert_eq!(set_cell_size("foo", 2), "fo");
assert_eq!(set_cell_size("abcdefgh", 5), "abcde");
}
#[test]
fn test_set_cell_size_crop_double_width() {
assert_eq!(set_cell_size("π½π½", 4), "π½π½");
assert_eq!(set_cell_size("π½π½", 2), "π½");
assert_eq!(set_cell_size("π½π½", 3), "π½ ");
assert_eq!(set_cell_size("π½π½", 1), " ");
let result = set_cell_size("γγγγ¨γ", 6);
assert_eq!(
result,
"γγγ",
"Expected 'γγγ' (6 cells), got '{}' ({} cells)",
result,
cell_len(&result)
);
assert_eq!(set_cell_size("γγγγ¨γ", 5), "γγ "); assert_eq!(set_cell_size("γγγγ¨γ", 4), "γγ");
assert_eq!(set_cell_size("γγγγ¨γ", 3), "γ ");
}
#[test]
fn test_set_cell_size_mixed_width() {
assert_eq!(set_cell_size("aπ½b", 4), "aπ½b");
assert_eq!(set_cell_size("aπ½b", 3), "aπ½");
assert_eq!(set_cell_size("aπ½b", 2), "a ");
assert_eq!(set_cell_size("aγb", 4), "aγb");
assert_eq!(set_cell_size("aγb", 3), "aγ");
assert_eq!(set_cell_size("aγb", 2), "a ");
}
#[test]
fn test_chop_cells_single_width() {
assert_eq!(
chop_cells("abcdefghijk", 3),
vec!["abc", "def", "ghi", "jk"]
);
assert_eq!(chop_cells("hello", 3), vec!["hel", "lo"]);
assert_eq!(chop_cells("abc", 3), vec!["abc"]);
assert_eq!(chop_cells("abc", 10), vec!["abc"]);
}
#[test]
fn test_chop_cells_double_width() {
assert_eq!(
chop_cells("γγγγ¨γ", 3),
vec!["γ", "γ", "γ", "γ¨", "γ"]
);
assert_eq!(chop_cells("γγγγ¨γ", 4), vec!["γγ", "γγ¨", "γ"]);
assert_eq!(chop_cells("γγγγ¨γ", 6), vec!["γγγ", "γ¨γ"]);
assert_eq!(chop_cells("π½π½π½", 4), vec!["π½π½", "π½"]);
assert_eq!(chop_cells("π½π½π½", 5), vec!["π½π½", "π½"]); }
#[test]
fn test_chop_cells_mixed_width() {
let text = "γ1γ234γ5γ¨6γ78";
let result = chop_cells(text, 3);
assert_eq!(result, vec!["γ1", "γ2", "34", "γ5", "γ¨6", "γ7", "8"]);
}
#[test]
fn test_chop_cells_empty() {
assert_eq!(chop_cells("", 3), Vec::<String>::new());
assert_eq!(chop_cells("abc", 0), Vec::<String>::new());
}
#[test]
fn test_is_single_cell_widths() {
assert!(is_single_cell_widths("hello world"));
assert!(is_single_cell_widths("abc123"));
assert!(is_single_cell_widths("The quick brown fox"));
assert!(is_single_cell_widths("βββ¬ββ ββ"));
assert!(is_single_cell_widths("βββΌββ€"));
assert!(is_single_cell_widths(""));
assert!(!is_single_cell_widths("π©"));
assert!(!is_single_cell_widths("π½"));
assert!(!is_single_cell_widths("hello π©"));
assert!(!is_single_cell_widths("γγγ³"));
assert!(!is_single_cell_widths("γγγγ¨γ"));
assert!(!is_single_cell_widths("hello γ"));
assert!(!is_single_cell_widths("\x01"));
assert!(!is_single_cell_widths("a\x01b"));
}
#[test]
fn test_long_strings() {
let long_ascii = "a".repeat(600);
assert_eq!(cell_len(&long_ascii), 600);
assert_eq!(set_cell_size(&long_ascii, 500).len(), 500);
assert!(is_single_cell_widths(&long_ascii));
let long_cjk = "γ".repeat(300);
assert_eq!(cell_len(&long_cjk), 600); assert!(!is_single_cell_widths(&long_cjk));
}
#[test]
fn test_edge_cases() {
assert_eq!(cell_len("a"), 1);
assert_eq!(set_cell_size("a", 1), "a");
assert_eq!(chop_cells("a", 1), vec!["a"]);
let nul_a_len = cell_len("\x00a");
assert!(
(1..=2).contains(&nul_a_len),
"Expected \\x00a width 1-2, got {}",
nul_a_len
);
assert_eq!(cell_len(" "), 3);
assert_eq!(set_cell_size(" ", 5), " ");
let tab_width = get_character_cell_size('\t');
let newline_width = get_character_cell_size('\n');
assert!(
tab_width <= 4,
"Tab width should be <= 4, got {}",
tab_width
);
assert!(
newline_width <= 1,
"Newline width should be <= 1, got {}",
newline_width
);
}
}
use lru::LruCache;
use std::cell::RefCell;
use std::num::NonZeroUsize;
thread_local! {
static CHAR_WIDTH_CACHE: RefCell<LruCache<char, u8>> =
RefCell::new(LruCache::new(NonZeroUsize::new(1024).unwrap()));
}
#[inline]
fn cached_char_width(c: char) -> usize {
CHAR_WIDTH_CACHE.with(|cache| {
let mut cache = cache.borrow_mut();
if let Some(&w) = cache.get(&c) {
return w as usize;
}
let w = c.width().unwrap_or(0) as u8;
cache.put(c, w);
w as usize
})
}
#[cfg(test)]
pub(crate) fn cache_hit_count_for(c: char) -> Option<u8> {
CHAR_WIDTH_CACHE.with(|cache| cache.borrow().peek(&c).copied())
}
#[cfg(test)]
mod tests_v1_4_width_fixes {
use super::cell_len;
#[test]
fn family_zwj_emoji_is_6_cells_terminal_reality() {
let s = "π¨\u{200d}π©\u{200d}π§";
assert_eq!(s.chars().count(), 5);
assert_eq!(cell_len(s), 6);
}
#[test]
fn flag_emoji_is_2_cells_not_2_codepoints_misread_as_1_each() {
let s = "\u{1F1FA}\u{1F1F8}";
assert_eq!(cell_len(s), 2);
}
#[test]
fn combining_acute_zero_width() {
let s = "cafe\u{0301}";
assert_eq!(s.chars().count(), 5);
assert_eq!(cell_len(s), 4);
}
#[test]
fn ascii_fast_path_unchanged() {
assert_eq!(cell_len("hello"), 5);
assert_eq!(cell_len(""), 0);
}
}