use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
pub fn display_width(s: &str) -> usize {
s.split('\t')
.enumerate()
.map(|(i, part)| {
let w = UnicodeWidthStr::width(part);
if i > 0 { w + 4 } else { w }
})
.sum()
}
pub fn char_display_width(c: char) -> usize {
if c == '\t' {
4
} else {
unicode_width::UnicodeWidthChar::width(c).unwrap_or(0)
}
}
pub fn truncate_to_width(s: &str, max_cells: usize) -> String {
if max_cells == 0 {
return String::new();
}
let sw = display_width(s);
if sw <= max_cells {
return s.to_string();
}
if max_cells <= 1 {
return "\u{2026}".to_string();
}
let budget = max_cells - 1; let mut width = 0;
let mut result = String::new();
for grapheme in s.graphemes(true) {
let gw = grapheme_display_width(grapheme);
if width + gw > budget {
break;
}
width += gw;
result.push_str(grapheme);
}
result.push('\u{2026}');
result
}
pub fn next_grapheme_boundary(s: &str, byte_offset: usize) -> Option<usize> {
if byte_offset >= s.len() {
return None;
}
if let Some((i, _)) = s[byte_offset..].grapheme_indices(true).nth(1) {
return Some(byte_offset + i);
}
Some(s.len())
}
pub fn prev_grapheme_boundary(s: &str, byte_offset: usize) -> Option<usize> {
if byte_offset == 0 {
return None;
}
let prefix = &s[..byte_offset];
let mut last_start = 0;
for (i, _) in prefix.grapheme_indices(true) {
last_start = i;
}
Some(last_start)
}
pub fn grapheme_at(s: &str, byte_offset: usize) -> &str {
if byte_offset >= s.len() {
return "";
}
s[byte_offset..].graphemes(true).next().unwrap_or("")
}
pub fn byte_offset_to_display_col(s: &str, byte_offset: usize) -> usize {
let clamped = byte_offset.min(s.len());
display_width(&s[..clamped])
}
pub fn display_col_to_byte_offset(s: &str, target_col: usize) -> usize {
let mut col = 0;
for (i, g) in s.grapheme_indices(true) {
let gw = grapheme_display_width(g);
if col + gw > target_col {
return i;
}
col += gw;
}
s.len()
}
pub fn word_boundary_left(s: &str, byte_offset: usize) -> usize {
if byte_offset == 0 {
return 0;
}
let prefix = &s[..byte_offset];
let graphemes: Vec<(usize, &str)> = prefix.grapheme_indices(true).collect();
if graphemes.is_empty() {
return 0;
}
let mut idx = graphemes.len() - 1;
while idx > 0 && graphemes[idx].1.chars().all(|c| c.is_whitespace()) {
idx -= 1;
}
while idx > 0 && !graphemes[idx - 1].1.chars().all(|c| c.is_whitespace()) {
idx -= 1;
}
graphemes[idx].0
}
pub fn word_boundary_right(s: &str, byte_offset: usize) -> usize {
if byte_offset >= s.len() {
return s.len();
}
let suffix = &s[byte_offset..];
let graphemes: Vec<(usize, &str)> = suffix.grapheme_indices(true).collect();
if graphemes.is_empty() {
return s.len();
}
let mut idx = 0;
while idx < graphemes.len() && !graphemes[idx].1.chars().all(|c| c.is_whitespace()) {
idx += 1;
}
while idx < graphemes.len() && graphemes[idx].1.chars().all(|c| c.is_whitespace()) {
idx += 1;
}
if idx < graphemes.len() {
byte_offset + graphemes[idx].0
} else {
s.len()
}
}
fn grapheme_display_width(g: &str) -> usize {
if g == "\t" {
return 4;
}
UnicodeWidthStr::width(g)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn display_width_ascii() {
assert_eq!(display_width("hello"), 5);
}
#[test]
fn display_width_cjk() {
assert_eq!(display_width("你好"), 4);
}
#[test]
fn display_width_emoji() {
assert_eq!(display_width("🎉"), 2);
}
#[test]
fn display_width_mixed() {
assert_eq!(display_width("hello你好"), 9);
}
#[test]
fn display_width_combining() {
assert_eq!(display_width("cafe\u{0301}"), 4);
}
#[test]
fn display_width_zero_width_space() {
assert_eq!(display_width("a\u{200B}b"), 2);
}
#[test]
fn display_width_fullwidth() {
assert_eq!(display_width("Hi"), 4);
}
#[test]
fn display_width_tab() {
assert_eq!(display_width("\thello"), 9);
assert_eq!(display_width("a\tb"), 6); }
#[test]
fn display_width_empty() {
assert_eq!(display_width(""), 0);
}
#[test]
fn display_width_box_drawing() {
assert_eq!(display_width("─│┌┐└┘"), 6);
}
#[test]
fn truncate_no_truncation_needed() {
assert_eq!(truncate_to_width("hi", 10), "hi");
}
#[test]
fn truncate_exact_fit() {
assert_eq!(truncate_to_width("hello", 5), "hello");
}
#[test]
fn truncate_ascii() {
assert_eq!(truncate_to_width("hello world", 8), "hello w\u{2026}");
}
#[test]
fn truncate_cjk_boundary() {
assert_eq!(truncate_to_width("你好世界", 5), "你好\u{2026}");
}
#[test]
fn truncate_cjk_off_by_one() {
let result = truncate_to_width("你好世界", 4);
assert!(display_width(&result) <= 4);
assert!(result.ends_with('\u{2026}'));
}
#[test]
fn truncate_emoji() {
assert_eq!(truncate_to_width("🎉🚀💫", 4), "🎉\u{2026}");
}
#[test]
fn truncate_zero() {
assert_eq!(truncate_to_width("hello", 0), "");
}
#[test]
fn truncate_one() {
assert_eq!(truncate_to_width("hello", 1), "\u{2026}");
}
#[test]
fn next_grapheme_ascii() {
assert_eq!(next_grapheme_boundary("hello", 0), Some(1));
assert_eq!(next_grapheme_boundary("hello", 4), Some(5));
assert_eq!(next_grapheme_boundary("hello", 5), None);
}
#[test]
fn prev_grapheme_ascii() {
assert_eq!(prev_grapheme_boundary("hello", 5), Some(4));
assert_eq!(prev_grapheme_boundary("hello", 1), Some(0));
assert_eq!(prev_grapheme_boundary("hello", 0), None);
}
#[test]
fn next_grapheme_emoji() {
let s = "a🎉b";
assert_eq!(next_grapheme_boundary(s, 0), Some(1)); assert_eq!(next_grapheme_boundary(s, 1), Some(5)); assert_eq!(next_grapheme_boundary(s, 5), Some(6)); }
#[test]
fn grapheme_combining() {
let s = "cafe\u{0301}!"; assert_eq!(next_grapheme_boundary(s, 3), Some(6)); assert_eq!(prev_grapheme_boundary(s, 6), Some(3)); }
#[test]
fn grapheme_zwj() {
let family = "👨\u{200D}👩\u{200D}👧";
let next = next_grapheme_boundary(family, 0);
assert_eq!(next, Some(family.len()));
}
#[test]
fn grapheme_at_tests() {
assert_eq!(grapheme_at("hello", 0), "h");
assert_eq!(grapheme_at("a🎉b", 1), "🎉");
assert_eq!(grapheme_at("cafe\u{0301}!", 3), "e\u{0301}");
assert_eq!(grapheme_at("hello", 5), "");
}
#[test]
fn byte_to_display_col_ascii() {
assert_eq!(byte_offset_to_display_col("hello", 3), 3);
}
#[test]
fn byte_to_display_col_cjk() {
assert_eq!(byte_offset_to_display_col("你好", 3), 2);
assert_eq!(byte_offset_to_display_col("你好", 6), 4);
}
#[test]
fn display_col_to_byte_ascii() {
assert_eq!(display_col_to_byte_offset("hello", 3), 3);
}
#[test]
fn display_col_to_byte_cjk() {
assert_eq!(display_col_to_byte_offset("你好", 2), 3);
assert_eq!(display_col_to_byte_offset("你好", 4), 6);
}
#[test]
fn display_col_to_byte_snaps() {
assert_eq!(display_col_to_byte_offset("你好", 1), 0);
}
#[test]
fn display_col_to_byte_beyond() {
assert_eq!(display_col_to_byte_offset("hi", 10), 2);
}
#[test]
fn word_boundary_left_ascii() {
let s = "hello world";
assert_eq!(word_boundary_left(s, 11), 6); assert_eq!(word_boundary_left(s, 6), 0); assert_eq!(word_boundary_left(s, 0), 0);
}
#[test]
fn word_boundary_right_ascii() {
let s = "hello world";
assert_eq!(word_boundary_right(s, 0), 6); assert_eq!(word_boundary_right(s, 6), 11); assert_eq!(word_boundary_right(s, 11), 11);
}
#[test]
fn word_boundary_left_cjk() {
let s = "hello 你好";
let end = s.len(); assert_eq!(word_boundary_left(s, end), 6); }
#[test]
fn word_boundary_right_cjk() {
let s = "hello 你好";
assert_eq!(word_boundary_right(s, 0), 6); }
#[test]
fn char_display_width_tests() {
assert_eq!(char_display_width('a'), 1);
assert_eq!(char_display_width('你'), 2);
assert_eq!(char_display_width('\t'), 4);
assert_eq!(char_display_width('🎉'), 2);
}
}