pub fn get_column_width(s: &str) -> usize {
s.chars().map(|c| if is_wide_char(c) { 2 } else { 1 }).sum()
}
pub fn is_wide_char(c: char) -> bool {
let cp = c as u32;
if (0x4E00..=0x9FFF).contains(&cp) {
return true;
} if (0x3400..=0x4DBF).contains(&cp) {
return true;
} if (0x20000..=0x2A6DF).contains(&cp) {
return true;
} if (0x2A700..=0x2B73F).contains(&cp) {
return true;
} if (0x2B740..=0x2B81F).contains(&cp) {
return true;
} if (0xFF01..=0xFF60).contains(&cp) {
return true;
} if (0xFFE0..=0xFFE6).contains(&cp) {
return true;
} if (0x3000..=0x303F).contains(&cp) {
return true;
} if (0xFF00..=0xFFEF).contains(&cp) {
return true;
} if (0x3040..=0x309F).contains(&cp) {
return true;
} if (0x30A0..=0x30FF).contains(&cp) {
return true;
} if (0xAC00..=0xD7AF).contains(&cp) {
return true;
} if (0x1100..=0x11FF).contains(&cp) {
return true;
} false
}
pub fn break_text_at_width(text: &str, max_width: usize) -> Vec<String> {
if get_column_width(text) <= max_width {
return vec![text.to_string()];
}
let mut lines = Vec::new();
let mut current = String::new();
let mut current_width = 0usize;
let break_after: &[char] = &[
'。', ',', '、', ';', ':', '!', '?', ')', '】', '》', ' ',
];
for c in text.chars() {
let char_width = if is_wide_char(c) { 2 } else { 1 };
current.push(c);
current_width += char_width;
if current_width >= max_width {
if let Some(break_pos) = find_break_point(¤t, break_after) {
let (first, rest) = current.split_at(break_pos);
lines.push(first.to_string());
current = rest.to_string();
current_width = get_column_width(¤t);
} else {
lines.push(current.clone());
current.clear();
current_width = 0;
}
}
}
if !current.is_empty() {
lines.push(current);
}
lines
}
fn find_break_point(text: &str, break_chars: &[char]) -> Option<usize> {
let chars: Vec<char> = text.chars().collect();
let len = chars.len();
let search_start = len.saturating_sub(len * 30 / 100).max(len / 2);
for i in (search_start..len).rev() {
if break_chars.contains(&chars[i]) {
let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
return Some(byte_pos);
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_column_width_ascii() {
assert_eq!(get_column_width("hello"), 5);
assert_eq!(get_column_width("// comment"), 10);
}
#[test]
fn test_column_width_cjk() {
assert_eq!(get_column_width("中文"), 4); assert_eq!(get_column_width("// 中文"), 7); }
#[test]
fn test_column_width_japanese() {
assert_eq!(get_column_width("あいう"), 6); assert_eq!(get_column_width("アイウ"), 6); }
#[test]
fn test_column_width_korean() {
assert_eq!(get_column_width("한글"), 4); }
#[test]
fn test_column_width_fullwidth() {
assert_eq!(get_column_width("ABC"), 6); }
#[test]
fn test_column_width_mixed() {
assert_eq!(get_column_width("Hello中文"), 9);
}
#[test]
fn test_column_width_empty() {
assert_eq!(get_column_width(""), 0);
}
#[test]
fn test_is_wide_char() {
assert!(is_wide_char('中'));
assert!(is_wide_char(','));
assert!(!is_wide_char('a'));
assert!(!is_wide_char(' '));
}
#[test]
fn test_is_wide_char_cjk_ranges() {
assert!(is_wide_char('中'));
assert!(is_wide_char('文'));
assert!(is_wide_char('。'));
assert!(is_wide_char('、'));
}
#[test]
fn test_is_wide_char_japanese() {
assert!(is_wide_char('あ')); assert!(is_wide_char('ア')); }
#[test]
fn test_is_wide_char_korean() {
assert!(is_wide_char('한')); }
#[test]
fn test_break_text_short() {
let result = break_text_at_width("short text", 100);
assert_eq!(result, vec!["short text"]);
}
#[test]
fn test_break_text_at_space() {
let result = break_text_at_width("hello world this is a test", 15);
assert!(result.len() > 1);
for part in &result {
assert!(get_column_width(part) <= 15);
}
}
#[test]
fn test_break_text_chinese_punctuation() {
let result = break_text_at_width("这是一个测试,需要换行。", 15);
assert!(result.len() > 1);
}
#[test]
fn test_break_text_no_break_point() {
let result = break_text_at_width("aaaaaaaaaaaaaaaaaaaa", 10);
assert!(!result.is_empty());
}
}