use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};
pub fn visible_width(s: &str) -> usize {
let bytes = s.as_bytes();
let mut total = 0usize;
let mut i = 0;
while i < bytes.len() {
if bytes[i] == 0x1b && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
let mut j = i + 2;
while j < bytes.len() && !(0x40..=0x7e).contains(&bytes[j]) {
j += 1;
}
i = j.saturating_add(1).min(bytes.len());
continue;
}
let step = match bytes[i] {
b if b < 0x80 => 1,
b if b < 0xC0 => 1,
b if b < 0xE0 => 2,
b if b < 0xF0 => 3,
_ => 4,
};
let end = (i + step).min(bytes.len());
if let Some(ch) = s[i..end].chars().next() {
total += ch.width().unwrap_or(0);
}
i = end;
}
total
}
pub fn soft_wrap(text: &str, max_width: usize, continuation_indent: &str) -> Vec<String> {
if max_width == 0 {
return text.lines().map(|l| l.to_string()).collect();
}
let max_width = max_width.max(2);
let cont_w = UnicodeWidthStr::width(continuation_indent);
let effective_indent = if cont_w >= max_width {
""
} else {
continuation_indent
};
let cont_w = UnicodeWidthStr::width(effective_indent);
let mut out: Vec<String> = Vec::new();
for raw_logical in text.split('\n') {
let logical = raw_logical.strip_suffix('\r').unwrap_or(raw_logical);
if logical.is_empty() {
out.push(String::new());
continue;
}
wrap_logical_line(logical, max_width, effective_indent, cont_w, &mut out);
}
out
}
fn wrap_logical_line(
line: &str,
max_width: usize,
cont_indent: &str,
cont_w: usize,
out: &mut Vec<String>,
) {
let mut current = String::new();
let mut current_w = 0usize;
let mut is_first_row = true;
let push_row = |out: &mut Vec<String>, current: &mut String, is_first: &mut bool| {
if *is_first {
out.push(std::mem::take(current));
*is_first = false;
} else {
let mut s = String::with_capacity(cont_indent.len() + current.len());
s.push_str(cont_indent);
s.push_str(current);
out.push(s);
current.clear();
}
};
let tokens = tokenize(line);
for token in tokens {
let budget = if is_first_row {
max_width
} else {
max_width.saturating_sub(cont_w)
};
let tok_w = visible_width(token.text);
let ws_w = visible_width(token.leading_ws);
if current.is_empty() {
if is_first_row {
if ws_w + tok_w <= budget {
current.push_str(token.leading_ws);
current.push_str(token.text);
current_w = ws_w + tok_w;
} else if tok_w <= budget {
current.push_str(token.text);
current_w = tok_w;
} else {
break_long_token(
token.text,
budget,
max_width.saturating_sub(cont_w).max(1),
&mut current,
&mut current_w,
out,
cont_indent,
&mut is_first_row,
);
}
} else if tok_w <= budget {
current.push_str(token.text);
current_w = tok_w;
} else {
break_long_token(
token.text,
budget,
max_width.saturating_sub(cont_w).max(1),
&mut current,
&mut current_w,
out,
cont_indent,
&mut is_first_row,
);
}
continue;
}
if current_w + ws_w + tok_w <= budget {
current.push_str(token.leading_ws);
current.push_str(token.text);
current_w += ws_w + tok_w;
} else {
push_row(out, &mut current, &mut is_first_row);
current_w = 0;
let new_budget = max_width.saturating_sub(cont_w).max(1);
if tok_w <= new_budget {
current.push_str(token.text);
current_w = tok_w;
} else {
break_long_token(
token.text,
new_budget,
new_budget,
&mut current,
&mut current_w,
out,
cont_indent,
&mut is_first_row,
);
}
}
}
if !current.is_empty() || out.is_empty() {
push_row(out, &mut current, &mut is_first_row);
}
}
struct Token<'a> {
leading_ws: &'a str,
text: &'a str,
}
fn tokenize(line: &str) -> Vec<Token<'_>> {
let mut tokens = Vec::new();
let bytes = line.as_bytes();
let mut i = 0;
while i < bytes.len() {
let ws_start = i;
while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
i += 1;
}
let ws_end = i;
let word_start = i;
while i < bytes.len() && bytes[i] != b' ' && bytes[i] != b'\t' {
let ch_len = utf8_char_len(bytes[i]);
i += ch_len;
if i > bytes.len() {
i = bytes.len();
}
}
if word_start < bytes.len() {
tokens.push(Token {
leading_ws: &line[ws_start..ws_end],
text: &line[word_start..i],
});
} else if ws_start < ws_end {
tokens.push(Token {
leading_ws: &line[ws_start..ws_end],
text: "",
});
}
}
tokens
}
fn utf8_char_len(first_byte: u8) -> usize {
if first_byte < 0xC0 {
1
} else if first_byte < 0xE0 {
2
} else if first_byte < 0xF0 {
3
} else {
4
}
}
#[allow(clippy::too_many_arguments)]
fn break_long_token(
token: &str,
first_budget: usize,
continuation_budget: usize,
current: &mut String,
current_w: &mut usize,
out: &mut Vec<String>,
cont_indent: &str,
is_first_row: &mut bool,
) {
let bytes = token.as_bytes();
let mut remaining_budget = first_budget;
let mut i = 0;
while i < bytes.len() {
if bytes[i] == 0x1b && i + 1 < bytes.len() && bytes[i + 1] == b'[' {
let mut j = i + 2;
while j < bytes.len() && !(0x40..=0x7e).contains(&bytes[j]) {
j += 1;
}
let end = j.saturating_add(1).min(bytes.len());
current.push_str(&token[i..end]);
i = end;
continue;
}
let ch = token[i..].chars().next().unwrap_or('\u{FFFD}');
let cw = ch.width().unwrap_or(0);
if cw > remaining_budget {
if *is_first_row {
out.push(std::mem::take(current));
*is_first_row = false;
} else {
let mut s = String::with_capacity(cont_indent.len() + current.len());
s.push_str(cont_indent);
s.push_str(current);
out.push(s);
current.clear();
}
*current_w = 0;
remaining_budget = continuation_budget;
}
current.push(ch);
*current_w += cw;
remaining_budget = remaining_budget.saturating_sub(cw);
i += ch.len_utf8();
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn short_line_returns_unchanged() {
let out = soft_wrap("hello world", 80, "");
assert_eq!(out, vec!["hello world"]);
}
#[test]
fn wraps_on_word_boundary_not_midword() {
let out = soft_wrap("the quick brown fox jumps", 12, "");
for line in &out {
for word in line.split_whitespace() {
assert!(word.len() <= 12, "word {word:?} fits its row");
}
}
assert_eq!(out[0], "the quick");
}
#[test]
fn preserves_hard_newlines() {
let out = soft_wrap("line one\nline two", 80, "");
assert_eq!(out, vec!["line one", "line two"]);
}
#[test]
fn applies_continuation_indent() {
let out = soft_wrap("aaa bbb ccc ddd", 7, " ");
assert_eq!(out[0], "aaa bbb");
for line in &out[1..] {
assert!(line.starts_with(" "));
}
}
#[test]
fn hard_breaks_unbreakable_long_token() {
let out = soft_wrap("aaaaaaaaaaaaaaaaaa", 5, "");
assert_eq!(out.len(), 4);
assert!(out.iter().all(|l| l.len() <= 5));
}
#[test]
fn empty_input_returns_one_empty_row() {
let out = soft_wrap("", 80, "");
assert_eq!(out, vec![""]);
}
#[test]
fn zero_width_returns_unwrapped_lines() {
let out = soft_wrap("anything goes", 0, "");
assert_eq!(out, vec!["anything goes"]);
}
#[test]
fn respects_display_width_for_cjk() {
let out = soft_wrap("中文测试abc", 6, "");
for line in &out {
assert!(
UnicodeWidthStr::width(line.as_str()) <= 6,
"row {line:?} width = {} <= 6",
UnicodeWidthStr::width(line.as_str()),
);
}
}
#[test]
fn indent_wider_than_width_degrades_gracefully() {
let out = soft_wrap("aaa bbb ccc", 4, " ");
assert!(!out.is_empty());
}
#[test]
fn preserves_leading_whitespace_on_first_row() {
let out = soft_wrap(" ▶ hello world", 80, "");
assert_eq!(out[0], " ▶ hello world");
}
#[test]
fn strips_carriage_returns_from_crlf_input() {
let out_lf = soft_wrap("first\nsecond", 80, "");
let out_crlf = soft_wrap("first\r\nsecond", 80, "");
assert_eq!(out_lf, out_crlf);
for row in &out_crlf {
assert!(!row.contains('\r'), "row {row:?} should have no CR");
}
}
#[test]
fn wide_glyph_respects_max_width_at_floor() {
let out = soft_wrap("中文", 1, "");
for line in &out {
assert!(
UnicodeWidthStr::width(line.as_str()) <= 2,
"row {line:?} should not exceed effective floor of 2"
);
}
}
#[test]
fn preserves_leading_whitespace_only_line() {
let out = soft_wrap(" ", 80, "");
assert_eq!(out[0], " ");
}
#[test]
fn visible_width_skips_ansi_sgr() {
let plain = "- // ng_max = ceil(32 / 8) = 4";
let styled = format!("\x1b[48;5;52m{}\x1b[49m", plain);
let pw = visible_width(plain);
let sw = visible_width(&styled);
assert_eq!(pw, sw, "SGR escapes must not contribute to width");
}
#[test]
fn ansi_padded_row_does_not_overwrap() {
let inner = 60;
let visible_content: String = format!("-text{}", " ".repeat(inner - 5));
assert_eq!(visible_width(&visible_content), inner);
let row = format!("│ \x1b[48;5;52m{}\x1b[49m │", visible_content);
let out = soft_wrap(&row, inner + 4, "");
assert_eq!(
out.len(),
1,
"row with embedded SGR must not split: got {} rows",
out.len()
);
}
}