use std::iter;
use crate::writer::ZWSP;
pub fn width(text: &[u8]) -> usize {
let mut width = 0;
for chunk in text.utf8_chunks() {
width += str_width(chunk.valid());
if !chunk.invalid().is_empty() {
width += 2;
}
}
width
}
pub fn char_width(c: char) -> usize {
let mut buf = [0; 4];
str_width(c.encode_utf8(&mut buf))
}
fn str_width(s: &str) -> usize {
let mut is_escape = false;
s.chars()
.map(|ch| match (ch, is_escape) {
(ZWSP, _) => 0,
('m', true) => {
is_escape = false;
0
}
('\x1b', _) => {
is_escape = true;
0
}
(_, true) => 0,
(_, false) => 1,
})
.sum()
}
pub struct Wrapper<'a> {
paragraph: &'a [u8],
max_width: usize,
newline: &'a [u8],
newline_width: usize,
}
impl<'a> Wrapper<'a> {
pub fn new(paragraph: &'a [u8], max_width: usize) -> Self {
Self {
paragraph,
max_width,
newline: b"\n",
newline_width: 0,
}
}
pub fn with_newline(self, newline: &'a [u8], newline_width: usize) -> Self {
Self {
newline,
newline_width,
..self
}
}
pub fn wrap(self) -> impl Iterator<Item = &'a [u8]> {
let mut words = words(self.paragraph).peekable();
let mut width = 0;
let mut inject_newline = false;
iter::from_fn(move || -> Option<&[u8]> {
if inject_newline {
inject_newline = false;
return Some(self.newline);
}
let cur = words.next()?;
let next = words.peek().copied().unwrap_or(&[]);
let cur_width = self::width(cur);
let nowrap_width = width + cur_width + self::width(next.trim_end_spaces());
if nowrap_width > self.max_width
&& width + self::width(cur.trim_end_spaces()) > self.newline_width
{
inject_newline = words.peek().is_some();
width = self.newline_width;
Some(cur.trim_end_spaces())
} else {
width += cur_width;
Some(cur)
}
})
}
}
trait BytesExt {
fn trim_end_spaces(&self) -> &Self;
}
impl BytesExt for [u8] {
fn trim_end_spaces(&self) -> &Self {
const ZWSP_B: [u8; ZWSP.len_utf8()] = {
let mut buf = [0; ZWSP.len_utf8()];
ZWSP.encode_utf8(&mut buf);
buf
};
let mut slice = self;
loop {
if let Some(sl) = slice.strip_suffix(b" ") {
slice = sl;
} else if let Some(sl) = slice.strip_suffix(&ZWSP_B) {
slice = sl;
} else {
return slice;
}
}
}
}
fn words(buf: &[u8]) -> impl Iterator<Item = &[u8]> {
let mut start = 0;
let mut was_space = false;
iter::from_fn(move || {
for chunk in buf[start..].utf8_chunks() {
let valid_len = chunk.valid().len();
let spaces = chunk
.valid()
.char_indices()
.map(|(i, ch)| (i, ch == ' ' || ch == ZWSP))
.chain(
chunk
.invalid()
.iter()
.enumerate()
.map(|(i, _)| (valid_len + i, false)),
);
for (i, is_space) in spaces {
if was_space && !is_space {
was_space = is_space;
let word = &buf[start..start + i];
start += i;
return Some(word);
}
was_space = is_space;
}
}
if start < buf.len() {
let word = &buf[start..];
start = buf.len();
return Some(word);
}
None
})
}
#[cfg(test)]
mod tests {
use crate::writer::{BOLD, NBSP, RESET};
use super::*;
#[test]
fn test_width() {
assert_eq!(width(b""), 0);
assert_eq!(width("ΓΌ".as_bytes()), 1);
assert_eq!(width("μ".as_bytes()), 1);
assert_eq!(width("π".as_bytes()), 1);
assert_eq!(width(b"b\xff"), 3);
assert_eq!(width(format!("{BOLD}BOLD{RESET}").as_bytes()), 4);
}
#[test]
fn test_trim() {
assert_eq!(b"".trim_end_spaces(), b"");
assert_eq!(b"\n ".trim_end_spaces(), b"\n");
assert_eq!(b"abc ".trim_end_spaces(), b"abc");
assert_eq!(
format!("abc{NBSP}").as_bytes().trim_end_spaces(),
format!("abc{NBSP}").as_bytes()
);
assert_eq!(format!("abc{ZWSP}").as_bytes().trim_end_spaces(), b"abc");
}
#[test]
fn test_words() {
assert_eq!(
words(b" abc def gh a\xffb ").collect::<Vec<_>>(),
vec![&b" "[..], b"abc ", b"def ", b"gh ", b"a\xffb "],
);
assert_eq!(
words(format!("abc{ZWSP}123").as_bytes()).collect::<Vec<_>>(),
vec![format!("abc{ZWSP}").as_bytes(), b"123"],
);
}
#[test]
fn test_wrap() {
fn check_indent(indent: usize, text: &[u8], wrapped: &[u8]) {
let mut newline = vec![b'\n'];
newline.resize(indent + 1, b' ');
let actual = Wrapper::new(text, 10)
.with_newline(&newline, indent)
.wrap()
.flatten()
.copied()
.collect::<Vec<_>>();
assert_eq!(
actual,
wrapped,
r#"expected "{}", got "{}""#,
wrapped.escape_ascii(),
actual.escape_ascii()
);
}
fn check(text: &[u8], wrapped: &[u8]) {
check_indent(0, text, wrapped);
}
check(b"12345 7890 abc def", b"12345 7890\nabc def");
check(b"1234567890 bla", b"1234567890\nbla");
check(b"12345678901234567890", b"12345678901234567890");
check(b"1 2 3 4 5 longtext", b"1 2 3 4 5\nlongtext");
check_indent(4, b"123 456 789 abcdef", b"123 456\n 789\n abcdef");
check_indent(
4,
b"1234 toolongtofitononeline",
b"1234 toolongtofitononeline",
);
check(
format!("1234567890 1234567890{NBSP}abcd").as_bytes(),
format!("1234567890\n1234567890{NBSP}abcd").as_bytes(),
);
check(
format!("1234567890{ZWSP}1234567890{NBSP}abcd").as_bytes(),
format!("1234567890\n1234567890{NBSP}abcd").as_bytes(),
);
}
}