use regex::Regex;
use std::sync::LazyLock;
use crate::cells::{cell_len, chop_cells};
static RE_WORD: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\s*\S+\s*").unwrap());
pub fn words(text: &str) -> Vec<(usize, usize, &str)> {
let byte_to_char = build_byte_to_char_map(text);
let mut result = Vec::new();
for m in RE_WORD.find_iter(text) {
let byte_start = m.start();
let byte_end = m.end();
let char_start = byte_to_char[byte_start];
let char_end = byte_to_char[byte_end];
result.push((char_start, char_end, m.as_str()));
}
result
}
fn build_byte_to_char_map(text: &str) -> Vec<usize> {
let mut map = vec![0usize; text.len() + 1];
let mut char_idx = 0;
for (byte_idx, _ch) in text.char_indices() {
map[byte_idx] = char_idx;
char_idx += 1;
}
map[text.len()] = char_idx;
map
}
pub fn divide_line(text: &str, width: usize, fold: bool) -> Vec<usize> {
if width == 0 {
return vec![];
}
let mut break_positions: Vec<usize> = Vec::new();
let mut cell_offset: usize = 0;
for (start, _end, word) in words(text) {
let word_length = cell_len(word.trim_end());
let remaining_space = width.saturating_sub(cell_offset);
let word_fits_remaining_space = remaining_space >= word_length;
if word_fits_remaining_space {
cell_offset += cell_len(word);
} else if word_length > width {
if fold {
let folded_word = chop_cells(word, width);
let num_pieces = folded_word.len();
let mut current_start = start;
for (i, line) in folded_word.iter().enumerate() {
let is_last = i == num_pieces - 1;
if is_last {
if current_start > 0 {
break_positions.push(current_start);
}
cell_offset = cell_len(line);
} else {
if current_start > 0 {
break_positions.push(current_start);
}
current_start += line.chars().count();
}
}
} else {
if start > 0 {
break_positions.push(start);
}
cell_offset = cell_len(word);
}
} else {
if cell_offset > 0 && start > 0 {
break_positions.push(start);
}
cell_offset = cell_len(word);
}
}
break_positions
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_words_basic() {
let result = words("foo bar baz");
assert_eq!(
result,
vec![(0, 4, "foo "), (4, 8, "bar "), (8, 11, "baz"),]
);
}
#[test]
fn test_words_leading_whitespace() {
let result = words(" hello world");
assert_eq!(result, vec![(0, 8, " hello "), (8, 13, "world"),]);
}
#[test]
fn test_words_trailing_whitespace() {
let result = words("hello world ");
assert_eq!(result, vec![(0, 6, "hello "), (6, 13, "world "),]);
}
#[test]
fn test_words_single_word() {
let result = words("hello");
assert_eq!(result, vec![(0, 5, "hello")]);
}
#[test]
fn test_words_empty_string() {
let result = words("");
assert_eq!(result, Vec::<(usize, usize, &str)>::new());
}
#[test]
fn test_words_only_whitespace() {
let result = words(" ");
assert_eq!(result, Vec::<(usize, usize, &str)>::new());
}
#[test]
fn test_words_multiple_spaces_between() {
let result = words("foo bar");
assert_eq!(result, vec![(0, 6, "foo "), (6, 9, "bar"),]);
}
#[test]
fn test_words_cjk() {
let result = words("あ い");
assert_eq!(result, vec![(0, 2, "あ "), (2, 3, "い"),]);
}
#[test]
fn test_words_no_trailing_space() {
let result = words("abracadabra");
assert_eq!(result, vec![(0, 11, "abracadabra")]);
}
#[test]
fn test_divide_line_simple_width_3() {
assert_eq!(divide_line("foo bar baz", 3, true), vec![4, 8]);
}
#[test]
fn test_divide_line_simple_width_4() {
assert_eq!(divide_line("foo bar baz", 4, true), vec![4, 8]);
}
#[test]
fn test_divide_line_simple_width_7() {
assert_eq!(divide_line("foo bar baz", 7, true), vec![8]);
}
#[test]
fn test_divide_line_fits_on_one_line() {
assert_eq!(divide_line("foo bar baz", 20, true), Vec::<usize>::new());
}
#[test]
fn test_divide_line_exact_fit() {
assert_eq!(divide_line("foo bar baz", 11, true), Vec::<usize>::new());
}
#[test]
fn test_divide_line_fold_long_word() {
assert_eq!(divide_line("abracadabra", 4, true), vec![4, 8]);
}
#[test]
fn test_divide_line_fold_long_word_after_short() {
assert_eq!(divide_line("XX 12345678912", 4, true), vec![3, 7, 11]);
}
#[test]
fn test_divide_line_fold_single_char_width() {
assert_eq!(divide_line("abcd", 1, true), vec![1, 2, 3]);
}
#[test]
fn test_divide_line_no_fold() {
assert_eq!(divide_line("abracadabra", 4, false), Vec::<usize>::new());
}
#[test]
fn test_divide_line_no_fold_long_word_after_short() {
assert_eq!(divide_line("XX 12345678912", 4, false), vec![3]);
}
#[test]
fn test_divide_line_cjk_width_4() {
assert_eq!(divide_line("ああああ", 4, true), vec![2]);
}
#[test]
fn test_divide_line_cjk_with_ascii() {
assert_eq!(divide_line("aあ bい", 3, true), vec![3]);
}
#[test]
fn test_divide_line_cjk_fold() {
assert_eq!(divide_line("ああああああ", 5, true), vec![2, 4]);
}
#[test]
fn test_divide_line_empty_string() {
assert_eq!(divide_line("", 10, true), Vec::<usize>::new());
}
#[test]
fn test_divide_line_width_zero() {
assert_eq!(divide_line("hello", 0, true), Vec::<usize>::new());
}
#[test]
fn test_divide_line_single_word_fits() {
assert_eq!(divide_line("hello", 10, true), Vec::<usize>::new());
}
#[test]
fn test_divide_line_single_char() {
assert_eq!(divide_line("a", 1, true), Vec::<usize>::new());
}
#[test]
fn test_divide_line_all_spaces_yields_nothing() {
assert_eq!(divide_line(" ", 3, true), Vec::<usize>::new());
}
#[test]
fn test_divide_line_word_exactly_width() {
assert_eq!(divide_line("abcd", 4, true), Vec::<usize>::new());
}
#[test]
fn test_divide_line_two_words_each_exactly_width() {
assert_eq!(divide_line("abcd efgh", 4, true), vec![5]);
}
#[test]
fn test_divide_line_word_with_leading_spaces() {
assert_eq!(divide_line(" hello world", 5, true), vec![5, 9]);
}
#[test]
fn test_divide_line_many_short_words() {
assert_eq!(divide_line("a b c d e", 1, true), vec![2, 4, 6, 8]);
}
#[test]
fn test_byte_to_char_map_ascii() {
let map = build_byte_to_char_map("abc");
assert_eq!(map, vec![0, 1, 2, 3]);
}
#[test]
fn test_byte_to_char_map_multibyte() {
let map = build_byte_to_char_map("あ");
assert_eq!(map[0], 0);
assert_eq!(map[3], 1);
assert_eq!(map.len(), 4);
}
#[test]
fn test_byte_to_char_map_mixed() {
let map = build_byte_to_char_map("aあb");
assert_eq!(map[0], 0); assert_eq!(map[1], 1); assert_eq!(map[4], 2); assert_eq!(map[5], 3); }
#[test]
fn test_byte_to_char_map_empty() {
let map = build_byte_to_char_map("");
assert_eq!(map, vec![0]);
}
}