#[derive(Debug, Clone)]
pub struct TextConfig {
pub tab_width: u8,
}
impl Default for TextConfig {
fn default() -> Self {
Self { tab_width: 8 }
}
}
impl TextConfig {
pub fn new(tab_width: u8) -> Self {
Self { tab_width }
}
}
pub fn expand_tabs(text: &str, tab_width: u8) -> String {
if tab_width == 0 {
return text.replace('\t', "");
}
let tw = tab_width as usize;
let mut result = String::with_capacity(text.len());
let mut column: usize = 0;
for ch in text.chars() {
if ch == '\t' {
let spaces_needed = tw - (column % tw);
for _ in 0..spaces_needed {
result.push(' ');
}
column += spaces_needed;
} else if ch == '\n' {
result.push(ch);
column = 0;
} else {
result.push(ch);
column += 1;
}
}
result
}
pub fn filter_control_chars(text: &str) -> String {
let mut result = String::with_capacity(text.len());
for ch in text.chars() {
if ch == '\t' || ch == '\n' {
result.push(ch);
continue;
}
if ch.is_ascii_control() {
continue;
}
let code = ch as u32;
if (0x80..=0x9F).contains(&code) {
continue;
}
result.push(ch);
}
result
}
pub fn preprocess(text: &str, config: &TextConfig) -> String {
let expanded = expand_tabs(text, config.tab_width);
filter_control_chars(&expanded)
}
pub fn truncate_to_char_boundary(text: &str, max_bytes: usize) -> &str {
if text.len() <= max_bytes {
return text;
}
let mut end = max_bytes;
while end > 0 && !text.is_char_boundary(end) {
end -= 1;
}
&text[..end]
}
pub fn string_display_width(text: &str) -> u16 {
use unicode_width::UnicodeWidthStr;
let width = UnicodeWidthStr::width(text);
if width > u16::MAX as usize {
u16::MAX
} else {
width as u16
}
}
pub fn truncate_to_display_width(text: &str, max_width: usize) -> &str {
use unicode_width::UnicodeWidthChar;
let mut width = 0usize;
for (byte_idx, ch) in text.char_indices() {
let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
if width + ch_width > max_width {
return &text[..byte_idx];
}
width += ch_width;
}
text
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn truncate_at_char_boundary_ascii() {
let text = "Hello World";
assert_eq!(truncate_to_char_boundary(text, 5), "Hello");
}
#[test]
fn truncate_at_char_boundary_emoji() {
let text = "Hello \u{1F600} World";
let result = truncate_to_char_boundary(text, 7);
assert_eq!(result, "Hello ");
}
#[test]
fn truncate_at_char_boundary_cjk() {
let text = "\u{4F60}\u{597D}\u{4E16}\u{754C}"; let result = truncate_to_char_boundary(text, 7);
assert_eq!(result, "\u{4F60}\u{597D}");
}
#[test]
fn truncate_at_char_boundary_empty() {
assert_eq!(truncate_to_char_boundary("", 5), "");
}
#[test]
fn truncate_at_char_boundary_zero_limit() {
assert_eq!(truncate_to_char_boundary("Hello", 0), "");
}
#[test]
fn truncate_at_char_boundary_larger_limit() {
let text = "Hi";
assert_eq!(truncate_to_char_boundary(text, 100), "Hi");
}
#[test]
fn display_width_ascii() {
assert_eq!(string_display_width("Hello"), 5);
}
#[test]
fn display_width_emoji() {
assert_eq!(string_display_width("\u{1F600}"), 2);
}
#[test]
fn display_width_cjk() {
assert_eq!(string_display_width("\u{4F60}\u{597D}"), 4);
}
#[test]
fn display_width_empty() {
assert_eq!(string_display_width(""), 0);
}
#[test]
fn display_width_mixed() {
assert_eq!(string_display_width("Hi \u{1F600}"), 5);
}
#[test]
fn truncate_to_display_width_ascii() {
assert_eq!(truncate_to_display_width("Hello World", 5), "Hello");
}
#[test]
fn truncate_to_display_width_cjk() {
let text = "\u{4F60}\u{597D}\u{4E16}"; assert_eq!(truncate_to_display_width(text, 5), "\u{4F60}\u{597D}");
}
#[test]
fn truncate_to_display_width_emoji() {
assert_eq!(truncate_to_display_width("Hi \u{1F600}", 4), "Hi ");
}
#[test]
fn expand_tabs_single_tab_at_position_zero() {
let result = expand_tabs("\t", 8);
assert_eq!(result, " ");
assert_eq!(result.len(), 8);
}
#[test]
fn expand_tabs_after_three_chars() {
let result = expand_tabs("abc\t", 8);
assert_eq!(result, "abc ");
assert_eq!(result.len(), 8);
}
#[test]
fn expand_tabs_after_eight_chars() {
let result = expand_tabs("abcdefgh\t", 8);
assert_eq!(result, "abcdefgh ");
assert_eq!(result.len(), 16);
}
#[test]
fn expand_tabs_no_tabs_unchanged() {
let result = expand_tabs("hello world", 8);
assert_eq!(result, "hello world");
}
#[test]
fn expand_tabs_custom_width_four() {
let result = expand_tabs("\t", 4);
assert_eq!(result, " ");
assert_eq!(result.len(), 4);
let result2 = expand_tabs("ab\t", 4);
assert_eq!(result2, "ab ");
assert_eq!(result2.len(), 4);
}
#[test]
fn filter_control_chars_removes_null() {
let result = filter_control_chars("hello\x00world");
assert_eq!(result, "helloworld");
}
#[test]
fn filter_control_chars_removes_bell() {
let result = filter_control_chars("hello\x07world");
assert_eq!(result, "helloworld");
}
#[test]
fn filter_control_chars_preserves_tab_and_newline() {
let result = filter_control_chars("hello\tworld\n");
assert_eq!(result, "hello\tworld\n");
}
#[test]
fn filter_control_chars_clean_text_unchanged() {
let result = filter_control_chars("Hello, World! 123");
assert_eq!(result, "Hello, World! 123");
}
#[test]
fn preprocess_combines_tab_expansion_and_filtering() {
let config = TextConfig::new(4);
let result = preprocess("a\tb\x07c", &config);
assert_eq!(result, "a bc");
}
#[test]
fn empty_string_handling() {
assert_eq!(expand_tabs("", 8), "");
assert_eq!(filter_control_chars(""), "");
let config = TextConfig::default();
assert_eq!(preprocess("", &config), "");
}
#[test]
fn expand_tabs_multiple_tabs() {
let result = expand_tabs("\t\t", 4);
assert_eq!(result, " ");
assert_eq!(result.len(), 8);
}
#[test]
fn filter_control_chars_removes_c1_range() {
let text = format!("hello{}world", '\u{0085}'); let result = filter_control_chars(&text);
assert_eq!(result, "helloworld");
}
#[test]
fn expand_tabs_with_newline_resets_column() {
let result = expand_tabs("abc\n\t", 4);
assert_eq!(result, "abc\n ");
}
#[test]
fn text_config_default_tab_width_eight() {
let config = TextConfig::default();
assert_eq!(config.tab_width, 8);
}
}