#![forbid(missing_docs, unsafe_code)]
#![warn(clippy::arithmetic_side_effects)]
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(
feature = "std",
doc = r##"
Making sure the string is displayed in exactly number of columns by
combining padding and truncating.
```rust
use unicode_truncate::UnicodeTruncateStr;
use unicode_truncate::Alignment;
use unicode_width::UnicodeWidthStr;
let str = "你好吗".unicode_pad(5, Alignment::Left, true);
assert_eq!(str, "你好 ");
assert_eq!(str.width(), 5);
```
"##
)]
use itertools::{merge_join_by, Either};
use unicode_segmentation::UnicodeSegmentation;
use unicode_width::UnicodeWidthStr;
#[derive(PartialEq, Eq, Debug, Copy, Clone)]
pub enum Alignment {
Left,
Center,
Right,
}
pub trait UnicodeTruncateStr {
fn unicode_truncate(&self, max_width: usize) -> (&str, usize);
fn unicode_truncate_start(&self, max_width: usize) -> (&str, usize);
fn unicode_truncate_centered(&self, max_width: usize) -> (&str, usize);
#[inline]
fn unicode_truncate_aligned(&self, max_width: usize, align: Alignment) -> (&str, usize) {
match align {
Alignment::Left => self.unicode_truncate(max_width),
Alignment::Center => self.unicode_truncate_centered(max_width),
Alignment::Right => self.unicode_truncate_start(max_width),
}
}
#[cfg(feature = "std")]
fn unicode_pad(
&self,
target_width: usize,
align: Alignment,
truncate: bool,
) -> std::borrow::Cow<'_, str>;
}
impl UnicodeTruncateStr for str {
#[inline]
fn unicode_truncate(&self, max_width: usize) -> (&str, usize) {
let (byte_index, new_width) = self
.grapheme_indices(true)
.map(|(byte_index, grapheme)| (byte_index, grapheme.width()))
.chain(core::iter::once((self.len(), 0)))
.scan(0, |sum: &mut usize, (byte_index, grapheme_width)| {
let current_width = *sum;
*sum = sum.checked_add(grapheme_width)?;
Some((byte_index, current_width))
})
.take_while(|&(_, current_width)| current_width <= max_width)
.last()
.unwrap_or((0, 0));
let result = self.get(..byte_index).unwrap();
debug_assert_eq!(result.width(), new_width);
(result, new_width)
}
#[inline]
fn unicode_truncate_start(&self, max_width: usize) -> (&str, usize) {
let (byte_index, new_width) = self
.grapheme_indices(true)
.rev()
.map(|(byte_index, grapheme)| (byte_index, grapheme.width()))
.scan(0, |sum: &mut usize, (byte_index, grapheme_width)| {
*sum = sum.checked_add(grapheme_width)?;
Some((byte_index, *sum))
})
.take_while(|&(_, current_width)| current_width <= max_width)
.last()
.unwrap_or((self.len(), 0));
let result = self.get(byte_index..).unwrap();
debug_assert_eq!(result.width(), new_width);
(result, new_width)
}
#[inline]
fn unicode_truncate_centered(&self, max_width: usize) -> (&str, usize) {
if max_width == 0 {
return ("", 0);
}
let original_width = self.width();
if original_width <= max_width {
return (self, original_width);
}
let min_removal_width = original_width.checked_sub(max_width).unwrap();
let less_than_half = min_removal_width.saturating_sub(10) / 2;
let from_start = self
.grapheme_indices(true)
.map(|(byte_index, grapheme)| (byte_index, grapheme.width()))
.scan(
(0usize, 0usize),
|(sum, prev_width), (byte_index, grapheme_width)| {
*sum = sum.checked_add(*prev_width)?;
*prev_width = grapheme_width;
Some((byte_index, *sum))
},
)
.skip_while(|&(_, removed)| removed < less_than_half);
let from_end = self
.grapheme_indices(true)
.map(|(byte_index, grapheme)| (byte_index, grapheme.width()))
.rev()
.scan(0usize, |sum, (byte_index, grapheme_width)| {
*sum = sum.checked_add(grapheme_width)?;
Some((byte_index, *sum))
})
.skip_while(|&(_, removed)| removed < less_than_half);
let (start_index, end_index, removed_width) = merge_join_by(
from_start,
from_end,
|&(_, start_removed), &(_, end_removed)| start_removed < end_removed,
)
.scan(
(0usize, 0usize, 0usize, 0usize),
|(start_removed, end_removed, start_index, end_index), position| {
match position {
Either::Left((idx, removed)) => {
*start_index = idx;
*start_removed = removed;
}
Either::Right((idx, removed)) => {
*end_index = idx;
*end_removed = removed;
}
}
let total_removed = start_removed.checked_add(*end_removed).unwrap();
Some((*start_index, *end_index, total_removed))
},
)
.find(|&(_, _, removed)| removed >= min_removal_width)
.unwrap_or((0, 0, original_width));
let result = self.get(start_index..end_index).unwrap();
let result_width = original_width.checked_sub(removed_width).unwrap();
debug_assert_eq!(result.width(), result_width);
(result, result_width)
}
#[cfg(feature = "std")]
#[inline]
fn unicode_pad(
&self,
target_width: usize,
align: Alignment,
truncate: bool,
) -> std::borrow::Cow<'_, str> {
use std::borrow::Cow;
if !truncate && self.width() >= target_width {
return Cow::Borrowed(self);
}
let (truncated, columns) = self.unicode_truncate(target_width);
if columns == target_width {
return Cow::Borrowed(truncated);
}
let diff = target_width.saturating_sub(columns);
let (left_pad, right_pad) = match align {
Alignment::Left => (0, diff),
Alignment::Right => (diff, 0),
Alignment::Center => (diff / 2, diff.saturating_sub(diff / 2)),
};
debug_assert_eq!(diff, left_pad.saturating_add(right_pad));
let new_len = truncated
.len()
.checked_add(diff)
.expect("Padded result should fit in a new String");
let mut result = String::with_capacity(new_len);
for _ in 0..left_pad {
result.push(' ');
}
result += truncated;
for _ in 0..right_pad {
result.push(' ');
}
Cow::Owned(result)
}
}
#[cfg(test)]
mod tests {
use super::*;
mod truncate_end {
use super::*;
#[test]
fn empty() {
assert_eq!("".unicode_truncate(4), ("", 0));
}
#[test]
fn zero_width() {
assert_eq!("ab".unicode_truncate(0), ("", 0));
assert_eq!("你好".unicode_truncate(0), ("", 0));
}
#[test]
fn less_than_limit() {
assert_eq!("abc".unicode_truncate(4), ("abc", 3));
assert_eq!("你".unicode_truncate(4), ("你", 2));
}
#[test]
fn at_boundary() {
assert_eq!("boundary".unicode_truncate(5), ("bound", 5));
assert_eq!("你好吗".unicode_truncate(4), ("你好", 4));
}
#[test]
fn not_boundary() {
assert_eq!("你好吗".unicode_truncate(3), ("你", 2));
assert_eq!("你好吗".unicode_truncate(1), ("", 0));
}
#[test]
fn zero_width_char_in_middle() {
assert_eq!("y\u{0306}es".unicode_truncate(2), ("y\u{0306}e", 2));
}
#[test]
fn keep_zero_width_char_at_boundary() {
assert_eq!(
"y\u{0306}ey\u{0306}s".unicode_truncate(3),
("y\u{0306}ey\u{0306}", 3)
);
}
#[test]
fn family_stays_together() {
let input = "123👨👩👧👦456";
assert_eq!("👨👩👧👦".width(), 2);
assert_eq!(input.unicode_truncate(4), ("123", 3));
assert_eq!(input.unicode_truncate(5), ("123👨👩👧👦", 5));
assert_eq!(input.unicode_truncate(6), ("123👨👩👧👦4", 6));
assert_eq!(input.unicode_truncate(20), (input, 8));
}
}
mod truncate_start {
use super::*;
#[test]
fn empty() {
assert_eq!("".unicode_truncate_start(4), ("", 0));
}
#[test]
fn zero_width() {
assert_eq!("ab".unicode_truncate_start(0), ("", 0));
assert_eq!("你好".unicode_truncate_start(0), ("", 0));
}
#[test]
fn less_than_limit() {
assert_eq!("abc".unicode_truncate_start(4), ("abc", 3));
assert_eq!("你".unicode_truncate_start(4), ("你", 2));
}
#[test]
fn at_boundary() {
assert_eq!("boundary".unicode_truncate_start(5), ("ndary", 5));
assert_eq!("你好吗".unicode_truncate_start(4), ("好吗", 4));
}
#[test]
fn not_boundary() {
assert_eq!("你好吗".unicode_truncate_start(3), ("吗", 2));
assert_eq!("你好吗".unicode_truncate_start(1), ("", 0));
}
#[test]
fn zero_width_char_in_middle() {
assert_eq!(
"y\u{0306}ey\u{0306}s".unicode_truncate_start(2),
("y\u{0306}s", 2)
);
}
#[test]
fn remove_zero_width_char_at_boundary() {
assert_eq!("y\u{0306}es".unicode_truncate_start(2), ("es", 2));
}
#[test]
fn family_stays_together() {
let input = "123👨👩👧👦456";
assert_eq!("👨👩👧👦".width(), 2);
assert_eq!(input.unicode_truncate_start(4), ("456", 3));
assert_eq!(input.unicode_truncate_start(5), ("👨👩👧👦456", 5));
assert_eq!(input.unicode_truncate_start(6), ("3👨👩👧👦456", 6));
assert_eq!(input.unicode_truncate_start(20), (input, 8));
}
}
mod truncate_centered {
use super::*;
#[test]
fn empty() {
assert_eq!("".unicode_truncate_centered(4), ("", 0));
}
#[test]
fn zero_width() {
assert_eq!("ab".unicode_truncate_centered(0), ("", 0));
assert_eq!("你好".unicode_truncate_centered(0), ("", 0));
}
#[test]
fn less_than_limit() {
assert_eq!("abc".unicode_truncate_centered(4), ("abc", 3));
assert_eq!("你".unicode_truncate_centered(4), ("你", 2));
}
#[test]
fn truncate_exactly_one() {
assert_eq!("abcd".unicode_truncate_centered(3), ("abc", 3));
}
#[test]
fn at_boundary() {
assert_eq!(
"boundaryboundary".unicode_truncate_centered(5),
("arybo", 5)
);
assert_eq!(
"你好吗你好吗你好吗".unicode_truncate_centered(4),
("你好", 4)
);
}
#[test]
fn not_boundary() {
assert_eq!("你好吗你好吗".unicode_truncate_centered(3), ("吗", 2));
assert_eq!("你好吗你好吗".unicode_truncate_centered(1), ("", 0));
}
#[test]
fn zero_width_char_in_middle() {
assert_eq!(
"yy\u{0306}es".unicode_truncate_centered(2),
("y\u{0306}e", 2)
);
}
#[test]
fn zero_width_char_at_boundary() {
assert_eq!(
"y\u{0306}ea\u{0306}b\u{0306}y\u{0306}ea\u{0306}b\u{0306}"
.unicode_truncate_centered(2),
("b\u{0306}y\u{0306}", 2)
);
assert_eq!(
"ay\u{0306}ea\u{0306}b\u{0306}y\u{0306}ea\u{0306}b\u{0306}"
.unicode_truncate_centered(2),
("a\u{0306}b\u{0306}", 2)
);
assert_eq!(
"y\u{0306}ea\u{0306}b\u{0306}y\u{0306}ea\u{0306}b\u{0306}a"
.unicode_truncate_centered(2),
("b\u{0306}y\u{0306}", 2)
);
}
#[test]
fn control_char() {
use unicode_width::UnicodeWidthChar;
assert_eq!("\u{0019}".width(), 1);
assert_eq!('\u{0019}'.width(), None);
assert_eq!("\u{0019}".unicode_truncate(2), ("\u{0019}", 1));
}
#[test]
fn family_stays_together() {
let input = "123👨👩👧👦456";
assert_eq!("👨👩👧👦".width(), 2);
assert_eq!(input.unicode_truncate_centered(1), ("", 0));
assert_eq!(input.unicode_truncate_centered(2), ("👨👩👧👦", 2));
assert_eq!(input.unicode_truncate_centered(4), ("3👨👩👧👦4", 4));
assert_eq!(input.unicode_truncate_centered(6), ("23👨👩👧👦45", 6));
assert_eq!(input.unicode_truncate_centered(20), (input, 8));
}
}
#[test]
fn truncate_aligned() {
assert_eq!("abc".unicode_truncate_aligned(1, Alignment::Left), ("a", 1));
assert_eq!(
"abc".unicode_truncate_aligned(1, Alignment::Center),
("b", 1)
);
assert_eq!(
"abc".unicode_truncate_aligned(1, Alignment::Right),
("c", 1)
);
}
#[cfg(feature = "std")]
mod pad {
use super::*;
#[test]
fn zero_width() {
assert_eq!("你好".unicode_pad(0, Alignment::Left, true), "");
assert_eq!("你好".unicode_pad(0, Alignment::Left, false), "你好");
}
#[test]
fn less_than_limit() {
assert_eq!("你".unicode_pad(4, Alignment::Left, true), "你 ");
assert_eq!("你".unicode_pad(4, Alignment::Left, false), "你 ");
}
#[test]
fn width_at_boundary() {
assert_eq!("你好吗".unicode_pad(4, Alignment::Left, true), "你好");
assert_eq!("你好吗".unicode_pad(4, Alignment::Left, false), "你好吗");
}
#[test]
fn width_not_boundary() {
assert_eq!("你好吗".unicode_pad(3, Alignment::Left, true), "你 ");
assert_eq!("你好吗".unicode_pad(1, Alignment::Left, true), " ");
assert_eq!("你好吗".unicode_pad(3, Alignment::Left, false), "你好吗");
assert_eq!("你好吗".unicode_pad(3, Alignment::Center, true), "你 ");
assert_eq!("你好吗".unicode_pad(3, Alignment::Right, true), " 你");
}
}
}