pub fn truncate_at_char_boundary(s: &str, max_bytes: usize) -> &str {
if s.len() <= max_bytes {
return s;
}
let mut end = max_bytes;
while end > 0 && !s.is_char_boundary(end) {
end -= 1;
}
&s[..end]
}
pub fn truncate_at_char_boundary_from_end(s: &str, max_bytes: usize) -> &str {
if s.len() <= max_bytes {
return s;
}
let mut start = s.len() - max_bytes;
while start < s.len() && !s.is_char_boundary(start) {
start += 1;
}
&s[start..]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn truncate_ascii_short_passthrough() {
assert_eq!(truncate_at_char_boundary("abc", 10), "abc");
}
#[test]
fn truncate_ascii_exact_cut() {
assert_eq!(truncate_at_char_boundary("abcdef", 3), "abc");
}
#[test]
fn truncate_at_char_boundary_zero_yields_empty() {
assert_eq!(truncate_at_char_boundary("anything", 0), "");
}
#[test]
fn truncate_three_byte_char_does_not_panic() {
let s = "\u{4e16}".repeat(67);
let out = truncate_at_char_boundary(&s, 197);
assert_eq!(out.len(), 195);
assert_eq!(out, "\u{4e16}".repeat(65));
assert!(std::str::from_utf8(out.as_bytes()).is_ok());
}
#[test]
fn truncate_four_byte_emoji_does_not_panic() {
let s = "\u{1f600}".repeat(51);
let out = truncate_at_char_boundary(&s, 197);
assert_eq!(out.len(), 196);
assert!(std::str::from_utf8(out.as_bytes()).is_ok());
}
#[test]
fn truncate_mid_two_byte_sequence() {
let out = truncate_at_char_boundary("café", 4);
assert_eq!(out, "caf");
}
#[test]
fn truncate_from_end_ascii() {
assert_eq!(truncate_at_char_boundary_from_end("abcdef", 3), "def");
}
#[test]
fn truncate_from_end_short_passthrough() {
assert_eq!(truncate_at_char_boundary_from_end("abc", 10), "abc");
}
#[test]
fn truncate_from_end_three_byte_char() {
let s = "\u{4e16}".repeat(13);
let out = truncate_at_char_boundary_from_end(&s, 27);
assert_eq!(out.len(), 27);
assert_eq!(out, "\u{4e16}".repeat(9));
}
#[test]
fn truncate_from_end_snaps_up_inside_codepoint() {
let s = "\u{4e16}".repeat(13);
let out = truncate_at_char_boundary_from_end(&s, 28);
assert_eq!(out.len(), 27);
assert_eq!(out, "\u{4e16}".repeat(9));
}
}