Skip to main content

matrixcode_core/
truncate.rs

1//! Truncation utilities for safe string handling.
2//!
3//! Provides functions to truncate strings while respecting UTF-8 boundaries.
4
5/// Find a safe UTF-8 boundary position.
6/// Returns the largest position <= max that is a valid char boundary.
7pub fn find_boundary(s: &str, max: usize) -> usize {
8    let max = max.min(s.len());
9    let mut end = max;
10    while end > 0 && !s.is_char_boundary(end) {
11        end -= 1;
12    }
13    end
14}
15
16/// Truncate string to max bytes, respecting UTF-8 boundaries.
17/// Does not add any suffix.
18pub fn truncate_bytes(s: &str, max: usize) -> &str {
19    if s.len() <= max {
20        return s;
21    }
22    let end = find_boundary(s, max);
23    &s[..end]
24}
25
26/// Truncate string to max bytes with "..." suffix.
27/// Respects UTF-8 boundaries.
28pub fn truncate_with_suffix(s: &str, max: usize) -> String {
29    if s.len() <= max {
30        return s.to_string();
31    }
32    let suffix = "...";
33    let suffix_len = suffix.len();
34    let end = find_boundary(s, max.saturating_sub(suffix_len));
35    format!("{}{}", &s[..end], suffix)
36}
37
38/// Truncate string to max characters (not bytes).
39/// Useful for display purposes where visual length matters.
40pub fn truncate_chars(s: &str, max_chars: usize) -> String {
41    let char_count = s.chars().count();
42    if char_count <= max_chars {
43        return s.to_string();
44    }
45    let suffix = "...";
46    let take_chars = max_chars.saturating_sub(suffix.chars().count());
47    s.chars().take(take_chars).collect::<String>() + suffix
48}
49
50/// Truncate string and modify in place (for String types).
51pub fn truncate_string_in_place(s: &mut String, max: usize) {
52    if s.len() <= max {
53        return;
54    }
55    let end = find_boundary(s, max);
56    s.truncate(end);
57}
58
59#[cfg(test)]
60mod tests {
61    use super::*;
62
63    #[test]
64    fn test_find_boundary_ascii() {
65        let s = "hello world";
66        assert_eq!(find_boundary(s, 5), 5);
67        assert_eq!(find_boundary(s, 100), 11);
68    }
69
70    #[test]
71    fn test_find_boundary_multibyte() {
72        // Chinese: each char is 3 bytes
73        let s = "你好世界";
74        assert_eq!(find_boundary(s, 4), 3); // Falls in middle of '好', back to '你'
75        assert_eq!(find_boundary(s, 6), 6); // Exactly at boundary of '世'
76        assert_eq!(find_boundary(s, 7), 6); // Falls in '世', back to 6
77    }
78
79    #[test]
80    fn test_truncate_bytes() {
81        let s = "hello";
82        assert_eq!(truncate_bytes(s, 10), "hello");
83        assert_eq!(truncate_bytes(s, 3), "hel");
84    }
85
86    #[test]
87    fn test_truncate_bytes_chinese() {
88        let s = "你好世界";
89        assert_eq!(truncate_bytes(s, 100), "你好世界");
90        assert_eq!(truncate_bytes(s, 5), "你"); // 3 bytes, not 5 (boundary at 3)
91    }
92
93    #[test]
94    fn test_truncate_with_suffix() {
95        let s = "hello world";
96        assert_eq!(truncate_with_suffix(s, 100), "hello world");
97        assert_eq!(truncate_with_suffix(s, 8), "hello...");
98        assert_eq!(truncate_with_suffix(s, 5), "he...");
99    }
100
101    #[test]
102    fn test_truncate_chars() {
103        let s = "你好世界hello";
104        assert_eq!(truncate_chars(s, 10), "你好世界hello");
105        // max_chars=4: take 4-3=1 char + "..." = "你..."
106        assert_eq!(truncate_chars(s, 4), "你...");
107        // max_chars=6: take 6-3=3 chars + "..." = "你好世..."
108        assert_eq!(truncate_chars(s, 6), "你好世...");
109        // max_chars=7: take 7-3=4 chars + "..." = "你好世界..."
110        assert_eq!(truncate_chars(s, 7), "你好世界...");
111    }
112
113    #[test]
114    fn test_truncate_in_place() {
115        let mut s = "hello world".to_string();
116        truncate_string_in_place(&mut s, 5);
117        assert_eq!(s, "hello");
118
119        let mut s2 = "你好世界".to_string();
120        truncate_string_in_place(&mut s2, 5);
121        assert_eq!(s2, "你"); // 3 bytes, not 5
122    }
123}