Skip to main content

codemem_core/
utils.rs

1/// Truncate a string to `max` bytes, appending "..." if truncated.
2/// Handles multi-byte UTF-8 safely by finding the nearest char boundary.
3pub fn truncate(s: &str, max: usize) -> String {
4    if s.len() <= max {
5        s.to_string()
6    } else {
7        let mut end = max;
8        while end > 0 && !s.is_char_boundary(end) {
9            end -= 1;
10        }
11        format!("{}...", &s[..end])
12    }
13}
14
15#[cfg(test)]
16mod tests {
17    use super::*;
18
19    #[test]
20    fn short_string_unchanged() {
21        assert_eq!(truncate("hi", 10), "hi");
22    }
23
24    #[test]
25    fn exact_length_unchanged() {
26        assert_eq!(truncate("hello", 5), "hello");
27    }
28
29    #[test]
30    fn long_string_truncated_with_ellipsis() {
31        assert_eq!(truncate("hello world", 5), "hello...");
32    }
33
34    #[test]
35    fn empty_string() {
36        assert_eq!(truncate("", 5), "");
37    }
38
39    #[test]
40    fn zero_max() {
41        assert_eq!(truncate("abc", 0), "...");
42    }
43
44    #[test]
45    fn multibyte_utf8_safe() {
46        let result = truncate("héllo", 2);
47        assert!(result.ends_with("..."));
48        // 'h' is 1 byte, 'é' is 2 bytes, so at max=2 we can fit "hé" (3 bytes > 2),
49        // boundary backs up to 1, giving "h..."
50        assert_eq!(result, "h...");
51    }
52
53    #[test]
54    fn multibyte_cjk() {
55        let result = truncate("日本語テスト", 4);
56        // '日' is 3 bytes, next char starts at byte 3, byte 4 is mid-char, backs to 3
57        assert!(result.ends_with("..."));
58        assert_eq!(result, "日...");
59    }
60}