Skip to main content

zeph_common/
text.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! String utility functions for Unicode-safe text manipulation.
5
6/// Truncate `s` to at most `max_bytes` bytes, preserving UTF-8 char boundaries.
7///
8/// Returns an owned `String`. If `s` fits within `max_bytes`, returns a copy
9/// unchanged. Otherwise, walks char boundaries and truncates at the largest
10/// boundary that fits.
11#[must_use]
12pub fn truncate_to_bytes(s: &str, max_bytes: usize) -> String {
13    if s.len() <= max_bytes {
14        return s.to_owned();
15    }
16    let mut byte_count = 0usize;
17    let mut end = 0usize;
18    for ch in s.chars() {
19        let ch_len = ch.len_utf8();
20        if byte_count + ch_len > max_bytes {
21            break;
22        }
23        byte_count += ch_len;
24        end += ch_len;
25    }
26    s[..end].to_owned()
27}
28
29/// Borrow a prefix of `s` that fits within `max_bytes` bytes.
30///
31/// Returns a subslice of `s`. Walks backwards from `max_bytes` to find a valid
32/// UTF-8 char boundary.
33#[must_use]
34pub fn truncate_to_bytes_ref(s: &str, max_bytes: usize) -> &str {
35    if s.len() <= max_bytes {
36        return s;
37    }
38    let mut end = max_bytes;
39    while end > 0 && !s.is_char_boundary(end) {
40        end -= 1;
41    }
42    &s[..end]
43}
44
45/// Borrow a prefix of `s` that is at most `max_chars` Unicode scalar values long.
46///
47/// Returns a subslice of `s`. No ellipsis is appended.
48#[must_use]
49pub fn truncate_chars(s: &str, max_chars: usize) -> &str {
50    if max_chars == 0 {
51        return "";
52    }
53    match s.char_indices().nth(max_chars) {
54        Some((byte_idx, _)) => &s[..byte_idx],
55        None => s,
56    }
57}
58
59/// Truncate a string to at most `max_chars` Unicode scalar values.
60///
61/// If the string is longer than `max_chars` chars, the first `max_chars` chars are
62/// kept and the Unicode ellipsis character `…` (U+2026) is appended. If `max_chars`
63/// is zero, returns an empty string.
64#[must_use]
65pub fn truncate_to_chars(s: &str, max_chars: usize) -> String {
66    if max_chars == 0 {
67        return String::new();
68    }
69    let count = s.chars().count();
70    if count <= max_chars {
71        s.to_owned()
72    } else {
73        let truncated: String = s.chars().take(max_chars).collect();
74        format!("{truncated}\u{2026}")
75    }
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81
82    // truncate_to_bytes tests
83    #[test]
84    fn bytes_short_unchanged() {
85        assert_eq!(truncate_to_bytes("hello", 10), "hello");
86    }
87
88    #[test]
89    fn bytes_exact_unchanged() {
90        assert_eq!(truncate_to_bytes("hello", 5), "hello");
91    }
92
93    #[test]
94    fn bytes_truncates_at_boundary() {
95        let s = "hello world";
96        assert_eq!(truncate_to_bytes(s, 5), "hello");
97    }
98
99    #[test]
100    fn bytes_unicode_boundary() {
101        // "é" is 2 bytes in UTF-8
102        let s = "héllo";
103        assert_eq!(truncate_to_bytes(s, 3), "hé");
104    }
105
106    #[test]
107    fn bytes_zero_returns_empty() {
108        assert_eq!(truncate_to_bytes("hello", 0), "");
109    }
110
111    // truncate_to_bytes_ref tests
112    #[test]
113    fn bytes_ref_short_unchanged() {
114        assert_eq!(truncate_to_bytes_ref("hello", 10), "hello");
115    }
116
117    #[test]
118    fn bytes_ref_truncates_at_boundary() {
119        assert_eq!(truncate_to_bytes_ref("hello world", 5), "hello");
120    }
121
122    #[test]
123    fn bytes_ref_unicode_boundary() {
124        let s = "héllo";
125        assert_eq!(truncate_to_bytes_ref(s, 2), "h");
126    }
127
128    // truncate_chars tests
129    #[test]
130    fn chars_short_unchanged() {
131        assert_eq!(truncate_chars("hello", 10), "hello");
132    }
133
134    #[test]
135    fn chars_exact_unchanged() {
136        assert_eq!(truncate_chars("hello", 5), "hello");
137    }
138
139    #[test]
140    fn chars_truncates_by_char() {
141        assert_eq!(truncate_chars("hello world", 5), "hello");
142    }
143
144    #[test]
145    fn chars_zero_returns_empty() {
146        assert_eq!(truncate_chars("hello", 0), "");
147    }
148
149    #[test]
150    fn chars_unicode_by_char() {
151        let s = "😀😁😂😃😄extra";
152        assert_eq!(truncate_chars(s, 5), "😀😁😂😃😄");
153    }
154
155    // truncate_to_chars tests
156    #[test]
157    fn to_chars_short_unchanged() {
158        assert_eq!(truncate_to_chars("hello", 10), "hello");
159    }
160
161    #[test]
162    fn to_chars_exact_unchanged() {
163        assert_eq!(truncate_to_chars("hello", 5), "hello");
164    }
165
166    #[test]
167    fn to_chars_appends_ellipsis() {
168        assert_eq!(truncate_to_chars("hello world", 5), "hello\u{2026}");
169    }
170
171    #[test]
172    fn to_chars_zero_returns_empty() {
173        assert_eq!(truncate_to_chars("hello", 0), "");
174    }
175
176    #[test]
177    fn to_chars_unicode() {
178        let s = "😀😁😂😃😄extra";
179        assert_eq!(truncate_to_chars(s, 5), "😀😁😂😃😄\u{2026}");
180    }
181}