1#[must_use]
12pub fn truncate_to_bytes(s: &str, max_bytes: usize) -> String {
13 if s.len() <= max_bytes {
14 return s.to_owned();
15 }
16 let mut byte_count = 0usize;
17 let mut end = 0usize;
18 for ch in s.chars() {
19 let ch_len = ch.len_utf8();
20 if byte_count + ch_len > max_bytes {
21 break;
22 }
23 byte_count += ch_len;
24 end += ch_len;
25 }
26 s[..end].to_owned()
27}
28
29#[must_use]
34pub fn truncate_to_bytes_ref(s: &str, max_bytes: usize) -> &str {
35 if s.len() <= max_bytes {
36 return s;
37 }
38 let mut end = max_bytes;
39 while end > 0 && !s.is_char_boundary(end) {
40 end -= 1;
41 }
42 &s[..end]
43}
44
45#[must_use]
51pub fn estimate_tokens(text: &str) -> usize {
52 text.chars().count() / 4
53}
54
55#[must_use]
59pub fn truncate_chars(s: &str, max_chars: usize) -> &str {
60 if max_chars == 0 {
61 return "";
62 }
63 match s.char_indices().nth(max_chars) {
64 Some((byte_idx, _)) => &s[..byte_idx],
65 None => s,
66 }
67}
68
69#[must_use]
75pub fn truncate_to_chars(s: &str, max_chars: usize) -> String {
76 if max_chars == 0 {
77 return String::new();
78 }
79 let count = s.chars().count();
80 if count <= max_chars {
81 s.to_owned()
82 } else {
83 let truncated: String = s.chars().take(max_chars).collect();
84 format!("{truncated}\u{2026}")
85 }
86}
87
88#[cfg(test)]
89mod tests {
90 use super::*;
91
92 #[test]
94 fn bytes_short_unchanged() {
95 assert_eq!(truncate_to_bytes("hello", 10), "hello");
96 }
97
98 #[test]
99 fn bytes_exact_unchanged() {
100 assert_eq!(truncate_to_bytes("hello", 5), "hello");
101 }
102
103 #[test]
104 fn bytes_truncates_at_boundary() {
105 let s = "hello world";
106 assert_eq!(truncate_to_bytes(s, 5), "hello");
107 }
108
109 #[test]
110 fn bytes_unicode_boundary() {
111 let s = "héllo";
113 assert_eq!(truncate_to_bytes(s, 3), "hé");
114 }
115
116 #[test]
117 fn bytes_zero_returns_empty() {
118 assert_eq!(truncate_to_bytes("hello", 0), "");
119 }
120
121 #[test]
123 fn bytes_ref_short_unchanged() {
124 assert_eq!(truncate_to_bytes_ref("hello", 10), "hello");
125 }
126
127 #[test]
128 fn bytes_ref_truncates_at_boundary() {
129 assert_eq!(truncate_to_bytes_ref("hello world", 5), "hello");
130 }
131
132 #[test]
133 fn bytes_ref_unicode_boundary() {
134 let s = "héllo";
135 assert_eq!(truncate_to_bytes_ref(s, 2), "h");
136 }
137
138 #[test]
140 fn chars_short_unchanged() {
141 assert_eq!(truncate_chars("hello", 10), "hello");
142 }
143
144 #[test]
145 fn chars_exact_unchanged() {
146 assert_eq!(truncate_chars("hello", 5), "hello");
147 }
148
149 #[test]
150 fn chars_truncates_by_char() {
151 assert_eq!(truncate_chars("hello world", 5), "hello");
152 }
153
154 #[test]
155 fn chars_zero_returns_empty() {
156 assert_eq!(truncate_chars("hello", 0), "");
157 }
158
159 #[test]
160 fn chars_unicode_by_char() {
161 let s = "😀😁😂😃😄extra";
162 assert_eq!(truncate_chars(s, 5), "😀😁😂😃😄");
163 }
164
165 #[test]
167 fn to_chars_short_unchanged() {
168 assert_eq!(truncate_to_chars("hello", 10), "hello");
169 }
170
171 #[test]
172 fn to_chars_exact_unchanged() {
173 assert_eq!(truncate_to_chars("hello", 5), "hello");
174 }
175
176 #[test]
177 fn to_chars_appends_ellipsis() {
178 assert_eq!(truncate_to_chars("hello world", 5), "hello\u{2026}");
179 }
180
181 #[test]
182 fn to_chars_zero_returns_empty() {
183 assert_eq!(truncate_to_chars("hello", 0), "");
184 }
185
186 #[test]
187 fn to_chars_unicode() {
188 let s = "😀😁😂😃😄extra";
189 assert_eq!(truncate_to_chars(s, 5), "😀😁😂😃😄\u{2026}");
190 }
191}