1#[must_use]
23#[allow(clippy::cast_precision_loss)]
24pub fn format_tokens(n: u64) -> String {
25 if n >= 1_000_000 {
26 format!("{:.1}M", n as f64 / 1_000_000.0)
27 } else if n >= 1_000 {
28 format!("{:.1}k", n as f64 / 1_000.0)
29 } else {
30 n.to_string()
31 }
32}
33
34#[must_use]
40pub fn truncate_to_bytes(s: &str, max_bytes: usize) -> String {
41 if s.len() <= max_bytes {
42 return s.to_owned();
43 }
44 let mut byte_count = 0usize;
45 let mut end = 0usize;
46 for ch in s.chars() {
47 let ch_len = ch.len_utf8();
48 if byte_count + ch_len > max_bytes {
49 break;
50 }
51 byte_count += ch_len;
52 end += ch_len;
53 }
54 s[..end].to_owned()
55}
56
57#[must_use]
62pub fn truncate_to_bytes_ref(s: &str, max_bytes: usize) -> &str {
63 if s.len() <= max_bytes {
64 return s;
65 }
66 let mut end = max_bytes;
67 while end > 0 && !s.is_char_boundary(end) {
68 end -= 1;
69 }
70 &s[..end]
71}
72
73#[must_use]
79pub fn estimate_tokens(text: &str) -> usize {
80 text.chars().count() / 4
81}
82
83#[must_use]
87pub fn truncate_chars(s: &str, max_chars: usize) -> &str {
88 if max_chars == 0 {
89 return "";
90 }
91 match s.char_indices().nth(max_chars) {
92 Some((byte_idx, _)) => &s[..byte_idx],
93 None => s,
94 }
95}
96
97#[must_use]
103pub fn truncate_to_chars(s: &str, max_chars: usize) -> String {
104 if max_chars == 0 {
105 return String::new();
106 }
107 let count = s.chars().count();
108 if count <= max_chars {
109 s.to_owned()
110 } else {
111 let truncated: String = s.chars().take(max_chars).collect();
112 format!("{truncated}\u{2026}")
113 }
114}
115
116#[must_use]
132pub fn xml_escape(s: &str) -> String {
133 let mut out = String::with_capacity(s.len());
134 for ch in s.chars() {
135 match ch {
136 '&' => out.push_str("&"),
137 '<' => out.push_str("<"),
138 '>' => out.push_str(">"),
139 '"' => out.push_str("""),
140 '\'' => out.push_str("'"),
141 other => out.push(other),
142 }
143 }
144 out
145}
146
147#[cfg(test)]
148mod tests {
149 use super::*;
150
151 #[test]
153 fn bytes_short_unchanged() {
154 assert_eq!(truncate_to_bytes("hello", 10), "hello");
155 }
156
157 #[test]
158 fn bytes_exact_unchanged() {
159 assert_eq!(truncate_to_bytes("hello", 5), "hello");
160 }
161
162 #[test]
163 fn bytes_truncates_at_boundary() {
164 let s = "hello world";
165 assert_eq!(truncate_to_bytes(s, 5), "hello");
166 }
167
168 #[test]
169 fn bytes_unicode_boundary() {
170 let s = "héllo";
172 assert_eq!(truncate_to_bytes(s, 3), "hé");
173 }
174
175 #[test]
176 fn bytes_zero_returns_empty() {
177 assert_eq!(truncate_to_bytes("hello", 0), "");
178 }
179
180 #[test]
182 fn bytes_ref_short_unchanged() {
183 assert_eq!(truncate_to_bytes_ref("hello", 10), "hello");
184 }
185
186 #[test]
187 fn bytes_ref_truncates_at_boundary() {
188 assert_eq!(truncate_to_bytes_ref("hello world", 5), "hello");
189 }
190
191 #[test]
192 fn bytes_ref_unicode_boundary() {
193 let s = "héllo";
194 assert_eq!(truncate_to_bytes_ref(s, 2), "h");
195 }
196
197 #[test]
199 fn chars_short_unchanged() {
200 assert_eq!(truncate_chars("hello", 10), "hello");
201 }
202
203 #[test]
204 fn chars_exact_unchanged() {
205 assert_eq!(truncate_chars("hello", 5), "hello");
206 }
207
208 #[test]
209 fn chars_truncates_by_char() {
210 assert_eq!(truncate_chars("hello world", 5), "hello");
211 }
212
213 #[test]
214 fn chars_zero_returns_empty() {
215 assert_eq!(truncate_chars("hello", 0), "");
216 }
217
218 #[test]
219 fn chars_unicode_by_char() {
220 let s = "😀😁😂😃😄extra";
221 assert_eq!(truncate_chars(s, 5), "😀😁😂😃😄");
222 }
223
224 #[test]
226 fn to_chars_short_unchanged() {
227 assert_eq!(truncate_to_chars("hello", 10), "hello");
228 }
229
230 #[test]
231 fn to_chars_exact_unchanged() {
232 assert_eq!(truncate_to_chars("hello", 5), "hello");
233 }
234
235 #[test]
236 fn to_chars_appends_ellipsis() {
237 assert_eq!(truncate_to_chars("hello world", 5), "hello\u{2026}");
238 }
239
240 #[test]
241 fn to_chars_zero_returns_empty() {
242 assert_eq!(truncate_to_chars("hello", 0), "");
243 }
244
245 #[test]
246 fn to_chars_unicode() {
247 let s = "😀😁😂😃😄extra";
248 assert_eq!(truncate_to_chars(s, 5), "😀😁😂😃😄\u{2026}");
249 }
250}