1#[inline(always)]
11pub fn truncate_bytes(s: &mut String, max_bytes: usize) {
12 if s.len() <= max_bytes {
13 return;
14 }
15 truncate_bytes_cold(s, max_bytes)
16}
17
18#[inline(always)]
25pub fn truncate_chars(s: &mut String, max_chars: usize) {
26 if s.len() <= max_chars {
27 return;
28 }
29 truncate_chars_cold(s, max_chars)
30}
31
32#[inline(never)]
33#[cold]
34fn truncate_bytes_cold(s: &mut String, max_bytes: usize) {
35 for idx in (max_bytes.saturating_sub(3)..=max_bytes).rev() {
38 if s.is_char_boundary(idx) {
39 s.truncate(idx);
40 break;
41 }
42 }
43}
44
45#[inline(never)]
46#[cold]
47fn truncate_chars_cold(s: &mut String, max_chars: usize) {
48 const HIGH_BITS: u64 = 0x8080_8080_8080_8080;
49
50 let bytes = s.as_bytes();
51 let len = bytes.len();
52 let (chunks, _) = bytes.as_chunks::<8>();
53
54 let mut idx = 0usize;
55 let mut chars_seen = 0usize;
56
57 for chunk in chunks {
58 let word = u64::from_ne_bytes(*chunk);
59
60 let continuation_mask = (word & HIGH_BITS) & !((word << 1) & HIGH_BITS);
62 let continuation_count = continuation_mask.count_ones() as usize;
63 let chunk_chars = 8usize - continuation_count;
64
65 chars_seen += chunk_chars;
68 if chars_seen > max_chars {
69 chars_seen -= chunk_chars;
70 break;
71 }
72
73 idx += 8;
74 }
75
76 while idx < len {
77 if (bytes[idx] & 0b1100_0000) != 0b1000_0000 {
78 chars_seen += 1;
79 if chars_seen > max_chars {
80 s.truncate(idx);
83 return;
84 }
85 }
86 idx += 1;
87 }
88}
89
90#[cfg(test)]
91mod tests {
92 use proptest::{prop_assert, prop_assert_eq, proptest};
93
94 use super::*;
95
96 fn tb(s: &str, max_bytes: usize) -> String {
98 let mut s = s.to_owned();
99 truncate_bytes(&mut s, max_bytes);
100 s
101 }
102
103 fn tc(s: &str, max_chars: usize) -> String {
105 let mut s = s.to_owned();
106 truncate_chars(&mut s, max_chars);
107 s
108 }
109
110 #[test]
111 fn test_truncate_bytes() {
112 assert_eq!(tb("", 10), "");
114 assert_eq!(tb("hello", 10), "hello");
115 assert_eq!(tb("hello", 5), "hello");
116
117 assert_eq!(tb("hello world", 5), "hello");
119
120 assert_eq!(tb("a\u{1F600}b", 3), "a");
123
124 assert_eq!(tb("日本語", 7), "日本"); assert_eq!(tb("日本語", 6), "日本"); }
128
129 #[test]
130 fn test_truncate_chars() {
131 assert_eq!(tc("", 10), "");
133 assert_eq!(tc("hello", 10), "hello");
134 assert_eq!(tc("hello", 5), "hello");
135
136 assert_eq!(tc("hello world", 5), "hello");
138
139 assert_eq!(tc("a\u{1F600}b\u{1F600}c", 3), "a\u{1F600}b");
141
142 assert_eq!(tc("日本語テスト", 3), "日本語");
144
145 assert_eq!(tc("hello", 0), "");
147 }
148
149 #[test]
152 fn test_truncate_idempotent() {
153 proptest!(|(s: String, n in 0usize..=512)| {
154 let bytes_once = tb(&s, n);
155 let bytes_twice = tb(&bytes_once, n);
156 prop_assert_eq!(bytes_once, bytes_twice);
157
158 let chars_once = tc(&s, n);
159 let chars_twice = tc(&chars_once, n);
160 prop_assert_eq!(chars_once, chars_twice);
161 });
162 }
163
164 #[test]
168 fn test_truncate_length_ordering() {
169 proptest!(|(s: String, n in 0usize..=512)| {
170 let original_len = s.len();
171 let chars_len = tc(&s, n).len();
172 let bytes_len = tb(&s, n).len();
173
174 prop_assert!(original_len >= chars_len);
175 prop_assert!(chars_len >= bytes_len);
176 });
177 }
178
179 #[test]
184 fn test_truncate_prefix_recovery() {
185 proptest!(|(s: String, ascii in 0u8..=0x7f, c: char)| {
186 let mut with_ascii = s.clone();
187 with_ascii.push(char::from(ascii));
188 let bytes_recovered = tb(&with_ascii, s.len());
189 prop_assert_eq!(bytes_recovered, s.clone());
190
191 let mut with_char = s.clone();
192 with_char.push(c);
193 let chars_recovered = tc(&with_char, s.chars().count());
194 prop_assert_eq!(chars_recovered, s);
195 });
196 }
197}