1#[derive(Debug, Clone)]
5pub struct TextConfig {
6 pub tab_width: u8,
8}
9
10impl Default for TextConfig {
11 fn default() -> Self {
12 Self { tab_width: 8 }
13 }
14}
15
16impl TextConfig {
17 pub fn new(tab_width: u8) -> Self {
19 Self { tab_width }
20 }
21}
22
23pub fn expand_tabs(text: &str, tab_width: u8) -> String {
30 if tab_width == 0 {
31 return text.replace('\t', "");
32 }
33
34 let tw = tab_width as usize;
35 let mut result = String::with_capacity(text.len());
36 let mut column: usize = 0;
37
38 for ch in text.chars() {
39 if ch == '\t' {
40 let spaces_needed = tw - (column % tw);
41 for _ in 0..spaces_needed {
42 result.push(' ');
43 }
44 column += spaces_needed;
45 } else if ch == '\n' {
46 result.push(ch);
47 column = 0;
48 } else {
49 result.push(ch);
50 column += 1;
51 }
52 }
53
54 result
55}
56
57pub fn filter_control_chars(text: &str) -> String {
62 let mut result = String::with_capacity(text.len());
63
64 for ch in text.chars() {
65 if ch == '\t' || ch == '\n' {
67 result.push(ch);
68 continue;
69 }
70
71 if ch.is_ascii_control() {
73 continue;
74 }
75
76 let code = ch as u32;
78 if (0x80..=0x9F).contains(&code) {
79 continue;
80 }
81
82 result.push(ch);
83 }
84
85 result
86}
87
88pub fn preprocess(text: &str, config: &TextConfig) -> String {
93 let expanded = expand_tabs(text, config.tab_width);
94 filter_control_chars(&expanded)
95}
96
97pub fn truncate_to_char_boundary(text: &str, max_bytes: usize) -> &str {
103 if text.len() <= max_bytes {
104 return text;
105 }
106 let mut end = max_bytes;
108 while end > 0 && !text.is_char_boundary(end) {
109 end -= 1;
110 }
111 &text[..end]
112}
113
114pub fn string_display_width(text: &str) -> u16 {
119 use unicode_width::UnicodeWidthStr;
120 let width = UnicodeWidthStr::width(text);
121 if width > u16::MAX as usize {
122 u16::MAX
123 } else {
124 width as u16
125 }
126}
127
128pub fn truncate_to_display_width(text: &str, max_width: usize) -> &str {
134 use unicode_width::UnicodeWidthChar;
135 let mut width = 0usize;
136 for (byte_idx, ch) in text.char_indices() {
137 let ch_width = UnicodeWidthChar::width(ch).unwrap_or(0);
138 if width + ch_width > max_width {
139 return &text[..byte_idx];
140 }
141 width += ch_width;
142 }
143 text
144}
145
146#[cfg(test)]
147mod tests {
148 use super::*;
149
150 #[test]
151 fn truncate_at_char_boundary_ascii() {
152 let text = "Hello World";
153 assert_eq!(truncate_to_char_boundary(text, 5), "Hello");
154 }
155
156 #[test]
157 fn truncate_at_char_boundary_emoji() {
158 let text = "Hello \u{1F600} World";
160 let result = truncate_to_char_boundary(text, 7);
161 assert_eq!(result, "Hello ");
163 }
164
165 #[test]
166 fn truncate_at_char_boundary_cjk() {
167 let text = "\u{4F60}\u{597D}\u{4E16}\u{754C}"; let result = truncate_to_char_boundary(text, 7);
170 assert_eq!(result, "\u{4F60}\u{597D}");
172 }
173
174 #[test]
175 fn truncate_at_char_boundary_empty() {
176 assert_eq!(truncate_to_char_boundary("", 5), "");
177 }
178
179 #[test]
180 fn truncate_at_char_boundary_zero_limit() {
181 assert_eq!(truncate_to_char_boundary("Hello", 0), "");
182 }
183
184 #[test]
185 fn truncate_at_char_boundary_larger_limit() {
186 let text = "Hi";
187 assert_eq!(truncate_to_char_boundary(text, 100), "Hi");
188 }
189
190 #[test]
191 fn display_width_ascii() {
192 assert_eq!(string_display_width("Hello"), 5);
193 }
194
195 #[test]
196 fn display_width_emoji() {
197 assert_eq!(string_display_width("\u{1F600}"), 2);
199 }
200
201 #[test]
202 fn display_width_cjk() {
203 assert_eq!(string_display_width("\u{4F60}\u{597D}"), 4);
205 }
206
207 #[test]
208 fn display_width_empty() {
209 assert_eq!(string_display_width(""), 0);
210 }
211
212 #[test]
213 fn display_width_mixed() {
214 assert_eq!(string_display_width("Hi \u{1F600}"), 5);
216 }
217
218 #[test]
219 fn truncate_to_display_width_ascii() {
220 assert_eq!(truncate_to_display_width("Hello World", 5), "Hello");
221 }
222
223 #[test]
224 fn truncate_to_display_width_cjk() {
225 let text = "\u{4F60}\u{597D}\u{4E16}"; assert_eq!(truncate_to_display_width(text, 5), "\u{4F60}\u{597D}");
228 }
229
230 #[test]
231 fn truncate_to_display_width_emoji() {
232 assert_eq!(truncate_to_display_width("Hi \u{1F600}", 4), "Hi ");
234 }
235
236 #[test]
237 fn expand_tabs_single_tab_at_position_zero() {
238 let result = expand_tabs("\t", 8);
240 assert_eq!(result, " ");
241 assert_eq!(result.len(), 8);
242 }
243
244 #[test]
245 fn expand_tabs_after_three_chars() {
246 let result = expand_tabs("abc\t", 8);
248 assert_eq!(result, "abc ");
249 assert_eq!(result.len(), 8);
250 }
251
252 #[test]
253 fn expand_tabs_after_eight_chars() {
254 let result = expand_tabs("abcdefgh\t", 8);
256 assert_eq!(result, "abcdefgh ");
257 assert_eq!(result.len(), 16);
258 }
259
260 #[test]
261 fn expand_tabs_no_tabs_unchanged() {
262 let result = expand_tabs("hello world", 8);
263 assert_eq!(result, "hello world");
264 }
265
266 #[test]
267 fn expand_tabs_custom_width_four() {
268 let result = expand_tabs("\t", 4);
270 assert_eq!(result, " ");
271 assert_eq!(result.len(), 4);
272
273 let result2 = expand_tabs("ab\t", 4);
275 assert_eq!(result2, "ab ");
276 assert_eq!(result2.len(), 4);
277 }
278
279 #[test]
280 fn filter_control_chars_removes_null() {
281 let result = filter_control_chars("hello\x00world");
282 assert_eq!(result, "helloworld");
283 }
284
285 #[test]
286 fn filter_control_chars_removes_bell() {
287 let result = filter_control_chars("hello\x07world");
288 assert_eq!(result, "helloworld");
289 }
290
291 #[test]
292 fn filter_control_chars_preserves_tab_and_newline() {
293 let result = filter_control_chars("hello\tworld\n");
294 assert_eq!(result, "hello\tworld\n");
295 }
296
297 #[test]
298 fn filter_control_chars_clean_text_unchanged() {
299 let result = filter_control_chars("Hello, World! 123");
300 assert_eq!(result, "Hello, World! 123");
301 }
302
303 #[test]
304 fn preprocess_combines_tab_expansion_and_filtering() {
305 let config = TextConfig::new(4);
306 let result = preprocess("a\tb\x07c", &config);
308 assert_eq!(result, "a bc");
310 }
311
312 #[test]
313 fn empty_string_handling() {
314 assert_eq!(expand_tabs("", 8), "");
315 assert_eq!(filter_control_chars(""), "");
316 let config = TextConfig::default();
317 assert_eq!(preprocess("", &config), "");
318 }
319
320 #[test]
321 fn expand_tabs_multiple_tabs() {
322 let result = expand_tabs("\t\t", 4);
325 assert_eq!(result, " ");
326 assert_eq!(result.len(), 8);
327 }
328
329 #[test]
330 fn filter_control_chars_removes_c1_range() {
331 let text = format!("hello{}world", '\u{0085}'); let result = filter_control_chars(&text);
334 assert_eq!(result, "helloworld");
335 }
336
337 #[test]
338 fn expand_tabs_with_newline_resets_column() {
339 let result = expand_tabs("abc\n\t", 4);
341 assert_eq!(result, "abc\n ");
342 }
343
344 #[test]
345 fn text_config_default_tab_width_eight() {
346 let config = TextConfig::default();
347 assert_eq!(config.tab_width, 8);
348 }
349}