television_utils/strings.rs
1use lazy_static::lazy_static;
2
3/// Returns the index of the next character boundary in the given string.
4///
5/// If the given index is already a character boundary, it is returned as is.
6/// If the given index is out of bounds, the length of the string is returned.
7///
8/// # Examples
9/// ```
10/// use television_utils::strings::next_char_boundary;
11///
12/// let s = "Hello, World!";
13/// assert_eq!(next_char_boundary(s, 0), 0);
14/// assert_eq!(next_char_boundary(s, 1), 1);
15/// assert_eq!(next_char_boundary(s, 13), 13);
16/// assert_eq!(next_char_boundary(s, 30), 13);
17///
18/// let s = "ππ!";
19/// assert_eq!(next_char_boundary(s, 0), 0);
20/// assert_eq!(next_char_boundary(s, 1), 4);
21/// assert_eq!(next_char_boundary(s, 4), 4);
22/// assert_eq!(next_char_boundary(s, 7), 8);
23/// assert_eq!(next_char_boundary(s, 8), 8);
24/// ```
25pub fn next_char_boundary(s: &str, start: usize) -> usize {
26 let mut i = start;
27 let len = s.len();
28 if i >= len {
29 return len;
30 }
31 while !s.is_char_boundary(i) && i < len {
32 i += 1;
33 }
34 i
35}
36
37/// Returns the index of the previous character boundary in the given string.
38///
39/// If the given index is already a character boundary, it is returned as is.
40/// If the given index is out of bounds, 0 is returned.
41///
42/// # Examples
43/// ```
44/// use television_utils::strings::prev_char_boundary;
45///
46/// let s = "Hello, World!";
47/// assert_eq!(prev_char_boundary(s, 0), 0);
48/// assert_eq!(prev_char_boundary(s, 1), 1);
49/// assert_eq!(prev_char_boundary(s, 5), 5);
50///
51/// let s = "ππ!";
52/// assert_eq!(prev_char_boundary(s, 0), 0);
53/// assert_eq!(prev_char_boundary(s, 4), 4);
54/// assert_eq!(prev_char_boundary(s, 6), 4);
55/// ```
56pub fn prev_char_boundary(s: &str, start: usize) -> usize {
57 let mut i = start;
58 while !s.is_char_boundary(i) && i > 0 {
59 i -= 1;
60 }
61 i
62}
63
64/// Returns a slice of the given string that starts and ends at character boundaries.
65///
66/// If the given start index is greater than the end index, or if either index is out of bounds,
67/// an empty string is returned.
68///
69/// # Examples
70/// ```
71/// use television_utils::strings::slice_at_char_boundaries;
72///
73/// let s = "Hello, World!";
74/// assert_eq!(slice_at_char_boundaries(s, 0, 0), "");
75/// assert_eq!(slice_at_char_boundaries(s, 0, 1), "H");
76///
77/// let s = "ππ!";
78/// assert_eq!(slice_at_char_boundaries(s, 0, 0), "");
79/// assert_eq!(slice_at_char_boundaries(s, 0, 2), "π");
80/// assert_eq!(slice_at_char_boundaries(s, 0, 5), "ππ");
81/// ```
82pub fn slice_at_char_boundaries(
83 s: &str,
84 start_byte_index: usize,
85 end_byte_index: usize,
86) -> &str {
87 if start_byte_index > end_byte_index
88 || start_byte_index > s.len()
89 || end_byte_index > s.len()
90 {
91 return EMPTY_STRING;
92 }
93 &s[prev_char_boundary(s, start_byte_index)
94 ..next_char_boundary(s, end_byte_index)]
95}
96
97/// Returns a slice of the given string that starts at the beginning and ends at a character
98/// boundary.
99///
100/// If the given index is out of bounds, the whole string is returned.
101/// If the given index is already a character boundary, the string up to that index is returned.
102///
103/// # Examples
104/// ```
105/// use television_utils::strings::slice_up_to_char_boundary;
106///
107/// let s = "Hello, World!";
108/// assert_eq!(slice_up_to_char_boundary(s, 0), "");
109/// assert_eq!(slice_up_to_char_boundary(s, 1), "H");
110/// assert_eq!(slice_up_to_char_boundary(s, 13), "Hello, World!");
111///
112/// let s = "π\nπ!";
113/// assert_eq!(slice_up_to_char_boundary(s, 0), "");
114/// assert_eq!(slice_up_to_char_boundary(s, 1), "π");
115/// assert_eq!(slice_up_to_char_boundary(s, 4), "π");
116/// assert_eq!(slice_up_to_char_boundary(s, 7), "π\nπ");
117/// ```
118pub fn slice_up_to_char_boundary(s: &str, byte_index: usize) -> &str {
119 &s[..next_char_boundary(s, byte_index)]
120}
121
122/// Attempts to parse a UTF-8 character from the given byte slice.
123///
124/// The function returns the parsed character and the number of bytes consumed.
125///
126/// # Examples
127/// ```
128/// use television_utils::strings::try_parse_utf8_char;
129///
130/// let input = b"Hello, World!";
131/// let (chr, n) = try_parse_utf8_char(input).unwrap();
132/// assert_eq!(chr, 'H');
133/// assert_eq!(n, 1);
134///
135/// let input = b"\xF0\x9F\x91\x8B\xF0\x9F\x8C\x8D!";
136/// let (chr, n) = try_parse_utf8_char(input).unwrap();
137/// assert_eq!(chr, 'π');
138/// assert_eq!(n, 4);
139/// ```
140pub fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> {
141 let str_from_utf8 = |seq| std::str::from_utf8(seq).ok();
142
143 let decoded = input
144 .get(0..1)
145 .and_then(str_from_utf8)
146 .map(|c| (c, 1))
147 .or_else(|| input.get(0..2).and_then(str_from_utf8).map(|c| (c, 2)))
148 .or_else(|| input.get(0..3).and_then(str_from_utf8).map(|c| (c, 3)))
149 .or_else(|| input.get(0..4).and_then(str_from_utf8).map(|c| (c, 4)));
150
151 decoded.map(|(seq, n)| (seq.chars().next().unwrap(), n))
152}
153
154lazy_static! {
155 /// The Unicode symbol to use for non-printable characters.
156 static ref NULL_SYMBOL: char = char::from_u32(0x2400).unwrap();
157}
158
159pub const EMPTY_STRING: &str = "";
160pub const TAB_WIDTH: usize = 4;
161
162const TAB_CHARACTER: char = '\t';
163const LINE_FEED_CHARACTER: char = '\x0A';
164const DELETE_CHARACTER: char = '\x7F';
165const BOM_CHARACTER: char = '\u{FEFF}';
166const NULL_CHARACTER: char = '\x00';
167const UNIT_SEPARATOR_CHARACTER: char = '\u{001F}';
168const APPLICATION_PROGRAM_COMMAND_CHARACTER: char = '\u{009F}';
169
170const NF_RANGE_DEVICONS: std::ops::RangeInclusive<char> =
171 '\u{e700}'..='\u{e8ef}';
172const NF_RANGE_SETI: std::ops::RangeInclusive<char> = '\u{e5fa}'..='\u{e6b7}';
173const NF_RANGE_FONT_AWESOME: std::ops::RangeInclusive<char> =
174 '\u{ed00}'..='\u{f2ff}';
175const NF_RANGE_FONT_AWESOME_EXT: std::ops::RangeInclusive<char> =
176 '\u{e200}'..='\u{e2a9}';
177const NF_RANGE_MATERIAL: std::ops::RangeInclusive<char> =
178 '\u{f0001}'..='\u{f1af0}';
179const NF_RANGE_WEATHER: std::ops::RangeInclusive<char> =
180 '\u{e300}'..='\u{e3e3}';
181const NF_RANGE_OCTICONS_1: std::ops::RangeInclusive<char> =
182 '\u{f400}'..='\u{f533}';
183const NF_RANGE_OCTICONS_2: std::ops::RangeInclusive<char> =
184 '\u{2665}'..='\u{26a1}';
185const NF_RANGE_POWERLINE_1: std::ops::RangeInclusive<char> =
186 '\u{e0a0}'..='\u{e0a2}';
187const NF_RANGE_POWERLINE_2: std::ops::RangeInclusive<char> =
188 '\u{e0b0}'..='\u{e0b3}';
189
190const ALL_NF_RANGES: [&std::ops::RangeInclusive<char>; 10] = [
191 &NF_RANGE_DEVICONS,
192 &NF_RANGE_SETI,
193 &NF_RANGE_FONT_AWESOME,
194 &NF_RANGE_FONT_AWESOME_EXT,
195 &NF_RANGE_MATERIAL,
196 &NF_RANGE_WEATHER,
197 &NF_RANGE_OCTICONS_1,
198 &NF_RANGE_OCTICONS_2,
199 &NF_RANGE_POWERLINE_1,
200 &NF_RANGE_POWERLINE_2,
201];
202
203pub struct ReplaceNonPrintableConfig {
204 pub replace_tab: bool,
205 pub tab_width: usize,
206 pub replace_line_feed: bool,
207 pub replace_control_characters: bool,
208}
209
210impl ReplaceNonPrintableConfig {
211 pub fn tab_width(&mut self, tab_width: usize) -> &mut Self {
212 self.tab_width = tab_width;
213 self
214 }
215}
216
217impl Default for ReplaceNonPrintableConfig {
218 fn default() -> Self {
219 Self {
220 replace_tab: true,
221 tab_width: TAB_WIDTH,
222 replace_line_feed: true,
223 replace_control_characters: true,
224 }
225 }
226}
227
228#[allow(clippy::missing_panics_doc)]
229/// Replaces non-printable characters in the given byte slice with default printable characters.
230///
231/// The tab width is used to determine how many spaces to replace a tab character with.
232/// The default printable character for non-printable characters is the Unicode symbol for NULL.
233///
234/// The function returns a tuple containing the processed string and a vector of offsets introduced
235/// by the transformation.
236///
237/// # Examples
238/// ```
239/// use television_utils::strings::{replace_non_printable, ReplaceNonPrintableConfig};
240///
241/// let input = b"Hello, World!";
242/// let (output, offsets) = replace_non_printable(input, &ReplaceNonPrintableConfig::default());
243/// assert_eq!(output, "Hello, World!");
244/// assert_eq!(offsets, vec![0,0,0,0,0,0,0,0,0,0,0,0,0]);
245///
246/// let input = b"Hello,\tWorld!";
247/// let (output, offsets) = replace_non_printable(input, &ReplaceNonPrintableConfig::default().tab_width(4));
248/// assert_eq!(output, "Hello, World!");
249/// assert_eq!(offsets, vec![0,0,0,0,0,0,0,3,3,3,3,3,3]);
250///
251/// let input = b"Hello,\nWorld!";
252/// let (output, offsets) = replace_non_printable(input, &ReplaceNonPrintableConfig::default());
253/// assert_eq!(output, "Hello,World!");
254/// assert_eq!(offsets, vec![0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1]);
255/// ```
256pub fn replace_non_printable(
257 input: &[u8],
258 config: &ReplaceNonPrintableConfig,
259) -> (String, Vec<i16>) {
260 let mut output = String::with_capacity(input.len());
261 let mut offsets = Vec::new();
262 let mut cumulative_offset: i16 = 0;
263
264 let mut idx = 0;
265 let len = input.len();
266 while idx < len {
267 offsets.push(cumulative_offset);
268 if let Some((chr, skip_ahead)) = try_parse_utf8_char(&input[idx..]) {
269 idx += skip_ahead;
270
271 match chr {
272 // tab
273 TAB_CHARACTER if config.replace_tab => {
274 output.push_str(&" ".repeat(config.tab_width));
275 cumulative_offset +=
276 i16::try_from(config.tab_width).unwrap() - 1;
277 }
278 // line feed
279 LINE_FEED_CHARACTER if config.replace_line_feed => {
280 cumulative_offset -= 1;
281 }
282
283 // ASCII control characters from 0x00 to 0x1F
284 // + control characters from \u{007F} to \u{009F}
285 // + BOM
286 NULL_CHARACTER..=UNIT_SEPARATOR_CHARACTER
287 | DELETE_CHARACTER..=APPLICATION_PROGRAM_COMMAND_CHARACTER
288 | BOM_CHARACTER
289 if config.replace_control_characters =>
290 {
291 output.push(*NULL_SYMBOL);
292 }
293 // CJK Unified Ideographs
294 c if ('\u{4E00}'..='\u{9FFF}').contains(&c) => {
295 output.push(c);
296 }
297 // Nerd fonts
298 c if ALL_NF_RANGES.iter().any(|r| r.contains(&c)) => {
299 output.push(c);
300 }
301 // Unicode characters above 0x0700 seem unstable with ratatui
302 c if c > '\u{0700}' => {
303 output.push(*NULL_SYMBOL);
304 }
305 // everything else
306 c => output.push(c),
307 }
308 } else {
309 output.push(*NULL_SYMBOL);
310 idx += 1;
311 }
312 }
313
314 (output, offsets)
315}
316
317/// The threshold for considering a buffer to be printable ASCII.
318///
319/// This is used to determine whether a file is likely to be a text file
320/// based on a sample of its contents.
321pub const PRINTABLE_ASCII_THRESHOLD: f32 = 0.7;
322
323/// Returns the proportion of printable ASCII characters in the given buffer.
324///
325/// This really is a cheap way to determine if a buffer is likely to be a text file.
326///
327/// # Examples
328/// ```
329/// use television_utils::strings::proportion_of_printable_ascii_characters;
330///
331/// let buffer = b"Hello, World!";
332/// let proportion = proportion_of_printable_ascii_characters(buffer);
333/// assert_eq!(proportion, 1.0);
334///
335/// let buffer = b"Hello, World!\x00";
336/// let proportion = proportion_of_printable_ascii_characters(buffer);
337/// assert_eq!(proportion, 0.9285714);
338///
339/// let buffer = b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F";
340/// let proportion = proportion_of_printable_ascii_characters(buffer);
341/// assert_eq!(proportion, 0.0);
342/// ```
343pub fn proportion_of_printable_ascii_characters(buffer: &[u8]) -> f32 {
344 let mut printable: usize = 0;
345 for &byte in buffer {
346 if (32..127).contains(&byte) {
347 printable += 1;
348 }
349 }
350 printable as f32 / buffer.len() as f32
351}
352
353const MAX_LINE_LENGTH: usize = 300;
354
355/// Preprocesses a line of text for display.
356///
357/// This function trims the line, replaces non-printable characters, and truncates the line if it
358/// is too long.
359///
360/// # Examples
361/// ```
362/// use television_utils::strings::preprocess_line;
363///
364/// let line = "Hello, World!";
365/// let (processed, offsets) = preprocess_line(line);
366/// assert_eq!(processed, "Hello, World!");
367/// assert_eq!(offsets, vec![0,0,0,0,0,0,0,0,0,0,0,0,0]);
368///
369/// let line = "\x00World\x7F!";
370/// let (processed, offsets) = preprocess_line(line);
371/// assert_eq!(processed, "βWorldβ!");
372/// assert_eq!(offsets, vec![0,0,0,0,0,0,0,0]);
373///
374/// let line = "a".repeat(400);
375/// let (processed, offsets) = preprocess_line(&line);
376/// assert_eq!(processed.len(), 300);
377/// assert_eq!(offsets, vec![0; 300]);
378/// ```
379pub fn preprocess_line(line: &str) -> (String, Vec<i16>) {
380 replace_non_printable(
381 {
382 if line.len() > MAX_LINE_LENGTH {
383 slice_up_to_char_boundary(line, MAX_LINE_LENGTH)
384 } else {
385 line
386 }
387 }
388 .as_bytes(),
389 &ReplaceNonPrintableConfig::default(),
390 )
391}
392
393/// Make a matched string printable while preserving match ranges in the process.
394///
395/// This function preprocesses the matched string and returns a printable version of it along with
396/// the match ranges adjusted to the new string.
397///
398/// # Examples
399/// ```
400/// use television_utils::strings::make_matched_string_printable;
401///
402/// let matched_string = "Hello, World!";
403/// let match_ranges = vec![(0, 1), (7, 8)];
404/// let match_ranges = Some(match_ranges.as_slice());
405/// let (printable, match_indices) = make_matched_string_printable(matched_string, match_ranges);
406/// assert_eq!(printable, "Hello, World!");
407/// assert_eq!(match_indices, vec![(0, 1), (7, 8)]);
408///
409/// let matched_string = "Hello,\tWorld!";
410/// let match_ranges = vec![(0, 1), (7, 8)];
411/// let match_ranges = Some(match_ranges.as_slice());
412/// let (printable, match_indices) = make_matched_string_printable(matched_string, match_ranges);
413/// assert_eq!(printable, "Hello, World!");
414/// assert_eq!(match_indices, vec![(0, 1), (10, 11)]);
415///
416/// let matched_string = "Hello,\nWorld!";
417/// let match_ranges = vec![(0, 1), (7, 8)];
418/// let match_ranges = Some(match_ranges.as_slice());
419/// let (printable, match_indices) = make_matched_string_printable(matched_string, match_ranges);
420/// assert_eq!(printable, "Hello,World!");
421/// assert_eq!(match_indices, vec![(0, 1), (6, 7)]);
422///
423/// let matched_string = "Hello, World!";
424/// let (printable, match_indices) = make_matched_string_printable(matched_string, None);
425/// assert_eq!(printable, "Hello, World!");
426/// assert_eq!(match_indices, vec![]);
427///
428/// let matched_string = "build.rs";
429/// let match_ranges = vec![(0, 1), (7, 8)];
430/// let match_ranges = Some(match_ranges.as_slice());
431/// let (printable, match_indices) = make_matched_string_printable(matched_string, match_ranges);
432/// assert_eq!(printable, "build.rs");
433/// assert_eq!(match_indices, vec![(0, 1), (7, 8)]);
434///
435/// let matched_string = "a\tb";
436/// let match_ranges = vec![(0, 1), (2, 3)];
437/// let match_ranges = Some(match_ranges.as_slice());
438/// let (printable, match_indices) = make_matched_string_printable(matched_string, match_ranges);
439/// assert_eq!(printable, "a b");
440/// assert_eq!(match_indices, vec![(0, 1), (5, 6)]);
441///
442/// let matched_string = "a\tbcd".repeat(65);
443/// let match_ranges = vec![(0, 1), (310, 311)];
444/// let match_ranges = Some(match_ranges.as_slice());
445/// let (printable, match_indices) = make_matched_string_printable(&matched_string, match_ranges);
446/// assert_eq!(printable.len(), 480);
447/// assert_eq!(match_indices, vec![(0, 1)]);
448/// ```
449///
450/// # Panics
451/// This will panic if the length of the printable string or the match indices don't fit into a
452/// `u32`.
453pub fn make_matched_string_printable(
454 matched_string: &str,
455 match_ranges: Option<&[(u32, u32)]>,
456) -> (String, Vec<(u32, u32)>) {
457 let (printable, transformation_offsets) = preprocess_line(matched_string);
458 let mut match_indices = Vec::new();
459
460 if let Some(ranges) = match_ranges {
461 for (start, end) in ranges.iter().take_while(|(start, _)| {
462 *start < u32::try_from(transformation_offsets.len()).unwrap()
463 }) {
464 let new_start = i64::from(*start)
465 + i64::from(transformation_offsets[*start as usize]);
466 let new_end = i64::from(*end)
467 + i64::from(
468 // Use the last offset if the end index is out of bounds
469 // (this will be the case when the match range includes the last character)
470 transformation_offsets[(*end as usize)
471 .min(transformation_offsets.len() - 1)],
472 );
473 match_indices.push((
474 u32::try_from(new_start).unwrap(),
475 u32::try_from(new_end).unwrap(),
476 ));
477 }
478 }
479
480 (printable, match_indices)
481}
482
483/// Shrink a string to a maximum length, adding an ellipsis in the middle.
484///
485/// If the string is shorter than the maximum length, it is returned as is.
486/// If the string is longer than the maximum length, it is shortened and an ellipsis is added in
487/// the middle.
488///
489/// # Examples
490/// ```
491/// use television_utils::strings::shrink_with_ellipsis;
492///
493/// let s = "Hello, World!";
494/// assert_eq!(shrink_with_ellipsis(s, 13), "Hello, World!");
495/// assert_eq!(shrink_with_ellipsis(s, 6), "Hβ¦!");
496/// ```
497pub fn shrink_with_ellipsis(s: &str, max_length: usize) -> String {
498 if s.len() <= max_length {
499 return s.to_string();
500 }
501
502 let half_max_length = (max_length / 2).saturating_sub(2);
503 let first_half = slice_up_to_char_boundary(s, half_max_length);
504 let second_half =
505 slice_at_char_boundaries(s, s.len() - half_max_length, s.len());
506 format!("{first_half}β¦{second_half}")
507}
508
509#[cfg(test)]
510mod tests {
511 use super::*;
512
513 fn test_next_char_boundary(input: &str, start: usize, expected: usize) {
514 let actual = next_char_boundary(input, start);
515 assert_eq!(actual, expected);
516 }
517
518 #[test]
519 fn test_next_char_boundary_ascii() {
520 test_next_char_boundary("Hello, World!", 0, 0);
521 test_next_char_boundary("Hello, World!", 1, 1);
522 test_next_char_boundary("Hello, World!", 13, 13);
523 test_next_char_boundary("Hello, World!", 30, 13);
524 }
525
526 #[test]
527 fn test_next_char_boundary_emoji() {
528 test_next_char_boundary("ππ!", 0, 0);
529 test_next_char_boundary("ππ!", 1, 4);
530 test_next_char_boundary("ππ!", 4, 4);
531 test_next_char_boundary("ππ!", 8, 8);
532 test_next_char_boundary("ππ!", 7, 8);
533 }
534
535 fn test_previous_char_boundary(
536 input: &str,
537 start: usize,
538 expected: usize,
539 ) {
540 let actual = prev_char_boundary(input, start);
541 assert_eq!(actual, expected);
542 }
543
544 #[test]
545 fn test_previous_char_boundary_ascii() {
546 test_previous_char_boundary("Hello, World!", 0, 0);
547 test_previous_char_boundary("Hello, World!", 1, 1);
548 test_previous_char_boundary("Hello, World!", 5, 5);
549 }
550
551 #[test]
552 fn test_previous_char_boundary_emoji() {
553 test_previous_char_boundary("ππ!", 0, 0);
554 test_previous_char_boundary("ππ!", 4, 4);
555 test_previous_char_boundary("ππ!", 6, 4);
556 test_previous_char_boundary("ππ!", 8, 8);
557 }
558
559 fn test_slice_at_char_boundaries(
560 input: &str,
561 start: usize,
562 end: usize,
563 expected: &str,
564 ) {
565 let actual = slice_at_char_boundaries(input, start, end);
566 assert_eq!(actual, expected);
567 }
568
569 #[test]
570 fn test_slice_at_char_boundaries_ascii() {
571 test_slice_at_char_boundaries("Hello, World!", 0, 0, "");
572 test_slice_at_char_boundaries("Hello, World!", 0, 1, "H");
573 test_slice_at_char_boundaries("Hello, World!", 0, 13, "Hello, World!");
574 test_slice_at_char_boundaries("Hello, World!", 0, 30, "");
575 }
576
577 #[test]
578 fn test_slice_at_char_boundaries_emoji() {
579 test_slice_at_char_boundaries("ππ!", 0, 0, "");
580 test_slice_at_char_boundaries("ππ!", 0, 4, "π");
581 test_slice_at_char_boundaries("ππ!", 0, 8, "ππ");
582 test_slice_at_char_boundaries("ππ!", 0, 7, "ππ");
583 test_slice_at_char_boundaries("ππ!", 0, 9, "ππ!");
584 }
585
586 fn test_replace_non_printable(input: &str, expected: &str) {
587 let (actual, _offset) = replace_non_printable(
588 input.as_bytes(),
589 &ReplaceNonPrintableConfig::default().tab_width(2),
590 );
591 assert_eq!(actual, expected);
592 }
593
594 #[test]
595 fn test_replace_non_printable_ascii() {
596 test_replace_non_printable("Hello, World!", "Hello, World!");
597 }
598
599 #[test]
600 fn test_replace_non_printable_tab() {
601 test_replace_non_printable("Hello\tWorld!", "Hello World!");
602 test_replace_non_printable(
603 " -- AND
604", " -- AND",
605 )
606 }
607
608 #[test]
609 fn test_replace_non_printable_line_feed() {
610 test_replace_non_printable("Hello\nWorld!", "HelloWorld!");
611 }
612
613 #[test]
614 fn test_replace_non_printable_null() {
615 test_replace_non_printable("Hello\x00World!", "HelloβWorld!");
616 test_replace_non_printable("Hello World!\0", "Hello World!β");
617 }
618
619 #[test]
620 fn test_replace_non_printable_delete() {
621 test_replace_non_printable("Hello\x7FWorld!", "HelloβWorld!");
622 }
623
624 #[test]
625 fn test_replace_non_printable_bom() {
626 test_replace_non_printable("Hello\u{FEFF}World!", "HelloβWorld!");
627 }
628
629 #[test]
630 fn test_replace_non_printable_start_txt() {
631 test_replace_non_printable("ΓΓ¬", "ΓΓ¬β");
632 }
633
634 #[test]
635 fn test_replace_non_printable_range_tab() {
636 let input = b"Hello,\tWorld!";
637 let (output, offsets) = replace_non_printable(
638 input,
639 &ReplaceNonPrintableConfig::default(),
640 );
641 assert_eq!(output, "Hello, World!");
642 assert_eq!(offsets, vec![0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3]);
643 }
644
645 #[test]
646 fn test_replace_non_printable_range_line_feed() {
647 let input = b"Hello,\nWorld!";
648 let (output, offsets) = replace_non_printable(
649 input,
650 &ReplaceNonPrintableConfig::default().tab_width(2),
651 );
652 assert_eq!(output, "Hello,World!");
653 assert_eq!(offsets, vec![0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1]);
654 }
655
656 #[test]
657 fn test_replace_non_printable_no_range_changes() {
658 let input = b"Hello,\x00World!";
659 let (output, offsets) = replace_non_printable(
660 input,
661 &ReplaceNonPrintableConfig::default().tab_width(2),
662 );
663 assert_eq!(output, "Hello,βWorld!");
664 assert_eq!(offsets, vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
665
666 let input = b"Hello,\x7FWorld!";
667 let (output, offsets) = replace_non_printable(
668 input,
669 &ReplaceNonPrintableConfig::default().tab_width(2),
670 );
671 assert_eq!(output, "Hello,βWorld!");
672 assert_eq!(offsets, vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
673 }
674
675 fn test_proportion_of_printable_ascii_characters(
676 input: &str,
677 expected: f32,
678 ) {
679 let actual =
680 proportion_of_printable_ascii_characters(input.as_bytes());
681 assert_eq!(actual, expected);
682 }
683
684 #[test]
685 fn test_proportion_of_printable_ascii_characters_ascii() {
686 test_proportion_of_printable_ascii_characters("Hello, World!", 1.0);
687 test_proportion_of_printable_ascii_characters(
688 "Hello, World!\x00",
689 0.9285714,
690 );
691 test_proportion_of_printable_ascii_characters(
692 "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
693 0.0,
694 );
695 }
696
697 fn test_preprocess_line(input: &str, expected: &str) {
698 let (actual, _offset) = preprocess_line(input);
699 assert_eq!(actual, expected, "input: {:?}", input);
700 }
701
702 #[test]
703 fn test_preprocess_line_cases() {
704 test_preprocess_line("Hello, World!", "Hello, World!");
705 test_preprocess_line("Hello, World!\n", "Hello, World!");
706 test_preprocess_line("Hello, World!\x00", "Hello, World!β");
707 test_preprocess_line("Hello, World!\x7F", "Hello, World!β");
708 test_preprocess_line("Hello, World!\u{FEFF}", "Hello, World!β");
709 test_preprocess_line(&"a".repeat(400), &"a".repeat(300));
710 }
711}