television_utils/
strings.rs

1use lazy_static::lazy_static;
2
3/// Returns the index of the next character boundary in the given string.
4///
5/// If the given index is already a character boundary, it is returned as is.
6/// If the given index is out of bounds, the length of the string is returned.
7///
8/// # Examples
9/// ```
10/// use television_utils::strings::next_char_boundary;
11///
12/// let s = "Hello, World!";
13/// assert_eq!(next_char_boundary(s, 0), 0);
14/// assert_eq!(next_char_boundary(s, 1), 1);
15/// assert_eq!(next_char_boundary(s, 13), 13);
16/// assert_eq!(next_char_boundary(s, 30), 13);
17///
18/// let s = "πŸ‘‹πŸŒ!";
19/// assert_eq!(next_char_boundary(s, 0), 0);
20/// assert_eq!(next_char_boundary(s, 1), 4);
21/// assert_eq!(next_char_boundary(s, 4), 4);
22/// assert_eq!(next_char_boundary(s, 7), 8);
23/// assert_eq!(next_char_boundary(s, 8), 8);
24/// ```
25pub fn next_char_boundary(s: &str, start: usize) -> usize {
26    let mut i = start;
27    let len = s.len();
28    if i >= len {
29        return len;
30    }
31    while !s.is_char_boundary(i) && i < len {
32        i += 1;
33    }
34    i
35}
36
37/// Returns the index of the previous character boundary in the given string.
38///
39/// If the given index is already a character boundary, it is returned as is.
40/// If the given index is out of bounds, 0 is returned.
41///
42/// # Examples
43/// ```
44/// use television_utils::strings::prev_char_boundary;
45///
46/// let s = "Hello, World!";
47/// assert_eq!(prev_char_boundary(s, 0), 0);
48/// assert_eq!(prev_char_boundary(s, 1), 1);
49/// assert_eq!(prev_char_boundary(s, 5), 5);
50///
51/// let s = "πŸ‘‹πŸŒ!";
52/// assert_eq!(prev_char_boundary(s, 0), 0);
53/// assert_eq!(prev_char_boundary(s, 4), 4);
54/// assert_eq!(prev_char_boundary(s, 6), 4);
55/// ```
56pub fn prev_char_boundary(s: &str, start: usize) -> usize {
57    let mut i = start;
58    while !s.is_char_boundary(i) && i > 0 {
59        i -= 1;
60    }
61    i
62}
63
64/// Returns a slice of the given string that starts and ends at character boundaries.
65///
66/// If the given start index is greater than the end index, or if either index is out of bounds,
67/// an empty string is returned.
68///
69/// # Examples
70/// ```
71/// use television_utils::strings::slice_at_char_boundaries;
72///
73/// let s = "Hello, World!";
74/// assert_eq!(slice_at_char_boundaries(s, 0, 0), "");
75/// assert_eq!(slice_at_char_boundaries(s, 0, 1), "H");
76///
77/// let s = "πŸ‘‹πŸŒ!";
78/// assert_eq!(slice_at_char_boundaries(s, 0, 0), "");
79/// assert_eq!(slice_at_char_boundaries(s, 0, 2), "πŸ‘‹");
80/// assert_eq!(slice_at_char_boundaries(s, 0, 5), "πŸ‘‹πŸŒ");
81/// ```
82pub fn slice_at_char_boundaries(
83    s: &str,
84    start_byte_index: usize,
85    end_byte_index: usize,
86) -> &str {
87    if start_byte_index > end_byte_index
88        || start_byte_index > s.len()
89        || end_byte_index > s.len()
90    {
91        return EMPTY_STRING;
92    }
93    &s[prev_char_boundary(s, start_byte_index)
94        ..next_char_boundary(s, end_byte_index)]
95}
96
97/// Returns a slice of the given string that starts at the beginning and ends at a character
98/// boundary.
99///
100/// If the given index is out of bounds, the whole string is returned.
101/// If the given index is already a character boundary, the string up to that index is returned.
102///
103/// # Examples
104/// ```
105/// use television_utils::strings::slice_up_to_char_boundary;
106///
107/// let s = "Hello, World!";
108/// assert_eq!(slice_up_to_char_boundary(s, 0), "");
109/// assert_eq!(slice_up_to_char_boundary(s, 1), "H");
110/// assert_eq!(slice_up_to_char_boundary(s, 13), "Hello, World!");
111///
112/// let s = "πŸ‘‹\n🌍!";
113/// assert_eq!(slice_up_to_char_boundary(s, 0), "");
114/// assert_eq!(slice_up_to_char_boundary(s, 1), "πŸ‘‹");
115/// assert_eq!(slice_up_to_char_boundary(s, 4), "πŸ‘‹");
116/// assert_eq!(slice_up_to_char_boundary(s, 7), "πŸ‘‹\n🌍");
117/// ```
118pub fn slice_up_to_char_boundary(s: &str, byte_index: usize) -> &str {
119    &s[..next_char_boundary(s, byte_index)]
120}
121
122/// Attempts to parse a UTF-8 character from the given byte slice.
123///
124/// The function returns the parsed character and the number of bytes consumed.
125///
126/// # Examples
127/// ```
128/// use television_utils::strings::try_parse_utf8_char;
129///
130/// let input = b"Hello, World!";
131/// let (chr, n) = try_parse_utf8_char(input).unwrap();
132/// assert_eq!(chr, 'H');
133/// assert_eq!(n, 1);
134///
135/// let input = b"\xF0\x9F\x91\x8B\xF0\x9F\x8C\x8D!";
136/// let (chr, n) = try_parse_utf8_char(input).unwrap();
137/// assert_eq!(chr, 'πŸ‘‹');
138/// assert_eq!(n, 4);
139/// ```
140pub fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> {
141    let str_from_utf8 = |seq| std::str::from_utf8(seq).ok();
142
143    let decoded = input
144        .get(0..1)
145        .and_then(str_from_utf8)
146        .map(|c| (c, 1))
147        .or_else(|| input.get(0..2).and_then(str_from_utf8).map(|c| (c, 2)))
148        .or_else(|| input.get(0..3).and_then(str_from_utf8).map(|c| (c, 3)))
149        .or_else(|| input.get(0..4).and_then(str_from_utf8).map(|c| (c, 4)));
150
151    decoded.map(|(seq, n)| (seq.chars().next().unwrap(), n))
152}
153
154lazy_static! {
155    /// The Unicode symbol to use for non-printable characters.
156    static ref NULL_SYMBOL: char = char::from_u32(0x2400).unwrap();
157}
158
159pub const EMPTY_STRING: &str = "";
160pub const TAB_WIDTH: usize = 4;
161
162const TAB_CHARACTER: char = '\t';
163const LINE_FEED_CHARACTER: char = '\x0A';
164const DELETE_CHARACTER: char = '\x7F';
165const BOM_CHARACTER: char = '\u{FEFF}';
166const NULL_CHARACTER: char = '\x00';
167const UNIT_SEPARATOR_CHARACTER: char = '\u{001F}';
168const APPLICATION_PROGRAM_COMMAND_CHARACTER: char = '\u{009F}';
169
170const NF_RANGE_DEVICONS: std::ops::RangeInclusive<char> =
171    '\u{e700}'..='\u{e8ef}';
172const NF_RANGE_SETI: std::ops::RangeInclusive<char> = '\u{e5fa}'..='\u{e6b7}';
173const NF_RANGE_FONT_AWESOME: std::ops::RangeInclusive<char> =
174    '\u{ed00}'..='\u{f2ff}';
175const NF_RANGE_FONT_AWESOME_EXT: std::ops::RangeInclusive<char> =
176    '\u{e200}'..='\u{e2a9}';
177const NF_RANGE_MATERIAL: std::ops::RangeInclusive<char> =
178    '\u{f0001}'..='\u{f1af0}';
179const NF_RANGE_WEATHER: std::ops::RangeInclusive<char> =
180    '\u{e300}'..='\u{e3e3}';
181const NF_RANGE_OCTICONS_1: std::ops::RangeInclusive<char> =
182    '\u{f400}'..='\u{f533}';
183const NF_RANGE_OCTICONS_2: std::ops::RangeInclusive<char> =
184    '\u{2665}'..='\u{26a1}';
185const NF_RANGE_POWERLINE_1: std::ops::RangeInclusive<char> =
186    '\u{e0a0}'..='\u{e0a2}';
187const NF_RANGE_POWERLINE_2: std::ops::RangeInclusive<char> =
188    '\u{e0b0}'..='\u{e0b3}';
189
190const ALL_NF_RANGES: [&std::ops::RangeInclusive<char>; 10] = [
191    &NF_RANGE_DEVICONS,
192    &NF_RANGE_SETI,
193    &NF_RANGE_FONT_AWESOME,
194    &NF_RANGE_FONT_AWESOME_EXT,
195    &NF_RANGE_MATERIAL,
196    &NF_RANGE_WEATHER,
197    &NF_RANGE_OCTICONS_1,
198    &NF_RANGE_OCTICONS_2,
199    &NF_RANGE_POWERLINE_1,
200    &NF_RANGE_POWERLINE_2,
201];
202
203pub struct ReplaceNonPrintableConfig {
204    pub replace_tab: bool,
205    pub tab_width: usize,
206    pub replace_line_feed: bool,
207    pub replace_control_characters: bool,
208}
209
210impl ReplaceNonPrintableConfig {
211    pub fn tab_width(&mut self, tab_width: usize) -> &mut Self {
212        self.tab_width = tab_width;
213        self
214    }
215}
216
217impl Default for ReplaceNonPrintableConfig {
218    fn default() -> Self {
219        Self {
220            replace_tab: true,
221            tab_width: TAB_WIDTH,
222            replace_line_feed: true,
223            replace_control_characters: true,
224        }
225    }
226}
227
228#[allow(clippy::missing_panics_doc)]
229/// Replaces non-printable characters in the given byte slice with default printable characters.
230///
231/// The tab width is used to determine how many spaces to replace a tab character with.
232/// The default printable character for non-printable characters is the Unicode symbol for NULL.
233///
234/// The function returns a tuple containing the processed string and a vector of offsets introduced
235/// by the transformation.
236///
237/// # Examples
238/// ```
239/// use television_utils::strings::{replace_non_printable, ReplaceNonPrintableConfig};
240///
241/// let input = b"Hello, World!";
242/// let (output, offsets) = replace_non_printable(input, &ReplaceNonPrintableConfig::default());
243/// assert_eq!(output, "Hello, World!");
244/// assert_eq!(offsets, vec![0,0,0,0,0,0,0,0,0,0,0,0,0]);
245///
246/// let input = b"Hello,\tWorld!";
247/// let (output, offsets) = replace_non_printable(input, &ReplaceNonPrintableConfig::default().tab_width(4));
248/// assert_eq!(output, "Hello,    World!");
249/// assert_eq!(offsets, vec![0,0,0,0,0,0,0,3,3,3,3,3,3]);
250///
251/// let input = b"Hello,\nWorld!";
252/// let (output, offsets) = replace_non_printable(input, &ReplaceNonPrintableConfig::default());
253/// assert_eq!(output, "Hello,World!");
254/// assert_eq!(offsets, vec![0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1]);
255/// ```
256pub fn replace_non_printable(
257    input: &[u8],
258    config: &ReplaceNonPrintableConfig,
259) -> (String, Vec<i16>) {
260    let mut output = String::with_capacity(input.len());
261    let mut offsets = Vec::new();
262    let mut cumulative_offset: i16 = 0;
263
264    let mut idx = 0;
265    let len = input.len();
266    while idx < len {
267        offsets.push(cumulative_offset);
268        if let Some((chr, skip_ahead)) = try_parse_utf8_char(&input[idx..]) {
269            idx += skip_ahead;
270
271            match chr {
272                // tab
273                TAB_CHARACTER if config.replace_tab => {
274                    output.push_str(&" ".repeat(config.tab_width));
275                    cumulative_offset +=
276                        i16::try_from(config.tab_width).unwrap() - 1;
277                }
278                // line feed
279                LINE_FEED_CHARACTER if config.replace_line_feed => {
280                    cumulative_offset -= 1;
281                }
282
283                // ASCII control characters from 0x00 to 0x1F
284                // + control characters from \u{007F} to \u{009F}
285                // + BOM
286                NULL_CHARACTER..=UNIT_SEPARATOR_CHARACTER
287                | DELETE_CHARACTER..=APPLICATION_PROGRAM_COMMAND_CHARACTER
288                | BOM_CHARACTER
289                    if config.replace_control_characters =>
290                {
291                    output.push(*NULL_SYMBOL);
292                }
293                // CJK Unified Ideographs
294                c if ('\u{4E00}'..='\u{9FFF}').contains(&c) => {
295                    output.push(c);
296                }
297                // Nerd fonts
298                c if ALL_NF_RANGES.iter().any(|r| r.contains(&c)) => {
299                    output.push(c);
300                }
301                // Unicode characters above 0x0700 seem unstable with ratatui
302                c if c > '\u{0700}' => {
303                    output.push(*NULL_SYMBOL);
304                }
305                // everything else
306                c => output.push(c),
307            }
308        } else {
309            output.push(*NULL_SYMBOL);
310            idx += 1;
311        }
312    }
313
314    (output, offsets)
315}
316
317/// The threshold for considering a buffer to be printable ASCII.
318///
319/// This is used to determine whether a file is likely to be a text file
320/// based on a sample of its contents.
321pub const PRINTABLE_ASCII_THRESHOLD: f32 = 0.7;
322
323/// Returns the proportion of printable ASCII characters in the given buffer.
324///
325/// This really is a cheap way to determine if a buffer is likely to be a text file.
326///
327/// # Examples
328/// ```
329/// use television_utils::strings::proportion_of_printable_ascii_characters;
330///
331/// let buffer = b"Hello, World!";
332/// let proportion = proportion_of_printable_ascii_characters(buffer);
333/// assert_eq!(proportion, 1.0);
334///
335/// let buffer = b"Hello, World!\x00";
336/// let proportion = proportion_of_printable_ascii_characters(buffer);
337/// assert_eq!(proportion, 0.9285714);
338///
339/// let buffer = b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F";
340/// let proportion = proportion_of_printable_ascii_characters(buffer);
341/// assert_eq!(proportion, 0.0);
342/// ```
343pub fn proportion_of_printable_ascii_characters(buffer: &[u8]) -> f32 {
344    let mut printable: usize = 0;
345    for &byte in buffer {
346        if (32..127).contains(&byte) {
347            printable += 1;
348        }
349    }
350    printable as f32 / buffer.len() as f32
351}
352
353const MAX_LINE_LENGTH: usize = 300;
354
355/// Preprocesses a line of text for display.
356///
357/// This function trims the line, replaces non-printable characters, and truncates the line if it
358/// is too long.
359///
360/// # Examples
361/// ```
362/// use television_utils::strings::preprocess_line;
363///
364/// let line = "Hello, World!";
365/// let (processed, offsets) = preprocess_line(line);
366/// assert_eq!(processed, "Hello, World!");
367/// assert_eq!(offsets, vec![0,0,0,0,0,0,0,0,0,0,0,0,0]);
368///
369/// let line = "\x00World\x7F!";
370/// let (processed, offsets) = preprocess_line(line);
371/// assert_eq!(processed, "␀World␀!");
372/// assert_eq!(offsets, vec![0,0,0,0,0,0,0,0]);
373///
374/// let line = "a".repeat(400);
375/// let (processed, offsets) = preprocess_line(&line);
376/// assert_eq!(processed.len(), 300);
377/// assert_eq!(offsets, vec![0; 300]);
378/// ```
379pub fn preprocess_line(line: &str) -> (String, Vec<i16>) {
380    replace_non_printable(
381        {
382            if line.len() > MAX_LINE_LENGTH {
383                slice_up_to_char_boundary(line, MAX_LINE_LENGTH)
384            } else {
385                line
386            }
387        }
388        .as_bytes(),
389        &ReplaceNonPrintableConfig::default(),
390    )
391}
392
393/// Make a matched string printable while preserving match ranges in the process.
394///
395/// This function preprocesses the matched string and returns a printable version of it along with
396/// the match ranges adjusted to the new string.
397///
398/// # Examples
399/// ```
400/// use television_utils::strings::make_matched_string_printable;
401///
402/// let matched_string = "Hello, World!";
403/// let match_ranges = vec![(0, 1), (7, 8)];
404/// let match_ranges = Some(match_ranges.as_slice());
405/// let (printable, match_indices) = make_matched_string_printable(matched_string, match_ranges);
406/// assert_eq!(printable, "Hello, World!");
407/// assert_eq!(match_indices, vec![(0, 1), (7, 8)]);
408///
409/// let matched_string = "Hello,\tWorld!";
410/// let match_ranges = vec![(0, 1), (7, 8)];
411/// let match_ranges = Some(match_ranges.as_slice());
412/// let (printable, match_indices) = make_matched_string_printable(matched_string, match_ranges);
413/// assert_eq!(printable, "Hello,    World!");
414/// assert_eq!(match_indices, vec![(0, 1), (10, 11)]);
415///
416/// let matched_string = "Hello,\nWorld!";
417/// let match_ranges = vec![(0, 1), (7, 8)];
418/// let match_ranges = Some(match_ranges.as_slice());
419/// let (printable, match_indices) = make_matched_string_printable(matched_string, match_ranges);
420/// assert_eq!(printable, "Hello,World!");
421/// assert_eq!(match_indices, vec![(0, 1), (6, 7)]);
422///
423/// let matched_string = "Hello, World!";
424/// let (printable, match_indices) = make_matched_string_printable(matched_string, None);
425/// assert_eq!(printable, "Hello, World!");
426/// assert_eq!(match_indices, vec![]);
427///
428/// let matched_string = "build.rs";
429/// let match_ranges = vec![(0, 1), (7, 8)];
430/// let match_ranges = Some(match_ranges.as_slice());
431/// let (printable, match_indices) = make_matched_string_printable(matched_string, match_ranges);
432/// assert_eq!(printable, "build.rs");
433/// assert_eq!(match_indices, vec![(0, 1), (7, 8)]);
434///
435/// let matched_string = "a\tb";
436/// let match_ranges = vec![(0, 1), (2, 3)];
437/// let match_ranges = Some(match_ranges.as_slice());
438/// let (printable, match_indices) = make_matched_string_printable(matched_string, match_ranges);
439/// assert_eq!(printable, "a    b");
440/// assert_eq!(match_indices, vec![(0, 1), (5, 6)]);
441///
442/// let matched_string = "a\tbcd".repeat(65);
443/// let match_ranges = vec![(0, 1), (310, 311)];
444/// let match_ranges = Some(match_ranges.as_slice());
445/// let (printable, match_indices) = make_matched_string_printable(&matched_string, match_ranges);
446/// assert_eq!(printable.len(), 480);
447/// assert_eq!(match_indices, vec![(0, 1)]);
448/// ```
449///
450/// # Panics
451/// This will panic if the length of the printable string or the match indices don't fit into a
452/// `u32`.
453pub fn make_matched_string_printable(
454    matched_string: &str,
455    match_ranges: Option<&[(u32, u32)]>,
456) -> (String, Vec<(u32, u32)>) {
457    let (printable, transformation_offsets) = preprocess_line(matched_string);
458    let mut match_indices = Vec::new();
459
460    if let Some(ranges) = match_ranges {
461        for (start, end) in ranges.iter().take_while(|(start, _)| {
462            *start < u32::try_from(transformation_offsets.len()).unwrap()
463        }) {
464            let new_start = i64::from(*start)
465                + i64::from(transformation_offsets[*start as usize]);
466            let new_end = i64::from(*end)
467                + i64::from(
468                    // Use the last offset if the end index is out of bounds
469                    // (this will be the case when the match range includes the last character)
470                    transformation_offsets[(*end as usize)
471                        .min(transformation_offsets.len() - 1)],
472                );
473            match_indices.push((
474                u32::try_from(new_start).unwrap(),
475                u32::try_from(new_end).unwrap(),
476            ));
477        }
478    }
479
480    (printable, match_indices)
481}
482
483/// Shrink a string to a maximum length, adding an ellipsis in the middle.
484///
485/// If the string is shorter than the maximum length, it is returned as is.
486/// If the string is longer than the maximum length, it is shortened and an ellipsis is added in
487/// the middle.
488///
489/// # Examples
490/// ```
491/// use television_utils::strings::shrink_with_ellipsis;
492///
493/// let s = "Hello, World!";
494/// assert_eq!(shrink_with_ellipsis(s, 13), "Hello, World!");
495/// assert_eq!(shrink_with_ellipsis(s, 6), "H…!");
496/// ```
497pub fn shrink_with_ellipsis(s: &str, max_length: usize) -> String {
498    if s.len() <= max_length {
499        return s.to_string();
500    }
501
502    let half_max_length = (max_length / 2).saturating_sub(2);
503    let first_half = slice_up_to_char_boundary(s, half_max_length);
504    let second_half =
505        slice_at_char_boundaries(s, s.len() - half_max_length, s.len());
506    format!("{first_half}…{second_half}")
507}
508
509#[cfg(test)]
510mod tests {
511    use super::*;
512
513    fn test_next_char_boundary(input: &str, start: usize, expected: usize) {
514        let actual = next_char_boundary(input, start);
515        assert_eq!(actual, expected);
516    }
517
518    #[test]
519    fn test_next_char_boundary_ascii() {
520        test_next_char_boundary("Hello, World!", 0, 0);
521        test_next_char_boundary("Hello, World!", 1, 1);
522        test_next_char_boundary("Hello, World!", 13, 13);
523        test_next_char_boundary("Hello, World!", 30, 13);
524    }
525
526    #[test]
527    fn test_next_char_boundary_emoji() {
528        test_next_char_boundary("πŸ‘‹πŸŒ!", 0, 0);
529        test_next_char_boundary("πŸ‘‹πŸŒ!", 1, 4);
530        test_next_char_boundary("πŸ‘‹πŸŒ!", 4, 4);
531        test_next_char_boundary("πŸ‘‹πŸŒ!", 8, 8);
532        test_next_char_boundary("πŸ‘‹πŸŒ!", 7, 8);
533    }
534
535    fn test_previous_char_boundary(
536        input: &str,
537        start: usize,
538        expected: usize,
539    ) {
540        let actual = prev_char_boundary(input, start);
541        assert_eq!(actual, expected);
542    }
543
544    #[test]
545    fn test_previous_char_boundary_ascii() {
546        test_previous_char_boundary("Hello, World!", 0, 0);
547        test_previous_char_boundary("Hello, World!", 1, 1);
548        test_previous_char_boundary("Hello, World!", 5, 5);
549    }
550
551    #[test]
552    fn test_previous_char_boundary_emoji() {
553        test_previous_char_boundary("πŸ‘‹πŸŒ!", 0, 0);
554        test_previous_char_boundary("πŸ‘‹πŸŒ!", 4, 4);
555        test_previous_char_boundary("πŸ‘‹πŸŒ!", 6, 4);
556        test_previous_char_boundary("πŸ‘‹πŸŒ!", 8, 8);
557    }
558
559    fn test_slice_at_char_boundaries(
560        input: &str,
561        start: usize,
562        end: usize,
563        expected: &str,
564    ) {
565        let actual = slice_at_char_boundaries(input, start, end);
566        assert_eq!(actual, expected);
567    }
568
569    #[test]
570    fn test_slice_at_char_boundaries_ascii() {
571        test_slice_at_char_boundaries("Hello, World!", 0, 0, "");
572        test_slice_at_char_boundaries("Hello, World!", 0, 1, "H");
573        test_slice_at_char_boundaries("Hello, World!", 0, 13, "Hello, World!");
574        test_slice_at_char_boundaries("Hello, World!", 0, 30, "");
575    }
576
577    #[test]
578    fn test_slice_at_char_boundaries_emoji() {
579        test_slice_at_char_boundaries("πŸ‘‹πŸŒ!", 0, 0, "");
580        test_slice_at_char_boundaries("πŸ‘‹πŸŒ!", 0, 4, "πŸ‘‹");
581        test_slice_at_char_boundaries("πŸ‘‹πŸŒ!", 0, 8, "πŸ‘‹πŸŒ");
582        test_slice_at_char_boundaries("πŸ‘‹πŸŒ!", 0, 7, "πŸ‘‹πŸŒ");
583        test_slice_at_char_boundaries("πŸ‘‹πŸŒ!", 0, 9, "πŸ‘‹πŸŒ!");
584    }
585
586    fn test_replace_non_printable(input: &str, expected: &str) {
587        let (actual, _offset) = replace_non_printable(
588            input.as_bytes(),
589            &ReplaceNonPrintableConfig::default().tab_width(2),
590        );
591        assert_eq!(actual, expected);
592    }
593
594    #[test]
595    fn test_replace_non_printable_ascii() {
596        test_replace_non_printable("Hello, World!", "Hello, World!");
597    }
598
599    #[test]
600    fn test_replace_non_printable_tab() {
601        test_replace_non_printable("Hello\tWorld!", "Hello  World!");
602        test_replace_non_printable(
603            "	-- AND
604", "  -- AND",
605        )
606    }
607
608    #[test]
609    fn test_replace_non_printable_line_feed() {
610        test_replace_non_printable("Hello\nWorld!", "HelloWorld!");
611    }
612
613    #[test]
614    fn test_replace_non_printable_null() {
615        test_replace_non_printable("Hello\x00World!", "Hello␀World!");
616        test_replace_non_printable("Hello World!\0", "Hello World!␀");
617    }
618
619    #[test]
620    fn test_replace_non_printable_delete() {
621        test_replace_non_printable("Hello\x7FWorld!", "Hello␀World!");
622    }
623
624    #[test]
625    fn test_replace_non_printable_bom() {
626        test_replace_non_printable("Hello\u{FEFF}World!", "Hello␀World!");
627    }
628
629    #[test]
630    fn test_replace_non_printable_start_txt() {
631        test_replace_non_printable("Àì", "Àì␀");
632    }
633
634    #[test]
635    fn test_replace_non_printable_range_tab() {
636        let input = b"Hello,\tWorld!";
637        let (output, offsets) = replace_non_printable(
638            input,
639            &ReplaceNonPrintableConfig::default(),
640        );
641        assert_eq!(output, "Hello,    World!");
642        assert_eq!(offsets, vec![0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3]);
643    }
644
645    #[test]
646    fn test_replace_non_printable_range_line_feed() {
647        let input = b"Hello,\nWorld!";
648        let (output, offsets) = replace_non_printable(
649            input,
650            &ReplaceNonPrintableConfig::default().tab_width(2),
651        );
652        assert_eq!(output, "Hello,World!");
653        assert_eq!(offsets, vec![0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1]);
654    }
655
656    #[test]
657    fn test_replace_non_printable_no_range_changes() {
658        let input = b"Hello,\x00World!";
659        let (output, offsets) = replace_non_printable(
660            input,
661            &ReplaceNonPrintableConfig::default().tab_width(2),
662        );
663        assert_eq!(output, "Hello,␀World!");
664        assert_eq!(offsets, vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
665
666        let input = b"Hello,\x7FWorld!";
667        let (output, offsets) = replace_non_printable(
668            input,
669            &ReplaceNonPrintableConfig::default().tab_width(2),
670        );
671        assert_eq!(output, "Hello,␀World!");
672        assert_eq!(offsets, vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
673    }
674
675    fn test_proportion_of_printable_ascii_characters(
676        input: &str,
677        expected: f32,
678    ) {
679        let actual =
680            proportion_of_printable_ascii_characters(input.as_bytes());
681        assert_eq!(actual, expected);
682    }
683
684    #[test]
685    fn test_proportion_of_printable_ascii_characters_ascii() {
686        test_proportion_of_printable_ascii_characters("Hello, World!", 1.0);
687        test_proportion_of_printable_ascii_characters(
688            "Hello, World!\x00",
689            0.9285714,
690        );
691        test_proportion_of_printable_ascii_characters(
692            "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
693            0.0,
694        );
695    }
696
697    fn test_preprocess_line(input: &str, expected: &str) {
698        let (actual, _offset) = preprocess_line(input);
699        assert_eq!(actual, expected, "input: {:?}", input);
700    }
701
702    #[test]
703    fn test_preprocess_line_cases() {
704        test_preprocess_line("Hello, World!", "Hello, World!");
705        test_preprocess_line("Hello, World!\n", "Hello, World!");
706        test_preprocess_line("Hello, World!\x00", "Hello, World!␀");
707        test_preprocess_line("Hello, World!\x7F", "Hello, World!␀");
708        test_preprocess_line("Hello, World!\u{FEFF}", "Hello, World!␀");
709        test_preprocess_line(&"a".repeat(400), &"a".repeat(300));
710    }
711}