Skip to main content

binocular/
text.rs

1pub const EMPTY_STRING: &str = "";
2pub const TAB_WIDTH: usize = 4;
3
4pub fn truncate_str_chars(s: &str, max_chars: usize) -> (&str, bool) {
5    if s.len() <= max_chars {
6        return (s, false);
7    }
8    match s.char_indices().nth(max_chars) {
9        Some((byte_pos, _)) => (&s[..byte_pos], true),
10        None => (s, false),
11    }
12}
13
14const NULL_SYMBOL: char = '\u{2400}';
15const TAB_CHARACTER: char = '\t';
16const LINE_FEED_CHARACTER: char = '\x0A';
17const CARRIAGE_RETURN_CHARACTER: char = '\r';
18const DELETE_CHARACTER: char = '\x7F';
19const BOM_CHARACTER: char = '\u{FEFF}';
20const NULL_CHARACTER: char = '\x00';
21const UNIT_SEPARATOR_CHARACTER: char = '\u{001F}';
22const APPLICATION_PROGRAM_COMMAND_CHARACTER: char = '\u{009F}';
23
24pub struct ReplaceNonPrintableConfig {
25    pub replace_tab: bool,
26    pub tab_width: usize,
27    pub replace_line_feed: bool,
28    pub replace_control_characters: bool,
29}
30
31impl ReplaceNonPrintableConfig {
32    pub fn tab_width(&mut self, tab_width: usize) -> &mut Self {
33        self.tab_width = tab_width;
34        self
35    }
36
37    pub fn keep_line_feed(&mut self) -> &mut Self {
38        self.replace_line_feed = false;
39        self
40    }
41
42    pub fn keep_control_characters(&mut self) -> &mut Self {
43        self.replace_control_characters = false;
44        self
45    }
46}
47
48impl Default for ReplaceNonPrintableConfig {
49    fn default() -> Self {
50        Self {
51            replace_tab: true,
52            tab_width: TAB_WIDTH,
53            replace_line_feed: true,
54            replace_control_characters: true,
55        }
56    }
57}
58
59pub fn next_char_boundary(s: &str, start: usize) -> usize {
60    let mut i = start;
61    let len = s.len();
62    if i >= len {
63        return len;
64    }
65    while !s.is_char_boundary(i) && i < len {
66        i += 1;
67    }
68    i
69}
70
71pub fn prev_char_boundary(s: &str, start: usize) -> usize {
72    let mut i = start;
73    while !s.is_char_boundary(i) && i > 0 {
74        i -= 1;
75    }
76    i
77}
78
79pub fn slice_at_char_boundaries(s: &str, start_byte_index: usize, end_byte_index: usize) -> &str {
80    if start_byte_index > end_byte_index || start_byte_index > s.len() || end_byte_index > s.len() {
81        return EMPTY_STRING;
82    }
83    &s[prev_char_boundary(s, start_byte_index)..next_char_boundary(s, end_byte_index)]
84}
85
86pub fn slice_up_to_char_boundary(s: &str, byte_index: usize) -> &str {
87    &s[..next_char_boundary(s, byte_index)]
88}
89
90pub fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> {
91    let str_from_utf8 = |seq| std::str::from_utf8(seq).ok();
92
93    let decoded = input
94        .get(0..1)
95        .and_then(str_from_utf8)
96        .map(|c| (c, 1))
97        .or_else(|| input.get(0..2).and_then(str_from_utf8).map(|c| (c, 2)))
98        .or_else(|| input.get(0..3).and_then(str_from_utf8).map(|c| (c, 3)))
99        .or_else(|| input.get(0..4).and_then(str_from_utf8).map(|c| (c, 4)));
100
101    decoded.map(|(seq, n)| (seq.chars().next().unwrap(), n))
102}
103
104pub fn replace_non_printable(
105    input: &[u8],
106    config: &ReplaceNonPrintableConfig,
107) -> (String, Vec<i16>) {
108    let mut output = String::with_capacity(input.len());
109    let mut offsets = Vec::new();
110    let mut cumulative_offset: i16 = 0;
111
112    let mut idx = 0;
113    let len = input.len();
114    while idx < len {
115        if let Some((chr, skip_ahead)) = try_parse_utf8_char(&input[idx..]) {
116            for _ in 0..skip_ahead {
117                offsets.push(cumulative_offset);
118            }
119            idx += skip_ahead;
120            match chr {
121                TAB_CHARACTER if config.replace_tab => {
122                    output.push_str(&" ".repeat(config.tab_width));
123                    cumulative_offset += i16::try_from(config.tab_width).unwrap() - 1;
124                }
125                LINE_FEED_CHARACTER => {
126                    if config.replace_line_feed {
127                        cumulative_offset -= 1;
128                    } else {
129                        output.push(chr);
130                    }
131                }
132                CARRIAGE_RETURN_CHARACTER => {
133                    cumulative_offset -= 1;
134                }
135                NULL_CHARACTER..=UNIT_SEPARATOR_CHARACTER
136                | DELETE_CHARACTER..=APPLICATION_PROGRAM_COMMAND_CHARACTER
137                | BOM_CHARACTER
138                    if config.replace_control_characters =>
139                {
140                    output.push(NULL_SYMBOL);
141                }
142                // Unicode characters above 0x0700 seem unstable with ratatui
143                c if c > '\u{0700}' => {
144                    output.push(NULL_SYMBOL);
145                }
146                c => output.push(c),
147            }
148        } else {
149            offsets.push(cumulative_offset);
150            output.push(NULL_SYMBOL);
151            idx += 1;
152        }
153    }
154
155    (output, offsets)
156}
157
158const MAX_LINE_LENGTH: usize = 300;
159
160pub fn preprocess_line(line: &str) -> (String, Vec<i16>) {
161    replace_non_printable(
162        {
163            if line.len() > MAX_LINE_LENGTH {
164                slice_up_to_char_boundary(line, MAX_LINE_LENGTH)
165            } else {
166                line
167            }
168        }
169        .as_bytes(),
170        &ReplaceNonPrintableConfig::default(),
171    )
172}
173
174pub fn sanitize_text_with_indices(display_str: &str, indices: &[u32]) -> (String, Vec<u32>) {
175    if display_str.is_ascii()
176        && !display_str
177            .bytes()
178            .any(|b| b == b'\t' || b == b'\n' || b < 32 || b == 127)
179    {
180        return (display_str.to_string(), indices.to_vec());
181    }
182
183    let (printable, transformation_offsets) = preprocess_line(display_str);
184    let mut match_indices = Vec::with_capacity(indices.len());
185
186    for &start in indices {
187        if start < u32::try_from(transformation_offsets.len()).unwrap() {
188            let new_start = i64::from(start) + i64::from(transformation_offsets[start as usize]);
189            match_indices.push(u32::try_from(new_start).unwrap_or(0));
190        }
191    }
192
193    (printable, match_indices)
194}
195
196pub fn shrink_with_ellipsis(s: &str, max_length: usize) -> String {
197    if s.len() <= max_length {
198        return s.to_string();
199    }
200
201    let half_max_length = (max_length / 2).saturating_sub(2);
202    let first_half = slice_up_to_char_boundary(s, half_max_length);
203    let second_half = slice_at_char_boundaries(s, s.len() - half_max_length, s.len());
204    format!("{first_half}…{second_half}")
205}
206
207pub const PRINTABLE_ASCII_THRESHOLD: f32 = 0.7;
208
209pub fn proportion_of_printable_ascii_characters(buffer: &[u8]) -> f32 {
210    if buffer.is_empty() {
211        return 1.0;
212    }
213    let mut printable: usize = 0;
214    for &byte in buffer {
215        if (32..127).contains(&byte) || byte == 9 || byte == 10 || byte == 13 {
216            printable += 1;
217        }
218    }
219    printable as f32 / buffer.len() as f32
220}
221
222pub fn find_first_match_column_in_grep_result(text: &str, match_indices: &[u32]) -> Option<usize> {
223    let bytes = text.as_bytes();
224    let mut first_colon = None;
225
226    for (i, &b) in bytes.iter().enumerate() {
227        if b == b':' {
228            if let Some(start) = first_colon {
229                if i > start + 1 {
230                    let potential_line_num = &text[start + 1..i];
231                    if potential_line_num.bytes().all(|b| b.is_ascii_digit()) {
232                        let content_start_byte = i + 1;
233                        let content_start_char = if text.is_ascii() {
234                            content_start_byte
235                        } else {
236                            text[..content_start_byte].chars().count()
237                        };
238
239                        if let Some(&first_match_idx) = match_indices
240                            .iter()
241                            .find(|&&idx| idx as usize >= content_start_char)
242                        {
243                            let column_offset = (first_match_idx as usize) - content_start_char;
244                            return Some(column_offset + 1); // 1-indexed
245                        }
246                        return None;
247                    }
248                }
249
250                first_colon = Some(i);
251            } else {
252                first_colon = Some(i);
253            }
254        }
255    }
256    None
257}