1pub const EMPTY_STRING: &str = "";
2pub const TAB_WIDTH: usize = 4;
3
4pub fn truncate_str_chars(s: &str, max_chars: usize) -> (&str, bool) {
5 if s.len() <= max_chars {
6 return (s, false);
7 }
8 match s.char_indices().nth(max_chars) {
9 Some((byte_pos, _)) => (&s[..byte_pos], true),
10 None => (s, false),
11 }
12}
13
14const NULL_SYMBOL: char = '\u{2400}';
15const TAB_CHARACTER: char = '\t';
16const LINE_FEED_CHARACTER: char = '\x0A';
17const CARRIAGE_RETURN_CHARACTER: char = '\r';
18const DELETE_CHARACTER: char = '\x7F';
19const BOM_CHARACTER: char = '\u{FEFF}';
20const NULL_CHARACTER: char = '\x00';
21const UNIT_SEPARATOR_CHARACTER: char = '\u{001F}';
22const APPLICATION_PROGRAM_COMMAND_CHARACTER: char = '\u{009F}';
23
24pub struct ReplaceNonPrintableConfig {
25 pub replace_tab: bool,
26 pub tab_width: usize,
27 pub replace_line_feed: bool,
28 pub replace_control_characters: bool,
29}
30
31impl ReplaceNonPrintableConfig {
32 pub fn tab_width(&mut self, tab_width: usize) -> &mut Self {
33 self.tab_width = tab_width;
34 self
35 }
36
37 pub fn keep_line_feed(&mut self) -> &mut Self {
38 self.replace_line_feed = false;
39 self
40 }
41
42 pub fn keep_control_characters(&mut self) -> &mut Self {
43 self.replace_control_characters = false;
44 self
45 }
46}
47
48impl Default for ReplaceNonPrintableConfig {
49 fn default() -> Self {
50 Self {
51 replace_tab: true,
52 tab_width: TAB_WIDTH,
53 replace_line_feed: true,
54 replace_control_characters: true,
55 }
56 }
57}
58
59pub fn next_char_boundary(s: &str, start: usize) -> usize {
60 let mut i = start;
61 let len = s.len();
62 if i >= len {
63 return len;
64 }
65 while !s.is_char_boundary(i) && i < len {
66 i += 1;
67 }
68 i
69}
70
71pub fn prev_char_boundary(s: &str, start: usize) -> usize {
72 let mut i = start;
73 while !s.is_char_boundary(i) && i > 0 {
74 i -= 1;
75 }
76 i
77}
78
79pub fn slice_at_char_boundaries(s: &str, start_byte_index: usize, end_byte_index: usize) -> &str {
80 if start_byte_index > end_byte_index || start_byte_index > s.len() || end_byte_index > s.len() {
81 return EMPTY_STRING;
82 }
83 &s[prev_char_boundary(s, start_byte_index)..next_char_boundary(s, end_byte_index)]
84}
85
86pub fn slice_up_to_char_boundary(s: &str, byte_index: usize) -> &str {
87 &s[..next_char_boundary(s, byte_index)]
88}
89
90pub fn try_parse_utf8_char(input: &[u8]) -> Option<(char, usize)> {
91 let str_from_utf8 = |seq| std::str::from_utf8(seq).ok();
92
93 let decoded = input
94 .get(0..1)
95 .and_then(str_from_utf8)
96 .map(|c| (c, 1))
97 .or_else(|| input.get(0..2).and_then(str_from_utf8).map(|c| (c, 2)))
98 .or_else(|| input.get(0..3).and_then(str_from_utf8).map(|c| (c, 3)))
99 .or_else(|| input.get(0..4).and_then(str_from_utf8).map(|c| (c, 4)));
100
101 decoded.map(|(seq, n)| (seq.chars().next().unwrap(), n))
102}
103
104pub fn replace_non_printable(
105 input: &[u8],
106 config: &ReplaceNonPrintableConfig,
107) -> (String, Vec<i16>) {
108 let mut output = String::with_capacity(input.len());
109 let mut offsets = Vec::new();
110 let mut cumulative_offset: i16 = 0;
111
112 let mut idx = 0;
113 let len = input.len();
114 while idx < len {
115 if let Some((chr, skip_ahead)) = try_parse_utf8_char(&input[idx..]) {
116 for _ in 0..skip_ahead {
117 offsets.push(cumulative_offset);
118 }
119 idx += skip_ahead;
120 match chr {
121 TAB_CHARACTER if config.replace_tab => {
122 output.push_str(&" ".repeat(config.tab_width));
123 cumulative_offset += i16::try_from(config.tab_width).unwrap() - 1;
124 }
125 LINE_FEED_CHARACTER => {
126 if config.replace_line_feed {
127 cumulative_offset -= 1;
128 } else {
129 output.push(chr);
130 }
131 }
132 CARRIAGE_RETURN_CHARACTER => {
133 cumulative_offset -= 1;
134 }
135 NULL_CHARACTER..=UNIT_SEPARATOR_CHARACTER
136 | DELETE_CHARACTER..=APPLICATION_PROGRAM_COMMAND_CHARACTER
137 | BOM_CHARACTER
138 if config.replace_control_characters =>
139 {
140 output.push(NULL_SYMBOL);
141 }
142 c if c > '\u{0700}' => {
144 output.push(NULL_SYMBOL);
145 }
146 c => output.push(c),
147 }
148 } else {
149 offsets.push(cumulative_offset);
150 output.push(NULL_SYMBOL);
151 idx += 1;
152 }
153 }
154
155 (output, offsets)
156}
157
158const MAX_LINE_LENGTH: usize = 300;
159
160pub fn preprocess_line(line: &str) -> (String, Vec<i16>) {
161 replace_non_printable(
162 {
163 if line.len() > MAX_LINE_LENGTH {
164 slice_up_to_char_boundary(line, MAX_LINE_LENGTH)
165 } else {
166 line
167 }
168 }
169 .as_bytes(),
170 &ReplaceNonPrintableConfig::default(),
171 )
172}
173
174pub fn sanitize_text_with_indices(display_str: &str, indices: &[u32]) -> (String, Vec<u32>) {
175 if display_str.is_ascii()
176 && !display_str
177 .bytes()
178 .any(|b| b == b'\t' || b == b'\n' || b < 32 || b == 127)
179 {
180 return (display_str.to_string(), indices.to_vec());
181 }
182
183 let (printable, transformation_offsets) = preprocess_line(display_str);
184 let mut match_indices = Vec::with_capacity(indices.len());
185
186 for &start in indices {
187 if start < u32::try_from(transformation_offsets.len()).unwrap() {
188 let new_start = i64::from(start) + i64::from(transformation_offsets[start as usize]);
189 match_indices.push(u32::try_from(new_start).unwrap_or(0));
190 }
191 }
192
193 (printable, match_indices)
194}
195
196pub fn shrink_with_ellipsis(s: &str, max_length: usize) -> String {
197 if s.len() <= max_length {
198 return s.to_string();
199 }
200
201 let half_max_length = (max_length / 2).saturating_sub(2);
202 let first_half = slice_up_to_char_boundary(s, half_max_length);
203 let second_half = slice_at_char_boundaries(s, s.len() - half_max_length, s.len());
204 format!("{first_half}…{second_half}")
205}
206
207pub const PRINTABLE_ASCII_THRESHOLD: f32 = 0.7;
208
209pub fn proportion_of_printable_ascii_characters(buffer: &[u8]) -> f32 {
210 if buffer.is_empty() {
211 return 1.0;
212 }
213 let mut printable: usize = 0;
214 for &byte in buffer {
215 if (32..127).contains(&byte) || byte == 9 || byte == 10 || byte == 13 {
216 printable += 1;
217 }
218 }
219 printable as f32 / buffer.len() as f32
220}
221
222pub fn find_first_match_column_in_grep_result(text: &str, match_indices: &[u32]) -> Option<usize> {
223 let bytes = text.as_bytes();
224 let mut first_colon = None;
225
226 for (i, &b) in bytes.iter().enumerate() {
227 if b == b':' {
228 if let Some(start) = first_colon {
229 if i > start + 1 {
230 let potential_line_num = &text[start + 1..i];
231 if potential_line_num.bytes().all(|b| b.is_ascii_digit()) {
232 let content_start_byte = i + 1;
233 let content_start_char = if text.is_ascii() {
234 content_start_byte
235 } else {
236 text[..content_start_byte].chars().count()
237 };
238
239 if let Some(&first_match_idx) = match_indices
240 .iter()
241 .find(|&&idx| idx as usize >= content_start_char)
242 {
243 let column_offset = (first_match_idx as usize) - content_start_char;
244 return Some(column_offset + 1); }
246 return None;
247 }
248 }
249
250 first_colon = Some(i);
251 } else {
252 first_colon = Some(i);
253 }
254 }
255 }
256 None
257}