1use memchr::memchr;
4
5const ESC: u8 = 0x1b;
6const BEL: u8 = 0x07;
7const DEL: u8 = 0x7f;
8const C1_ST: u8 = 0x9c;
9const C1_DCS: u8 = 0x90;
10const C1_SOS: u8 = 0x98;
11const C1_CSI: u8 = 0x9b;
12const C1_OSC: u8 = 0x9d;
13const C1_PM: u8 = 0x9e;
14const C1_APC: u8 = 0x9f;
15const CAN: u8 = 0x18;
16const SUB: u8 = 0x1a;
17const MAX_STRING_SEQUENCE_BYTES: usize = 4096;
18const MAX_CSI_SEQUENCE_BYTES: usize = 64;
19
20#[inline]
21fn parse_c1_at(bytes: &[u8], start: usize) -> Option<(u8, usize)> {
22 let first = *bytes.get(start)?;
23 if (0x80..=0x9f).contains(&first) {
24 return Some((first, 1));
25 }
26 None
27}
28
29#[inline]
30fn parse_csi(bytes: &[u8], start: usize) -> Option<usize> {
31 let mut index = start;
37 let mut phase = 0u8; let mut consumed = 0usize;
39
40 while index < bytes.len() {
41 let byte = bytes[index];
42 if byte == ESC {
43 return Some(index);
45 }
46 if byte == CAN || byte == SUB {
47 return Some(index + 1);
49 }
50
51 consumed += 1;
52 if consumed > MAX_CSI_SEQUENCE_BYTES {
53 return Some(index + 1);
55 }
56
57 if phase == 0 && (0x30..=0x3f).contains(&byte) {
58 index += 1;
59 continue;
60 }
61 if (0x20..=0x2f).contains(&byte) {
62 phase = 1;
63 index += 1;
64 continue;
65 }
66 if (0x40..=0x7e).contains(&byte) {
67 return Some(index + 1);
68 }
69
70 return Some(index);
72 }
73
74 None
75}
76
77#[inline]
78fn parse_osc(bytes: &[u8], start: usize) -> Option<usize> {
79 let mut consumed = 0usize;
80 for index in start..bytes.len() {
81 if bytes[index] == ESC && !(index + 1 < bytes.len() && bytes[index + 1] == b'\\') {
82 return Some(index);
84 }
85 if bytes[index] == CAN || bytes[index] == SUB {
86 return Some(index + 1);
87 }
88
89 if let Some((c1, len)) = parse_c1_at(bytes, index)
90 && c1 == C1_ST
91 {
92 return Some(index + len);
93 }
94
95 match bytes[index] {
96 BEL | C1_ST => return Some(index + 1),
97 ESC if index + 1 < bytes.len() && bytes[index + 1] == b'\\' => return Some(index + 2),
98 _ => {}
99 }
100
101 consumed += 1;
102 if consumed > MAX_STRING_SEQUENCE_BYTES {
103 return Some(index + 1);
105 }
106 }
107 None
108}
109
110#[inline]
111fn parse_st_terminated(bytes: &[u8], start: usize) -> Option<usize> {
112 let mut consumed = 0usize;
113 for index in start..bytes.len() {
114 if bytes[index] == ESC && !(index + 1 < bytes.len() && bytes[index + 1] == b'\\') {
115 return Some(index);
116 }
117 if bytes[index] == CAN || bytes[index] == SUB {
118 return Some(index + 1);
119 }
120
121 if let Some((c1, len)) = parse_c1_at(bytes, index)
122 && c1 == C1_ST
123 {
124 return Some(index + len);
125 }
126
127 match bytes[index] {
128 C1_ST => return Some(index + 1),
129 ESC if index + 1 < bytes.len() && bytes[index + 1] == b'\\' => return Some(index + 2),
130 _ => {}
131 }
132
133 consumed += 1;
134 if consumed > MAX_STRING_SEQUENCE_BYTES {
135 return Some(index + 1);
136 }
137 }
138 None
139}
140
141#[inline]
142fn parse_ansi_sequence_bytes(bytes: &[u8]) -> Option<usize> {
143 if bytes.is_empty() {
144 return None;
145 }
146
147 if let Some((c1, c1_len)) = parse_c1_at(bytes, 0) {
148 return match c1 {
149 C1_CSI => parse_csi(bytes, c1_len),
150 C1_OSC => parse_osc(bytes, c1_len),
151 C1_DCS | C1_SOS | C1_PM | C1_APC => parse_st_terminated(bytes, c1_len),
152 _ => Some(c1_len),
153 };
154 }
155
156 match bytes[0] {
157 ESC => {
158 if bytes.len() < 2 {
159 return None;
160 }
161
162 match bytes[1] {
163 b'[' => parse_csi(bytes, 2),
164 b']' => parse_osc(bytes, 2),
165 b'P' | b'^' | b'_' | b'X' => parse_st_terminated(bytes, 2),
166 next if next < 128 => Some(2),
167 _ => Some(1),
168 }
169 }
170 _ => None,
171 }
172}
173
174pub fn strip_ansi(text: &str) -> String {
176 let mut output = Vec::with_capacity(text.len());
177 let bytes = text.as_bytes();
178 let mut i = 0;
179
180 while i < bytes.len() {
181 let next_esc = memchr(ESC, &bytes[i..]).map_or(bytes.len(), |offset| i + offset);
182 while i < next_esc {
183 if bytes[i] == b'\n' || bytes[i] == b'\r' || bytes[i] == b'\t' {
184 output.push(bytes[i]);
185 i += 1;
186 } else if bytes[i] < 32 || bytes[i] == DEL {
187 i += 1;
188 } else {
189 output.push(bytes[i]);
190 i += 1;
191 }
192 }
193
194 if i >= bytes.len() {
195 break;
196 }
197
198 if let Some(len) = parse_ansi_sequence_bytes(&bytes[i..]) {
199 i += len;
200 continue;
201 } else {
202 break;
204 }
205 }
206
207 String::from_utf8_lossy(&output).into_owned()
208}
209
210pub fn strip_ansi_bytes(input: &[u8]) -> Vec<u8> {
214 let mut output = Vec::with_capacity(input.len());
215 let bytes = input;
216 let mut i = 0;
217
218 while i < bytes.len() {
219 if (bytes[i] == ESC || parse_c1_at(bytes, i).is_some())
220 && let Some(len) = parse_ansi_sequence_bytes(&bytes[i..])
221 {
222 i += len;
223 continue;
224 }
225 if bytes[i] == ESC || parse_c1_at(bytes, i).is_some() {
226 break;
228 }
229
230 if bytes[i] == b'\n' || bytes[i] == b'\r' || bytes[i] == b'\t' {
231 output.push(bytes[i]);
232 i += 1;
233 } else if bytes[i] < 32 || bytes[i] == DEL {
234 i += 1;
235 } else {
236 output.push(bytes[i]);
237 i += 1;
238 }
239 }
240 output
241}
242
243pub fn parse_ansi_sequence(text: &str) -> Option<usize> {
245 let bytes = text.as_bytes();
246 parse_ansi_sequence_bytes(bytes)
247}
248
249pub fn strip_ansi_ascii_only(text: &str) -> String {
251 let mut output = String::with_capacity(text.len());
252 let bytes = text.as_bytes();
253 let mut search_start = 0;
254 let mut copy_start = 0;
255
256 while let Some(offset) = memchr(ESC, &bytes[search_start..]) {
257 let esc_index = search_start + offset;
258 if let Some(len) = parse_ansi_sequence_bytes(&bytes[esc_index..]) {
259 if copy_start < esc_index {
260 output.push_str(&text[copy_start..esc_index]);
261 }
262 copy_start = esc_index + len;
263 search_start = copy_start;
264 } else {
265 search_start = esc_index + 1;
266 }
267 }
268
269 if copy_start < text.len() {
270 output.push_str(&text[copy_start..]);
271 }
272
273 output
274}
275
276pub fn contains_unicode(text: &str) -> bool {
278 text.bytes().any(|b| b >= 0x80)
279}
280
281#[cfg(test)]
282mod tests {
283 use super::{CAN, SUB, strip_ansi, strip_ansi_ascii_only};
284
285 #[test]
286 fn strips_esc_csi_sequences() {
287 let input = "a\x1b[31mred\x1b[0mz";
288 assert_eq!(strip_ansi(input), "aredz");
289 assert_eq!(strip_ansi_ascii_only(input), "aredz");
290 }
291
292 #[test]
293 fn utf8_encoded_c1_is_not_reprocessed_as_control() {
294 let input = "a\u{009b}31mred";
296 assert_eq!(strip_ansi(input), input);
297 }
298
299 #[test]
300 fn strip_removes_ascii_del_control() {
301 let input = format!("a{}b", char::from(0x7f));
302 assert_eq!(strip_ansi(&input), "ab");
303 }
304
305 #[test]
306 fn csi_aborts_on_esc_then_new_sequence_parses() {
307 let input = "a\x1b[31\x1b[32mgreen\x1b[0mz";
308 assert_eq!(strip_ansi(input), "agreenz");
309 }
310
311 #[test]
312 fn csi_aborts_on_can_and_sub() {
313 let can = format!("a\x1b[31{}b", char::from(CAN));
314 let sub = format!("a\x1b[31{}b", char::from(SUB));
315 assert_eq!(strip_ansi(&can), "ab");
316 assert_eq!(strip_ansi(&sub), "ab");
317 }
318
319 #[test]
320 fn osc_aborts_on_esc_non_st() {
321 let input = "a\x1b]title\x1b[31mred\x1b[0mz";
322 assert_eq!(strip_ansi(input), "aredz");
323 }
324
325 #[test]
326 fn incomplete_sequence_drops_tail() {
327 let input = "text\x1b[31";
328 assert_eq!(strip_ansi(input), "text");
329 }
330
331 #[test]
332 fn ascii_only_incomplete_sequence_keeps_tail() {
333 let input = "text\x1b[31";
334 assert_eq!(strip_ansi_ascii_only(input), input);
335 }
336
337 #[test]
338 fn strips_common_progress_redraw_sequences() {
339 let input = "\r\x1b[2KProgress 10%\r\x1b[2KDone\n";
342 assert_eq!(strip_ansi(input), "\rProgress 10%\rDone\n");
343 }
344
345 #[test]
346 fn strips_cursor_navigation_sequences() {
347 let input = "left\x1b[1D!\nup\x1b[1Arow";
348 assert_eq!(strip_ansi(input), "left!\nuprow");
349 }
350
351 #[test]
352 fn strip_ansi_bytes_supports_raw_c1_csi() {
353 let input = [
354 b'a', 0x9b, b'3', b'1', b'm', b'r', b'e', b'd', 0x9b, b'0', b'm', b'z',
355 ];
356 let out = super::strip_ansi_bytes(&input);
357 assert_eq!(out, b"aredz");
358 }
359
360 #[test]
361 fn strip_ansi_bytes_supports_raw_c1_osc_and_st() {
362 let mut input = b"pre".to_vec();
363 input.extend_from_slice(&[0x9d]);
364 input.extend_from_slice(b"8;;https://example.com");
365 input.extend_from_slice(&[0x9c]);
366 input.extend_from_slice(b"link");
367 input.extend_from_slice(&[0x9d]);
368 input.extend_from_slice(b"8;;");
369 input.extend_from_slice(&[0x9c]);
370 input.extend_from_slice(b"post");
371 let out = super::strip_ansi_bytes(&input);
372 assert_eq!(out, b"prelinkpost");
373 }
374
375 #[test]
376 fn csi_respects_parameter_intermediate_final_grammar() {
377 let input = "a\x1b[1;2 mred\x1b[0mz";
379 assert_eq!(strip_ansi(input), "aredz");
380 }
381
382 #[test]
383 fn malformed_csi_does_not_consume_following_text() {
384 let malformed = format!("a\x1b[12{}visible", char::from(0x10));
386 assert_eq!(strip_ansi(&malformed), "avisible");
387 }
388
389 #[test]
390 fn strips_wikipedia_sgr_8bit_color_pattern() {
391 let input = "x\x1b[38;5;196mred\x1b[0my";
392 assert_eq!(strip_ansi(input), "xredy");
393 }
394
395 #[test]
396 fn strips_wikipedia_sgr_truecolor_pattern() {
397 let input = "x\x1b[48;2;12;34;56mblock\x1b[0my";
398 assert_eq!(strip_ansi(input), "xblocky");
399 }
400
401 #[test]
402 fn strips_wikipedia_osc8_hyperlink_pattern() {
403 let input = "go \x1b]8;;https://example.com\x1b\\here\x1b]8;;\x1b\\ now";
404 assert_eq!(strip_ansi(input), "go here now");
405 }
406
407 #[test]
408 fn strips_dec_private_mode_csi() {
409 let input = "a\x1b[?25lb\x1b[?25hc";
410 assert_eq!(strip_ansi(input), "abc");
411 }
412}