1use crate::ansi_codes::{BEL_BYTE, ESC_BYTE};
6use memchr::memchr;
7
8const ESC: u8 = ESC_BYTE;
9const BEL: u8 = BEL_BYTE;
10const DEL: u8 = 0x7f;
11const C1_ST: u8 = 0x9c;
12const C1_DCS: u8 = 0x90;
13const C1_SOS: u8 = 0x98;
14const C1_CSI: u8 = 0x9b;
15const C1_OSC: u8 = 0x9d;
16const C1_PM: u8 = 0x9e;
17const C1_APC: u8 = 0x9f;
18const CAN: u8 = 0x18;
19const SUB: u8 = 0x1a;
20const MAX_STRING_SEQUENCE_BYTES: usize = 4096;
21const MAX_CSI_SEQUENCE_BYTES: usize = 64;
22
23#[derive(Clone, Copy)]
24enum StringSequenceTerminator {
25 StOnly,
26 BelOrSt,
27}
28
29impl StringSequenceTerminator {
30 #[inline]
31 const fn allows_bel(self) -> bool {
32 matches!(self, Self::BelOrSt)
33 }
34}
35
36#[inline]
37fn parse_c1_at(bytes: &[u8], start: usize) -> Option<(u8, usize)> {
38 let first = *bytes.get(start)?;
39 if (0x80..=0x9f).contains(&first) {
40 return Some((first, 1));
41 }
42 None
43}
44
45#[inline]
46fn parse_csi(bytes: &[u8], start: usize) -> Option<usize> {
47 let mut index = start;
53 let mut phase = 0u8; let mut consumed = 0usize;
55
56 while index < bytes.len() {
57 let byte = bytes[index];
58 if byte == ESC {
59 return Some(index);
61 }
62 if byte == CAN || byte == SUB {
63 return Some(index + 1);
65 }
66
67 consumed += 1;
68 if consumed > MAX_CSI_SEQUENCE_BYTES {
69 return Some(index + 1);
71 }
72
73 if phase == 0 && (0x30..=0x3f).contains(&byte) {
74 index += 1;
75 continue;
76 }
77 if (0x20..=0x2f).contains(&byte) {
78 phase = 1;
79 index += 1;
80 continue;
81 }
82 if (0x40..=0x7e).contains(&byte) {
83 return Some(index + 1);
84 }
85
86 return Some(index);
88 }
89
90 None
91}
92
93#[inline]
94fn parse_string_sequence(
95 bytes: &[u8],
96 start: usize,
97 terminator: StringSequenceTerminator,
98) -> Option<usize> {
99 let mut consumed = 0usize;
100 for index in start..bytes.len() {
101 if bytes[index] == ESC && !(index + 1 < bytes.len() && bytes[index + 1] == b'\\') {
102 return Some(index);
104 }
105 if bytes[index] == CAN || bytes[index] == SUB {
106 return Some(index + 1);
107 }
108
109 if let Some((c1, len)) = parse_c1_at(bytes, index)
110 && c1 == C1_ST
111 {
112 return Some(index + len);
113 }
114
115 match bytes[index] {
116 BEL if terminator.allows_bel() => return Some(index + 1),
117 ESC if index + 1 < bytes.len() && bytes[index + 1] == b'\\' => return Some(index + 2),
118 _ => {}
119 }
120
121 consumed += 1;
122 if consumed > MAX_STRING_SEQUENCE_BYTES {
123 return Some(index + 1);
125 }
126 }
127 None
128}
129
130#[inline]
131fn push_visible_byte(output: &mut Vec<u8>, byte: u8) {
132 if matches!(byte, b'\n' | b'\r' | b'\t') || !(byte < 32 || byte == DEL) {
133 output.push(byte);
134 }
135}
136
137#[inline]
138fn parse_ansi_sequence_bytes(bytes: &[u8]) -> Option<usize> {
139 if bytes.is_empty() {
140 return None;
141 }
142
143 if let Some((c1, c1_len)) = parse_c1_at(bytes, 0) {
144 return match c1 {
145 C1_CSI => parse_csi(bytes, c1_len),
146 C1_OSC => parse_string_sequence(bytes, c1_len, StringSequenceTerminator::BelOrSt),
147 C1_DCS | C1_SOS | C1_PM | C1_APC => {
148 parse_string_sequence(bytes, c1_len, StringSequenceTerminator::StOnly)
149 }
150 _ => Some(c1_len),
151 };
152 }
153
154 match bytes[0] {
155 ESC => {
156 if bytes.len() < 2 {
157 return None;
158 }
159
160 match bytes[1] {
161 b'[' => parse_csi(bytes, 2),
162 b']' => parse_string_sequence(bytes, 2, StringSequenceTerminator::BelOrSt),
163 b'P' | b'^' | b'_' | b'X' => {
164 parse_string_sequence(bytes, 2, StringSequenceTerminator::StOnly)
165 }
166 b' ' | b'#' | b'%' | b'(' | b')' | b'*' | b'+' => {
172 if bytes.len() > 2 {
173 Some(3)
174 } else {
175 None
176 }
177 }
178 next if next < 128 => Some(2),
179 _ => Some(1),
180 }
181 }
182 _ => None,
183 }
184}
185
186pub fn strip_ansi_codes(text: &str) -> std::borrow::Cow<'_, str> {
190 if !text.contains('\x1b') {
191 return std::borrow::Cow::Borrowed(text);
192 }
193 std::borrow::Cow::Owned(strip_ansi(text))
194}
195
196pub fn strip_ansi(text: &str) -> String {
198 let mut output = Vec::with_capacity(text.len());
199 let bytes = text.as_bytes();
200 let mut i = 0;
201
202 while i < bytes.len() {
203 let next_esc = memchr(ESC, &bytes[i..]).map_or(bytes.len(), |offset| i + offset);
204 for &b in &bytes[i..next_esc] {
207 push_visible_byte(&mut output, b);
208 }
209 i = next_esc;
210
211 if i >= bytes.len() {
212 break;
213 }
214
215 if let Some(len) = parse_ansi_sequence_bytes(&bytes[i..]) {
216 i += len;
217 continue;
218 } else {
219 break;
221 }
222 }
223
224 String::from_utf8_lossy(&output).into_owned()
225}
226
227pub fn strip_ansi_bytes(input: &[u8]) -> Vec<u8> {
231 let mut output = Vec::with_capacity(input.len());
232 let bytes = input;
233 let mut i = 0;
234
235 while i < bytes.len() {
236 let rest = &bytes[i..];
238
239 if (rest[0] == ESC || parse_c1_at(bytes, i).is_some())
240 && let Some(len) = parse_ansi_sequence_bytes(rest)
241 {
242 i += len;
243 continue;
244 }
245 if rest[0] == ESC || parse_c1_at(bytes, i).is_some() {
246 break;
248 }
249
250 push_visible_byte(&mut output, rest[0]);
251 i += 1;
252 }
253 output
254}
255
256pub fn parse_ansi_sequence(text: &str) -> Option<usize> {
258 let bytes = text.as_bytes();
259 parse_ansi_sequence_bytes(bytes)
260}
261
262pub fn strip_ansi_ascii_only(text: &str) -> String {
264 let mut output = String::with_capacity(text.len());
265 let bytes = text.as_bytes();
266 let mut search_start = 0;
267 let mut copy_start = 0;
268
269 while let Some(offset) = memchr(ESC, &bytes[search_start..]) {
270 let esc_index = search_start + offset;
271 if let Some(len) = parse_ansi_sequence_bytes(&bytes[esc_index..]) {
272 if copy_start < esc_index {
273 output.push_str(&text[copy_start..esc_index]);
274 }
275 copy_start = esc_index + len;
276 search_start = copy_start;
277 } else {
278 search_start = esc_index + 1;
279 }
280 }
281
282 if copy_start < text.len() {
283 output.push_str(&text[copy_start..]);
284 }
285
286 output
287}
288
289#[must_use]
291pub fn contains_unicode(text: &str) -> bool {
292 text.bytes().any(|b| b >= 0x80)
293}
294
295#[cfg(test)]
296mod tests {
297 use super::{CAN, SUB, strip_ansi, strip_ansi_ascii_only};
298
299 #[test]
300 fn strips_esc_csi_sequences() {
301 let input = "a\x1b[31mred\x1b[0mz";
302 assert_eq!(strip_ansi(input), "aredz");
303 assert_eq!(strip_ansi_ascii_only(input), "aredz");
304 }
305
306 #[test]
307 fn utf8_encoded_c1_is_not_reprocessed_as_control() {
308 let input = "a\u{009b}31mred";
310 assert_eq!(strip_ansi(input), input);
311 }
312
313 #[test]
314 fn strip_removes_ascii_del_control() {
315 let input = format!("a{}b", char::from(0x7f));
316 assert_eq!(strip_ansi(&input), "ab");
317 }
318
319 #[test]
320 fn csi_aborts_on_esc_then_new_sequence_parses() {
321 let input = "a\x1b[31\x1b[32mgreen\x1b[0mz";
322 assert_eq!(strip_ansi(input), "agreenz");
323 }
324
325 #[test]
326 fn csi_aborts_on_can_and_sub() {
327 let can = format!("a\x1b[31{}b", char::from(CAN));
328 let sub = format!("a\x1b[31{}b", char::from(SUB));
329 assert_eq!(strip_ansi(&can), "ab");
330 assert_eq!(strip_ansi(&sub), "ab");
331 }
332
333 #[test]
334 fn osc_aborts_on_esc_non_st() {
335 let input = "a\x1b]title\x1b[31mred\x1b[0mz";
336 assert_eq!(strip_ansi(input), "aredz");
337 }
338
339 #[test]
340 fn incomplete_sequence_drops_tail() {
341 let input = "text\x1b[31";
342 assert_eq!(strip_ansi(input), "text");
343 }
344
345 #[test]
346 fn ascii_only_incomplete_sequence_keeps_tail() {
347 let input = "text\x1b[31";
348 assert_eq!(strip_ansi_ascii_only(input), input);
349 }
350
351 #[test]
352 fn strips_common_progress_redraw_sequences() {
353 let input = "\r\x1b[2KProgress 10%\r\x1b[2KDone\n";
356 assert_eq!(strip_ansi(input), "\rProgress 10%\rDone\n");
357 }
358
359 #[test]
360 fn strips_cursor_navigation_sequences() {
361 let input = "left\x1b[1D!\nup\x1b[1Arow";
362 assert_eq!(strip_ansi(input), "left!\nuprow");
363 }
364
365 #[test]
366 fn strip_ansi_bytes_supports_raw_c1_csi() {
367 let input = [
368 b'a', 0x9b, b'3', b'1', b'm', b'r', b'e', b'd', 0x9b, b'0', b'm', b'z',
369 ];
370 let out = super::strip_ansi_bytes(&input);
371 assert_eq!(out, b"aredz");
372 }
373
374 #[test]
375 fn strip_ansi_bytes_supports_raw_c1_osc_and_st() {
376 let mut input = b"pre".to_vec();
377 input.extend_from_slice(&[0x9d]);
378 input.extend_from_slice(b"8;;https://example.com");
379 input.extend_from_slice(&[0x9c]);
380 input.extend_from_slice(b"link");
381 input.extend_from_slice(&[0x9d]);
382 input.extend_from_slice(b"8;;");
383 input.extend_from_slice(&[0x9c]);
384 input.extend_from_slice(b"post");
385 let out = super::strip_ansi_bytes(&input);
386 assert_eq!(out, b"prelinkpost");
387 }
388
389 #[test]
390 fn csi_respects_parameter_intermediate_final_grammar() {
391 let input = "a\x1b[1;2 mred\x1b[0mz";
393 assert_eq!(strip_ansi(input), "aredz");
394 }
395
396 #[test]
397 fn malformed_csi_does_not_consume_following_text() {
398 let malformed = format!("a\x1b[12{}visible", char::from(0x10));
400 assert_eq!(strip_ansi(&malformed), "avisible");
401 }
402
403 #[test]
404 fn strips_wikipedia_sgr_8bit_color_pattern() {
405 let input = "x\x1b[38;5;196mred\x1b[0my";
406 assert_eq!(strip_ansi(input), "xredy");
407 }
408
409 #[test]
410 fn strips_wikipedia_sgr_truecolor_pattern() {
411 let input = "x\x1b[48;2;12;34;56mblock\x1b[0my";
412 assert_eq!(strip_ansi(input), "xblocky");
413 }
414
415 #[test]
416 fn strips_wikipedia_osc8_hyperlink_pattern() {
417 let input = "go \x1b]8;;https://example.com\x1b\\here\x1b]8;;\x1b\\ now";
418 assert_eq!(strip_ansi(input), "go here now");
419 }
420
421 #[test]
422 fn strips_dec_private_mode_csi() {
423 let input = "a\x1b[?25lb\x1b[?25hc";
424 assert_eq!(strip_ansi(input), "abc");
425 }
426
427 #[test]
428 fn strips_three_byte_esc_sequences() {
429 let input = "a\x1b#8b";
431 assert_eq!(strip_ansi(input), "ab");
432
433 let input2 = "a\x1b(Bb";
435 assert_eq!(strip_ansi(input2), "ab");
436
437 let input3 = "a\x1b Fb";
439 assert_eq!(strip_ansi(input3), "ab");
440
441 let input4 = "a\x1b%Gb";
443 assert_eq!(strip_ansi(input4), "ab");
444 }
445
446 #[test]
447 fn incomplete_three_byte_esc_sequence_drops_tail() {
448 let input = "text\x1b#";
450 assert_eq!(strip_ansi(input), "text");
451 }
452}