1const ESC: u8 = 0x1b;
4const BEL: u8 = 0x07;
5const DEL: u8 = 0x7f;
6const C1_ST: u8 = 0x9c;
7const C1_DCS: u8 = 0x90;
8const C1_SOS: u8 = 0x98;
9const C1_CSI: u8 = 0x9b;
10const C1_OSC: u8 = 0x9d;
11const C1_PM: u8 = 0x9e;
12const C1_APC: u8 = 0x9f;
13const CAN: u8 = 0x18;
14const SUB: u8 = 0x1a;
15const MAX_STRING_SEQUENCE_BYTES: usize = 4096;
16const MAX_CSI_SEQUENCE_BYTES: usize = 64;
17
18#[inline]
19fn parse_c1_at(bytes: &[u8], start: usize) -> Option<(u8, usize)> {
20 let first = *bytes.get(start)?;
21 if (0x80..=0x9f).contains(&first) {
22 return Some((first, 1));
23 }
24 None
25}
26
27#[inline]
28fn parse_csi(bytes: &[u8], start: usize) -> Option<usize> {
29 let mut index = start;
35 let mut phase = 0u8; let mut consumed = 0usize;
37
38 while index < bytes.len() {
39 let byte = bytes[index];
40 if byte == ESC {
41 return Some(index);
43 }
44 if byte == CAN || byte == SUB {
45 return Some(index + 1);
47 }
48
49 consumed += 1;
50 if consumed > MAX_CSI_SEQUENCE_BYTES {
51 return Some(index + 1);
53 }
54
55 if phase == 0 && (0x30..=0x3f).contains(&byte) {
56 index += 1;
57 continue;
58 }
59 if (0x20..=0x2f).contains(&byte) {
60 phase = 1;
61 index += 1;
62 continue;
63 }
64 if (0x40..=0x7e).contains(&byte) {
65 return Some(index + 1);
66 }
67
68 return Some(index);
70 }
71
72 None
73}
74
75#[inline]
76fn parse_osc(bytes: &[u8], start: usize) -> Option<usize> {
77 let mut consumed = 0usize;
78 for index in start..bytes.len() {
79 if bytes[index] == ESC && !(index + 1 < bytes.len() && bytes[index + 1] == b'\\') {
80 return Some(index);
82 }
83 if bytes[index] == CAN || bytes[index] == SUB {
84 return Some(index + 1);
85 }
86
87 if let Some((c1, len)) = parse_c1_at(bytes, index)
88 && c1 == C1_ST
89 {
90 return Some(index + len);
91 }
92
93 match bytes[index] {
94 BEL | C1_ST => return Some(index + 1),
95 ESC if index + 1 < bytes.len() && bytes[index + 1] == b'\\' => return Some(index + 2),
96 _ => {}
97 }
98
99 consumed += 1;
100 if consumed > MAX_STRING_SEQUENCE_BYTES {
101 return Some(index + 1);
103 }
104 }
105 None
106}
107
108#[inline]
109fn parse_st_terminated(bytes: &[u8], start: usize) -> Option<usize> {
110 let mut consumed = 0usize;
111 for index in start..bytes.len() {
112 if bytes[index] == ESC && !(index + 1 < bytes.len() && bytes[index + 1] == b'\\') {
113 return Some(index);
114 }
115 if bytes[index] == CAN || bytes[index] == SUB {
116 return Some(index + 1);
117 }
118
119 if let Some((c1, len)) = parse_c1_at(bytes, index)
120 && c1 == C1_ST
121 {
122 return Some(index + len);
123 }
124
125 match bytes[index] {
126 C1_ST => return Some(index + 1),
127 ESC if index + 1 < bytes.len() && bytes[index + 1] == b'\\' => return Some(index + 2),
128 _ => {}
129 }
130
131 consumed += 1;
132 if consumed > MAX_STRING_SEQUENCE_BYTES {
133 return Some(index + 1);
134 }
135 }
136 None
137}
138
139#[inline]
140fn parse_ansi_sequence_bytes(bytes: &[u8]) -> Option<usize> {
141 if bytes.is_empty() {
142 return None;
143 }
144
145 if let Some((c1, c1_len)) = parse_c1_at(bytes, 0) {
146 return match c1 {
147 C1_CSI => parse_csi(bytes, c1_len),
148 C1_OSC => parse_osc(bytes, c1_len),
149 C1_DCS | C1_SOS | C1_PM | C1_APC => parse_st_terminated(bytes, c1_len),
150 _ => Some(c1_len),
151 };
152 }
153
154 match bytes[0] {
155 ESC => {
156 if bytes.len() < 2 {
157 return None;
158 }
159
160 match bytes[1] {
161 b'[' => parse_csi(bytes, 2),
162 b']' => parse_osc(bytes, 2),
163 b'P' | b'^' | b'_' | b'X' => parse_st_terminated(bytes, 2),
164 next if next < 128 => Some(2),
165 _ => Some(1),
166 }
167 }
168 _ => None,
169 }
170}
171
172pub fn strip_ansi(text: &str) -> String {
174 let mut output = Vec::with_capacity(text.len());
175 let bytes = text.as_bytes();
176 let mut i = 0;
177
178 while i < bytes.len() {
179 if bytes[i] == ESC
180 && let Some(len) = parse_ansi_sequence_bytes(&bytes[i..])
181 {
182 i += len;
183 continue;
184 }
185 if bytes[i] == ESC {
186 break;
188 }
189
190 if bytes[i] == b'\n' || bytes[i] == b'\r' || bytes[i] == b'\t' {
191 output.push(bytes[i]);
192 i += 1;
193 } else if bytes[i] < 32 || bytes[i] == DEL {
194 i += 1;
195 } else {
196 output.push(bytes[i]);
197 i += 1;
198 }
199 }
200
201 String::from_utf8_lossy(&output).into_owned()
202}
203
204pub fn strip_ansi_bytes(input: &[u8]) -> Vec<u8> {
208 let mut output = Vec::with_capacity(input.len());
209 let bytes = input;
210 let mut i = 0;
211
212 while i < bytes.len() {
213 if (bytes[i] == ESC || parse_c1_at(bytes, i).is_some())
214 && let Some(len) = parse_ansi_sequence_bytes(&bytes[i..])
215 {
216 i += len;
217 continue;
218 }
219 if bytes[i] == ESC || parse_c1_at(bytes, i).is_some() {
220 break;
222 }
223
224 if bytes[i] == b'\n' || bytes[i] == b'\r' || bytes[i] == b'\t' {
225 output.push(bytes[i]);
226 i += 1;
227 } else if bytes[i] < 32 || bytes[i] == DEL {
228 i += 1;
229 } else {
230 output.push(bytes[i]);
231 i += 1;
232 }
233 }
234 output
235}
236
237pub fn parse_ansi_sequence(text: &str) -> Option<usize> {
239 let bytes = text.as_bytes();
240 parse_ansi_sequence_bytes(bytes)
241}
242
243pub fn strip_ansi_ascii_only(text: &str) -> String {
245 let mut output = String::with_capacity(text.len());
246 let bytes = text.as_bytes();
247 let mut i = 0;
248 let mut last_valid = 0;
249
250 while i < bytes.len() {
251 if (bytes[i] == ESC || parse_c1_at(bytes, i).is_some())
252 && let Some(len) = parse_ansi_sequence_bytes(&bytes[i..])
253 {
254 if last_valid < i {
255 output.push_str(&text[last_valid..i]);
256 }
257 i += len;
258 last_valid = i;
259 continue;
260 }
261
262 i += 1;
263 }
264
265 if last_valid < text.len() {
266 output.push_str(&text[last_valid..]);
267 }
268
269 output
270}
271
272pub fn contains_unicode(text: &str) -> bool {
274 text.bytes().any(|b| b >= 0x80)
275}
276
277#[cfg(test)]
278mod tests {
279 use super::{CAN, SUB, strip_ansi, strip_ansi_ascii_only};
280
281 #[test]
282 fn strips_esc_csi_sequences() {
283 let input = "a\x1b[31mred\x1b[0mz";
284 assert_eq!(strip_ansi(input), "aredz");
285 assert_eq!(strip_ansi_ascii_only(input), "aredz");
286 }
287
288 #[test]
289 fn utf8_encoded_c1_is_not_reprocessed_as_control() {
290 let input = "a\u{009b}31mred";
292 assert_eq!(strip_ansi(input), input);
293 }
294
295 #[test]
296 fn strip_removes_ascii_del_control() {
297 let input = format!("a{}b", char::from(0x7f));
298 assert_eq!(strip_ansi(&input), "ab");
299 }
300
301 #[test]
302 fn csi_aborts_on_esc_then_new_sequence_parses() {
303 let input = "a\x1b[31\x1b[32mgreen\x1b[0mz";
304 assert_eq!(strip_ansi(input), "agreenz");
305 }
306
307 #[test]
308 fn csi_aborts_on_can_and_sub() {
309 let can = format!("a\x1b[31{}b", char::from(CAN));
310 let sub = format!("a\x1b[31{}b", char::from(SUB));
311 assert_eq!(strip_ansi(&can), "ab");
312 assert_eq!(strip_ansi(&sub), "ab");
313 }
314
315 #[test]
316 fn osc_aborts_on_esc_non_st() {
317 let input = "a\x1b]title\x1b[31mred\x1b[0mz";
318 assert_eq!(strip_ansi(input), "aredz");
319 }
320
321 #[test]
322 fn incomplete_sequence_drops_tail() {
323 let input = "text\x1b[31";
324 assert_eq!(strip_ansi(input), "text");
325 }
326
327 #[test]
328 fn strips_common_progress_redraw_sequences() {
329 let input = "\r\x1b[2KProgress 10%\r\x1b[2KDone\n";
332 assert_eq!(strip_ansi(input), "\rProgress 10%\rDone\n");
333 }
334
335 #[test]
336 fn strips_cursor_navigation_sequences() {
337 let input = "left\x1b[1D!\nup\x1b[1Arow";
338 assert_eq!(strip_ansi(input), "left!\nuprow");
339 }
340
341 #[test]
342 fn strip_ansi_bytes_supports_raw_c1_csi() {
343 let input = [
344 b'a', 0x9b, b'3', b'1', b'm', b'r', b'e', b'd', 0x9b, b'0', b'm', b'z',
345 ];
346 let out = super::strip_ansi_bytes(&input);
347 assert_eq!(out, b"aredz");
348 }
349
350 #[test]
351 fn strip_ansi_bytes_supports_raw_c1_osc_and_st() {
352 let mut input = b"pre".to_vec();
353 input.extend_from_slice(&[0x9d]);
354 input.extend_from_slice(b"8;;https://example.com");
355 input.extend_from_slice(&[0x9c]);
356 input.extend_from_slice(b"link");
357 input.extend_from_slice(&[0x9d]);
358 input.extend_from_slice(b"8;;");
359 input.extend_from_slice(&[0x9c]);
360 input.extend_from_slice(b"post");
361 let out = super::strip_ansi_bytes(&input);
362 assert_eq!(out, b"prelinkpost");
363 }
364
365 #[test]
366 fn csi_respects_parameter_intermediate_final_grammar() {
367 let input = "a\x1b[1;2 mred\x1b[0mz";
369 assert_eq!(strip_ansi(input), "aredz");
370 }
371
372 #[test]
373 fn malformed_csi_does_not_consume_following_text() {
374 let malformed = format!("a\x1b[12{}visible", char::from(0x10));
376 assert_eq!(strip_ansi(&malformed), "avisible");
377 }
378
379 #[test]
380 fn strips_wikipedia_sgr_8bit_color_pattern() {
381 let input = "x\x1b[38;5;196mred\x1b[0my";
382 assert_eq!(strip_ansi(input), "xredy");
383 }
384
385 #[test]
386 fn strips_wikipedia_sgr_truecolor_pattern() {
387 let input = "x\x1b[48;2;12;34;56mblock\x1b[0my";
388 assert_eq!(strip_ansi(input), "xblocky");
389 }
390
391 #[test]
392 fn strips_wikipedia_osc8_hyperlink_pattern() {
393 let input = "go \x1b]8;;https://example.com\x1b\\here\x1b]8;;\x1b\\ now";
394 assert_eq!(strip_ansi(input), "go here now");
395 }
396
397 #[test]
398 fn strips_dec_private_mode_csi() {
399 let input = "a\x1b[?25lb\x1b[?25hc";
400 assert_eq!(strip_ansi(input), "abc");
401 }
402}