1#[cfg(feature = "alloc")]
2use alloc::{borrow::Cow, string::String};
3use core::{
4 iter::{FusedIterator, Peekable},
5 str::CharIndices,
6};
7
8#[derive(Debug, Clone, Copy)]
9enum State {
10 Start,
11 S1,
12 S2,
13 S3,
14 S4,
15 S5,
16 S6,
17 S7,
18 S8,
19 S9,
20 S10,
21 S11,
22 Trap,
23}
24
25impl Default for State {
26 fn default() -> Self {
27 Self::Start
28 }
29}
30
31impl State {
32 fn is_final(&self) -> bool {
33 #[allow(clippy::match_like_matches_macro)]
34 match self {
35 Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true,
36 _ => false,
37 }
38 }
39
40 fn is_trapped(&self) -> bool {
41 #[allow(clippy::match_like_matches_macro)]
42 match self {
43 Self::Trap => true,
44 _ => false,
45 }
46 }
47
48 fn transition(&mut self, c: char) {
49 *self = match c {
50 '\u{1b}' | '\u{9b}' => match self {
51 Self::Start => Self::S1,
52 _ => Self::Trap,
53 },
54 '(' | ')' => match self {
55 Self::S1 => Self::S2,
56 Self::S2 | Self::S4 => Self::S4,
57 _ => Self::Trap,
58 },
59 ';' => match self {
60 Self::S1 | Self::S2 | Self::S4 => Self::S4,
61 Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10,
62 _ => Self::Trap,
63 },
64
65 '[' | '#' | '?' => match self {
66 Self::S1 | Self::S2 | Self::S4 => Self::S4,
67 _ => Self::Trap,
68 },
69 '0'..='2' => match self {
70 Self::S1 | Self::S4 => Self::S5,
71 Self::S2 => Self::S3,
72 Self::S5 => Self::S6,
73 Self::S6 => Self::S7,
74 Self::S7 => Self::S8,
75 Self::S8 => Self::S9,
76 Self::S10 => Self::S5,
77 _ => Self::Trap,
78 },
79 '3'..='9' => match self {
80 Self::S1 | Self::S4 => Self::S5,
81 Self::S2 => Self::S5,
82 Self::S5 => Self::S6,
83 Self::S6 => Self::S7,
84 Self::S7 => Self::S8,
85 Self::S8 => Self::S9,
86 Self::S10 => Self::S5,
87 _ => Self::Trap,
88 },
89 'A'..='P' | 'R' | 'Z' | 'c' | 'f'..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => {
90 match self {
91 Self::S1
92 | Self::S2
93 | Self::S4
94 | Self::S5
95 | Self::S6
96 | Self::S7
97 | Self::S8
98 | Self::S10 => Self::S11,
99 _ => Self::Trap,
100 }
101 }
102 _ => Self::Trap,
103 };
104 }
105}
106
107#[derive(Debug)]
108struct Matches<'a> {
109 s: &'a str,
110 it: Peekable<CharIndices<'a>>,
111}
112
113impl<'a> Matches<'a> {
114 fn new(s: &'a str) -> Self {
115 let it = s.char_indices().peekable();
116 Self { s, it }
117 }
118}
119
120#[derive(Debug)]
121struct Match<'a> {
122 text: &'a str,
123 start: usize,
124 end: usize,
125}
126
127impl<'a> Match<'a> {
128 #[inline]
129 pub(crate) fn as_str(&self) -> &'a str {
130 &self.text[self.start..self.end]
131 }
132}
133
134impl<'a> Iterator for Matches<'a> {
135 type Item = Match<'a>;
136
137 fn next(&mut self) -> Option<Self::Item> {
138 find_ansi_code_exclusive(&mut self.it).map(|(start, end)| Match {
139 text: self.s,
140 start,
141 end,
142 })
143 }
144}
145
146impl FusedIterator for Matches<'_> {}
147
148fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> {
149 'outer: loop {
150 if let (start, '\u{1b}') | (start, '\u{9b}') = it.peek()? {
151 let start = *start;
152 let mut state = State::default();
153 let mut maybe_end = None;
154
155 loop {
156 let item = it.peek();
157
158 if let Some((idx, c)) = item {
159 state.transition(*c);
160
161 if state.is_final() {
162 maybe_end = Some(*idx);
163 }
164 }
165
166 if state.is_trapped() || item.is_none() {
169 match maybe_end {
170 Some(end) => {
171 return Some((start, end + 1));
174 }
175 None => continue 'outer,
178 }
179 }
180
181 it.next();
182 }
183 }
184
185 it.next();
186 }
187}
188
189#[cfg(feature = "alloc")]
191pub fn strip_ansi_codes(s: &str) -> Cow<str> {
192 let mut char_it = s.char_indices().peekable();
193 match find_ansi_code_exclusive(&mut char_it) {
194 Some(_) => {
195 let stripped: String = AnsiCodeIterator::new(s)
196 .filter_map(|(text, is_ansi)| if is_ansi { None } else { Some(text) })
197 .collect();
198 Cow::Owned(stripped)
199 }
200 None => Cow::Borrowed(s),
201 }
202}
203
204pub struct AnsiCodeIterator<'a> {
211 s: &'a str,
212 pending_item: Option<(&'a str, bool)>,
213 last_idx: usize,
214 cur_idx: usize,
215 iter: Matches<'a>,
216}
217
218impl<'a> AnsiCodeIterator<'a> {
219 pub fn new(s: &'a str) -> AnsiCodeIterator<'a> {
221 AnsiCodeIterator {
222 s,
223 pending_item: None,
224 last_idx: 0,
225 cur_idx: 0,
226 iter: Matches::new(s),
227 }
228 }
229
230 pub fn current_slice(&self) -> &str {
232 &self.s[..self.cur_idx]
233 }
234
235 pub fn rest_slice(&self) -> &str {
237 &self.s[self.cur_idx..]
238 }
239}
240
241impl<'a> Iterator for AnsiCodeIterator<'a> {
242 type Item = (&'a str, bool);
243
244 fn next(&mut self) -> Option<(&'a str, bool)> {
245 if let Some(pending_item) = self.pending_item.take() {
246 self.cur_idx += pending_item.0.len();
247 Some(pending_item)
248 } else if let Some(m) = self.iter.next() {
249 let s = &self.s[self.last_idx..m.start];
250 self.last_idx = m.end;
251 if s.is_empty() {
252 self.cur_idx = m.end;
253 Some((m.as_str(), true))
254 } else {
255 self.cur_idx = m.start;
256 self.pending_item = Some((m.as_str(), true));
257 Some((s, false))
258 }
259 } else if self.last_idx < self.s.len() {
260 let rv = &self.s[self.last_idx..];
261 self.cur_idx = self.s.len();
262 self.last_idx = self.s.len();
263 Some((rv, false))
264 } else {
265 None
266 }
267 }
268}
269
270impl FusedIterator for AnsiCodeIterator<'_> {}
271
272#[cfg(test)]
273mod tests {
274 use super::*;
275
276 use once_cell::sync::Lazy;
277 use proptest::prelude::*;
278 use regex::Regex;
279
280 static STRIP_ANSI_RE: Lazy<Regex> = Lazy::new(|| {
283 Regex::new(
284 r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])",
285 )
286 .unwrap()
287 });
288
289 impl<'a> PartialEq<Match<'a>> for regex::Match<'_> {
290 fn eq(&self, other: &Match<'a>) -> bool {
291 self.start() == other.start && self.end() == other.end
292 }
293 }
294
295 proptest! {
296 #[test]
297 fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}") {
298 let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
299 let new_matches: Vec<_> = Matches::new(&s).collect();
300 assert_eq!(old_matches, new_matches);
301 }
302 }
303
304 #[test]
305 fn dfa_matches_regex_on_small_strings() {
306 const POSSIBLE_BYTES: &[u8] = &[b' ', 0x1b, 0x9b, b'(', b'0', b'[', b';', b'3', b'C'];
310
311 fn check_all_strings_of_len(len: usize) {
312 _check_all_strings_of_len(len, &mut Vec::with_capacity(len));
313 }
314
315 fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) {
316 if len == 0 {
317 if let Ok(s) = core::str::from_utf8(chunk) {
318 let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect();
319 let new_matches: Vec<_> = Matches::new(s).collect();
320 assert_eq!(old_matches, new_matches);
321 }
322
323 return;
324 }
325
326 for b in POSSIBLE_BYTES {
327 chunk.push(*b);
328 _check_all_strings_of_len(len - 1, chunk);
329 chunk.pop();
330 }
331 }
332
333 for str_len in 0..=6 {
334 check_all_strings_of_len(str_len);
335 }
336 }
337
338 #[test]
339 fn complex_data() {
340 let s = std::fs::read_to_string(
341 std::path::Path::new("tests")
342 .join("data")
343 .join("sample_zellij_session.log"),
344 )
345 .unwrap();
346
347 let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
348 let new_matches: Vec<_> = Matches::new(&s).collect();
349 assert_eq!(old_matches, new_matches);
350 }
351
352 #[test]
353 fn state_machine() {
354 let ansi_code = "\x1b)B";
355 let mut state = State::default();
356 assert!(!state.is_final());
357
358 for c in ansi_code.chars() {
359 state.transition(c);
360 }
361 assert!(state.is_final());
362
363 state.transition('A');
364 assert!(state.is_trapped());
365 }
366
367 #[test]
368 fn back_to_back_entry_char() {
369 let s = "\x1b\x1bf";
370 let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
371 assert_eq!(&["\x1bf"], matches.as_slice());
372 }
373
374 #[test]
375 fn early_paren_can_use_many_chars() {
376 let s = "\x1b(C";
377 let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
378 assert_eq!(&[s], matches.as_slice());
379 }
380
381 #[test]
382 fn long_run_of_digits() {
383 let s = "\u{1b}00000";
384 let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
385 assert_eq!(&[s], matches.as_slice());
386 }
387
388 #[test]
389 fn test_ansi_iter_re_vt100() {
390 let s = "\x1b(0lpq\x1b)Benglish";
391 let mut iter = AnsiCodeIterator::new(s);
392 assert_eq!(iter.next(), Some(("\x1b(0", true)));
393 assert_eq!(iter.next(), Some(("lpq", false)));
394 assert_eq!(iter.next(), Some(("\x1b)B", true)));
395 assert_eq!(iter.next(), Some(("english", false)));
396 }
397
398 #[test]
399 fn test_ansi_iter_re() {
400 use crate::style;
401 let s = format!("Hello {}!", style("World").red().force_styling(true));
402 let mut iter = AnsiCodeIterator::new(&s);
403 assert_eq!(iter.next(), Some(("Hello ", false)));
404 assert_eq!(iter.current_slice(), "Hello ");
405 assert_eq!(iter.rest_slice(), "\x1b[31mWorld\x1b[0m!");
406 assert_eq!(iter.next(), Some(("\x1b[31m", true)));
407 assert_eq!(iter.current_slice(), "Hello \x1b[31m");
408 assert_eq!(iter.rest_slice(), "World\x1b[0m!");
409 assert_eq!(iter.next(), Some(("World", false)));
410 assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld");
411 assert_eq!(iter.rest_slice(), "\x1b[0m!");
412 assert_eq!(iter.next(), Some(("\x1b[0m", true)));
413 assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m");
414 assert_eq!(iter.rest_slice(), "!");
415 assert_eq!(iter.next(), Some(("!", false)));
416 assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m!");
417 assert_eq!(iter.rest_slice(), "");
418 assert_eq!(iter.next(), None);
419 }
420
421 #[test]
422 fn test_ansi_iter_re_on_multi() {
423 use crate::style;
424 let s = format!("{}", style("a").red().bold().force_styling(true));
425 let mut iter = AnsiCodeIterator::new(&s);
426 assert_eq!(iter.next(), Some(("\x1b[31m", true)));
427 assert_eq!(iter.current_slice(), "\x1b[31m");
428 assert_eq!(iter.rest_slice(), "\x1b[1ma\x1b[0m");
429 assert_eq!(iter.next(), Some(("\x1b[1m", true)));
430 assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1m");
431 assert_eq!(iter.rest_slice(), "a\x1b[0m");
432 assert_eq!(iter.next(), Some(("a", false)));
433 assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma");
434 assert_eq!(iter.rest_slice(), "\x1b[0m");
435 assert_eq!(iter.next(), Some(("\x1b[0m", true)));
436 assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma\x1b[0m");
437 assert_eq!(iter.rest_slice(), "");
438 assert_eq!(iter.next(), None);
439 }
440}