console/
ansi.rs

1#[cfg(feature = "alloc")]
2use alloc::{borrow::Cow, string::String};
3use core::{
4    iter::{FusedIterator, Peekable},
5    str::CharIndices,
6};
7
8#[derive(Debug, Clone, Copy)]
9enum State {
10    Start,
11    S1,
12    S2,
13    S3,
14    S4,
15    S5,
16    S6,
17    S7,
18    S8,
19    S9,
20    S10,
21    S11,
22    Trap,
23}
24
25impl Default for State {
26    fn default() -> Self {
27        Self::Start
28    }
29}
30
31impl State {
32    fn is_final(&self) -> bool {
33        #[allow(clippy::match_like_matches_macro)]
34        match self {
35            Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true,
36            _ => false,
37        }
38    }
39
40    fn is_trapped(&self) -> bool {
41        #[allow(clippy::match_like_matches_macro)]
42        match self {
43            Self::Trap => true,
44            _ => false,
45        }
46    }
47
48    fn transition(&mut self, c: char) {
49        *self = match c {
50            '\u{1b}' | '\u{9b}' => match self {
51                Self::Start => Self::S1,
52                _ => Self::Trap,
53            },
54            '(' | ')' => match self {
55                Self::S1 => Self::S2,
56                Self::S2 | Self::S4 => Self::S4,
57                _ => Self::Trap,
58            },
59            ';' => match self {
60                Self::S1 | Self::S2 | Self::S4 => Self::S4,
61                Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10,
62                _ => Self::Trap,
63            },
64
65            '[' | '#' | '?' => match self {
66                Self::S1 | Self::S2 | Self::S4 => Self::S4,
67                _ => Self::Trap,
68            },
69            '0'..='2' => match self {
70                Self::S1 | Self::S4 => Self::S5,
71                Self::S2 => Self::S3,
72                Self::S5 => Self::S6,
73                Self::S6 => Self::S7,
74                Self::S7 => Self::S8,
75                Self::S8 => Self::S9,
76                Self::S10 => Self::S5,
77                _ => Self::Trap,
78            },
79            '3'..='9' => match self {
80                Self::S1 | Self::S4 => Self::S5,
81                Self::S2 => Self::S5,
82                Self::S5 => Self::S6,
83                Self::S6 => Self::S7,
84                Self::S7 => Self::S8,
85                Self::S8 => Self::S9,
86                Self::S10 => Self::S5,
87                _ => Self::Trap,
88            },
89            'A'..='P' | 'R' | 'Z' | 'c' | 'f'..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => {
90                match self {
91                    Self::S1
92                    | Self::S2
93                    | Self::S4
94                    | Self::S5
95                    | Self::S6
96                    | Self::S7
97                    | Self::S8
98                    | Self::S10 => Self::S11,
99                    _ => Self::Trap,
100                }
101            }
102            _ => Self::Trap,
103        };
104    }
105}
106
107#[derive(Debug)]
108struct Matches<'a> {
109    s: &'a str,
110    it: Peekable<CharIndices<'a>>,
111}
112
113impl<'a> Matches<'a> {
114    fn new(s: &'a str) -> Self {
115        let it = s.char_indices().peekable();
116        Self { s, it }
117    }
118}
119
120#[derive(Debug)]
121struct Match<'a> {
122    text: &'a str,
123    start: usize,
124    end: usize,
125}
126
127impl<'a> Match<'a> {
128    #[inline]
129    pub(crate) fn as_str(&self) -> &'a str {
130        &self.text[self.start..self.end]
131    }
132}
133
134impl<'a> Iterator for Matches<'a> {
135    type Item = Match<'a>;
136
137    fn next(&mut self) -> Option<Self::Item> {
138        find_ansi_code_exclusive(&mut self.it).map(|(start, end)| Match {
139            text: self.s,
140            start,
141            end,
142        })
143    }
144}
145
146impl FusedIterator for Matches<'_> {}
147
148fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> {
149    'outer: loop {
150        if let (start, '\u{1b}') | (start, '\u{9b}') = it.peek()? {
151            let start = *start;
152            let mut state = State::default();
153            let mut maybe_end = None;
154
155            loop {
156                let item = it.peek();
157
158                if let Some((idx, c)) = item {
159                    state.transition(*c);
160
161                    if state.is_final() {
162                        maybe_end = Some(*idx);
163                    }
164                }
165
166                // The match is greedy so run till we hit the trap state no matter what. A valid
167                // match is just one that was final at some point
168                if state.is_trapped() || item.is_none() {
169                    match maybe_end {
170                        Some(end) => {
171                            // All possible final characters are a single byte so it's safe to make
172                            // the end exclusive by just adding one
173                            return Some((start, end + 1));
174                        }
175                        // The character we are peeking right now might be the start of a match so
176                        // we want to continue the loop without popping off that char
177                        None => continue 'outer,
178                    }
179                }
180
181                it.next();
182            }
183        }
184
185        it.next();
186    }
187}
188
189/// Helper function to strip ansi codes.
190#[cfg(feature = "alloc")]
191pub fn strip_ansi_codes(s: &str) -> Cow<str> {
192    let mut char_it = s.char_indices().peekable();
193    match find_ansi_code_exclusive(&mut char_it) {
194        Some(_) => {
195            let stripped: String = AnsiCodeIterator::new(s)
196                .filter_map(|(text, is_ansi)| if is_ansi { None } else { Some(text) })
197                .collect();
198            Cow::Owned(stripped)
199        }
200        None => Cow::Borrowed(s),
201    }
202}
203
204/// An iterator over ansi codes in a string.
205///
206/// This type can be used to scan over ansi codes in a string.
207/// It yields tuples in the form `(s, is_ansi)` where `s` is a slice of
208/// the original string and `is_ansi` indicates if the slice contains
209/// ansi codes or string values.
210pub struct AnsiCodeIterator<'a> {
211    s: &'a str,
212    pending_item: Option<(&'a str, bool)>,
213    last_idx: usize,
214    cur_idx: usize,
215    iter: Matches<'a>,
216}
217
218impl<'a> AnsiCodeIterator<'a> {
219    /// Creates a new ansi code iterator.
220    pub fn new(s: &'a str) -> AnsiCodeIterator<'a> {
221        AnsiCodeIterator {
222            s,
223            pending_item: None,
224            last_idx: 0,
225            cur_idx: 0,
226            iter: Matches::new(s),
227        }
228    }
229
230    /// Returns the string slice up to the current match.
231    pub fn current_slice(&self) -> &str {
232        &self.s[..self.cur_idx]
233    }
234
235    /// Returns the string slice from the current match to the end.
236    pub fn rest_slice(&self) -> &str {
237        &self.s[self.cur_idx..]
238    }
239}
240
241impl<'a> Iterator for AnsiCodeIterator<'a> {
242    type Item = (&'a str, bool);
243
244    fn next(&mut self) -> Option<(&'a str, bool)> {
245        if let Some(pending_item) = self.pending_item.take() {
246            self.cur_idx += pending_item.0.len();
247            Some(pending_item)
248        } else if let Some(m) = self.iter.next() {
249            let s = &self.s[self.last_idx..m.start];
250            self.last_idx = m.end;
251            if s.is_empty() {
252                self.cur_idx = m.end;
253                Some((m.as_str(), true))
254            } else {
255                self.cur_idx = m.start;
256                self.pending_item = Some((m.as_str(), true));
257                Some((s, false))
258            }
259        } else if self.last_idx < self.s.len() {
260            let rv = &self.s[self.last_idx..];
261            self.cur_idx = self.s.len();
262            self.last_idx = self.s.len();
263            Some((rv, false))
264        } else {
265            None
266        }
267    }
268}
269
270impl FusedIterator for AnsiCodeIterator<'_> {}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275
276    use once_cell::sync::Lazy;
277    use proptest::prelude::*;
278    use regex::Regex;
279
280    // The manual dfa `State` is a handwritten translation from the previously used regex. That
281    // regex is kept here and used to ensure that the new matches are the same as the old
282    static STRIP_ANSI_RE: Lazy<Regex> = Lazy::new(|| {
283        Regex::new(
284            r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])",
285        )
286        .unwrap()
287    });
288
289    impl<'a> PartialEq<Match<'a>> for regex::Match<'_> {
290        fn eq(&self, other: &Match<'a>) -> bool {
291            self.start() == other.start && self.end() == other.end
292        }
293    }
294
295    proptest! {
296        #[test]
297        fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}") {
298            let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
299            let new_matches: Vec<_> = Matches::new(&s).collect();
300            assert_eq!(old_matches, new_matches);
301        }
302    }
303
304    #[test]
305    fn dfa_matches_regex_on_small_strings() {
306        // To make sure the test runs in a reasonable time this is a slimmed down list of
307        // characters to reduce the groups that are only used with each other along with one
308        // arbitrarily chosen character not used in the regex (' ')
309        const POSSIBLE_BYTES: &[u8] = &[b' ', 0x1b, 0x9b, b'(', b'0', b'[', b';', b'3', b'C'];
310
311        fn check_all_strings_of_len(len: usize) {
312            _check_all_strings_of_len(len, &mut Vec::with_capacity(len));
313        }
314
315        fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) {
316            if len == 0 {
317                if let Ok(s) = core::str::from_utf8(chunk) {
318                    let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect();
319                    let new_matches: Vec<_> = Matches::new(s).collect();
320                    assert_eq!(old_matches, new_matches);
321                }
322
323                return;
324            }
325
326            for b in POSSIBLE_BYTES {
327                chunk.push(*b);
328                _check_all_strings_of_len(len - 1, chunk);
329                chunk.pop();
330            }
331        }
332
333        for str_len in 0..=6 {
334            check_all_strings_of_len(str_len);
335        }
336    }
337
338    #[test]
339    fn complex_data() {
340        let s = std::fs::read_to_string(
341            std::path::Path::new("tests")
342                .join("data")
343                .join("sample_zellij_session.log"),
344        )
345        .unwrap();
346
347        let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
348        let new_matches: Vec<_> = Matches::new(&s).collect();
349        assert_eq!(old_matches, new_matches);
350    }
351
352    #[test]
353    fn state_machine() {
354        let ansi_code = "\x1b)B";
355        let mut state = State::default();
356        assert!(!state.is_final());
357
358        for c in ansi_code.chars() {
359            state.transition(c);
360        }
361        assert!(state.is_final());
362
363        state.transition('A');
364        assert!(state.is_trapped());
365    }
366
367    #[test]
368    fn back_to_back_entry_char() {
369        let s = "\x1b\x1bf";
370        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
371        assert_eq!(&["\x1bf"], matches.as_slice());
372    }
373
374    #[test]
375    fn early_paren_can_use_many_chars() {
376        let s = "\x1b(C";
377        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
378        assert_eq!(&[s], matches.as_slice());
379    }
380
381    #[test]
382    fn long_run_of_digits() {
383        let s = "\u{1b}00000";
384        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
385        assert_eq!(&[s], matches.as_slice());
386    }
387
388    #[test]
389    fn test_ansi_iter_re_vt100() {
390        let s = "\x1b(0lpq\x1b)Benglish";
391        let mut iter = AnsiCodeIterator::new(s);
392        assert_eq!(iter.next(), Some(("\x1b(0", true)));
393        assert_eq!(iter.next(), Some(("lpq", false)));
394        assert_eq!(iter.next(), Some(("\x1b)B", true)));
395        assert_eq!(iter.next(), Some(("english", false)));
396    }
397
398    #[test]
399    fn test_ansi_iter_re() {
400        use crate::style;
401        let s = format!("Hello {}!", style("World").red().force_styling(true));
402        let mut iter = AnsiCodeIterator::new(&s);
403        assert_eq!(iter.next(), Some(("Hello ", false)));
404        assert_eq!(iter.current_slice(), "Hello ");
405        assert_eq!(iter.rest_slice(), "\x1b[31mWorld\x1b[0m!");
406        assert_eq!(iter.next(), Some(("\x1b[31m", true)));
407        assert_eq!(iter.current_slice(), "Hello \x1b[31m");
408        assert_eq!(iter.rest_slice(), "World\x1b[0m!");
409        assert_eq!(iter.next(), Some(("World", false)));
410        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld");
411        assert_eq!(iter.rest_slice(), "\x1b[0m!");
412        assert_eq!(iter.next(), Some(("\x1b[0m", true)));
413        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m");
414        assert_eq!(iter.rest_slice(), "!");
415        assert_eq!(iter.next(), Some(("!", false)));
416        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m!");
417        assert_eq!(iter.rest_slice(), "");
418        assert_eq!(iter.next(), None);
419    }
420
421    #[test]
422    fn test_ansi_iter_re_on_multi() {
423        use crate::style;
424        let s = format!("{}", style("a").red().bold().force_styling(true));
425        let mut iter = AnsiCodeIterator::new(&s);
426        assert_eq!(iter.next(), Some(("\x1b[31m", true)));
427        assert_eq!(iter.current_slice(), "\x1b[31m");
428        assert_eq!(iter.rest_slice(), "\x1b[1ma\x1b[0m");
429        assert_eq!(iter.next(), Some(("\x1b[1m", true)));
430        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1m");
431        assert_eq!(iter.rest_slice(), "a\x1b[0m");
432        assert_eq!(iter.next(), Some(("a", false)));
433        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma");
434        assert_eq!(iter.rest_slice(), "\x1b[0m");
435        assert_eq!(iter.next(), Some(("\x1b[0m", true)));
436        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma\x1b[0m");
437        assert_eq!(iter.rest_slice(), "");
438        assert_eq!(iter.next(), None);
439    }
440}