skimmer/
ansi.rs

1#![allow(clippy::module_name_repetitions)]
2// Parse ANSI attr code
3use std::default::Default;
4
5use std::cmp::max;
6use tuikit::prelude::*;
7use vte::{Params, Perform};
8
9/// An ANSI Parser, will parse one line at a time.
10///
11/// It will cache the latest attribute used, that means if an attribute affect multiple
12/// lines, the parser will recognize it.
13#[derive(Debug, Default)]
14pub struct ANSIParser {
15    partial_str: String,
16    last_attr: Attr,
17
18    stripped: String,
19    stripped_char_count: usize,
20    fragments: Vec<(Attr, (usize, usize))>, // [char_index_start, char_index_end)
21}
22
23impl Perform for ANSIParser {
24    fn print(&mut self, ch: char) {
25        self.partial_str.push(ch);
26    }
27
28    fn execute(&mut self, byte: u8) {
29        match byte {
30            // \b to delete character back
31            0x08 => {
32                self.partial_str.pop();
33            }
34            // put back \0 \r \n \t
35            0x00 | 0x0d | 0x0A | 0x09 => self.partial_str.push(byte as char),
36            // ignore all others
37            _ => trace!("AnsiParser:execute ignored {:?}", byte),
38        }
39    }
40
41    fn hook(&mut self, params: &Params, _intermediates: &[u8], _ignore: bool, _action: char) {
42        trace!("AnsiParser:hook ignored {:?}", params);
43    }
44
45    fn put(&mut self, byte: u8) {
46        trace!("AnsiParser:put ignored {:?}", byte);
47    }
48
49    fn unhook(&mut self) {
50        trace!("AnsiParser:unhook ignored");
51    }
52
53    fn osc_dispatch(&mut self, params: &[&[u8]], _bell_terminated: bool) {
54        trace!("AnsiParser:osc ignored {:?}", params);
55    }
56
57    fn csi_dispatch(&mut self, params: &Params, _intermediates: &[u8], _ignore: bool, action: char) {
58        // https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_(Select_Graphic_Rendition)_parameters
59        // Only care about graphic modes, ignore all others
60
61        if action != 'm' {
62            trace!("ignore: params: {:?}, action : {:?}", params, action);
63            return;
64        }
65
66        // \[[m => means reset
67        let mut attr = if params.is_empty() {
68            Attr::default()
69        } else {
70            self.last_attr
71        };
72
73        let mut iter = params.iter();
74        while let Some(code) = iter.next() {
75            match code[0] {
76                0 => attr = Attr::default(),
77                1 => attr.effect |= Effect::BOLD,
78                2 => attr.effect |= !Effect::BOLD,
79                4 => attr.effect |= Effect::UNDERLINE,
80                5 => attr.effect |= Effect::BLINK,
81                7 => attr.effect |= Effect::REVERSE,
82                num @ 30..=37 => attr.fg = Color::AnsiValue(u8::try_from(num - 30).unwrap()),
83                38 => match iter.next() {
84                    Some(&[2]) => {
85                        // ESC[ 38;2;<r>;<g>;<b> m Select RGB foreground color
86                        if let (Some(r), Some(g), Some(b)) = (iter.next(), iter.next(), iter.next()) {
87                            attr.fg = Color::Rgb(
88                                u8::try_from(r[0]).unwrap(),
89                                u8::try_from(g[0]).unwrap(),
90                                u8::try_from(b[0]).unwrap(),
91                            );
92                        } else {
93                            trace!("ignore CSI {:?} m", params);
94                            continue;
95                        }
96                    }
97                    Some(&[5]) => {
98                        // ESC[ 38;5;<n> m Select foreground color
99                        if let Some(color) = iter.next() {
100                            attr.fg = Color::AnsiValue(u8::try_from(color[0]).unwrap());
101                        } else {
102                            trace!("ignore CSI {:?} m", params);
103                            continue;
104                        }
105                    }
106                    _ => {
107                        trace!("error on parsing CSI {:?} m", params);
108                    }
109                },
110                39 => attr.fg = Color::Default,
111                num @ 40..=47 => attr.bg = Color::AnsiValue((num - 40) as u8),
112                48 => match iter.next() {
113                    Some(&[2]) => {
114                        // ESC[ 48;2;<r>;<g>;<b> m Select RGB background color
115                        if let (Some(r), Some(g), Some(b)) = (iter.next(), iter.next(), iter.next()) {
116                            attr.bg = Color::Rgb(
117                                u8::try_from(r[0]).unwrap(),
118                                u8::try_from(g[0]).unwrap(),
119                                u8::try_from(b[0]).unwrap(),
120                            );
121                        } else {
122                            trace!("ignore CSI {:?} m", params);
123                            continue;
124                        }
125                    }
126                    Some(&[5]) => {
127                        // ESC[ 48;5;<n> m Select background color
128                        if let Some(color) = iter.next() {
129                            attr.bg = Color::AnsiValue(u8::try_from(color[0]).unwrap());
130                        } else {
131                            trace!("ignore CSI {:?} m", params);
132                            continue;
133                        }
134                    }
135                    _ => {
136                        trace!("ignore CSI {:?} m", params);
137                    }
138                },
139                49 => attr.bg = Color::Default,
140                num @ 90..=97 => attr.fg = Color::AnsiValue(u8::try_from(num - 82).unwrap()),
141                num @ 100..=107 => attr.bg = Color::AnsiValue(u8::try_from(num - 92).unwrap()),
142                _ => {
143                    trace!("ignore CSI {:?} m", params);
144                }
145            }
146        }
147
148        self.attr_change(attr);
149    }
150
151    fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {
152        // ESC characters are replaced with \[
153        self.partial_str.push('"');
154        self.partial_str.push('[');
155    }
156}
157
158impl ANSIParser {
159    /// save the `partial_str` into fragments with current attr
160    fn save_str(&mut self) {
161        if self.partial_str.is_empty() {
162            return;
163        }
164
165        let string = std::mem::take(&mut self.partial_str);
166        let string_char_count = string.chars().count();
167        self.fragments.push((
168            self.last_attr,
169            (self.stripped_char_count, self.stripped_char_count + string_char_count),
170        ));
171        self.stripped_char_count += string_char_count;
172        self.stripped.push_str(&string);
173    }
174
175    // accept a new attr
176    fn attr_change(&mut self, new_attr: Attr) {
177        if new_attr == self.last_attr {
178            return;
179        }
180
181        self.save_str();
182        self.last_attr = new_attr;
183    }
184
185    pub fn parse_ansi(&mut self, text: &str) -> AnsiString {
186        let mut statemachine = vte::Parser::new();
187
188        for byte in text.as_bytes() {
189            statemachine.advance(self, *byte);
190        }
191        self.save_str();
192
193        let stripped = std::mem::take(&mut self.stripped);
194        self.stripped_char_count = 0;
195        let fragments = std::mem::take(&mut self.fragments);
196        AnsiString::new_string(stripped, fragments)
197    }
198}
199
200type Fragment = (Attr, (usize, usize));
201
202/// A String that contains ANSI state (e.g. colors)
203///
204/// It is internally represented as Vec<(attr, string)>
205#[derive(Clone, Debug)]
206pub struct AnsiString {
207    stripped: String,
208    // attr: start, end
209    fragments: Option<Vec<Fragment>>,
210}
211
212impl AnsiString {
213    #[must_use]
214    pub fn new_empty() -> Self {
215        Self {
216            stripped: String::new(),
217            fragments: None,
218        }
219    }
220
221    #[must_use]
222    fn new_raw_string(string: String) -> Self {
223        Self {
224            stripped: string,
225            fragments: None,
226        }
227    }
228
229    /// assume the fragments are ordered by (start, end) while end is exclusive
230    #[must_use]
231    pub fn new_string(stripped: String, fragments: Vec<Fragment>) -> Self {
232        let fragments_empty = fragments.is_empty() || (fragments.len() == 1 && fragments[0].0 == Attr::default());
233        Self {
234            stripped,
235            fragments: if fragments_empty { None } else { Some(fragments) },
236        }
237    }
238
239    #[must_use]
240    pub fn parse(raw: &str) -> AnsiString {
241        ANSIParser::default().parse_ansi(raw)
242    }
243
244    #[inline]
245    #[must_use]
246    pub fn is_empty(&self) -> bool {
247        self.stripped.is_empty()
248    }
249
250    #[inline]
251    #[must_use]
252    pub fn into_inner(self) -> String {
253        self.stripped
254    }
255
256    // Iterate over fragments
257    //
258    // # Panics
259    // Panics if it cannot acquire a reference to the fragments
260    pub fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = (char, Attr)> + 'a> {
261        if self.fragments.is_none() {
262            return Box::new(self.stripped.chars().map(|c| (c, Attr::default())));
263        }
264
265        Box::new(AnsiStringIterator::new(
266            &self.stripped,
267            self.fragments.as_ref().unwrap(),
268        ))
269    }
270
271    #[must_use]
272    pub fn has_attrs(&self) -> bool {
273        self.fragments.is_some()
274    }
275
276    #[inline]
277    pub fn stripped(&self) -> &str {
278        &self.stripped
279    }
280
281    // Override the attributes
282    //
283    // # Panics
284    // Should not panic (only when it fails to take the value from a Some option)
285    pub fn override_attrs(&mut self, attrs: Vec<Fragment>) {
286        if attrs.is_empty() {
287            // pass
288        } else if self.fragments.is_none() {
289            self.fragments = Some(attrs);
290        } else {
291            let current_fragments = self.fragments.take().expect("unreachable");
292            let new_fragments = merge_fragments(&current_fragments, &attrs);
293            self.fragments.replace(new_fragments);
294        }
295    }
296}
297
298impl<'a> IntoIterator for &'a AnsiString {
299    type IntoIter = std::boxed::Box<(dyn std::iter::Iterator<Item = (char, tuikit::attr::Attr)> + 'a)>;
300    type Item = (char, tuikit::attr::Attr);
301    fn into_iter(self) -> Self::IntoIter {
302        self.iter()
303    }
304}
305
306impl From<String> for AnsiString {
307    fn from(s: String) -> AnsiString {
308        AnsiString::new_raw_string(s)
309    }
310}
311
312// (text, indices, highlight attribute) -> AnsiString
313impl<'a> From<(String, &'a [usize], Attr)> for AnsiString {
314    fn from((text, indices, attr): (String, &'a [usize], Attr)) -> Self {
315        let fragments: Vec<Fragment> = indices.iter().map(|&idx| (attr, (idx, 1 + idx))).collect();
316        AnsiString::new_string(text, fragments)
317    }
318}
319
320/// An iterator over all the (char, attr) characters.
321pub struct AnsiStringIterator<'a> {
322    fragments: &'a [Fragment],
323    fragment_idx: usize,
324    chars_iter: std::iter::Enumerate<std::str::Chars<'a>>,
325}
326
327impl<'a> AnsiStringIterator<'a> {
328    pub fn new(stripped: &'a str, fragments: &'a [Fragment]) -> Self {
329        Self {
330            fragments,
331            fragment_idx: 0,
332            chars_iter: stripped.chars().enumerate(),
333        }
334    }
335}
336
337impl<'a> Iterator for AnsiStringIterator<'a> {
338    type Item = (char, Attr);
339
340    fn next(&mut self) -> Option<Self::Item> {
341        match self.chars_iter.next() {
342            Some((char_idx, char)) => {
343                // update fragment_idx
344                loop {
345                    if self.fragment_idx >= self.fragments.len() {
346                        break;
347                    }
348
349                    let (_attr, (_start, end)) = self.fragments[self.fragment_idx];
350                    if char_idx < end {
351                        break;
352                    }
353                    self.fragment_idx += 1;
354                }
355
356                let (attr, (start, end)) = if self.fragment_idx >= self.fragments.len() {
357                    (Attr::default(), (char_idx, 1 + char_idx))
358                } else {
359                    self.fragments[self.fragment_idx]
360                };
361
362                if start <= char_idx && char_idx < end {
363                    Some((char, attr))
364                } else {
365                    Some((char, Attr::default()))
366                }
367            }
368            None => None,
369        }
370    }
371}
372
373fn merge_fragments(old: &[Fragment], new: &[Fragment]) -> Vec<Fragment> {
374    let mut ret = vec![];
375    let mut i = 0;
376    let mut j = 0;
377    let mut os = 0;
378
379    while i < old.len() && j < new.len() {
380        let (oa, (o_start, oe)) = old[i];
381        let (na, (ns, ne)) = new[j];
382        os = max(os, o_start);
383
384        if ns <= os && ne >= oe {
385            //   [--old--]   | [--old--]   |   [--old--] | [--old--]
386            // [----new----] | [---new---] | [---new---] | [--new--]
387            i += 1; // skip old
388        } else if ns <= os {
389            //           [--old--] |         [--old--] |   [--old--] |   [---old---]
390            // [--new--]           | [--new--]         | [--new--]   |   [--new--]
391            ret.push((na, (ns, ne)));
392            os = ne;
393            j += 1;
394        } else if ns >= oe {
395            // [--old--]         | [--old--]
396            //         [--new--] |           [--new--]
397            ret.push((oa, (os, oe)));
398            i += 1;
399        } else {
400            // [---old---] | [---old---] | [--old--]
401            //  [--new--]  |   [--new--] |      [--new--]
402            ret.push((oa, (os, ns)));
403            os = ns;
404        }
405    }
406
407    if i < old.len() {
408        for &(oa, (s, e)) in &old[i..] {
409            ret.push((oa, (max(os, s), e)));
410        }
411    }
412    if j < new.len() {
413        ret.extend_from_slice(&new[j..]);
414    }
415
416    ret
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422
423    #[test]
424    fn test_ansi_iterator() {
425        let input = "\x1B[48;2;5;10;15m\x1B[38;2;70;130;180mhi\x1B[0m";
426        let ansistring = ANSIParser::default().parse_ansi(input);
427        let mut it = ansistring.iter();
428        let attr = Attr {
429            fg: Color::Rgb(70, 130, 180),
430            bg: Color::Rgb(5, 10, 15),
431            ..Attr::default()
432        };
433
434        assert_eq!(Some(('h', attr)), it.next());
435        assert_eq!(Some(('i', attr)), it.next());
436        assert_eq!(None, it.next());
437        assert_eq!(ansistring.stripped(), "hi");
438    }
439
440    #[test]
441    fn test_highlight_indices() {
442        let text = "abc".to_string();
443        let indices: Vec<usize> = vec![1];
444        let attr = Attr {
445            fg: Color::Rgb(70, 130, 180),
446            bg: Color::Rgb(5, 10, 15),
447            ..Attr::default()
448        };
449
450        let ansistring = AnsiString::from((text, &indices as &[usize], attr));
451        let mut it = ansistring.iter();
452
453        assert_eq!(Some(('a', Attr::default())), it.next());
454        assert_eq!(Some(('b', attr)), it.next());
455        assert_eq!(Some(('c', Attr::default())), it.next());
456        assert_eq!(None, it.next());
457    }
458
459    #[test]
460    fn test_normal_string() {
461        let input = "ab";
462        let ansistring = ANSIParser::default().parse_ansi(input);
463
464        assert!(!ansistring.has_attrs());
465
466        let mut it = ansistring.iter();
467        assert_eq!(Some(('a', Attr::default())), it.next());
468        assert_eq!(Some(('b', Attr::default())), it.next());
469        assert_eq!(None, it.next());
470
471        assert_eq!(ansistring.stripped(), "ab");
472    }
473
474    #[test]
475    fn test_multiple_attributes() {
476        let input = "\x1B[1;31mhi";
477        let ansistring = ANSIParser::default().parse_ansi(input);
478        let mut it = ansistring.iter();
479        let attr = Attr {
480            fg: Color::AnsiValue(1),
481            effect: Effect::BOLD,
482            ..Attr::default()
483        };
484
485        assert_eq!(Some(('h', attr)), it.next());
486        assert_eq!(Some(('i', attr)), it.next());
487        assert_eq!(None, it.next());
488        assert_eq!(ansistring.stripped(), "hi");
489    }
490
491    #[test]
492    fn test_reset() {
493        let input = "\x1B[35mA\x1B[mB";
494        let ansistring = ANSIParser::default().parse_ansi(input);
495        assert_eq!(ansistring.fragments.as_ref().map(|x| x.len()).unwrap(), 2);
496        assert_eq!(ansistring.stripped(), "AB");
497    }
498
499    #[test]
500    fn test_multi_bytes() {
501        let input = "中`\x1B[0m\x1B[1m\x1B[31mXYZ\x1B[0ms`";
502        let ansistring = ANSIParser::default().parse_ansi(input);
503        let mut it = ansistring.iter();
504        let default_attr = Attr::default();
505        let annotated = Attr {
506            fg: Color::AnsiValue(1),
507            effect: Effect::BOLD,
508            ..default_attr
509        };
510
511        assert_eq!(Some(('中', default_attr)), it.next());
512        assert_eq!(Some(('`', default_attr)), it.next());
513        assert_eq!(Some(('X', annotated)), it.next());
514        assert_eq!(Some(('Y', annotated)), it.next());
515        assert_eq!(Some(('Z', annotated)), it.next());
516        assert_eq!(Some(('s', default_attr)), it.next());
517        assert_eq!(Some(('`', default_attr)), it.next());
518        assert_eq!(None, it.next());
519    }
520
521    #[test]
522    fn test_merge_fragments() {
523        let ao = Attr::default();
524        let an = Attr::default().bg(Color::BLUE);
525
526        assert_eq!(
527            merge_fragments(&[(ao, (0, 1)), (ao, (1, 2))], &[]),
528            vec![(ao, (0, 1)), (ao, (1, 2))]
529        );
530
531        assert_eq!(
532            merge_fragments(&[], &[(an, (0, 1)), (an, (1, 2))]),
533            vec![(an, (0, 1)), (an, (1, 2))]
534        );
535
536        assert_eq!(
537            merge_fragments(&[(ao, (1, 3)), (ao, (5, 6)), (ao, (9, 10))], &[(an, (0, 1))]),
538            vec![(an, (0, 1)), (ao, (1, 3)), (ao, (5, 6)), (ao, (9, 10))]
539        );
540
541        assert_eq!(
542            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (0, 2))]),
543            vec![(an, (0, 2)), (ao, (2, 3)), (ao, (5, 7)), (ao, (9, 11))]
544        );
545
546        assert_eq!(
547            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (0, 3))]),
548            vec![(an, (0, 3)), (ao, (5, 7)), (ao, (9, 11))]
549        );
550
551        assert_eq!(
552            merge_fragments(
553                &[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))],
554                &[(an, (0, 6)), (an, (6, 7))]
555            ),
556            vec![(an, (0, 6)), (an, (6, 7)), (ao, (9, 11))]
557        );
558
559        assert_eq!(
560            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (1, 2))]),
561            vec![(an, (1, 2)), (ao, (2, 3)), (ao, (5, 7)), (ao, (9, 11))]
562        );
563
564        assert_eq!(
565            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (1, 3))]),
566            vec![(an, (1, 3)), (ao, (5, 7)), (ao, (9, 11))]
567        );
568
569        assert_eq!(
570            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (1, 4))]),
571            vec![(an, (1, 4)), (ao, (5, 7)), (ao, (9, 11))]
572        );
573
574        assert_eq!(
575            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (2, 3))]),
576            vec![(ao, (1, 2)), (an, (2, 3)), (ao, (5, 7)), (ao, (9, 11))]
577        );
578
579        assert_eq!(
580            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (2, 4))]),
581            vec![(ao, (1, 2)), (an, (2, 4)), (ao, (5, 7)), (ao, (9, 11))]
582        );
583
584        assert_eq!(
585            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (2, 6))]),
586            vec![(ao, (1, 2)), (an, (2, 6)), (ao, (6, 7)), (ao, (9, 11))]
587        );
588    }
589
590    #[test]
591    fn test_multi_byte_359() {
592        // https://github.com/lotabout/skim/issues/359
593        let highlight = Attr::default().effect(Effect::BOLD);
594        let ansistring = AnsiString::new_string("ああa".to_string(), vec![(highlight, (2, 3))]);
595        let mut it = ansistring.iter();
596        assert_eq!(Some(('あ', Attr::default())), it.next());
597        assert_eq!(Some(('あ', Attr::default())), it.next());
598        assert_eq!(Some(('a', highlight)), it.next());
599        assert_eq!(None, it.next());
600    }
601}