skim/
ansi.rs

1// Parse ANSI attr code
2use std::default::Default;
3
4use std::cmp::max;
5use tuikit::prelude::*;
6use vte::{Params, Perform};
7
8/// An ANSI Parser, will parse one line at a time.
9///
10/// It will cache the latest attribute used, that means if an attribute affect multiple
11/// lines, the parser will recognize it.
12#[derive(Debug, Default)]
13pub struct ANSIParser {
14    partial_str: String,
15    last_attr: Attr,
16
17    stripped: String,
18    stripped_char_count: usize,
19    fragments: Vec<(Attr, (u32, u32))>, // [char_index_start, char_index_end)
20}
21
22impl Perform for ANSIParser {
23    fn print(&mut self, ch: char) {
24        self.partial_str.push(ch);
25    }
26
27    fn execute(&mut self, byte: u8) {
28        match byte {
29            // \b to delete character back
30            0x08 => {
31                self.partial_str.pop();
32            }
33            // put back \0 \r \n \t
34            0x00 | 0x0d | 0x0A | 0x09 => self.partial_str.push(byte as char),
35            // ignore all others
36            _ => trace!("AnsiParser:execute ignored {:?}", byte),
37        }
38    }
39
40    fn hook(&mut self, params: &Params, _intermediates: &[u8], _ignore: bool, _action: char) {
41        trace!("AnsiParser:hook ignored {:?}", params);
42    }
43
44    fn put(&mut self, byte: u8) {
45        trace!("AnsiParser:put ignored {:?}", byte);
46    }
47
48    fn unhook(&mut self) {
49        trace!("AnsiParser:unhook ignored");
50    }
51
52    fn osc_dispatch(&mut self, params: &[&[u8]], _bell_terminated: bool) {
53        trace!("AnsiParser:osc ignored {:?}", params);
54    }
55
56    fn csi_dispatch(&mut self, params: &Params, _intermediates: &[u8], _ignore: bool, action: char) {
57        // https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_(Select_Graphic_Rendition)_parameters
58        // Only care about graphic modes, ignore all others
59
60        if action != 'm' {
61            trace!("ignore: params: {:?}, action : {:?}", params, action);
62            return;
63        }
64
65        // \[[m => means reset
66        let mut attr = if params.is_empty() {
67            Attr::default()
68        } else {
69            self.last_attr
70        };
71
72        let mut iter = params.iter();
73        while let Some(code) = iter.next() {
74            match code[0] {
75                0 => attr = Attr::default(),
76                1 => attr.effect |= Effect::BOLD,
77                2 => attr.effect |= !Effect::BOLD,
78                4 => attr.effect |= Effect::UNDERLINE,
79                5 => attr.effect |= Effect::BLINK,
80                7 => attr.effect |= Effect::REVERSE,
81                num @ 30..=37 => attr.fg = Color::AnsiValue((num - 30) as u8),
82                38 => match iter.next() {
83                    Some(&[2]) => {
84                        // ESC[ 38;2;<r>;<g>;<b> m Select RGB foreground color
85                        let (r, g, b) = match (iter.next(), iter.next(), iter.next()) {
86                            (Some(r), Some(g), Some(b)) => (r[0] as u8, g[0] as u8, b[0] as u8),
87                            _ => {
88                                trace!("ignore CSI {:?} m", params);
89                                continue;
90                            }
91                        };
92
93                        attr.fg = Color::Rgb(r, g, b);
94                    }
95                    Some(&[5]) => {
96                        // ESC[ 38;5;<n> m Select foreground color
97                        let color = match iter.next() {
98                            Some(color) => color[0] as u8,
99                            None => {
100                                trace!("ignore CSI {:?} m", params);
101                                continue;
102                            }
103                        };
104
105                        attr.fg = Color::AnsiValue(color);
106                    }
107                    _ => {
108                        trace!("error on parsing CSI {:?} m", params);
109                    }
110                },
111                39 => attr.fg = Color::Default,
112                num @ 40..=47 => attr.bg = Color::AnsiValue((num - 40) as u8),
113                48 => match iter.next() {
114                    Some(&[2]) => {
115                        // ESC[ 48;2;<r>;<g>;<b> m Select RGB background color
116                        let (r, g, b) = match (iter.next(), iter.next(), iter.next()) {
117                            (Some(r), Some(g), Some(b)) => (r[0] as u8, g[0] as u8, b[0] as u8),
118                            _ => {
119                                trace!("ignore CSI {:?} m", params);
120                                continue;
121                            }
122                        };
123
124                        attr.bg = Color::Rgb(r, g, b);
125                    }
126                    Some(&[5]) => {
127                        // ESC[ 48;5;<n> m Select background color
128                        let color = match iter.next() {
129                            Some(color) => color[0] as u8,
130                            None => {
131                                trace!("ignore CSI {:?} m", params);
132                                continue;
133                            }
134                        };
135
136                        attr.bg = Color::AnsiValue(color);
137                    }
138                    _ => {
139                        trace!("ignore CSI {:?} m", params);
140                    }
141                },
142                49 => attr.bg = Color::Default,
143                num @ 90..=97 => attr.fg = Color::AnsiValue((num - 82) as u8),
144                num @ 100..=107 => attr.bg = Color::AnsiValue((num - 92) as u8),
145                _ => {
146                    trace!("ignore CSI {:?} m", params);
147                }
148            }
149        }
150
151        self.attr_change(attr);
152    }
153
154    fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {
155        // ESC characters are replaced with \[
156        self.partial_str.push('"');
157        self.partial_str.push('[');
158    }
159}
160
161impl ANSIParser {
162    /// save the partial_str into fragments with current attr
163    fn save_str(&mut self) {
164        if self.partial_str.is_empty() {
165            return;
166        }
167
168        let string = std::mem::take(&mut self.partial_str);
169        let string_char_count = string.chars().count();
170        self.fragments.push((
171            self.last_attr,
172            (
173                self.stripped_char_count as u32,
174                (self.stripped_char_count + string_char_count) as u32,
175            ),
176        ));
177        self.stripped_char_count += string_char_count;
178        self.stripped.push_str(&string);
179    }
180
181    // accept a new attr
182    fn attr_change(&mut self, new_attr: Attr) {
183        if new_attr == self.last_attr {
184            return;
185        }
186
187        self.save_str();
188        self.last_attr = new_attr;
189    }
190
191    pub fn parse_ansi(&mut self, text: &str) -> AnsiString {
192        let mut state_machine = vte::Parser::new();
193
194        for byte in text.as_bytes() {
195            state_machine.advance(self, &[*byte]);
196        }
197        self.save_str();
198
199        let stripped = std::mem::take(&mut self.stripped);
200        self.stripped_char_count = 0;
201        let fragments = std::mem::take(&mut self.fragments);
202        AnsiString::new_string(stripped, fragments)
203    }
204}
205
206/// A String that contains ANSI state (e.g. colors)
207///
208/// It is internally represented as Vec<(attr, string)>
209#[derive(Clone, Debug)]
210pub struct AnsiString {
211    stripped: Box<str>,
212    // attr: start, end
213    fragments: Option<Vec<(Attr, (u32, u32))>>,
214}
215
216impl AnsiString {
217    pub fn new_empty() -> Self {
218        Self {
219            stripped: "".into(),
220            fragments: None,
221        }
222    }
223
224    fn new_raw_string(string: String) -> Self {
225        Self {
226            stripped: string.into(),
227            fragments: None,
228        }
229    }
230
231    fn new_raw_str(str_ref: &str) -> Self {
232        Self {
233            stripped: str_ref.into(),
234            fragments: None,
235        }
236    }
237
238    /// assume the fragments are ordered by (start, end) while end is exclusive
239    pub fn new_str(stripped: &str, fragments: Vec<(Attr, (u32, u32))>) -> Self {
240        let fragments_empty = fragments.is_empty() || (fragments.len() == 1 && fragments[0].0 == Attr::default());
241        Self {
242            stripped: stripped.into(),
243            fragments: if fragments_empty { None } else { Some(fragments) },
244        }
245    }
246
247    /// assume the fragments are ordered by (start, end) while end is exclusive
248    pub fn new_string(stripped: String, fragments: Vec<(Attr, (u32, u32))>) -> Self {
249        let fragments_empty = fragments.is_empty() || (fragments.len() == 1 && fragments[0].0 == Attr::default());
250        Self {
251            stripped: stripped.into(),
252            fragments: if fragments_empty { None } else { Some(fragments) },
253        }
254    }
255
256    pub fn parse(raw: &str) -> AnsiString {
257        ANSIParser::default().parse_ansi(raw)
258    }
259
260    #[inline]
261    pub fn is_empty(&self) -> bool {
262        self.stripped.is_empty()
263    }
264
265    #[inline]
266    pub fn into_inner(self) -> Box<str> {
267        self.stripped
268    }
269
270    pub fn iter(&self) -> Box<dyn Iterator<Item = (char, Attr)> + '_> {
271        if self.fragments.is_none() {
272            return Box::new(self.stripped.chars().map(|c| (c, Attr::default())).to_owned());
273        }
274
275        Box::new(AnsiStringIterator::new(
276            self.stripped.as_ref(),
277            self.fragments.as_ref().unwrap(),
278        ))
279    }
280
281    pub fn has_attrs(&self) -> bool {
282        self.fragments.is_some()
283    }
284
285    #[inline]
286    pub fn stripped(&self) -> &str {
287        &self.stripped
288    }
289
290    pub fn override_attrs(&mut self, attrs: Vec<(Attr, (u32, u32))>) {
291        if attrs.is_empty() {
292            // pass
293        } else if self.fragments.is_none() {
294            self.fragments = Some(attrs);
295        } else {
296            let current_fragments = self.fragments.take().expect("unreachable");
297            let new_fragments = merge_fragments(&current_fragments, &attrs);
298            self.fragments.replace(new_fragments);
299        }
300    }
301}
302
303impl From<&str> for AnsiString {
304    fn from(s: &str) -> AnsiString {
305        AnsiString::new_raw_str(s)
306    }
307}
308
309impl From<String> for AnsiString {
310    fn from(s: String) -> Self {
311        AnsiString::new_raw_string(s)
312    }
313}
314
315// (text, indices, highlight attribute) -> AnsiString
316impl From<(&str, &[usize], Attr)> for AnsiString {
317    fn from((text, indices, attr): (&str, &[usize], Attr)) -> Self {
318        let fragments = indices
319            .iter()
320            .map(|&idx| (attr, (idx as u32, 1 + idx as u32)))
321            .collect();
322        AnsiString::new_str(text, fragments)
323    }
324}
325
326/// An iterator over all the (char, attr) characters.
327pub struct AnsiStringIterator<'a> {
328    fragments: &'a [(Attr, (u32, u32))],
329    fragment_idx: usize,
330    chars_iter: std::iter::Enumerate<std::str::Chars<'a>>,
331}
332
333impl<'a> AnsiStringIterator<'a> {
334    pub fn new(stripped: &'a str, fragments: &'a [(Attr, (u32, u32))]) -> Self {
335        Self {
336            fragments,
337            fragment_idx: 0,
338            chars_iter: stripped.chars().enumerate(),
339        }
340    }
341}
342
343impl<'a> Iterator for AnsiStringIterator<'a> {
344    type Item = (char, Attr);
345
346    fn next(&mut self) -> Option<Self::Item> {
347        match self.chars_iter.next() {
348            Some((char_idx, char)) => {
349                // update fragment_idx
350                loop {
351                    if self.fragment_idx >= self.fragments.len() {
352                        break;
353                    }
354
355                    let (_attr, (_start, end)) = self.fragments[self.fragment_idx];
356                    if char_idx < (end as usize) {
357                        break;
358                    } else {
359                        self.fragment_idx += 1;
360                    }
361                }
362
363                let (attr, (start, end)) = if self.fragment_idx >= self.fragments.len() {
364                    (Attr::default(), (char_idx as u32, 1 + char_idx as u32))
365                } else {
366                    self.fragments[self.fragment_idx]
367                };
368
369                if (start as usize) <= char_idx && char_idx < (end as usize) {
370                    Some((char, attr))
371                } else {
372                    Some((char, Attr::default()))
373                }
374            }
375            None => None,
376        }
377    }
378}
379
380fn merge_fragments(old: &[(Attr, (u32, u32))], new: &[(Attr, (u32, u32))]) -> Vec<(Attr, (u32, u32))> {
381    let mut ret = vec![];
382    let mut i = 0;
383    let mut j = 0;
384    let mut os = 0;
385
386    while i < old.len() && j < new.len() {
387        let (oa, (o_start, oe)) = old[i];
388        let (na, (ns, ne)) = new[j];
389        os = max(os, o_start);
390
391        if ns <= os && ne >= oe {
392            //   [--old--]   | [--old--]   |   [--old--] | [--old--]
393            // [----new----] | [---new---] | [---new---] | [--new--]
394            i += 1; // skip old
395        } else if ns <= os {
396            //           [--old--] |         [--old--] |   [--old--] |   [---old---]
397            // [--new--]           | [--new--]         | [--new--]   |   [--new--]
398            ret.push((na, (ns, ne)));
399            os = ne;
400            j += 1;
401        } else if ns >= oe {
402            // [--old--]         | [--old--]
403            //         [--new--] |           [--new--]
404            ret.push((oa, (os, oe)));
405            i += 1;
406        } else {
407            // [---old---] | [---old---] | [--old--]
408            //  [--new--]  |   [--new--] |      [--new--]
409            ret.push((oa, (os, ns)));
410            os = ns;
411        }
412    }
413
414    if i < old.len() {
415        for &(oa, (s, e)) in old[i..].iter() {
416            ret.push((oa, (max(os, s), e)))
417        }
418    }
419    if j < new.len() {
420        ret.extend_from_slice(&new[j..]);
421    }
422
423    ret
424}
425
426#[cfg(test)]
427mod tests {
428    use super::*;
429
430    #[test]
431    fn test_ansi_iterator() {
432        let input = "\x1B[48;2;5;10;15m\x1B[38;2;70;130;180mhi\x1B[0m";
433        let ansistring = ANSIParser::default().parse_ansi(input);
434        let mut it = ansistring.iter();
435        let attr = Attr {
436            fg: Color::Rgb(70, 130, 180),
437            bg: Color::Rgb(5, 10, 15),
438            ..Attr::default()
439        };
440
441        assert_eq!(Some(('h', attr)), it.next());
442        assert_eq!(Some(('i', attr)), it.next());
443        assert_eq!(None, it.next());
444        assert_eq!(ansistring.stripped(), "hi");
445    }
446
447    #[test]
448    fn test_highlight_indices() {
449        let text = "abc";
450        let indices: Vec<usize> = vec![1];
451        let attr = Attr {
452            fg: Color::Rgb(70, 130, 180),
453            bg: Color::Rgb(5, 10, 15),
454            ..Attr::default()
455        };
456
457        let ansistring = AnsiString::from((text, &indices as &[usize], attr));
458        let mut it = ansistring.iter();
459
460        assert_eq!(Some(('a', Attr::default())), it.next());
461        assert_eq!(Some(('b', attr)), it.next());
462        assert_eq!(Some(('c', Attr::default())), it.next());
463        assert_eq!(None, it.next());
464    }
465
466    #[test]
467    fn test_normal_string() {
468        let input = "ab";
469        let ansistring = ANSIParser::default().parse_ansi(input);
470
471        assert!(!ansistring.has_attrs());
472
473        let mut it = ansistring.iter();
474        assert_eq!(Some(('a', Attr::default())), it.next());
475        assert_eq!(Some(('b', Attr::default())), it.next());
476        assert_eq!(None, it.next());
477
478        assert_eq!(ansistring.stripped(), "ab");
479    }
480
481    #[test]
482    fn test_multiple_attributes() {
483        let input = "\x1B[1;31mhi";
484        let ansistring = ANSIParser::default().parse_ansi(input);
485        let mut it = ansistring.iter();
486        let attr = Attr {
487            fg: Color::AnsiValue(1),
488            effect: Effect::BOLD,
489            ..Attr::default()
490        };
491
492        assert_eq!(Some(('h', attr)), it.next());
493        assert_eq!(Some(('i', attr)), it.next());
494        assert_eq!(None, it.next());
495        assert_eq!(ansistring.stripped(), "hi");
496    }
497
498    #[test]
499    fn test_reset() {
500        let input = "\x1B[35mA\x1B[mB";
501        let ansistring = ANSIParser::default().parse_ansi(input);
502        assert_eq!(ansistring.fragments.as_ref().map(|x| x.len()).unwrap(), 2);
503        assert_eq!(ansistring.stripped(), "AB");
504    }
505
506    #[test]
507    fn test_multi_bytes() {
508        let input = "中`\x1B[0m\x1B[1m\x1B[31mXYZ\x1B[0ms`";
509        let ansistring = ANSIParser::default().parse_ansi(input);
510        let mut it = ansistring.iter();
511        let default_attr = Attr::default();
512        let annotated = Attr {
513            fg: Color::AnsiValue(1),
514            effect: Effect::BOLD,
515            ..default_attr
516        };
517
518        assert_eq!(Some(('中', default_attr)), it.next());
519        assert_eq!(Some(('`', default_attr)), it.next());
520        assert_eq!(Some(('X', annotated)), it.next());
521        assert_eq!(Some(('Y', annotated)), it.next());
522        assert_eq!(Some(('Z', annotated)), it.next());
523        assert_eq!(Some(('s', default_attr)), it.next());
524        assert_eq!(Some(('`', default_attr)), it.next());
525        assert_eq!(None, it.next());
526    }
527
528    #[test]
529    fn test_merge_fragments() {
530        let ao = Attr::default();
531        let an = Attr::default().bg(Color::BLUE);
532
533        assert_eq!(
534            merge_fragments(&[(ao, (0, 1)), (ao, (1, 2))], &[]),
535            vec![(ao, (0, 1)), (ao, (1, 2))]
536        );
537
538        assert_eq!(
539            merge_fragments(&[], &[(an, (0, 1)), (an, (1, 2))]),
540            vec![(an, (0, 1)), (an, (1, 2))]
541        );
542
543        assert_eq!(
544            merge_fragments(&[(ao, (1, 3)), (ao, (5, 6)), (ao, (9, 10))], &[(an, (0, 1))]),
545            vec![(an, (0, 1)), (ao, (1, 3)), (ao, (5, 6)), (ao, (9, 10))]
546        );
547
548        assert_eq!(
549            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (0, 2))]),
550            vec![(an, (0, 2)), (ao, (2, 3)), (ao, (5, 7)), (ao, (9, 11))]
551        );
552
553        assert_eq!(
554            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (0, 3))]),
555            vec![(an, (0, 3)), (ao, (5, 7)), (ao, (9, 11))]
556        );
557
558        assert_eq!(
559            merge_fragments(
560                &[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))],
561                &[(an, (0, 6)), (an, (6, 7))]
562            ),
563            vec![(an, (0, 6)), (an, (6, 7)), (ao, (9, 11))]
564        );
565
566        assert_eq!(
567            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (1, 2))]),
568            vec![(an, (1, 2)), (ao, (2, 3)), (ao, (5, 7)), (ao, (9, 11))]
569        );
570
571        assert_eq!(
572            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (1, 3))]),
573            vec![(an, (1, 3)), (ao, (5, 7)), (ao, (9, 11))]
574        );
575
576        assert_eq!(
577            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (1, 4))]),
578            vec![(an, (1, 4)), (ao, (5, 7)), (ao, (9, 11))]
579        );
580
581        assert_eq!(
582            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (2, 3))]),
583            vec![(ao, (1, 2)), (an, (2, 3)), (ao, (5, 7)), (ao, (9, 11))]
584        );
585
586        assert_eq!(
587            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (2, 4))]),
588            vec![(ao, (1, 2)), (an, (2, 4)), (ao, (5, 7)), (ao, (9, 11))]
589        );
590
591        assert_eq!(
592            merge_fragments(&[(ao, (1, 3)), (ao, (5, 7)), (ao, (9, 11))], &[(an, (2, 6))]),
593            vec![(ao, (1, 2)), (an, (2, 6)), (ao, (6, 7)), (ao, (9, 11))]
594        );
595    }
596
597    #[test]
598    fn test_multi_byte_359() {
599        // https://github.com/lotabout/skim/issues/359
600        let highlight = Attr::default().effect(Effect::BOLD);
601        let ansistring = AnsiString::new_str("ああa", vec![(highlight, (2, 3))]);
602        let mut it = ansistring.iter();
603        assert_eq!(Some(('あ', Attr::default())), it.next());
604        assert_eq!(Some(('あ', Attr::default())), it.next());
605        assert_eq!(Some(('a', highlight)), it.next());
606        assert_eq!(None, it.next());
607    }
608}