Skip to main content

rich_rs/
ansi.rs

1//! ANSI decoding utilities.
2//!
3//! Port of Python Rich's `rich/ansi.py` (subset).
4//!
5//! The primary entry points are:
6//! - `AnsiDecoder` (stateful decoder that persists style across lines)
7//! - `Text::from_ansi` (see `src/text.rs`)
8
9use std::sync::Arc;
10
11use crate::color::SimpleColor;
12use crate::style::{Style, StyleMeta};
13use crate::text::Text;
14
15/// Translate ANSI escape codes in to styled `Text`.
16///
17/// This decoder is deliberately lenient: it ignores unknown / malformed escape codes.
18/// Style state is preserved across lines (matches Python Rich).
19#[derive(Debug, Clone)]
20pub struct AnsiDecoder {
21    style: Style,
22    /// Current hyperlink URL from OSC 8, if any.
23    link: Option<Arc<str>>,
24    /// Current hyperlink id from OSC 8 params (`id=...`), if any.
25    link_id: Option<Arc<str>>,
26}
27
28impl Default for AnsiDecoder {
29    fn default() -> Self {
30        Self {
31            style: Style::new(),
32            link: None,
33            link_id: None,
34        }
35    }
36}
37
38impl AnsiDecoder {
39    pub fn new() -> Self {
40        Self::default()
41    }
42
43    /// Decode ANSI codes in a multi-line string.
44    ///
45    /// This splits on line boundaries and returns one `Text` per line, with style state
46    /// persisting across lines (same behavior as Python Rich's `AnsiDecoder.decode`).
47    pub fn decode(&mut self, terminal_text: &str) -> Vec<Text> {
48        // Python Rich uses `str.splitlines()`, which splits on:
49        // - \n
50        // - \r\n
51        // - \r
52        // Rust's `str::lines()` does *not* split on bare \r, so we implement
53        // a small compatible splitter here.
54        splitlines_like_python(terminal_text)
55            .into_iter()
56            .map(|line| self.decode_line(line))
57            .collect()
58    }
59
60    /// Decode a line containing ANSI escape codes.
61    pub fn decode_line(&mut self, line: &str) -> Text {
62        // Match Rich: only keep content after the last carriage return.
63        let line = line.rsplit('\r').next().unwrap_or(line);
64
65        let mut out = Text::new();
66
67        let bytes = line.as_bytes();
68        let mut index: usize = 0;
69        let mut plain_start: usize = 0;
70
71        while index < bytes.len() {
72            if bytes[index] != 0x1b {
73                index += 1;
74                continue;
75            }
76
77            // Flush preceding plain text.
78            if plain_start < index {
79                let plain = &line[plain_start..index];
80                if !plain.is_empty() {
81                    self.append_to_text(&mut out, plain);
82                }
83            }
84
85            // Parse escape sequence (best-effort).
86            if index + 1 >= bytes.len() {
87                break;
88            }
89
90            match bytes[index + 1] {
91                b'[' => {
92                    // CSI ... <final>
93                    if let Some((final_byte, params_end, next_index)) = parse_csi(bytes, index + 2)
94                    {
95                        if final_byte == b'm' {
96                            let params = &line[index + 2..params_end];
97                            self.apply_sgr(params);
98                        }
99                        index = next_index;
100                        plain_start = index;
101                        continue;
102                    }
103                    // Malformed CSI: skip ESC + '['.
104                    index += 2;
105                    plain_start = index;
106                }
107                b']' => {
108                    // OSC ... (BEL or ST)
109                    if let Some((content_start, content_end, next_index)) =
110                        parse_osc(bytes, index + 2)
111                    {
112                        let content = &line[content_start..content_end];
113                        self.apply_osc(content);
114                        index = next_index;
115                        plain_start = index;
116                        continue;
117                    }
118                    // Malformed OSC: skip ESC + ']'.
119                    index += 2;
120                    plain_start = index;
121                }
122                _ => {
123                    // Unknown escape: skip ESC + one byte.
124                    index += 2;
125                    plain_start = index;
126                }
127            }
128        }
129
130        // Flush trailing plain text.
131        if plain_start < bytes.len() {
132            let plain = &line[plain_start..];
133            if !plain.is_empty() {
134                self.append_to_text(&mut out, plain);
135            }
136        }
137
138        out
139    }
140
141    fn style_for_text(&self) -> Option<Style> {
142        if self.style.is_null() && self.link.is_none() {
143            None
144        } else {
145            Some(self.style)
146        }
147    }
148
149    /// Get the current StyleMeta (for hyperlink), if any.
150    fn meta_for_text(&self) -> Option<StyleMeta> {
151        self.link.as_ref().map(|url| StyleMeta {
152            link: Some(url.clone()),
153            link_id: self.link_id.clone(),
154            meta: None,
155        })
156    }
157
158    /// Append text to a Text object with current style and meta.
159    fn append_to_text(&self, out: &mut Text, plain: &str) {
160        let style = self.style_for_text();
161        let meta = self.meta_for_text();
162        if let Some(meta) = meta {
163            // Use spans with meta for hyperlinks
164            let start = out.len();
165            out.append(plain.to_string(), style);
166            // Manually add meta to the last span
167            if let Some(last_span) = out.spans_mut().last_mut() {
168                if last_span.start == start {
169                    last_span.meta = Some(meta);
170                }
171            }
172        } else {
173            out.append(plain.to_string(), style);
174        }
175    }
176
177    fn apply_osc(&mut self, content: &str) {
178        // Handle OSC 8 hyperlinks: "8;params;url" (url may be empty to clear).
179        if let Some(after_8) = content.strip_prefix("8;") {
180            // Split: "8;params;url"
181            if let Some((params, url)) = after_8.split_once(';') {
182                if url.is_empty() {
183                    // Clear hyperlink
184                    self.link = None;
185                    self.link_id = None;
186                } else {
187                    self.link = Some(Arc::from(url));
188                    self.link_id = parse_osc8_link_id(params);
189                }
190            }
191        }
192    }
193
194    fn apply_sgr(&mut self, params: &str) {
195        // Translate to semi-colon separated codes. Be lenient and ignore invalid codes.
196        //
197        // Python Rich: codes are int(min(255, int(code))) if code.isdigit() or code == "".
198        let mut codes: Vec<u16> = Vec::new();
199        for part in params.split(';') {
200            if part.is_empty() {
201                codes.push(0);
202                continue;
203            }
204            if !part.chars().all(|c| c.is_ascii_digit()) {
205                continue;
206            }
207            let parsed: u16 = part.parse::<u16>().unwrap_or(0).min(255);
208            codes.push(parsed);
209        }
210
211        if codes.is_empty() {
212            // `\x1b[m` is equivalent to reset.
213            codes.push(0);
214        }
215
216        let mut iter = codes.into_iter();
217        while let Some(code) = iter.next() {
218            match code {
219                0 => {
220                    // reset
221                    self.style = Style::new();
222                }
223                1 => self.style.bold = Some(true),
224                2 => self.style.dim = Some(true),
225                3 => self.style.italic = Some(true),
226                4 => self.style.underline = Some(true),
227                5 => self.style.blink = Some(true),
228                6 => self.style.blink2 = Some(true),
229                7 => self.style.reverse = Some(true),
230                8 => self.style.conceal = Some(true),
231                9 => self.style.strike = Some(true),
232                21 => self.style.underline2 = Some(true),
233
234                22 => {
235                    // not dim not bold
236                    self.style.bold = None;
237                    self.style.dim = None;
238                }
239                23 => self.style.italic = None,
240                24 => {
241                    // resets both underline and underline2
242                    self.style.underline = None;
243                    self.style.underline2 = None;
244                }
245                25 => {
246                    // resets both blink and blink2
247                    self.style.blink = None;
248                    self.style.blink2 = None;
249                }
250                27 => self.style.reverse = None,
251                28 => self.style.conceal = None,
252                29 => self.style.strike = None,
253
254                51 => self.style.frame = Some(true),
255                52 => self.style.encircle = Some(true),
256                53 => self.style.overline = Some(true),
257                54 => {
258                    // resets both frame and encircle
259                    self.style.frame = None;
260                    self.style.encircle = None;
261                }
262                55 => self.style.overline = None,
263
264                30..=37 => self.style.color = Some(SimpleColor::Standard((code - 30) as u8)),
265                39 => self.style.color = None,
266                40..=47 => self.style.bgcolor = Some(SimpleColor::Standard((code - 40) as u8)),
267                49 => self.style.bgcolor = None,
268
269                90..=97 => self.style.color = Some(SimpleColor::Standard((code - 90 + 8) as u8)),
270                100..=107 => {
271                    self.style.bgcolor = Some(SimpleColor::Standard((code - 100 + 8) as u8))
272                }
273
274                38 => {
275                    // Foreground extended color.
276                    if let Some(color_type) = iter.next() {
277                        match color_type {
278                            5 => {
279                                if let Some(n) = iter.next() {
280                                    self.style.color = Some(SimpleColor::EightBit(n as u8));
281                                }
282                            }
283                            2 => {
284                                let (Some(r), Some(g), Some(b)) =
285                                    (iter.next(), iter.next(), iter.next())
286                                else {
287                                    continue;
288                                };
289                                self.style.color = Some(SimpleColor::Rgb {
290                                    r: r as u8,
291                                    g: g as u8,
292                                    b: b as u8,
293                                });
294                            }
295                            _ => {}
296                        }
297                    }
298                }
299                48 => {
300                    // Background extended color.
301                    if let Some(color_type) = iter.next() {
302                        match color_type {
303                            5 => {
304                                if let Some(n) = iter.next() {
305                                    self.style.bgcolor = Some(SimpleColor::EightBit(n as u8));
306                                }
307                            }
308                            2 => {
309                                let (Some(r), Some(g), Some(b)) =
310                                    (iter.next(), iter.next(), iter.next())
311                                else {
312                                    continue;
313                                };
314                                self.style.bgcolor = Some(SimpleColor::Rgb {
315                                    r: r as u8,
316                                    g: g as u8,
317                                    b: b as u8,
318                                });
319                            }
320                            _ => {}
321                        }
322                    }
323                }
324
325                _ => {}
326            }
327        }
328    }
329}
330
331fn splitlines_like_python(s: &str) -> Vec<&str> {
332    let bytes = s.as_bytes();
333    let mut out: Vec<&str> = Vec::new();
334    let mut start: usize = 0;
335    let mut i: usize = 0;
336
337    while i < bytes.len() {
338        match bytes[i] {
339            b'\n' => {
340                out.push(&s[start..i]);
341                i += 1;
342                start = i;
343            }
344            b'\r' => {
345                out.push(&s[start..i]);
346                if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
347                    i += 2;
348                } else {
349                    i += 1;
350                }
351                start = i;
352            }
353            _ => i += 1,
354        }
355    }
356
357    if start <= bytes.len() {
358        if start < bytes.len() {
359            out.push(&s[start..]);
360        } else if !s.is_empty() {
361            // Trailing newline: Python splitlines() returns a final empty string only when
362            // keepends=True (default keepends=False). We mirror keepends=False here and
363            // do not push an empty last line.
364        }
365    }
366
367    if out.is_empty() && !s.is_empty() {
368        out.push(s);
369    }
370
371    out
372}
373
374fn parse_csi(bytes: &[u8], start: usize) -> Option<(u8, usize, usize)> {
375    // Scan for the final byte in the CSI sequence.
376    // Final byte range is 0x40..=0x7e (see ANSI X3.64).
377    let mut idx = start;
378    while idx < bytes.len() {
379        let b = bytes[idx];
380        if (0x40..=0x7e).contains(&b) {
381            // params_end is the start of final byte.
382            return Some((b, idx, idx + 1));
383        }
384        idx += 1;
385    }
386    None
387}
388
389fn parse_osc(bytes: &[u8], start: usize) -> Option<(usize, usize, usize)> {
390    // OSC can be terminated by BEL (0x07) or ST (ESC \).
391    let mut idx = start;
392    while idx < bytes.len() {
393        match bytes[idx] {
394            0x07 => return Some((start, idx, idx + 1)), // BEL
395            0x1b => {
396                if idx + 1 < bytes.len() && bytes[idx + 1] == b'\\' {
397                    return Some((start, idx, idx + 2)); // ST
398                }
399            }
400            _ => {}
401        }
402        idx += 1;
403    }
404    None
405}
406
407fn parse_osc8_link_id(params: &str) -> Option<Arc<str>> {
408    // OSC 8 params are colon-separated key/value tokens, e.g. "id=abc:foo=bar".
409    params
410        .split(':')
411        .find_map(|token| token.strip_prefix("id="))
412        .and_then(|id| {
413            if id.is_empty() {
414                None
415            } else {
416                Some(Arc::<str>::from(id))
417            }
418        })
419}
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424
425    #[test]
426    fn test_decode_line_strips_ansi_and_adds_spans() {
427        let mut decoder = AnsiDecoder::new();
428        let text = decoder.decode_line("\x1b[1mBold\x1b[0m Normal");
429        assert_eq!(text.plain_text(), "Bold Normal");
430        assert_eq!(text.spans().len(), 1);
431        assert_eq!(text.spans()[0].start, 0);
432        assert_eq!(text.spans()[0].end, 4);
433        assert_eq!(text.spans()[0].style.bold, Some(true));
434    }
435
436    #[test]
437    fn test_decode_line_extended_truecolor() {
438        let mut decoder = AnsiDecoder::new();
439        let text = decoder.decode_line("\x1b[38;2;255;0;0mRed\x1b[0m");
440        assert_eq!(text.plain_text(), "Red");
441        assert_eq!(text.spans().len(), 1);
442        assert_eq!(
443            text.spans()[0].style.color,
444            Some(SimpleColor::Rgb { r: 255, g: 0, b: 0 })
445        );
446    }
447
448    #[test]
449    fn test_decode_persists_style_across_lines() {
450        let mut decoder = AnsiDecoder::new();
451        let lines = decoder.decode("\x1b[31mred\nstill");
452        assert_eq!(lines.len(), 2);
453        assert_eq!(lines[0].plain_text(), "red");
454        assert_eq!(lines[1].plain_text(), "still");
455        assert_eq!(lines[0].spans().len(), 1);
456        assert_eq!(lines[1].spans().len(), 1);
457        assert_eq!(
458            lines[1].spans()[0].style.color,
459            Some(SimpleColor::Standard(1))
460        );
461    }
462
463    #[test]
464    fn test_decode_line_after_carriage_return() {
465        let mut decoder = AnsiDecoder::new();
466        let text = decoder.decode_line("abc\rdef");
467        assert_eq!(text.plain_text(), "def");
468    }
469
470    #[test]
471    fn test_decode_splits_on_carriage_return_like_python() {
472        let mut decoder = AnsiDecoder::new();
473        let lines = decoder.decode("abc\rdef");
474        assert_eq!(lines.len(), 2);
475        assert_eq!(lines[0].plain_text(), "abc");
476        assert_eq!(lines[1].plain_text(), "def");
477    }
478
479    #[test]
480    fn test_decode_new_sgr_codes() {
481        let mut decoder = AnsiDecoder::new();
482        // SGR 6 = blink2
483        let text = decoder.decode_line("\x1b[6mRapid\x1b[0m");
484        assert_eq!(text.spans()[0].style.blink2, Some(true));
485
486        let mut decoder = AnsiDecoder::new();
487        // SGR 8 = conceal
488        let text = decoder.decode_line("\x1b[8mHidden\x1b[0m");
489        assert_eq!(text.spans()[0].style.conceal, Some(true));
490
491        let mut decoder = AnsiDecoder::new();
492        // SGR 21 = underline2
493        let text = decoder.decode_line("\x1b[21mDouble\x1b[0m");
494        assert_eq!(text.spans()[0].style.underline2, Some(true));
495
496        let mut decoder = AnsiDecoder::new();
497        // SGR 53 = overline
498        let text = decoder.decode_line("\x1b[53mOver\x1b[0m");
499        assert_eq!(text.spans()[0].style.overline, Some(true));
500
501        let mut decoder = AnsiDecoder::new();
502        // SGR 51 = frame
503        let text = decoder.decode_line("\x1b[51mFramed\x1b[0m");
504        assert_eq!(text.spans()[0].style.frame, Some(true));
505
506        let mut decoder = AnsiDecoder::new();
507        // SGR 52 = encircle
508        let text = decoder.decode_line("\x1b[52mCircle\x1b[0m");
509        assert_eq!(text.spans()[0].style.encircle, Some(true));
510    }
511
512    #[test]
513    fn test_decode_sgr_reset_codes() {
514        let mut decoder = AnsiDecoder::new();
515        // Set overline, then reset with SGR 55
516        let text = decoder.decode_line("\x1b[53mOver\x1b[55mNormal\x1b[0m");
517        assert_eq!(text.spans().len(), 1);
518        assert_eq!(text.spans()[0].style.overline, Some(true));
519        // After reset, "Normal" should have overline=None
520    }
521
522    #[test]
523    fn test_decode_osc8_hyperlink() {
524        let mut decoder = AnsiDecoder::new();
525        // OSC 8 hyperlink: ESC ] 8 ; params ; url BEL
526        let text = decoder.decode_line("\x1b]8;;https://example.com\x07Link\x1b]8;;\x07");
527        assert_eq!(text.plain_text(), "Link");
528        assert_eq!(text.spans().len(), 1);
529        // Check meta has the link
530        let span = &text.spans()[0];
531        assert!(span.meta.is_some());
532        let meta = span.meta.as_ref().unwrap();
533        assert_eq!(meta.link.as_deref(), Some("https://example.com"));
534        assert_eq!(meta.link_id, None);
535    }
536
537    #[test]
538    fn test_decode_osc8_hyperlink_with_id_and_sgr_reset_semantics() {
539        let mut decoder = AnsiDecoder::new();
540        let text =
541            decoder.decode_line("\x1b]8;id=src42;https://example.com\x07L\x1b[0mi\x1b]8;;\x07N");
542        assert_eq!(text.plain_text(), "LiN");
543        assert_eq!(text.spans().len(), 2);
544
545        // Link metadata persists across SGR reset and clears only on OSC 8 close.
546        for span in text.spans() {
547            let meta = span.meta.as_ref().expect("expected link metadata");
548            assert_eq!(meta.link.as_deref(), Some("https://example.com"));
549            assert_eq!(meta.link_id.as_deref(), Some("src42"));
550        }
551    }
552}