Skip to main content

tess/
ansi.rs

1//! ANSI SGR (Select Graphic Rendition) + OSC 8 hyperlink parser.
2//!
3//! The parser is byte-driven (not char-driven) because ANSI sequences are
4//! ASCII-only and we want to operate on raw input from arbitrary sources.
5//! Public API: `Style`, `Color`, `ParseState`, `step`, `strip_sgr`.
6
7#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
8pub struct Style {
9    pub fg: Option<Color>,
10    pub bg: Option<Color>,
11    pub bold: bool,
12    pub dim: bool,
13    pub italic: bool,
14    pub underline: bool,
15    pub reverse: bool,
16    pub strike: bool,
17}
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum Color {
21    /// 16 named colors: 0..=7 standard, 8..=15 bright.
22    Ansi(u8),
23    /// xterm-256.
24    Indexed(u8),
25    /// 24-bit truecolor.
26    Rgb(u8, u8, u8),
27    /// Explicit reset to terminal default (`\x1b[39m` for fg, `\x1b[49m` for bg).
28    Default,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, Default)]
32pub enum ParseState {
33    #[default]
34    Normal,
35    EscSeen,
36    CsiBuilding(Vec<u8>),
37    OscBuilding(Vec<u8>),
38}
39
40/// Single-step transition driven by one input byte.
41#[derive(Debug, PartialEq, Eq)]
42pub enum ParseStep {
43    /// Byte is regular content; caller should emit it as a printable cell.
44    Printable(u8),
45    /// SGR sequence completed; `style` has been updated in place.
46    StyleChanged,
47    /// Non-SGR CSI completed; bytes consumed, no visible effect.
48    OtherCsiSkipped,
49    /// OSC 8 hyperlink open/close; `hyperlink` has been updated in place.
50    HyperlinkChanged,
51    /// Mid-sequence — caller should consume the byte and yield nothing yet.
52    Consuming,
53}
54
55/// Apply one input byte to the state machine. `style` and `hyperlink` are
56/// updated in place when relevant sequences complete.
57pub fn step(
58    state: &mut ParseState,
59    style: &mut Style,
60    hyperlink: &mut Option<String>,
61    byte: u8,
62) -> ParseStep {
63    match state {
64        ParseState::Normal => {
65            if byte == 0x1b {
66                *state = ParseState::EscSeen;
67                ParseStep::Consuming
68            } else {
69                ParseStep::Printable(byte)
70            }
71        }
72        ParseState::EscSeen => match byte {
73            b'[' => {
74                *state = ParseState::CsiBuilding(Vec::with_capacity(16));
75                ParseStep::Consuming
76            }
77            b']' => {
78                *state = ParseState::OscBuilding(Vec::with_capacity(32));
79                ParseStep::Consuming
80            }
81            _ => {
82                *state = ParseState::Normal;
83                ParseStep::OtherCsiSkipped
84            }
85        },
86        ParseState::CsiBuilding(buf) => {
87            if (0x40..=0x7e).contains(&byte) {
88                let params = std::mem::take(buf);
89                let final_byte = byte;
90                *state = ParseState::Normal;
91                if final_byte == b'm' {
92                    apply_sgr(&params, style);
93                    ParseStep::StyleChanged
94                } else {
95                    ParseStep::OtherCsiSkipped
96                }
97            } else {
98                buf.push(byte);
99                ParseStep::Consuming
100            }
101        }
102        ParseState::OscBuilding(buf) => {
103            if byte == 0x07 {
104                let body = std::mem::take(buf);
105                *state = ParseState::Normal;
106                apply_osc(&body, hyperlink);
107                ParseStep::HyperlinkChanged
108            } else if byte == b'\\' && buf.last() == Some(&0x1b) {
109                buf.pop();
110                let body = std::mem::take(buf);
111                *state = ParseState::Normal;
112                apply_osc(&body, hyperlink);
113                ParseStep::HyperlinkChanged
114            } else {
115                buf.push(byte);
116                ParseStep::Consuming
117            }
118        }
119    }
120}
121
122fn apply_sgr(params: &[u8], style: &mut Style) {
123    if params.is_empty() {
124        *style = Style::default();
125        return;
126    }
127
128    let text = match std::str::from_utf8(params) {
129        Ok(s) => s,
130        Err(_) => return,
131    };
132
133    let parts: Vec<&str> = text.split(';').collect();
134    let mut i = 0;
135    while i < parts.len() {
136        let n: u16 = match parts[i].parse() {
137            Ok(n) => n,
138            Err(_) => {
139                if parts[i].is_empty() {
140                    *style = Style::default();
141                    i += 1;
142                    continue;
143                }
144                i += 1;
145                continue;
146            }
147        };
148        match n {
149            0 => *style = Style::default(),
150            1 => style.bold = true,
151            2 => style.dim = true,
152            3 => style.italic = true,
153            4 => style.underline = true,
154            7 => style.reverse = true,
155            9 => style.strike = true,
156            22 => {
157                style.bold = false;
158                style.dim = false;
159            }
160            23 => style.italic = false,
161            24 => style.underline = false,
162            27 => style.reverse = false,
163            29 => style.strike = false,
164            30..=37 => style.fg = Some(Color::Ansi((n - 30) as u8)),
165            90..=97 => style.fg = Some(Color::Ansi((n - 90 + 8) as u8)),
166            40..=47 => style.bg = Some(Color::Ansi((n - 40) as u8)),
167            100..=107 => style.bg = Some(Color::Ansi((n - 100 + 8) as u8)),
168            39 => style.fg = Some(Color::Default),
169            49 => style.bg = Some(Color::Default),
170            38 | 48 => {
171                let dest = n;
172                let mode: u16 = match parts.get(i + 1).and_then(|s| s.parse().ok()) {
173                    Some(m) => m,
174                    None => {
175                        i += 1;
176                        continue;
177                    }
178                };
179                match mode {
180                    5 => {
181                        let idx: u16 = match parts.get(i + 2).and_then(|s| s.parse().ok()) {
182                            Some(x) => x,
183                            None => {
184                                i += 2;
185                                continue;
186                            }
187                        };
188                        let color = Color::Indexed(idx as u8);
189                        if dest == 38 {
190                            style.fg = Some(color);
191                        } else {
192                            style.bg = Some(color);
193                        }
194                        i += 3;
195                        continue;
196                    }
197                    2 => {
198                        let r: u16 = parts.get(i + 2).and_then(|s| s.parse().ok()).unwrap_or(0);
199                        let g: u16 = parts.get(i + 3).and_then(|s| s.parse().ok()).unwrap_or(0);
200                        let b: u16 = parts.get(i + 4).and_then(|s| s.parse().ok()).unwrap_or(0);
201                        let color = Color::Rgb(r as u8, g as u8, b as u8);
202                        if dest == 38 {
203                            style.fg = Some(color);
204                        } else {
205                            style.bg = Some(color);
206                        }
207                        i += 5;
208                        continue;
209                    }
210                    _ => {
211                        i += 2;
212                        continue;
213                    }
214                }
215            }
216            _ => {}
217        }
218        i += 1;
219    }
220}
221
222fn apply_osc(body: &[u8], hyperlink: &mut Option<String>) {
223    let text = match std::str::from_utf8(body) {
224        Ok(s) => s,
225        Err(_) => return,
226    };
227    let mut parts = text.splitn(3, ';');
228    let cmd = parts.next().unwrap_or("");
229    if cmd != "8" {
230        return;
231    }
232    let _params = parts.next().unwrap_or("");
233    let uri = parts.next().unwrap_or("");
234    if uri.is_empty() {
235        *hyperlink = None;
236    } else {
237        *hyperlink = Some(uri.to_string());
238    }
239}
240
241/// Strip SGR / CSI / OSC sequences from a byte slice. Returns a `Cow<[u8]>`
242/// that borrows the input when no sequences are present (common case) and
243/// owns a new buffer otherwise.
244pub fn strip_sgr(bytes: &[u8]) -> std::borrow::Cow<'_, [u8]> {
245    if !bytes.contains(&0x1b) {
246        return std::borrow::Cow::Borrowed(bytes);
247    }
248    let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
249    let mut state = ParseState::Normal;
250    let mut style = Style::default();
251    let mut hyperlink: Option<String> = None;
252    for &b in bytes {
253        if let ParseStep::Printable(byte) = step(&mut state, &mut style, &mut hyperlink, b) {
254            out.push(byte);
255        }
256    }
257    std::borrow::Cow::Owned(out)
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263
264    fn run(bytes: &[u8]) -> (Vec<u8>, Style, Option<String>) {
265        let mut state = ParseState::Normal;
266        let mut style = Style::default();
267        let mut link = None;
268        let mut printable = Vec::new();
269        for &b in bytes {
270            if let ParseStep::Printable(byte) = step(&mut state, &mut style, &mut link, b) {
271                printable.push(byte);
272            }
273        }
274        (printable, style, link)
275    }
276
277    #[test]
278    fn plain_bytes_pass_through() {
279        let (out, style, _) = run(b"hello");
280        assert_eq!(out, b"hello");
281        assert_eq!(style, Style::default());
282    }
283
284    #[test]
285    fn sgr_red_then_text() {
286        let (out, style, _) = run(b"\x1b[31mhi");
287        assert_eq!(out, b"hi");
288        assert_eq!(style.fg, Some(Color::Ansi(1)));
289    }
290
291    #[test]
292    fn sgr_reset_clears_style() {
293        let (out, style, _) = run(b"\x1b[1;31mbold\x1b[0mreset");
294        assert_eq!(out, b"boldreset");
295        assert_eq!(style, Style::default());
296    }
297
298    #[test]
299    fn sgr_named_colors_0_to_15() {
300        let (_, style, _) = run(b"\x1b[37m");
301        assert_eq!(style.fg, Some(Color::Ansi(7)));
302        let (_, style2, _) = run(b"\x1b[90m");
303        assert_eq!(style2.fg, Some(Color::Ansi(8)));
304        let (_, style3, _) = run(b"\x1b[97m");
305        assert_eq!(style3.fg, Some(Color::Ansi(15)));
306    }
307
308    #[test]
309    fn sgr_256_indexed_fg() {
310        let (_, style, _) = run(b"\x1b[38;5;208m");
311        assert_eq!(style.fg, Some(Color::Indexed(208)));
312    }
313
314    #[test]
315    fn sgr_truecolor_fg() {
316        let (_, style, _) = run(b"\x1b[38;2;255;128;0m");
317        assert_eq!(style.fg, Some(Color::Rgb(255, 128, 0)));
318    }
319
320    #[test]
321    fn sgr_256_indexed_bg() {
322        let (_, style, _) = run(b"\x1b[48;5;15m");
323        assert_eq!(style.bg, Some(Color::Indexed(15)));
324    }
325
326    #[test]
327    fn sgr_truecolor_bg() {
328        let (_, style, _) = run(b"\x1b[48;2;10;20;30m");
329        assert_eq!(style.bg, Some(Color::Rgb(10, 20, 30)));
330    }
331
332    #[test]
333    fn sgr_attributes_all() {
334        let (_, style, _) = run(b"\x1b[1;2;3;4;7;9m");
335        assert!(style.bold);
336        assert!(style.dim);
337        assert!(style.italic);
338        assert!(style.underline);
339        assert!(style.reverse);
340        assert!(style.strike);
341    }
342
343    #[test]
344    fn sgr_attribute_cancels() {
345        let (_, style, _) = run(b"\x1b[1;2;3;4;7;9m\x1b[22;23;24;27;29m");
346        assert!(!style.bold);
347        assert!(!style.dim);
348        assert!(!style.italic);
349        assert!(!style.underline);
350        assert!(!style.reverse);
351        assert!(!style.strike);
352    }
353
354    #[test]
355    fn sgr_default_fg_bg_reset_colors_only() {
356        let (_, style, _) = run(b"\x1b[1;31;42m\x1b[39;49m");
357        assert!(style.bold);
358        assert_eq!(style.fg, Some(Color::Default));
359        assert_eq!(style.bg, Some(Color::Default));
360    }
361
362    #[test]
363    fn sgr_empty_treated_as_reset() {
364        let (_, style, _) = run(b"\x1b[31m\x1b[m");
365        assert_eq!(style, Style::default());
366    }
367
368    #[test]
369    fn unknown_sgr_code_ignored() {
370        let (_, style, _) = run(b"\x1b[31;999;1m");
371        assert_eq!(style.fg, Some(Color::Ansi(1)));
372        assert!(style.bold);
373    }
374
375    #[test]
376    fn non_sgr_csi_skipped() {
377        let (out, style, _) = run(b"\x1b[2Jcleared");
378        assert_eq!(out, b"cleared");
379        assert_eq!(style, Style::default());
380    }
381
382    #[test]
383    fn incomplete_csi_at_eof_recovers() {
384        let (out, _, _) = run(b"\x1b[31");
385        assert_eq!(out, b"");
386        let mut state = ParseState::Normal;
387        let mut style = Style::default();
388        let mut link = None;
389        for &b in b"\x1b[31" {
390            let _ = step(&mut state, &mut style, &mut link, b);
391        }
392        for &b in b"m" {
393            let _ = step(&mut state, &mut style, &mut link, b);
394        }
395        assert_eq!(style.fg, Some(Color::Ansi(1)));
396    }
397
398    #[test]
399    fn osc8_hyperlink_open_with_bel() {
400        let (_, _, link) = run(b"\x1b]8;;https://example.com\x07");
401        assert_eq!(link, Some("https://example.com".to_string()));
402    }
403
404    #[test]
405    fn osc8_hyperlink_open_with_st() {
406        let (_, _, link) = run(b"\x1b]8;;https://example.com\x1b\\");
407        assert_eq!(link, Some("https://example.com".to_string()));
408    }
409
410    #[test]
411    fn osc8_hyperlink_close() {
412        let mut state = ParseState::Normal;
413        let mut style = Style::default();
414        let mut link = Some("https://example.com".to_string());
415        for &b in b"\x1b]8;;\x07" {
416            let _ = step(&mut state, &mut style, &mut link, b);
417        }
418        assert_eq!(link, None);
419    }
420
421    #[test]
422    fn strip_sgr_borrows_when_no_escapes() {
423        let s = strip_sgr(b"plain");
424        assert!(matches!(s, std::borrow::Cow::Borrowed(_)));
425        assert_eq!(s.as_ref(), b"plain");
426    }
427
428    #[test]
429    fn strip_sgr_owns_and_removes_sgr() {
430        let s = strip_sgr(b"\x1b[31merror\x1b[0m");
431        assert_eq!(s.as_ref(), b"error");
432    }
433
434    #[test]
435    fn strip_sgr_preserves_utf8() {
436        let s = strip_sgr("\x1b[31m日本\x1b[0m".as_bytes());
437        assert_eq!(s.as_ref(), "日本".as_bytes());
438    }
439
440    #[test]
441    fn strip_sgr_handles_real_git_diff_line() {
442        let input = b"\x1b[1mdiff --git a/foo b/foo\x1b[m\n\x1b[31m-old line\x1b[m\n\x1b[32m+new line\x1b[m\n";
443        let stripped = strip_sgr(input);
444        assert_eq!(
445            stripped.as_ref(),
446            b"diff --git a/foo b/foo\n-old line\n+new line\n"
447        );
448    }
449}