Skip to main content

stynx_code_tui/
util.rs

1/// Strip ANSI CSI / OSC escape sequences so tool output renders cleanly.
2/// Handles the common cases (`\x1b[...m`, OSC `\x1b]...\x07`/`\x1b\\`,
3/// single-char escapes like `\x1b(B`).
4pub fn strip_ansi(input: &str) -> String {
5    let bytes = input.as_bytes();
6    let mut out = String::with_capacity(bytes.len());
7    let mut i = 0;
8    while i < bytes.len() {
9        let b = bytes[i];
10        if b == 0x1b && i + 1 < bytes.len() {
11            let next = bytes[i + 1];
12            // CSI: ESC '[' ... final byte in 0x40..=0x7E
13            if next == b'[' {
14                i += 2;
15                while i < bytes.len() {
16                    let c = bytes[i];
17                    if (0x40..=0x7e).contains(&c) {
18                        i += 1;
19                        break;
20                    }
21                    i += 1;
22                }
23                continue;
24            }
25            // OSC: ESC ']' ... terminated by BEL (0x07) or ESC '\\'
26            if next == b']' {
27                i += 2;
28                while i < bytes.len() {
29                    let c = bytes[i];
30                    if c == 0x07 {
31                        i += 1;
32                        break;
33                    }
34                    if c == 0x1b && i + 1 < bytes.len() && bytes[i + 1] == b'\\' {
35                        i += 2;
36                        break;
37                    }
38                    i += 1;
39                }
40                continue;
41            }
42            // Two-char escapes: ESC '(' x or ESC ')' x etc.
43            if matches!(next, b'(' | b')' | b'*' | b'+' | b'#') && i + 2 < bytes.len() {
44                i += 3;
45                continue;
46            }
47            // Skip lone ESC.
48            i += 1;
49            continue;
50        }
51        // Drop other C0 control chars except \n, \t, \r.
52        if b < 0x20 && b != b'\n' && b != b'\t' && b != b'\r' {
53            i += 1;
54            continue;
55        }
56        // Keep multibyte UTF-8 intact.
57        let char_end = utf8_char_end(bytes, i);
58        out.push_str(&input[i..char_end]);
59        i = char_end;
60    }
61    out
62}
63
64fn utf8_char_end(bytes: &[u8], i: usize) -> usize {
65    let b = bytes[i];
66    let width = if b < 0x80 {
67        1
68    } else if b & 0xe0 == 0xc0 {
69        2
70    } else if b & 0xf0 == 0xe0 {
71        3
72    } else if b & 0xf8 == 0xf0 {
73        4
74    } else {
75        1
76    };
77    (i + width).min(bytes.len())
78}
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83
84    #[test]
85    fn strips_csi() {
86        assert_eq!(strip_ansi("\x1b[31mred\x1b[0m"), "red");
87        assert_eq!(strip_ansi("\x1b[1;33mwarn\x1b[m hi"), "warn hi");
88    }
89
90    #[test]
91    fn strips_osc() {
92        assert_eq!(strip_ansi("\x1b]0;title\x07after"), "after");
93        assert_eq!(strip_ansi("\x1b]0;title\x1b\\after"), "after");
94    }
95
96    #[test]
97    fn keeps_unicode() {
98        assert_eq!(strip_ansi("héllo 🦀"), "héllo 🦀");
99    }
100}