1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#[cfg(all(not(feature = "std"), feature = "alloc"))]
use alloc::vec::Vec;

/// A match.
#[derive(Debug, PartialEq, Eq)]
pub struct Match<'t> {
    /// First byte index.
    pub start: usize,
    /// Last byte index + 1.
    pub end: usize,
    /// The text slice (ie `text[start..end]`).
    /// Note that the range is `(start..end]`.
    pub text: &'t str,
}

// ESC is 0x1b
const CSI: &str = "\x1b[";

#[inline(always)]
fn terminated_byte(byte: u8) -> bool {
    (0x40..=0x7e).contains(&byte)
}

/// Parses ANSI escape codes from the given text, returning a vector of `Match`.
///
/// ```rust
/// let ansi_text = "Hello, \x1b[31;4mworld\x1b[0m!";
/// let parsed: Vec<_> = cansi::parse(ansi_text)
///     .into_iter()
///     .map(|m| (m.start, m.end))
///     .collect();
/// assert_eq!(
///     parsed,
///     vec![(7, 14), (19, 23)],
/// );
/// ```
pub fn parse(text: &str) -> Vec<Match> {
    let mut v = Vec::with_capacity(8);
    let csi_len = CSI.len();

    let mut s = text;
    let mut start = 0;
    let mut end = start + csi_len;

    while end <= text.len() {
        if s.starts_with(CSI) {
            // start of a CSI seq
            while end < text.len() && !terminated_byte(text.as_bytes()[end]) {
                end += 1;
            }

            let end = end + 1;

            if end > text.len() {
                break;
            }

            v.push(Match {
                start,
                end,
                text: &text[start..end],
            });

            start = end;
        } else {
            start += s.chars().next().expect("non-empty-str").len_utf8();
        }

        s = &text[start..];
        end = start + csi_len;
    }

    v
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_test() {
        let ansi_text = "Hello, \x1b[31;4mworld\x1b[0m!";
        let parsed = parse(ansi_text);
        assert_eq!(
            parsed,
            vec![
                Match {
                    start: 7,
                    end: 14,
                    text: "\x1b[31;4m"
                },
                Match {
                    start: 19,
                    end: 23,
                    text: "\x1b[0m"
                }
            ]
        );
    }

    #[test]
    fn parse_string_with_different_chars() {
        let t = "👋, \x1b[31;4m🌍\x1b[0m!";
        let parsed = parse(t);
        assert_eq!(
            parsed,
            vec![
                Match {
                    start: 6,
                    end: 13,
                    text: "\x1b[31;4m"
                },
                Match {
                    start: 17,
                    end: 21,
                    text: "\x1b[0m"
                }
            ]
        );
    }

    #[test]
    fn malformed_escape() {
        let x = parse("oops\x1b[\n");

        assert_eq!(x, vec![]);
    }
}