vt_push_parser/
ascii.rs

1//! ASCII control codes.
2
3macro_rules! ascii_control {
4    ($(($variant:ident, $value:expr)),* $(,)?) => {
5        /// ASCII control codes.
6        #[derive(Clone, Copy, PartialEq, Eq, Hash)]
7        #[repr(u8)]
8        pub enum AsciiControl {
9            $( $variant = $value, )*
10        }
11
12        impl std::fmt::Display for AsciiControl {
13            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
14                match self {
15                    $( AsciiControl::$variant => write!(f, "<{}>", stringify!($variant).to_ascii_uppercase()), )*
16                }
17            }
18        }
19
20        impl std::fmt::Debug for AsciiControl {
21            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22                match self {
23                    $( AsciiControl::$variant => write!(f, "<{}>", stringify!($variant).to_ascii_uppercase()), )*
24                }
25            }
26        }
27
28        impl TryFrom<u8> for AsciiControl {
29            type Error = ();
30            fn try_from(value: u8) -> Result<Self, Self::Error> {
31                $(
32                    if value == $value {
33                        return Ok(AsciiControl::$variant);
34                    }
35                )*
36                Err(())
37            }
38        }
39
40        impl TryFrom<char> for AsciiControl {
41            type Error = ();
42            fn try_from(value: char) -> Result<Self, Self::Error> {
43                $(
44                    if value == char::from($value) {
45                        return Ok(AsciiControl::$variant);
46                    }
47                )*
48                Err(())
49            }
50        }
51
52        impl std::str::FromStr for AsciiControl {
53            type Err = ();
54            fn from_str(s: &str) -> Result<Self, Self::Err> {
55                $(
56                    if s.eq_ignore_ascii_case(stringify!($variant)) {
57                        return Ok(AsciiControl::$variant);
58                    }
59                )*
60                Err(())
61            }
62        }
63    };
64}
65
66ascii_control! {
67    (Nul, 0),
68    (Soh, 1),
69    (Stx, 2),
70    (Etx, 3),
71    (Eot, 4),
72    (Enq, 5),
73    (Ack, 6),
74    (Bel, 7),
75    (Bs, 8),
76    (Tab, 9),
77    (Lf, 10),
78    (Vt, 11),
79    (Ff, 12),
80    (Cr, 13),
81    (So, 14),
82    (Si, 15),
83    (Dle, 16),
84    (Dc1, 17),
85    (Dc2, 18),
86    (Dc3, 19),
87    (Dc4, 20),
88    (Nak, 21),
89    (Syn, 22),
90    (Etb, 23),
91    (Can, 24),
92    (Em, 25),
93    (Sub, 26),
94    (Esc, 27),
95    (Fs, 28),
96    (Gs, 29),
97    (Rs, 30),
98    (Us, 31),
99    (Del, 127),
100}
101
102#[doc(hidden)]
103pub fn decode_string(input: &str) -> Vec<u8> {
104    let mut result = Vec::new();
105    let mut chars = input.chars().peekable();
106
107    while let Some(ch) = chars.next() {
108        if ch == '<' {
109            // Collect characters until '>'
110            let mut control_name = String::new();
111            let mut found_closing = false;
112            for ch in chars.by_ref() {
113                if ch == '>' {
114                    found_closing = true;
115                    break;
116                }
117                control_name.push(ch);
118            }
119
120            // Parse the control name and convert to byte
121            match control_name.to_uppercase().as_str() {
122                "NUL" => result.push(0),
123                "SOH" => result.push(1),
124                "STX" => result.push(2),
125                "ETX" => result.push(3),
126                "EOT" => result.push(4),
127                "ENQ" => result.push(5),
128                "ACK" => result.push(6),
129                "BEL" => result.push(7),
130                "BS" => result.push(8),
131                "HT" | "TAB" => result.push(9),
132                "LF" => result.push(10),
133                "VT" => result.push(11),
134                "FF" => result.push(12),
135                "CR" => result.push(13),
136                "SO" => result.push(14),
137                "SI" => result.push(15),
138                "DLE" => result.push(16),
139                "DC1" => result.push(17),
140                "DC2" => result.push(18),
141                "DC3" => result.push(19),
142                "DC4" => result.push(20),
143                "NAK" => result.push(21),
144                "SYN" => result.push(22),
145                "ETB" => result.push(23),
146                "CAN" => result.push(24),
147                "EM" => result.push(25),
148                "SUB" => result.push(26),
149                "ESC" => result.push(27),
150                "FS" => result.push(28),
151                "GS" => result.push(29),
152                "RS" => result.push(30),
153                "US" => result.push(31),
154                // Note that this is only for parsing
155                "SP" => result.push(32),
156                "DEL" => result.push(127),
157                _ => {
158                    // If it's a hex byte (2 hex digits), convert to byte
159                    if control_name.len() == 2
160                        && control_name.chars().all(|c| c.is_ascii_hexdigit())
161                        && let Ok(byte) = u8::from_str_radix(&control_name, 16)
162                    {
163                        result.push(byte);
164                        continue;
165                    }
166
167                    // If not a recognized control code, treat as literal text
168                    result.push(b'<');
169                    result.extend_from_slice(control_name.as_bytes());
170                    if found_closing {
171                        result.push(b'>');
172                    }
173                }
174            }
175        } else {
176            // Regular character, convert to byte
177            let mut buf = [0; 4];
178            let char_bytes = ch.encode_utf8(&mut buf);
179            result.extend_from_slice(char_bytes.as_bytes());
180        }
181    }
182
183    result
184}
185
186#[doc(hidden)]
187pub fn encode_string(bytes: &[u8]) -> String {
188    use std::fmt::Write;
189    let mut s = String::new();
190    for chunk in bytes.utf8_chunks() {
191        for c in chunk.valid().chars() {
192            if let Ok(c) = AsciiControl::try_from(c) {
193                write!(s, "{c}").unwrap();
194            } else {
195                write!(s, "{c}").unwrap();
196            }
197        }
198        if !chunk.invalid().is_empty() {
199            write!(s, "<{}>", hex::encode(chunk.invalid())).unwrap();
200        }
201    }
202    s
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[test]
210    fn test_from_str() {
211        use std::str::FromStr;
212        assert_eq!(AsciiControl::from_str("ESC").unwrap(), AsciiControl::Esc);
213        assert_eq!(AsciiControl::from_str("CR").unwrap(), AsciiControl::Cr);
214        assert_eq!(AsciiControl::from_str("DEL").unwrap(), AsciiControl::Del);
215        assert!(AsciiControl::from_str("foo").is_err());
216    }
217
218    #[test]
219    fn test_decode_string_unclosed_control_sequence() {
220        // Test that unclosed control sequences don't add extraneous '>'
221        let decoded = decode_string("<ESC>[<u");
222        assert_eq!(decoded, vec![0x1B, 0x5B, 0x3C, 0x75]);
223    }
224
225    #[test]
226    fn test_decode_string_closed_control_sequence() {
227        // Test that closed control sequences work correctly
228        let decoded = decode_string("<ESC>[>u");
229        assert_eq!(decoded, vec![0x1B, 0x5B, 0x3E, 0x75]);
230    }
231
232    #[test]
233    fn test_decode_string_unrecognized_closed() {
234        // Test unrecognized but closed control sequence
235        let decoded = decode_string("<foo>");
236        assert_eq!(decoded, vec![0x3C, 0x66, 0x6F, 0x6F, 0x3E]);
237    }
238
239    #[test]
240    fn test_decode_string_unrecognized_unclosed() {
241        // Test unrecognized and unclosed control sequence
242        let decoded = decode_string("<bar");
243        assert_eq!(decoded, vec![0x3C, 0x62, 0x61, 0x72]);
244    }
245
246    #[test]
247    fn test_decode_string_recognized_control() {
248        // Test recognized control codes
249        let decoded = decode_string("<ESC>");
250        assert_eq!(decoded, vec![0x1B]);
251
252        let decoded = decode_string("<CR><LF>");
253        assert_eq!(decoded, vec![0x0D, 0x0A]);
254    }
255
256    #[test]
257    fn test_decode_string_hex_bytes() {
258        // Test hex byte encoding
259        let decoded = decode_string("<1B>[A");
260        assert_eq!(decoded, vec![0x1B, 0x5B, 0x41]);
261    }
262}