vt_push_parser/
ascii.rs

1//! ASCII control codes.
2
3macro_rules! ascii_control {
4    ($(($variant:ident, $value:expr)),* $(,)?) => {
5        /// ASCII control codes.
6        #[repr(u8)]
7        pub enum AsciiControl {
8            $( $variant = $value, )*
9        }
10
11        impl std::fmt::Display for AsciiControl {
12            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
13                match self {
14                    $( AsciiControl::$variant => write!(f, "<{}>", stringify!($variant).to_ascii_uppercase()), )*
15                }
16            }
17        }
18
19        impl std::fmt::Debug for AsciiControl {
20            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21                match self {
22                    $( AsciiControl::$variant => write!(f, "<{}>", stringify!($variant).to_ascii_uppercase()), )*
23                }
24            }
25        }
26
27        impl TryFrom<u8> for AsciiControl {
28            type Error = ();
29            fn try_from(value: u8) -> Result<Self, Self::Error> {
30                $(
31                    if value == $value {
32                        return Ok(AsciiControl::$variant);
33                    }
34                )*
35                Err(())
36            }
37        }
38
39        impl TryFrom<char> for AsciiControl {
40            type Error = ();
41            fn try_from(value: char) -> Result<Self, Self::Error> {
42                $(
43                    if value == char::from($value) {
44                        return Ok(AsciiControl::$variant);
45                    }
46                )*
47                Err(())
48            }
49        }
50
51        impl std::str::FromStr for AsciiControl {
52            type Err = ();
53            fn from_str(s: &str) -> Result<Self, Self::Err> {
54                $(
55                    if s.eq_ignore_ascii_case(stringify!($name)) {
56                        return Ok(AsciiControl::$variant);
57                    }
58                )*
59                Err(())
60            }
61        }
62    };
63}
64
65ascii_control! {
66    (Nul, 0),
67    (Soh, 1),
68    (Stx, 2),
69    (Etx, 3),
70    (Eot, 4),
71    (Enq, 5),
72    (Ack, 6),
73    (Bel, 7),
74    (Bs, 8),
75    (Tab, 9),
76    (Lf, 10),
77    (Vt, 11),
78    (Ff, 12),
79    (Cr, 13),
80    (So, 14),
81    (Si, 15),
82    (Dle, 16),
83    (Dc1, 17),
84    (Dc2, 18),
85    (Dc3, 19),
86    (Dc4, 20),
87    (Nak, 21),
88    (Syn, 22),
89    (Etb, 23),
90    (Can, 24),
91    (Em, 25),
92    (Sub, 26),
93    (Esc, 27),
94    (Fs, 28),
95    (Gs, 29),
96    (Rs, 30),
97    (Us, 31),
98    (Del, 127),
99}
100
101#[doc(hidden)]
102pub fn decode_string(input: &str) -> Vec<u8> {
103    let mut result = Vec::new();
104    let mut chars = input.chars().peekable();
105
106    while let Some(ch) = chars.next() {
107        if ch == '<' {
108            // Collect characters until '>'
109            let mut control_name = String::new();
110            let mut found_closing = false;
111            for ch in chars.by_ref() {
112                if ch == '>' {
113                    found_closing = true;
114                    break;
115                }
116                control_name.push(ch);
117            }
118
119            // Parse the control name and convert to byte
120            match control_name.to_uppercase().as_str() {
121                "NUL" => result.push(0),
122                "SOH" => result.push(1),
123                "STX" => result.push(2),
124                "ETX" => result.push(3),
125                "EOT" => result.push(4),
126                "ENQ" => result.push(5),
127                "ACK" => result.push(6),
128                "BEL" => result.push(7),
129                "BS" => result.push(8),
130                "HT" | "TAB" => result.push(9),
131                "LF" => result.push(10),
132                "VT" => result.push(11),
133                "FF" => result.push(12),
134                "CR" => result.push(13),
135                "SO" => result.push(14),
136                "SI" => result.push(15),
137                "DLE" => result.push(16),
138                "DC1" => result.push(17),
139                "DC2" => result.push(18),
140                "DC3" => result.push(19),
141                "DC4" => result.push(20),
142                "NAK" => result.push(21),
143                "SYN" => result.push(22),
144                "ETB" => result.push(23),
145                "CAN" => result.push(24),
146                "EM" => result.push(25),
147                "SUB" => result.push(26),
148                "ESC" => result.push(27),
149                "FS" => result.push(28),
150                "GS" => result.push(29),
151                "RS" => result.push(30),
152                "US" => result.push(31),
153                // Note that this is only for parsing
154                "SP" => result.push(32),
155                "DEL" => result.push(127),
156                _ => {
157                    // If it's a hex byte (2 hex digits), convert to byte
158                    if control_name.len() == 2
159                        && control_name.chars().all(|c| c.is_ascii_hexdigit())
160                        && let Ok(byte) = u8::from_str_radix(&control_name, 16)
161                    {
162                        result.push(byte);
163                        continue;
164                    }
165
166                    // If not a recognized control code, treat as literal text
167                    result.push(b'<');
168                    result.extend_from_slice(control_name.as_bytes());
169                    if found_closing {
170                        result.push(b'>');
171                    }
172                }
173            }
174        } else {
175            // Regular character, convert to byte
176            let mut buf = [0; 4];
177            let char_bytes = ch.encode_utf8(&mut buf);
178            result.extend_from_slice(char_bytes.as_bytes());
179        }
180    }
181
182    result
183}
184
185#[doc(hidden)]
186pub fn encode_string(bytes: &[u8]) -> String {
187    use std::fmt::Write;
188    let mut s = String::new();
189    for chunk in bytes.utf8_chunks() {
190        for c in chunk.valid().chars() {
191            if let Ok(c) = AsciiControl::try_from(c) {
192                write!(s, "{c}").unwrap();
193            } else {
194                write!(s, "{c}").unwrap();
195            }
196        }
197        if !chunk.invalid().is_empty() {
198            write!(s, "<{}>", hex::encode(chunk.invalid())).unwrap();
199        }
200    }
201    s
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn test_decode_string_unclosed_control_sequence() {
210        // Test that unclosed control sequences don't add extraneous '>'
211        let decoded = decode_string("<ESC>[<u");
212        assert_eq!(decoded, vec![0x1B, 0x5B, 0x3C, 0x75]);
213    }
214
215    #[test]
216    fn test_decode_string_closed_control_sequence() {
217        // Test that closed control sequences work correctly
218        let decoded = decode_string("<ESC>[>u");
219        assert_eq!(decoded, vec![0x1B, 0x5B, 0x3E, 0x75]);
220    }
221
222    #[test]
223    fn test_decode_string_unrecognized_closed() {
224        // Test unrecognized but closed control sequence
225        let decoded = decode_string("<foo>");
226        assert_eq!(decoded, vec![0x3C, 0x66, 0x6F, 0x6F, 0x3E]);
227    }
228
229    #[test]
230    fn test_decode_string_unrecognized_unclosed() {
231        // Test unrecognized and unclosed control sequence
232        let decoded = decode_string("<bar");
233        assert_eq!(decoded, vec![0x3C, 0x62, 0x61, 0x72]);
234    }
235
236    #[test]
237    fn test_decode_string_recognized_control() {
238        // Test recognized control codes
239        let decoded = decode_string("<ESC>");
240        assert_eq!(decoded, vec![0x1B]);
241
242        let decoded = decode_string("<CR><LF>");
243        assert_eq!(decoded, vec![0x0D, 0x0A]);
244    }
245
246    #[test]
247    fn test_decode_string_hex_bytes() {
248        // Test hex byte encoding
249        let decoded = decode_string("<1B>[A");
250        assert_eq!(decoded, vec![0x1B, 0x5B, 0x41]);
251    }
252}