1use crate::{
13 CodecError,
14 CodecResult,
15 Decoder,
16 Encoder,
17};
18
19#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
25pub struct CStringLiteralCodec;
26
27impl CStringLiteralCodec {
28 pub fn new() -> Self {
33 Self
34 }
35
36 pub fn encode(&self, bytes: &[u8]) -> String {
44 let mut output = String::with_capacity(bytes.len());
45 for byte in bytes {
46 push_encoded_byte(*byte, &mut output);
47 }
48 output
49 }
50
51 pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
64 let chars = text.char_indices().collect::<Vec<_>>();
65 let mut output = Vec::with_capacity(text.len());
66 let mut position = 0;
67 while let Some(&(index, character)) = chars.get(position) {
68 if character == '\\' {
69 decode_escape(text, &chars, &mut position, &mut output)?;
70 continue;
71 }
72 validate_source_character(index, character)?;
73 output.push(character as u8);
74 position += 1;
75 }
76 Ok(output)
77 }
78}
79
80impl Encoder<[u8]> for CStringLiteralCodec {
81 type Error = CodecError;
82 type Output = String;
83
84 fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
86 Ok(CStringLiteralCodec::encode(self, input))
87 }
88}
89
90impl Decoder<str> for CStringLiteralCodec {
91 type Error = CodecError;
92 type Output = Vec<u8>;
93
94 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
96 CStringLiteralCodec::decode(self, input)
97 }
98}
99
100fn decode_escape(text: &str, chars: &[(usize, char)], position: &mut usize, output: &mut Vec<u8>) -> CodecResult<()> {
112 let marker_index = chars[*position].0;
113 *position += 1;
114 let Some(&(_, escape)) = chars.get(*position) else {
115 return Err(invalid_escape(marker_index, "\\", "incomplete escape sequence"));
116 };
117 match escape {
118 ' ' => push_simple_escape(position, output, b' '),
119 '\'' => push_simple_escape(position, output, b'\''),
120 '"' => push_simple_escape(position, output, b'"'),
121 '?' => push_simple_escape(position, output, b'?'),
122 '\\' => push_simple_escape(position, output, b'\\'),
123 'a' => push_simple_escape(position, output, 0x07),
124 'b' => push_simple_escape(position, output, 0x08),
125 'f' => push_simple_escape(position, output, 0x0c),
126 'n' => push_simple_escape(position, output, b'\n'),
127 'r' => push_simple_escape(position, output, b'\r'),
128 't' => push_simple_escape(position, output, b'\t'),
129 'v' => push_simple_escape(position, output, 0x0b),
130 'x' | 'X' => {
131 *position += 1;
132 let value = parse_variable_hex_escape(chars, position, marker_index)?;
133 output.push(value);
134 }
135 'u' => {
136 *position += 1;
137 let value = parse_fixed_hex_escape(text, chars, position, marker_index, 4)?;
138 output.push(value);
139 }
140 'U' => {
141 *position += 1;
142 let value = parse_fixed_hex_escape(text, chars, position, marker_index, 8)?;
143 output.push(value);
144 }
145 '0'..='7' => {
146 let value = parse_octal_escape(chars, position);
147 output.push(value);
148 }
149 _ => {
150 return Err(invalid_escape(
151 marker_index,
152 &format!("\\{escape}"),
153 "unsupported escape sequence",
154 ));
155 }
156 }
157 Ok(())
158}
159
160fn push_simple_escape(position: &mut usize, output: &mut Vec<u8>, byte: u8) {
167 output.push(byte);
168 *position += 1;
169}
170
171fn parse_variable_hex_escape(chars: &[(usize, char)], position: &mut usize, marker_index: usize) -> CodecResult<u8> {
185 let mut value = 0u8;
186 let mut digit_count = 0;
187 while digit_count < 2 {
188 let Some(&(_, character)) = chars.get(*position) else {
189 break;
190 };
191 let Some(digit) = hex_value(character) else {
192 break;
193 };
194 value = (value << 4) | digit;
195 *position += 1;
196 digit_count += 1;
197 }
198 if digit_count == 0 {
199 return Err(invalid_escape(
200 marker_index,
201 "\\x",
202 "expected at least one hexadecimal digit",
203 ));
204 }
205 Ok(value)
206}
207
208fn parse_fixed_hex_escape(
225 text: &str,
226 chars: &[(usize, char)],
227 position: &mut usize,
228 marker_index: usize,
229 digits: usize,
230) -> CodecResult<u8> {
231 let mut value = 0u32;
232 for _ in 0..digits {
233 let Some(&(index, character)) = chars.get(*position) else {
234 let escape = text.get(marker_index..).unwrap_or("\\");
235 return Err(invalid_escape(
236 marker_index,
237 escape,
238 "incomplete universal character escape",
239 ));
240 };
241 let Some(digit) = hex_value(character) else {
242 return Err(CodecError::InvalidDigit {
243 radix: 16,
244 index,
245 character,
246 });
247 };
248 value = (value << 4) | u32::from(digit);
249 *position += 1;
250 }
251 if value > u32::from(u8::MAX) {
252 let escape = text
253 .get(marker_index..chars[*position - 1].0 + chars[*position - 1].1.len_utf8())
254 .unwrap_or("\\u");
255 return Err(invalid_escape(
256 marker_index,
257 escape,
258 "universal character value must fit in one byte",
259 ));
260 }
261 Ok(value as u8)
262}
263
264fn parse_octal_escape(chars: &[(usize, char)], position: &mut usize) -> u8 {
274 let mut value = 0u16;
275 let mut digit_count = 0;
276 while digit_count < 3 {
277 let Some(&(_, character)) = chars.get(*position) else {
278 break;
279 };
280 let Some(digit) = octal_value(character) else {
281 break;
282 };
283 value = (value << 3) | u16::from(digit);
284 *position += 1;
285 digit_count += 1;
286 }
287 value as u8
288}
289
290fn validate_source_character(index: usize, character: char) -> CodecResult<()> {
300 if is_source_character(character) {
301 return Ok(());
302 }
303 Err(CodecError::InvalidCharacter {
304 index,
305 character,
306 reason: "raw source character must be printable ASCII or allowed whitespace".to_owned(),
307 })
308}
309
310fn is_source_character(character: char) -> bool {
318 matches!(character, '\t' | '\n' | '\u{0b}' | '\u{0c}' | ' '..='~')
319}
320
321fn push_encoded_byte(byte: u8, output: &mut String) {
327 match byte {
328 b'\'' => output.push_str("\\'"),
329 b'"' => output.push_str("\\\""),
330 b'?' => output.push_str("\\?"),
331 b'\\' => output.push_str("\\\\"),
332 0x07 => output.push_str("\\a"),
333 0x08 => output.push_str("\\b"),
334 0x0c => output.push_str("\\f"),
335 b'\n' => output.push_str("\\n"),
336 b'\r' => output.push_str("\\r"),
337 b'\t' => output.push_str("\\t"),
338 0x0b => output.push_str("\\v"),
339 b' '..=b'~' => output.push(byte as char),
340 _ => {
341 output.push('\\');
342 output.push('x');
343 output.push(uppercase_hex_digit(byte >> 4));
344 output.push(uppercase_hex_digit(byte & 0x0f));
345 }
346 }
347}
348
349fn hex_value(character: char) -> Option<u8> {
357 match character {
358 '0'..='9' => Some(character as u8 - b'0'),
359 'a'..='f' => Some(character as u8 - b'a' + 10),
360 'A'..='F' => Some(character as u8 - b'A' + 10),
361 _ => None,
362 }
363}
364
365fn octal_value(character: char) -> Option<u8> {
373 match character {
374 '0'..='7' => Some(character as u8 - b'0'),
375 _ => None,
376 }
377}
378
379fn uppercase_hex_digit(value: u8) -> char {
387 match value & 0x0f {
388 0x0 => '0',
389 0x1 => '1',
390 0x2 => '2',
391 0x3 => '3',
392 0x4 => '4',
393 0x5 => '5',
394 0x6 => '6',
395 0x7 => '7',
396 0x8 => '8',
397 0x9 => '9',
398 0x0a => 'A',
399 0x0b => 'B',
400 0x0c => 'C',
401 0x0d => 'D',
402 0x0e => 'E',
403 _ => 'F',
404 }
405}
406
407fn invalid_escape(index: usize, escape: &str, reason: &str) -> CodecError {
417 CodecError::InvalidEscape {
418 index,
419 escape: escape.to_owned(),
420 reason: reason.to_owned(),
421 }
422}