1use crate::{
13 CodecError,
14 CodecResult,
15 Decoder,
16 Encoder,
17};
18
19#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
25pub struct CStringLiteralCodec;
26
27impl CStringLiteralCodec {
28 pub fn new() -> Self {
33 Self
34 }
35
36 pub fn encode(&self, bytes: &[u8]) -> String {
44 let mut output = String::with_capacity(bytes.len());
45 for byte in bytes {
46 push_encoded_byte(*byte, &mut output);
47 }
48 output
49 }
50
51 pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
64 let chars = text.char_indices().collect::<Vec<_>>();
65 let mut output = Vec::with_capacity(text.len());
66 let mut position = 0;
67 while let Some(&(index, character)) = chars.get(position) {
68 if character == '\\' {
69 decode_escape(text, &chars, &mut position, &mut output)?;
70 continue;
71 }
72 validate_source_character(index, character)?;
73 output.push(character as u8);
74 position += 1;
75 }
76 Ok(output)
77 }
78}
79
80impl Encoder<[u8]> for CStringLiteralCodec {
81 type Error = CodecError;
82 type Output = String;
83
84 fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
86 Ok(CStringLiteralCodec::encode(self, input))
87 }
88}
89
90impl Decoder<str> for CStringLiteralCodec {
91 type Error = CodecError;
92 type Output = Vec<u8>;
93
94 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
96 CStringLiteralCodec::decode(self, input)
97 }
98}
99
100fn decode_escape(
112 text: &str,
113 chars: &[(usize, char)],
114 position: &mut usize,
115 output: &mut Vec<u8>,
116) -> CodecResult<()> {
117 let marker_index = chars[*position].0;
118 *position += 1;
119 let Some(&(_, escape)) = chars.get(*position) else {
120 return Err(invalid_escape(
121 marker_index,
122 "\\",
123 "incomplete escape sequence",
124 ));
125 };
126 match escape {
127 ' ' => push_simple_escape(position, output, b' '),
128 '\'' => push_simple_escape(position, output, b'\''),
129 '"' => push_simple_escape(position, output, b'"'),
130 '?' => push_simple_escape(position, output, b'?'),
131 '\\' => push_simple_escape(position, output, b'\\'),
132 'a' => push_simple_escape(position, output, 0x07),
133 'b' => push_simple_escape(position, output, 0x08),
134 'f' => push_simple_escape(position, output, 0x0c),
135 'n' => push_simple_escape(position, output, b'\n'),
136 'r' => push_simple_escape(position, output, b'\r'),
137 't' => push_simple_escape(position, output, b'\t'),
138 'v' => push_simple_escape(position, output, 0x0b),
139 'x' | 'X' => {
140 *position += 1;
141 let value = parse_variable_hex_escape(chars, position, marker_index)?;
142 output.push(value);
143 }
144 'u' => {
145 *position += 1;
146 let value = parse_fixed_hex_escape(text, chars, position, marker_index, 4)?;
147 output.push(value);
148 }
149 'U' => {
150 *position += 1;
151 let value = parse_fixed_hex_escape(text, chars, position, marker_index, 8)?;
152 output.push(value);
153 }
154 '0'..='7' => {
155 let value = parse_octal_escape(chars, position);
156 output.push(value);
157 }
158 _ => {
159 return Err(invalid_escape(
160 marker_index,
161 &format!("\\{escape}"),
162 "unsupported escape sequence",
163 ));
164 }
165 }
166 Ok(())
167}
168
169fn push_simple_escape(position: &mut usize, output: &mut Vec<u8>, byte: u8) {
176 output.push(byte);
177 *position += 1;
178}
179
180fn parse_variable_hex_escape(
194 chars: &[(usize, char)],
195 position: &mut usize,
196 marker_index: usize,
197) -> CodecResult<u8> {
198 let mut value = 0u8;
199 let mut digit_count = 0;
200 while digit_count < 2 {
201 let Some(&(_, character)) = chars.get(*position) else {
202 break;
203 };
204 let Some(digit) = hex_value(character) else {
205 break;
206 };
207 value = (value << 4) | digit;
208 *position += 1;
209 digit_count += 1;
210 }
211 if digit_count == 0 {
212 return Err(invalid_escape(
213 marker_index,
214 "\\x",
215 "expected at least one hexadecimal digit",
216 ));
217 }
218 Ok(value)
219}
220
221fn parse_fixed_hex_escape(
238 text: &str,
239 chars: &[(usize, char)],
240 position: &mut usize,
241 marker_index: usize,
242 digits: usize,
243) -> CodecResult<u8> {
244 let mut value = 0u32;
245 for _ in 0..digits {
246 let Some(&(index, character)) = chars.get(*position) else {
247 let escape = text.get(marker_index..).unwrap_or("\\");
248 return Err(invalid_escape(
249 marker_index,
250 escape,
251 "incomplete universal character escape",
252 ));
253 };
254 let Some(digit) = hex_value(character) else {
255 return Err(CodecError::InvalidDigit {
256 radix: 16,
257 index,
258 character,
259 });
260 };
261 value = (value << 4) | u32::from(digit);
262 *position += 1;
263 }
264 if value > u32::from(u8::MAX) {
265 let escape = text
266 .get(marker_index..chars[*position - 1].0 + chars[*position - 1].1.len_utf8())
267 .unwrap_or("\\u");
268 return Err(invalid_escape(
269 marker_index,
270 escape,
271 "universal character value must fit in one byte",
272 ));
273 }
274 Ok(value as u8)
275}
276
277fn parse_octal_escape(chars: &[(usize, char)], position: &mut usize) -> u8 {
287 let mut value = 0u16;
288 let mut digit_count = 0;
289 while digit_count < 3 {
290 let Some(&(_, character)) = chars.get(*position) else {
291 break;
292 };
293 let Some(digit) = octal_value(character) else {
294 break;
295 };
296 value = (value << 3) | u16::from(digit);
297 *position += 1;
298 digit_count += 1;
299 }
300 value as u8
301}
302
303fn validate_source_character(index: usize, character: char) -> CodecResult<()> {
313 if is_source_character(character) {
314 return Ok(());
315 }
316 Err(CodecError::InvalidCharacter {
317 index,
318 character,
319 reason: "raw source character must be printable ASCII or allowed whitespace".to_owned(),
320 })
321}
322
323fn is_source_character(character: char) -> bool {
331 matches!(character, '\t' | '\n' | '\u{0b}' | '\u{0c}' | ' '..='~')
332}
333
334fn push_encoded_byte(byte: u8, output: &mut String) {
340 match byte {
341 b'\'' => output.push_str("\\'"),
342 b'"' => output.push_str("\\\""),
343 b'?' => output.push_str("\\?"),
344 b'\\' => output.push_str("\\\\"),
345 0x07 => output.push_str("\\a"),
346 0x08 => output.push_str("\\b"),
347 0x0c => output.push_str("\\f"),
348 b'\n' => output.push_str("\\n"),
349 b'\r' => output.push_str("\\r"),
350 b'\t' => output.push_str("\\t"),
351 0x0b => output.push_str("\\v"),
352 b' '..=b'~' => output.push(byte as char),
353 _ => {
354 output.push('\\');
355 output.push('x');
356 output.push(uppercase_hex_digit(byte >> 4));
357 output.push(uppercase_hex_digit(byte & 0x0f));
358 }
359 }
360}
361
362fn hex_value(character: char) -> Option<u8> {
370 match character {
371 '0'..='9' => Some(character as u8 - b'0'),
372 'a'..='f' => Some(character as u8 - b'a' + 10),
373 'A'..='F' => Some(character as u8 - b'A' + 10),
374 _ => None,
375 }
376}
377
378fn octal_value(character: char) -> Option<u8> {
386 match character {
387 '0'..='7' => Some(character as u8 - b'0'),
388 _ => None,
389 }
390}
391
392fn uppercase_hex_digit(value: u8) -> char {
400 match value & 0x0f {
401 0x0 => '0',
402 0x1 => '1',
403 0x2 => '2',
404 0x3 => '3',
405 0x4 => '4',
406 0x5 => '5',
407 0x6 => '6',
408 0x7 => '7',
409 0x8 => '8',
410 0x9 => '9',
411 0x0a => 'A',
412 0x0b => 'B',
413 0x0c => 'C',
414 0x0d => 'D',
415 0x0e => 'E',
416 _ => 'F',
417 }
418}
419
420fn invalid_escape(index: usize, escape: &str, reason: &str) -> CodecError {
430 CodecError::InvalidEscape {
431 index,
432 escape: escape.to_owned(),
433 reason: reason.to_owned(),
434 }
435}