1use crate::{
13 CodecError,
14 CodecResult,
15 Decoder,
16 Encoder,
17};
18
19#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
25pub struct CStringLiteralCodec;
26
27impl CStringLiteralCodec {
28 pub fn new() -> Self {
33 Self
34 }
35
36 pub fn encode(&self, bytes: &[u8]) -> String {
44 let mut output = String::with_capacity(bytes.len());
45 for byte in bytes {
46 push_encoded_byte(*byte, &mut output);
47 }
48 output
49 }
50
51 pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
64 let chars = text.char_indices().collect::<Vec<_>>();
65 let mut output = Vec::with_capacity(text.len());
66 let mut position = 0;
67 while let Some(&(index, character)) = chars.get(position) {
68 if character == '\\' {
69 decode_escape(text, &chars, &mut position, &mut output)?;
70 continue;
71 }
72 validate_source_character(index, character)?;
73 output.push(character as u8);
74 position += 1;
75 }
76 Ok(output)
77 }
78}
79
80impl Encoder<[u8]> for CStringLiteralCodec {
81 type Error = CodecError;
82 type Output = String;
83
84 fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
86 Ok(CStringLiteralCodec::encode(self, input))
87 }
88}
89
90impl Decoder<str> for CStringLiteralCodec {
91 type Error = CodecError;
92 type Output = Vec<u8>;
93
94 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
96 CStringLiteralCodec::decode(self, input)
97 }
98}
99
100fn decode_escape(
112 text: &str,
113 chars: &[(usize, char)],
114 position: &mut usize,
115 output: &mut Vec<u8>,
116) -> CodecResult<()> {
117 let marker_index = chars[*position].0;
118 *position += 1;
119 let Some(&(_, escape)) = chars.get(*position) else {
120 return Err(invalid_escape(
121 marker_index,
122 "\\",
123 "incomplete escape sequence",
124 ));
125 };
126 match escape {
127 '\'' => push_simple_escape(position, output, b'\''),
128 '"' => push_simple_escape(position, output, b'"'),
129 '?' => push_simple_escape(position, output, b'?'),
130 '\\' => push_simple_escape(position, output, b'\\'),
131 'a' => push_simple_escape(position, output, 0x07),
132 'b' => push_simple_escape(position, output, 0x08),
133 'f' => push_simple_escape(position, output, 0x0c),
134 'n' => push_simple_escape(position, output, b'\n'),
135 'r' => push_simple_escape(position, output, b'\r'),
136 't' => push_simple_escape(position, output, b'\t'),
137 'v' => push_simple_escape(position, output, 0x0b),
138 'x' | 'X' => {
139 *position += 1;
140 let value = parse_variable_hex_escape(chars, position, marker_index)?;
141 output.push(value);
142 }
143 'u' => {
144 *position += 1;
145 let value = parse_fixed_hex_escape(text, chars, position, marker_index, 4)?;
146 output.push(value);
147 }
148 'U' => {
149 *position += 1;
150 let value = parse_fixed_hex_escape(text, chars, position, marker_index, 8)?;
151 output.push(value);
152 }
153 '0'..='7' => {
154 let value = parse_octal_escape(chars, position);
155 output.push(value);
156 }
157 _ => {
158 return Err(invalid_escape(
159 marker_index,
160 &format!("\\{escape}"),
161 "unsupported escape sequence",
162 ));
163 }
164 }
165 Ok(())
166}
167
168fn push_simple_escape(position: &mut usize, output: &mut Vec<u8>, byte: u8) {
175 output.push(byte);
176 *position += 1;
177}
178
179fn parse_variable_hex_escape(
193 chars: &[(usize, char)],
194 position: &mut usize,
195 marker_index: usize,
196) -> CodecResult<u8> {
197 let mut value = 0u8;
198 let mut digit_count = 0;
199 while digit_count < 2 {
200 let Some(&(_, character)) = chars.get(*position) else {
201 break;
202 };
203 let Some(digit) = hex_value(character) else {
204 break;
205 };
206 value = (value << 4) | digit;
207 *position += 1;
208 digit_count += 1;
209 }
210 if digit_count == 0 {
211 return Err(invalid_escape(
212 marker_index,
213 "\\x",
214 "expected at least one hexadecimal digit",
215 ));
216 }
217 Ok(value)
218}
219
220fn parse_fixed_hex_escape(
237 text: &str,
238 chars: &[(usize, char)],
239 position: &mut usize,
240 marker_index: usize,
241 digits: usize,
242) -> CodecResult<u8> {
243 let mut value = 0u32;
244 for _ in 0..digits {
245 let Some(&(index, character)) = chars.get(*position) else {
246 let escape = text.get(marker_index..).unwrap_or("\\");
247 return Err(invalid_escape(
248 marker_index,
249 escape,
250 "incomplete universal character escape",
251 ));
252 };
253 let Some(digit) = hex_value(character) else {
254 return Err(CodecError::InvalidDigit {
255 radix: 16,
256 index,
257 character,
258 });
259 };
260 value = (value << 4) | u32::from(digit);
261 *position += 1;
262 }
263 if value > u32::from(u8::MAX) {
264 let escape = text
265 .get(marker_index..chars[*position - 1].0 + chars[*position - 1].1.len_utf8())
266 .unwrap_or("\\u");
267 return Err(invalid_escape(
268 marker_index,
269 escape,
270 "universal character value must fit in one byte",
271 ));
272 }
273 Ok(value as u8)
274}
275
276fn parse_octal_escape(chars: &[(usize, char)], position: &mut usize) -> u8 {
286 let mut value = 0u16;
287 let mut digit_count = 0;
288 while digit_count < 3 {
289 let Some(&(_, character)) = chars.get(*position) else {
290 break;
291 };
292 let Some(digit) = octal_value(character) else {
293 break;
294 };
295 value = (value << 3) | u16::from(digit);
296 *position += 1;
297 digit_count += 1;
298 }
299 value as u8
300}
301
302fn validate_source_character(index: usize, character: char) -> CodecResult<()> {
312 if is_source_character(character) {
313 return Ok(());
314 }
315 Err(CodecError::InvalidCharacter {
316 index,
317 character,
318 reason: "raw source character must be printable ASCII or allowed whitespace".to_owned(),
319 })
320}
321
322fn is_source_character(character: char) -> bool {
330 matches!(character, '\t' | '\n' | '\u{0b}' | '\u{0c}' | ' '..='~')
331}
332
333fn push_encoded_byte(byte: u8, output: &mut String) {
339 match byte {
340 b'\'' => output.push_str("\\'"),
341 b'"' => output.push_str("\\\""),
342 b'?' => output.push_str("\\?"),
343 b'\\' => output.push_str("\\\\"),
344 0x07 => output.push_str("\\a"),
345 0x08 => output.push_str("\\b"),
346 0x0c => output.push_str("\\f"),
347 b'\n' => output.push_str("\\n"),
348 b'\r' => output.push_str("\\r"),
349 b'\t' => output.push_str("\\t"),
350 0x0b => output.push_str("\\v"),
351 b' '..=b'~' => output.push(byte as char),
352 _ => {
353 output.push('\\');
354 output.push('x');
355 output.push(uppercase_hex_digit(byte >> 4));
356 output.push(uppercase_hex_digit(byte & 0x0f));
357 }
358 }
359}
360
361fn hex_value(character: char) -> Option<u8> {
369 match character {
370 '0'..='9' => Some(character as u8 - b'0'),
371 'a'..='f' => Some(character as u8 - b'a' + 10),
372 'A'..='F' => Some(character as u8 - b'A' + 10),
373 _ => None,
374 }
375}
376
377fn octal_value(character: char) -> Option<u8> {
385 match character {
386 '0'..='7' => Some(character as u8 - b'0'),
387 _ => None,
388 }
389}
390
391fn uppercase_hex_digit(value: u8) -> char {
399 match value & 0x0f {
400 0x0 => '0',
401 0x1 => '1',
402 0x2 => '2',
403 0x3 => '3',
404 0x4 => '4',
405 0x5 => '5',
406 0x6 => '6',
407 0x7 => '7',
408 0x8 => '8',
409 0x9 => '9',
410 0x0a => 'A',
411 0x0b => 'B',
412 0x0c => 'C',
413 0x0d => 'D',
414 0x0e => 'E',
415 _ => 'F',
416 }
417}
418
419fn invalid_escape(index: usize, escape: &str, reason: &str) -> CodecError {
429 CodecError::InvalidEscape {
430 index,
431 escape: escape.to_owned(),
432 reason: reason.to_owned(),
433 }
434}