1use crate::{
11 Codec,
12 MiscCodecError,
13 MiscCodecResult,
14 ValueDecoder,
15 ValueEncoder,
16};
17
18const UPPER_HEX_DIGITS: [char; 16] = [
19 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E',
20 'F',
21];
22
23#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
33pub struct CStringLiteralCodec;
34
35impl CStringLiteralCodec {
36 #[inline]
41 pub fn new() -> Self {
42 Self
43 }
44
45 #[inline]
53 pub fn encode(&self, bytes: &[u8]) -> String {
54 let mut output = String::with_capacity(bytes.len());
55 for byte in bytes {
56 push_encoded_byte(*byte, &mut output);
57 }
58 output
59 }
60
61 #[inline]
76 pub fn decode(&self, text: &str) -> MiscCodecResult<Vec<u8>> {
77 let input = text.as_bytes();
78 let mut output = Vec::with_capacity(text.len());
79 let mut index = 0;
80 while index < input.len() {
81 let (decoded, consumed) = decode_c_string_literal_unit(
82 input,
83 index,
84 CStringLiteralParseContext::CompleteText(text),
85 )?;
86 debug_assert!(consumed > 0);
87 output.push(decoded);
88 index += consumed;
89 }
90 Ok(output)
91 }
92}
93
94impl ValueEncoder<[u8]> for CStringLiteralCodec {
95 type Error = MiscCodecError;
96 type Output = String;
97
98 #[inline]
100 fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
101 Ok(CStringLiteralCodec::encode(self, input))
102 }
103}
104
105impl ValueDecoder<str> for CStringLiteralCodec {
106 type Error = MiscCodecError;
107 type Output = Vec<u8>;
108
109 #[inline]
111 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
112 CStringLiteralCodec::decode(self, input)
113 }
114}
115
116unsafe impl Codec for CStringLiteralCodec {
117 type Value = u8;
118 type Unit = u8;
119 type DecodeError = MiscCodecError;
120 type EncodeError = MiscCodecError;
121
122 #[inline(always)]
124 fn min_units_per_value(&self) -> core::num::NonZeroUsize {
125 core::num::NonZeroUsize::MIN
126 }
127
128 #[inline(always)]
130 fn max_units_per_value(&self) -> core::num::NonZeroUsize {
131 unsafe { core::num::NonZeroUsize::new_unchecked(10) }
132 }
133
134 #[inline]
136 unsafe fn decode_unchecked(
137 &self,
138 input: &[u8],
139 index: usize,
140 ) -> Result<(u8, core::num::NonZeroUsize), Self::DecodeError> {
141 debug_assert!(index < input.len());
142
143 let (value, consumed) = decode_c_string_literal_byte(input, index)?;
144 debug_assert!(consumed > 0);
145 let consumed =
148 unsafe { core::num::NonZeroUsize::new_unchecked(consumed) };
149 Ok((value, consumed))
150 }
151
152 #[inline]
154 unsafe fn encode_unchecked(
155 &self,
156 value: &u8,
157 output: &mut [u8],
158 index: usize,
159 ) -> Result<usize, Self::EncodeError> {
160 let required = match *value {
161 b'\'' | b'"' | b'?' | b'\\' | 0x07 | 0x08 | 0x0c | b'\n'
162 | b'\r' | b'\t' | 0x0b => 2,
163 b' '..=b'~' => 1,
164 _ => 4,
165 };
166 debug_assert!(index + required <= output.len());
167
168 Ok(write_encoded_byte(*value, output, index))
169 }
170}
171
172#[derive(Debug, Clone, Copy)]
177enum CStringLiteralParseContext<'a> {
178 CompleteText(&'a str),
180 StreamingBytes,
182}
183
184impl CStringLiteralParseContext<'_> {
185 #[inline(always)]
191 fn is_complete_text(self) -> bool {
192 matches!(self, Self::CompleteText(_))
193 }
194
195 fn trailing_escape_error(
205 self,
206 marker_index: usize,
207 available: usize,
208 ) -> MiscCodecError {
209 match self {
210 Self::CompleteText(_) => {
211 invalid_escape(marker_index, "\\", "incomplete escape sequence")
212 }
213 Self::StreamingBytes => MiscCodecError::Incomplete {
214 required: 2,
215 available,
216 },
217 }
218 }
219
220 fn source_character(self, input: &[u8], index: usize) -> char {
230 match self {
231 Self::CompleteText(text) => text
232 .get(index..)
233 .and_then(|rest| rest.chars().next())
234 .unwrap_or(char::from(input[index])),
235 Self::StreamingBytes => char::from(input[index]),
236 }
237 }
238
239 #[inline(always)]
244 fn raw_source_reason(self) -> &'static str {
245 match self {
246 Self::CompleteText(_) => {
247 "raw source character must be printable ASCII or allowed whitespace"
248 }
249 Self::StreamingBytes => {
250 "raw source byte must be printable ASCII or allowed whitespace"
251 }
252 }
253 }
254
255 fn escape_fragment(self, input: &[u8], start: usize, end: usize) -> String {
265 match self {
266 Self::CompleteText(text) => text
267 .get(start..end)
268 .or(text.get(start..))
269 .unwrap_or("\\")
270 .to_owned(),
271 Self::StreamingBytes => escape_fragment(input, start, end),
272 }
273 }
274}
275
276#[inline]
282fn push_encoded_byte(byte: u8, output: &mut String) {
283 match byte {
284 b'\'' => output.push_str("\\'"),
285 b'"' => output.push_str("\\\""),
286 b'?' => output.push_str("\\?"),
287 b'\\' => output.push_str("\\\\"),
288 0x07 => output.push_str("\\a"),
289 0x08 => output.push_str("\\b"),
290 0x0c => output.push_str("\\f"),
291 b'\n' => output.push_str("\\n"),
292 b'\r' => output.push_str("\\r"),
293 b'\t' => output.push_str("\\t"),
294 0x0b => output.push_str("\\v"),
295 b' '..=b'~' => output.push(byte as char),
296 _ => {
297 output.push('\\');
298 output.push('x');
299 output.push(uppercase_hex_digit(byte >> 4));
300 output.push(uppercase_hex_digit(byte & 0x0f));
301 }
302 }
303}
304
305#[inline]
317fn decode_c_string_literal_byte(
318 input: &[u8],
319 index: usize,
320) -> MiscCodecResult<(u8, usize)> {
321 decode_c_string_literal_unit(
322 input,
323 index,
324 CStringLiteralParseContext::StreamingBytes,
325 )
326}
327
328#[inline]
341fn decode_c_string_literal_unit(
342 input: &[u8],
343 index: usize,
344 context: CStringLiteralParseContext<'_>,
345) -> MiscCodecResult<(u8, usize)> {
346 let available = input.len().saturating_sub(index);
347 if available == 0 {
348 return Err(MiscCodecError::Incomplete {
349 required: 1,
350 available,
351 });
352 }
353 let byte = input[index];
354 if byte != b'\\' {
355 validate_source_unit(input, index, byte, context)?;
356 return Ok((byte, 1));
357 }
358 if available < 2 {
359 return Err(context.trailing_escape_error(index, available));
360 }
361 let escape = input[index + 1];
362 match escape {
363 b' ' => Ok((b' ', 2)),
364 b'\'' => Ok((b'\'', 2)),
365 b'"' => Ok((b'"', 2)),
366 b'?' => Ok((b'?', 2)),
367 b'\\' => Ok((b'\\', 2)),
368 b'a' => Ok((0x07, 2)),
369 b'b' => Ok((0x08, 2)),
370 b'f' => Ok((0x0c, 2)),
371 b'n' => Ok((b'\n', 2)),
372 b'r' => Ok((b'\r', 2)),
373 b't' => Ok((b'\t', 2)),
374 b'v' => Ok((0x0b, 2)),
375 b'x' | b'X' => {
376 if !context.is_complete_text() {
377 ensure_variable_hex_escape_complete(input, index, available)?;
378 }
379 parse_variable_hex_escape_units(input, index)
380 }
381 b'u' => {
382 if !context.is_complete_text() {
383 ensure_fixed_escape_complete(available, 6)?;
384 }
385 parse_fixed_hex_escape_units(input, index, 4, context)
386 }
387 b'U' => {
388 if !context.is_complete_text() {
389 ensure_fixed_escape_complete(available, 10)?;
390 }
391 parse_fixed_hex_escape_units(input, index, 8, context)
392 }
393 b'0'..=b'7' => {
394 ensure_octal_escape_complete(input, index, available)?;
395 Ok(parse_octal_escape_units(input, index))
396 }
397 _ => Err(invalid_escape(
398 index,
399 &context.escape_fragment(input, index, index + 2),
400 "unsupported escape sequence",
401 )),
402 }
403}
404
405#[inline]
415fn ensure_variable_hex_escape_complete(
416 _input: &[u8],
417 _index: usize,
418 available: usize,
419) -> MiscCodecResult<()> {
420 if available < 3 {
421 return Err(MiscCodecError::Incomplete {
422 required: 3,
423 available,
424 });
425 }
426 Ok(())
427}
428
429#[inline]
438fn ensure_fixed_escape_complete(
439 available: usize,
440 required: usize,
441) -> MiscCodecResult<()> {
442 if available < required {
443 return Err(MiscCodecError::Incomplete {
444 required,
445 available,
446 });
447 }
448 Ok(())
449}
450
451#[inline]
461fn ensure_octal_escape_complete(
462 _input: &[u8],
463 _index: usize,
464 _available: usize,
465) -> MiscCodecResult<()> {
466 Ok(())
467}
468
469#[inline]
481fn validate_source_unit(
482 input: &[u8],
483 index: usize,
484 byte: u8,
485 context: CStringLiteralParseContext<'_>,
486) -> MiscCodecResult<()> {
487 if matches!(byte, b'\t' | b'\n' | 0x0b | 0x0c | b' '..=b'~') {
488 return Ok(());
489 }
490 Err(MiscCodecError::InvalidCharacter {
491 index,
492 character: context.source_character(input, index),
493 reason: context.raw_source_reason().to_owned(),
494 })
495}
496
497#[inline]
510fn parse_variable_hex_escape_units(
511 input: &[u8],
512 marker_index: usize,
513) -> MiscCodecResult<(u8, usize)> {
514 let mut value = 0u8;
515 let mut digit_count = 0usize;
516 let mut index = marker_index + 2;
517 while digit_count < 2 {
518 let Some(&byte) = input.get(index) else {
519 break;
520 };
521 let Some(digit) = hex_value(char::from(byte)) else {
522 break;
523 };
524 value = (value << 4) | digit;
525 index += 1;
526 digit_count += 1;
527 }
528 if digit_count == 0 {
529 return Err(invalid_escape(
530 marker_index,
531 "\\x",
532 "expected at least one hexadecimal digit",
533 ));
534 }
535 Ok((value, 2 + digit_count))
536}
537
538#[inline]
553fn parse_fixed_hex_escape_units(
554 input: &[u8],
555 marker_index: usize,
556 digits: usize,
557 context: CStringLiteralParseContext<'_>,
558) -> MiscCodecResult<(u8, usize)> {
559 let mut value = 0u32;
560 let mut index = marker_index + 2;
561 for _ in 0..digits {
562 let Some(_) = input.get(index) else {
563 return Err(invalid_escape(
564 marker_index,
565 &context.escape_fragment(input, marker_index, input.len()),
566 "incomplete universal character escape",
567 ));
568 };
569 let character = context.source_character(input, index);
570 let Some(digit) = hex_value(character) else {
571 return Err(MiscCodecError::InvalidDigit {
572 radix: 16,
573 index,
574 character,
575 });
576 };
577 value = (value << 4) | u32::from(digit);
578 index += 1;
579 }
580 if value > u32::from(u8::MAX) {
581 return Err(invalid_escape(
582 marker_index,
583 &context.escape_fragment(input, marker_index, index),
584 "universal character value must fit in one byte",
585 ));
586 }
587 Ok((value as u8, 2 + digits))
588}
589
590#[inline]
600fn parse_octal_escape_units(input: &[u8], marker_index: usize) -> (u8, usize) {
601 let mut value = 0u16;
602 let mut digit_count = 0usize;
603 let mut index = marker_index + 1;
604 while digit_count < 3 {
605 let Some(&byte) = input.get(index) else {
606 break;
607 };
608 let Some(digit) = octal_value(char::from(byte)) else {
609 break;
610 };
611 value = (value << 3) | u16::from(digit);
612 index += 1;
613 digit_count += 1;
614 }
615 (value as u8, 1 + digit_count)
616}
617
618#[inline]
628fn write_encoded_byte(byte: u8, output: &mut [u8], index: usize) -> usize {
629 match byte {
630 b'\'' => write_ascii_escape(output, index, b'\''),
631 b'"' => write_ascii_escape(output, index, b'"'),
632 b'?' => write_ascii_escape(output, index, b'?'),
633 b'\\' => write_ascii_escape(output, index, b'\\'),
634 0x07 => write_ascii_escape(output, index, b'a'),
635 0x08 => write_ascii_escape(output, index, b'b'),
636 0x0c => write_ascii_escape(output, index, b'f'),
637 b'\n' => write_ascii_escape(output, index, b'n'),
638 b'\r' => write_ascii_escape(output, index, b'r'),
639 b'\t' => write_ascii_escape(output, index, b't'),
640 0x0b => write_ascii_escape(output, index, b'v'),
641 b' '..=b'~' => {
642 output[index] = byte;
643 1
644 }
645 _ => {
646 output[index] = b'\\';
647 output[index + 1] = b'x';
648 output[index + 2] = uppercase_hex_digit(byte >> 4) as u8;
649 output[index + 3] = uppercase_hex_digit(byte & 0x0f) as u8;
650 4
651 }
652 }
653}
654
655#[inline(always)]
665fn write_ascii_escape(output: &mut [u8], index: usize, escape: u8) -> usize {
666 output[index] = b'\\';
667 output[index + 1] = escape;
668 2
669}
670
671fn escape_fragment(input: &[u8], start: usize, end: usize) -> String {
681 let bounded_end = end.min(input.len());
682 input[start..bounded_end]
683 .iter()
684 .map(|byte| char::from(*byte))
685 .collect()
686}
687
688#[inline(always)]
696fn hex_value(character: char) -> Option<u8> {
697 match character {
698 '0'..='9' => Some(character as u8 - b'0'),
699 'a'..='f' => Some(character as u8 - b'a' + 10),
700 'A'..='F' => Some(character as u8 - b'A' + 10),
701 _ => None,
702 }
703}
704
705#[inline(always)]
713fn octal_value(character: char) -> Option<u8> {
714 match character {
715 '0'..='7' => Some(character as u8 - b'0'),
716 _ => None,
717 }
718}
719
720#[inline(always)]
728fn uppercase_hex_digit(value: u8) -> char {
729 UPPER_HEX_DIGITS[(value & 0x0f) as usize]
730}
731
732fn invalid_escape(index: usize, escape: &str, reason: &str) -> MiscCodecError {
742 MiscCodecError::InvalidEscape {
743 index,
744 escape: escape.to_owned(),
745 reason: reason.to_owned(),
746 }
747}