use crate::{
CodecError,
CodecResult,
Decoder,
Encoder,
};
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub struct CStringLiteralCodec;
impl CStringLiteralCodec {
pub fn new() -> Self {
Self
}
pub fn encode(&self, bytes: &[u8]) -> String {
let mut output = String::with_capacity(bytes.len());
for byte in bytes {
push_encoded_byte(*byte, &mut output);
}
output
}
pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
let chars = text.char_indices().collect::<Vec<_>>();
let mut output = Vec::with_capacity(text.len());
let mut position = 0;
while let Some(&(index, character)) = chars.get(position) {
if character == '\\' {
decode_escape(text, &chars, &mut position, &mut output)?;
continue;
}
validate_source_character(index, character)?;
output.push(character as u8);
position += 1;
}
Ok(output)
}
}
impl Encoder<[u8]> for CStringLiteralCodec {
type Error = CodecError;
type Output = String;
fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
Ok(CStringLiteralCodec::encode(self, input))
}
}
impl Decoder<str> for CStringLiteralCodec {
type Error = CodecError;
type Output = Vec<u8>;
fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
CStringLiteralCodec::decode(self, input)
}
}
fn decode_escape(
text: &str,
chars: &[(usize, char)],
position: &mut usize,
output: &mut Vec<u8>,
) -> CodecResult<()> {
let marker_index = chars[*position].0;
*position += 1;
let Some(&(_, escape)) = chars.get(*position) else {
return Err(invalid_escape(
marker_index,
"\\",
"incomplete escape sequence",
));
};
match escape {
' ' => push_simple_escape(position, output, b' '),
'\'' => push_simple_escape(position, output, b'\''),
'"' => push_simple_escape(position, output, b'"'),
'?' => push_simple_escape(position, output, b'?'),
'\\' => push_simple_escape(position, output, b'\\'),
'a' => push_simple_escape(position, output, 0x07),
'b' => push_simple_escape(position, output, 0x08),
'f' => push_simple_escape(position, output, 0x0c),
'n' => push_simple_escape(position, output, b'\n'),
'r' => push_simple_escape(position, output, b'\r'),
't' => push_simple_escape(position, output, b'\t'),
'v' => push_simple_escape(position, output, 0x0b),
'x' | 'X' => {
*position += 1;
let value = parse_variable_hex_escape(chars, position, marker_index)?;
output.push(value);
}
'u' => {
*position += 1;
let value = parse_fixed_hex_escape(text, chars, position, marker_index, 4)?;
output.push(value);
}
'U' => {
*position += 1;
let value = parse_fixed_hex_escape(text, chars, position, marker_index, 8)?;
output.push(value);
}
'0'..='7' => {
let value = parse_octal_escape(chars, position);
output.push(value);
}
_ => {
return Err(invalid_escape(
marker_index,
&format!("\\{escape}"),
"unsupported escape sequence",
));
}
}
Ok(())
}
fn push_simple_escape(position: &mut usize, output: &mut Vec<u8>, byte: u8) {
output.push(byte);
*position += 1;
}
fn parse_variable_hex_escape(
chars: &[(usize, char)],
position: &mut usize,
marker_index: usize,
) -> CodecResult<u8> {
let mut value = 0u8;
let mut digit_count = 0;
while digit_count < 2 {
let Some(&(_, character)) = chars.get(*position) else {
break;
};
let Some(digit) = hex_value(character) else {
break;
};
value = (value << 4) | digit;
*position += 1;
digit_count += 1;
}
if digit_count == 0 {
return Err(invalid_escape(
marker_index,
"\\x",
"expected at least one hexadecimal digit",
));
}
Ok(value)
}
fn parse_fixed_hex_escape(
text: &str,
chars: &[(usize, char)],
position: &mut usize,
marker_index: usize,
digits: usize,
) -> CodecResult<u8> {
let mut value = 0u32;
for _ in 0..digits {
let Some(&(index, character)) = chars.get(*position) else {
let escape = text.get(marker_index..).unwrap_or("\\");
return Err(invalid_escape(
marker_index,
escape,
"incomplete universal character escape",
));
};
let Some(digit) = hex_value(character) else {
return Err(CodecError::InvalidDigit {
radix: 16,
index,
character,
});
};
value = (value << 4) | u32::from(digit);
*position += 1;
}
if value > u32::from(u8::MAX) {
let escape = text
.get(marker_index..chars[*position - 1].0 + chars[*position - 1].1.len_utf8())
.unwrap_or("\\u");
return Err(invalid_escape(
marker_index,
escape,
"universal character value must fit in one byte",
));
}
Ok(value as u8)
}
fn parse_octal_escape(chars: &[(usize, char)], position: &mut usize) -> u8 {
let mut value = 0u16;
let mut digit_count = 0;
while digit_count < 3 {
let Some(&(_, character)) = chars.get(*position) else {
break;
};
let Some(digit) = octal_value(character) else {
break;
};
value = (value << 3) | u16::from(digit);
*position += 1;
digit_count += 1;
}
value as u8
}
fn validate_source_character(index: usize, character: char) -> CodecResult<()> {
if is_source_character(character) {
return Ok(());
}
Err(CodecError::InvalidCharacter {
index,
character,
reason: "raw source character must be printable ASCII or allowed whitespace".to_owned(),
})
}
fn is_source_character(character: char) -> bool {
matches!(character, '\t' | '\n' | '\u{0b}' | '\u{0c}' | ' '..='~')
}
fn push_encoded_byte(byte: u8, output: &mut String) {
match byte {
b'\'' => output.push_str("\\'"),
b'"' => output.push_str("\\\""),
b'?' => output.push_str("\\?"),
b'\\' => output.push_str("\\\\"),
0x07 => output.push_str("\\a"),
0x08 => output.push_str("\\b"),
0x0c => output.push_str("\\f"),
b'\n' => output.push_str("\\n"),
b'\r' => output.push_str("\\r"),
b'\t' => output.push_str("\\t"),
0x0b => output.push_str("\\v"),
b' '..=b'~' => output.push(byte as char),
_ => {
output.push('\\');
output.push('x');
output.push(uppercase_hex_digit(byte >> 4));
output.push(uppercase_hex_digit(byte & 0x0f));
}
}
}
fn hex_value(character: char) -> Option<u8> {
match character {
'0'..='9' => Some(character as u8 - b'0'),
'a'..='f' => Some(character as u8 - b'a' + 10),
'A'..='F' => Some(character as u8 - b'A' + 10),
_ => None,
}
}
fn octal_value(character: char) -> Option<u8> {
match character {
'0'..='7' => Some(character as u8 - b'0'),
_ => None,
}
}
fn uppercase_hex_digit(value: u8) -> char {
match value & 0x0f {
0x0 => '0',
0x1 => '1',
0x2 => '2',
0x3 => '3',
0x4 => '4',
0x5 => '5',
0x6 => '6',
0x7 => '7',
0x8 => '8',
0x9 => '9',
0x0a => 'A',
0x0b => 'B',
0x0c => 'C',
0x0d => 'D',
0x0e => 'E',
_ => 'F',
}
}
fn invalid_escape(index: usize, escape: &str, reason: &str) -> CodecError {
CodecError::InvalidEscape {
index,
escape: escape.to_owned(),
reason: reason.to_owned(),
}
}