use crate::{
Codec,
MiscCodecError,
MiscCodecResult,
ValueDecoder,
ValueEncoder,
};
const LOWER_HEX_DIGITS: [char; 16] = [
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e',
'f',
];
const UPPER_HEX_DIGITS: [char; 16] = [
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E',
'F',
];
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct HexCodec {
uppercase: bool,
prefix: Option<String>,
byte_prefix: Option<String>,
separator: Option<String>,
ignore_ascii_whitespace: bool,
ignore_prefix_case: bool,
}
impl HexCodec {
#[inline]
pub fn new() -> Self {
Self {
uppercase: false,
prefix: None,
byte_prefix: None,
separator: None,
ignore_ascii_whitespace: false,
ignore_prefix_case: false,
}
}
#[inline]
pub fn upper() -> Self {
Self::new().with_uppercase(true)
}
#[inline]
pub fn with_uppercase(mut self, uppercase: bool) -> Self {
self.uppercase = uppercase;
self
}
#[inline]
pub fn with_prefix(mut self, prefix: impl Into<String>) -> Self {
self.prefix = Some(prefix.into());
self
}
#[inline]
pub fn with_byte_prefix(mut self, prefix: impl Into<String>) -> Self {
self.byte_prefix = Some(prefix.into());
self
}
#[inline]
pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
self.separator = Some(separator.into());
self
}
#[inline]
pub fn with_ignored_ascii_whitespace(mut self, ignore: bool) -> Self {
self.ignore_ascii_whitespace = ignore;
self
}
#[inline]
pub fn with_ignore_prefix_case(mut self, ignore: bool) -> Self {
self.ignore_prefix_case = ignore;
self
}
#[inline]
pub fn encode(&self, bytes: &[u8]) -> String {
let separator_len = self.separator.as_ref().map_or(0, String::len);
let prefix_len = self.prefix.as_ref().map_or(0, String::len);
let byte_prefix_len = self.byte_prefix.as_ref().map_or(0, String::len);
let capacity = prefix_len.saturating_add(
bytes
.len()
.saturating_mul(byte_prefix_len.saturating_add(2))
.saturating_add(
bytes.len().saturating_sub(1).saturating_mul(separator_len),
),
);
let mut output = String::with_capacity(capacity);
self.encode_into(bytes, &mut output);
output
}
#[inline]
pub fn encode_into(&self, bytes: &[u8], output: &mut String) {
if let Some(prefix) = &self.prefix {
output.push_str(prefix);
}
for (index, byte) in bytes.iter().enumerate() {
if index > 0
&& let Some(separator) = &self.separator
{
output.push_str(separator);
}
if let Some(byte_prefix) = &self.byte_prefix {
output.push_str(byte_prefix);
}
push_hex_byte(*byte, self.uppercase, output);
}
}
#[inline]
pub fn decode(&self, text: &str) -> MiscCodecResult<Vec<u8>> {
let mut output = Vec::new();
self.decode_into(text, &mut output)?;
Ok(output)
}
#[inline]
pub fn decode_into(
&self,
text: &str,
output: &mut Vec<u8>,
) -> MiscCodecResult<()> {
let digits = self.normalized_digits(text)?;
if digits.len() % 2 != 0 {
return Err(invalid_hex_length(digits.len()));
}
output.reserve(digits.len() / 2);
for pair in digits.chunks_exact(2) {
let (high_index, high_char) = pair[0];
let (low_index, low_char) = pair[1];
let high = hex_value(high_char)
.ok_or(invalid_hex_digit(high_index, high_char))?;
let low = hex_value(low_char)
.ok_or(invalid_hex_digit(low_index, low_char))?;
output.push((high << 4) | low);
}
Ok(())
}
#[inline]
fn normalized_digits(
&self,
text: &str,
) -> MiscCodecResult<Vec<(usize, char)>> {
let start_index = self.consume_prefix(text)?;
if let Some(separator) = self
.separator
.as_deref()
.filter(|separator| !separator.is_empty())
{
return self.normalized_separated_digits(
text,
start_index,
separator,
);
}
if let Some(byte_prefix) = self
.byte_prefix
.as_deref()
.filter(|prefix| !prefix.is_empty())
{
return self.normalized_byte_prefixed_digits(
text,
byte_prefix,
start_index,
);
}
self.normalized_unprefixed_digits(text, start_index)
}
#[inline]
fn consume_prefix(&self, text: &str) -> MiscCodecResult<usize> {
let Some(prefix) =
self.prefix.as_deref().filter(|prefix| !prefix.is_empty())
else {
return Ok(0);
};
let index = self.skip_ascii_whitespace(text, 0);
let rest = &text[index..];
if self.starts_with_prefix(rest, prefix) {
Ok(index + prefix.len())
} else {
Err(MiscCodecError::MissingPrefix {
prefix: prefix.to_owned(),
})
}
}
fn normalized_separated_digits(
&self,
text: &str,
mut index: usize,
separator: &str,
) -> MiscCodecResult<Vec<(usize, char)>> {
let mut digits = Vec::with_capacity(text.len());
index = self.skip_ascii_whitespace(text, index);
if index >= text.len() {
return Ok(digits);
}
loop {
index = self.consume_byte_prefix(text, index)?;
let (high_index, high_char, next_index) =
read_required_hex_digit(text, index)?;
let (low_index, low_char, next_index) =
read_required_hex_digit(text, next_index)?;
digits.push((high_index, high_char));
digits.push((low_index, low_char));
index = next_index;
let separator_index =
self.next_separator_index(text, index, separator);
if separator_index >= text.len() {
return Ok(digits);
}
let rest = &text[separator_index..];
if !rest.starts_with(separator) {
return Err(invalid_hex_input(&format!(
"missing separator '{separator}' between hex bytes"
)));
}
index = self
.skip_ascii_whitespace(text, separator_index + separator.len());
if index >= text.len() {
return Err(invalid_hex_input(
"separator must be followed by a hex byte",
));
}
}
}
#[inline]
fn consume_byte_prefix(
&self,
text: &str,
index: usize,
) -> MiscCodecResult<usize> {
let Some(prefix) = self
.byte_prefix
.as_deref()
.filter(|prefix| !prefix.is_empty())
else {
return Ok(index);
};
let rest = &text[index..];
if self.starts_with_prefix(rest, prefix) {
Ok(index + prefix.len())
} else {
Err(MiscCodecError::MissingPrefix {
prefix: prefix.to_owned(),
})
}
}
#[inline]
fn next_separator_index(
&self,
text: &str,
index: usize,
separator: &str,
) -> usize {
let whitespace_end = self.skip_ascii_whitespace(text, index);
if whitespace_end >= text.len() {
return whitespace_end;
}
if separator.chars().all(|ch| ch.is_ascii_whitespace()) {
index
} else {
whitespace_end
}
}
fn normalized_unprefixed_digits(
&self,
text: &str,
mut index: usize,
) -> MiscCodecResult<Vec<(usize, char)>> {
let mut digits = Vec::with_capacity(text.len());
while index < text.len() {
let Some(rest) = text.get(index..) else {
break;
};
let Some(ch) = rest.chars().next() else {
break;
};
if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
index += ch.len_utf8();
continue;
}
if hex_value(ch).is_some() {
digits.push((index, ch));
index += ch.len_utf8();
continue;
}
return Err(invalid_hex_digit(index, ch));
}
Ok(digits)
}
fn normalized_byte_prefixed_digits(
&self,
text: &str,
prefix: &str,
mut index: usize,
) -> MiscCodecResult<Vec<(usize, char)>> {
let mut digits = Vec::with_capacity(text.len());
while index < text.len() {
index = self.skip_ignored(text, index);
if index >= text.len() {
break;
}
let Some(rest) = text.get(index..) else {
break;
};
if !self.starts_with_prefix(rest, prefix) {
return Err(MiscCodecError::MissingPrefix {
prefix: prefix.to_owned(),
});
}
index += prefix.len();
let mut digit_count = 0;
while digit_count < 2 && index < text.len() {
let Some(rest) = text.get(index..) else {
break;
};
let Some(ch) = rest.chars().next() else {
break;
};
if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
index += ch.len_utf8();
continue;
}
if hex_value(ch).is_some() {
digits.push((index, ch));
index += ch.len_utf8();
digit_count += 1;
continue;
}
return Err(invalid_hex_digit(index, ch));
}
}
Ok(digits)
}
#[inline]
fn skip_ignored(&self, text: &str, mut index: usize) -> usize {
while index < text.len() {
let byte = text.as_bytes()[index];
if self.ignore_ascii_whitespace && byte.is_ascii_whitespace() {
index += 1;
continue;
}
return index;
}
index
}
#[inline]
fn skip_ascii_whitespace(&self, text: &str, mut index: usize) -> usize {
while self.ignore_ascii_whitespace && index < text.len() {
if !text.as_bytes()[index].is_ascii_whitespace() {
return index;
}
index += 1;
}
index
}
#[inline]
fn starts_with_prefix(&self, text: &str, prefix: &str) -> bool {
if !self.ignore_prefix_case {
return text.starts_with(prefix);
}
let Some(candidate) = text.get(..prefix.len()) else {
return false;
};
candidate.eq_ignore_ascii_case(prefix)
}
}
impl Default for HexCodec {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl ValueEncoder<[u8]> for HexCodec {
type Error = MiscCodecError;
type Output = String;
#[inline]
fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
Ok(HexCodec::encode(self, input))
}
}
impl ValueDecoder<str> for HexCodec {
type Error = MiscCodecError;
type Output = Vec<u8>;
#[inline]
fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
HexCodec::decode(self, input)
}
}
unsafe impl Codec for HexCodec {
type Value = u8;
type Unit = u8;
type DecodeError = MiscCodecError;
type EncodeError = MiscCodecError;
#[inline(always)]
fn min_units_per_value(&self) -> core::num::NonZeroUsize {
unsafe { core::num::NonZeroUsize::new_unchecked(2) }
}
#[inline(always)]
fn max_units_per_value(&self) -> core::num::NonZeroUsize {
unsafe { core::num::NonZeroUsize::new_unchecked(2) }
}
#[inline]
unsafe fn decode_unchecked(
&self,
input: &[u8],
index: usize,
) -> Result<(u8, core::num::NonZeroUsize), Self::DecodeError> {
debug_assert!(index + 2 <= input.len());
let high_char = char::from(input[index]);
let low_char = char::from(input[index + 1]);
let high = hex_value(high_char)
.ok_or_else(|| invalid_hex_digit(index, high_char))?;
let low = hex_value(low_char)
.ok_or_else(|| invalid_hex_digit(index + 1, low_char))?;
Ok(((high << 4) | low, unsafe {
core::num::NonZeroUsize::new_unchecked(2)
}))
}
#[inline]
unsafe fn encode_unchecked(
&self,
value: &u8,
output: &mut [u8],
index: usize,
) -> Result<usize, Self::EncodeError> {
debug_assert!(index + 2 <= output.len());
output[index] = hex_digit(*value >> 4, self.uppercase) as u8;
output[index + 1] = hex_digit(*value & 0x0f, self.uppercase) as u8;
Ok(2)
}
}
#[inline(always)]
fn hex_value(ch: char) -> Option<u8> {
match ch {
'0'..='9' => Some(ch as u8 - b'0'),
'a'..='f' => Some(ch as u8 - b'a' + 10),
'A'..='F' => Some(ch as u8 - b'A' + 10),
_ => None,
}
}
fn invalid_hex_digit(index: usize, character: char) -> MiscCodecError {
MiscCodecError::InvalidDigit {
radix: 16,
index,
character,
}
}
fn invalid_hex_length(actual: usize) -> MiscCodecError {
MiscCodecError::InvalidLength {
context: "hex digits",
expected: "an even number of digits".to_owned(),
actual,
}
}
fn invalid_hex_input(reason: &str) -> MiscCodecError {
MiscCodecError::InvalidInput {
codec: "hex",
reason: reason.to_owned(),
}
}
#[inline]
fn read_required_hex_digit(
text: &str,
index: usize,
) -> MiscCodecResult<(usize, char, usize)> {
let Some(rest) = text.get(index..) else {
return Err(invalid_hex_input(
"expected a hexadecimal digit at a character boundary",
));
};
let Some(character) = rest.chars().next() else {
return Err(invalid_hex_input("expected a hexadecimal digit"));
};
if hex_value(character).is_none() {
return Err(invalid_hex_digit(index, character));
}
Ok((index, character, index + character.len_utf8()))
}
#[inline(always)]
fn push_hex_byte(byte: u8, uppercase: bool, output: &mut String) {
output.push(hex_digit(byte >> 4, uppercase));
output.push(hex_digit(byte & 0x0f, uppercase));
}
#[inline(always)]
fn hex_digit(value: u8, uppercase: bool) -> char {
let digits = if uppercase {
&UPPER_HEX_DIGITS
} else {
&LOWER_HEX_DIGITS
};
digits[(value & 0x0f) as usize]
}