#[derive(Debug, Clone, PartialEq, Eq)]
pub struct HexEscapeError {
pub offset: usize,
pub message: String,
}
pub fn validate_hex_escapes(content: &str, indicator: u8) -> Vec<HexEscapeError> {
let mut errors = Vec::new();
let bytes = content.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == indicator {
if i + 2 < bytes.len() {
let h1 = bytes[i + 1];
let h2 = bytes[i + 2];
if !h1.is_ascii_hexdigit() || !h2.is_ascii_hexdigit() {
errors.push(HexEscapeError {
offset: i,
message: format!(
"Invalid hex escape sequence {}{}{} (expected two hex digits 0-9, A-F after '{}')",
indicator as char, h1 as char, h2 as char, indicator as char,
),
});
}
i += 3;
} else {
errors.push(HexEscapeError {
offset: i,
message: format!(
"Incomplete hex escape sequence at offset {} (expected '{}XX' but input ends)",
i, indicator as char,
),
});
break;
}
} else {
i += 1;
}
}
errors
}
pub fn decode_hex_escapes(content: &str, indicator: u8) -> Result<Vec<u8>, Vec<HexEscapeError>> {
let mut output = Vec::with_capacity(content.len());
let mut errors = Vec::new();
let bytes = content.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == indicator {
if i + 2 < bytes.len() {
let h1 = bytes[i + 1];
let h2 = bytes[i + 2];
if h1.is_ascii_hexdigit() && h2.is_ascii_hexdigit() {
let byte = hex_pair_to_byte(h1, h2);
output.push(byte);
} else {
errors.push(HexEscapeError {
offset: i,
message: format!(
"Invalid hex escape sequence {}{}{} (expected two hex digits 0-9, A-F after '{}')",
indicator as char, h1 as char, h2 as char, indicator as char,
),
});
output.push(bytes[i]);
output.push(h1);
output.push(h2);
}
i += 3;
} else {
errors.push(HexEscapeError {
offset: i,
message: format!(
"Incomplete hex escape sequence at offset {} (expected '{}XX' but input ends)",
i, indicator as char,
),
});
while i < bytes.len() {
output.push(bytes[i]);
i += 1;
}
break;
}
} else {
output.push(bytes[i]);
i += 1;
}
}
if errors.is_empty() {
Ok(output)
} else {
Err(errors)
}
}
fn hex_pair_to_byte(h1: u8, h2: u8) -> u8 {
(hex_digit_value(h1) << 4) | hex_digit_value(h2)
}
fn hex_digit_value(b: u8) -> u8 {
match b {
b'0'..=b'9' => b - b'0',
b'A'..=b'F' => b - b'A' + 10,
b'a'..=b'f' => b - b'a' + 10,
_ => unreachable!("hex_digit_value called with non-hex byte: {}", b),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn validate_valid_sequences() {
assert!(validate_hex_escapes("Hello_20World", b'_').is_empty());
assert!(validate_hex_escapes("_00_FF_0A_ff", b'_').is_empty());
assert!(validate_hex_escapes("no escapes here", b'_').is_empty());
assert!(validate_hex_escapes("", b'_').is_empty());
}
#[test]
fn validate_invalid_hex_digits() {
let errors = validate_hex_escapes("_GG", b'_');
assert_eq!(errors.len(), 1);
assert_eq!(errors[0].offset, 0);
}
#[test]
fn validate_incomplete_at_end() {
let errors = validate_hex_escapes("Hello_", b'_');
assert_eq!(errors.len(), 1);
assert_eq!(errors[0].offset, 5);
let errors = validate_hex_escapes("Hello_A", b'_');
assert_eq!(errors.len(), 1);
assert_eq!(errors[0].offset, 5);
}
#[test]
fn validate_multiple_errors() {
let errors = validate_hex_escapes("_ZZ_XX", b'_');
assert_eq!(errors.len(), 2);
assert_eq!(errors[0].offset, 0);
assert_eq!(errors[1].offset, 3);
}
#[test]
fn validate_custom_indicator() {
assert!(validate_hex_escapes("#41#42", b'#').is_empty());
assert!(validate_hex_escapes("_GG", b'#').is_empty());
let errors = validate_hex_escapes("#ZZ", b'#');
assert_eq!(errors.len(), 1);
}
#[test]
fn decode_simple() {
let result = decode_hex_escapes("Hello_20World", b'_').unwrap();
assert_eq!(result, b"Hello World");
}
#[test]
fn decode_multiple() {
let result = decode_hex_escapes("_48_65_6C_6C_6F", b'_').unwrap();
assert_eq!(result, b"Hello");
}
#[test]
fn decode_no_escapes() {
let result = decode_hex_escapes("plain text", b'_').unwrap();
assert_eq!(result, b"plain text");
}
#[test]
fn decode_empty() {
let result = decode_hex_escapes("", b'_').unwrap();
assert!(result.is_empty());
}
#[test]
fn decode_all_escaped() {
let result = decode_hex_escapes("_00_FF_7F", b'_').unwrap();
assert_eq!(result, &[0x00, 0xFF, 0x7F]);
}
#[test]
fn decode_custom_indicator() {
let result = decode_hex_escapes("#41#42#43", b'#').unwrap();
assert_eq!(result, b"ABC");
}
#[test]
fn decode_error_returns_raw_bytes() {
let errors = decode_hex_escapes("_GG", b'_').unwrap_err();
assert_eq!(errors.len(), 1);
}
#[test]
fn decode_case_insensitive_hex() {
let upper = decode_hex_escapes("_4A", b'_').unwrap();
let lower = decode_hex_escapes("_4a", b'_').unwrap();
assert_eq!(upper, lower);
assert_eq!(upper, b"J");
}
#[test]
fn decode_mixed_content_and_escapes() {
let result = decode_hex_escapes("Price:_20_2410.00", b'_').unwrap();
assert_eq!(result, b"Price: $10.00");
}
}