use crate::{
metadata::token::Token,
utils::{read_be_at, read_le_at, CilIO},
Result,
};
pub struct Parser<'a> {
data: &'a [u8],
position: usize,
}
impl<'a> Parser<'a> {
#[must_use]
pub fn new(data: &'a [u8]) -> Self {
Parser { data, position: 0 }
}
#[must_use]
pub fn len(&self) -> usize {
self.data.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
#[must_use]
pub fn has_more_data(&self) -> bool {
self.position < self.data.len()
}
pub fn seek(&mut self, pos: usize) -> Result<()> {
if pos >= self.data.len() {
return Err(out_of_bounds_error!());
}
self.position = pos;
Ok(())
}
pub fn advance(&mut self) -> Result<()> {
self.advance_by(1)
}
pub fn advance_by(&mut self, step: usize) -> Result<()> {
if self.position + step > self.data.len() {
return Err(out_of_bounds_error!());
}
self.position += step;
Ok(())
}
#[must_use]
pub fn pos(&self) -> usize {
self.position
}
#[must_use]
pub fn data(&self) -> &[u8] {
self.data
}
pub fn peek_byte(&self) -> Result<u8> {
if self.position >= self.data.len() {
return Err(out_of_bounds_error!());
}
Ok(self.data[self.position])
}
pub fn peek_le<T: CilIO>(&self) -> Result<T> {
let mut temp_position = self.position;
read_le_at::<T>(self.data, &mut temp_position)
}
pub fn transactional<T, F>(&mut self, f: F) -> Result<T>
where
F: FnOnce(&mut Self) -> Result<T>,
{
let saved_position = self.position;
let result = f(self);
if result.is_err() {
self.position = saved_position;
}
result
}
pub fn align(&mut self, alignment: usize) -> Result<()> {
let padding = (alignment - (self.position % alignment)) % alignment;
if self.position + padding > self.data.len() {
return Err(out_of_bounds_error!());
}
self.position += padding;
Ok(())
}
pub fn read_le<T: CilIO>(&mut self) -> Result<T> {
read_le_at::<T>(self.data, &mut self.position)
}
pub fn read_be<T: CilIO>(&mut self) -> Result<T> {
read_be_at::<T>(self.data, &mut self.position)
}
pub fn read_compressed_uint(&mut self) -> Result<u32> {
let first_byte = self.read_le::<u8>()?;
if (first_byte & 0x80) == 0 {
return Ok(u32::from(first_byte));
}
if (first_byte & 0xC0) == 0x80 {
let second_byte = self.read_le::<u8>()?;
let value = ((u32::from(first_byte) & 0x3F) << 8) | u32::from(second_byte);
return Ok(value);
}
if (first_byte & 0xE0) == 0xC0 {
let b1 = u32::from(self.read_le::<u8>()?);
let b2 = u32::from(self.read_le::<u8>()?);
let b3 = u32::from(self.read_le::<u8>()?);
let value = ((u32::from(first_byte) & 0x1F) << 24) | (b1 << 16) | (b2 << 8) | b3;
return Ok(value);
}
Err(malformed_error!("Invalid compressed uint - {}", first_byte))
}
pub fn read_compressed_int(&mut self) -> Result<i32> {
let unsigned = self.read_compressed_uint()?;
let signed = if (unsigned & 1) == 0 {
#[allow(clippy::cast_possible_wrap)]
let result = (unsigned >> 1) as i32;
result
} else {
#[allow(clippy::cast_possible_wrap)]
let result = -((unsigned >> 1) as i32 + 1);
result
};
Ok(signed)
}
pub fn read_compressed_token(&mut self) -> Result<Token> {
let compressed_token = self.read_compressed_uint()?;
let table: u32 = match compressed_token & 0x3 {
0x0 => 0x0200_0000, 0x1 => 0x0100_0000, 0x2 => 0x1B00_0000, _ => {
return Err(malformed_error!(
"Invalid compressed token - {}",
compressed_token
))
}
};
let table_index = compressed_token >> 2;
Ok(Token::new(table + table_index))
}
pub fn read_7bit_encoded_int(&mut self) -> Result<u32> {
let mut value = 0u32;
let mut shift = 0;
loop {
if self.position >= self.data.len() {
return Err(out_of_bounds_error!());
}
let byte = self.data[self.position];
self.position += 1;
value |= u32::from(byte & 0x7F) << shift;
shift += 7;
if (byte & 0x80) == 0 {
break;
}
if shift >= 32 {
return Err(malformed_error!(
"7-bit encoded integer overflow: value exceeds u32 capacity after {} bits",
shift
));
}
}
Ok(value)
}
pub fn read_string_utf8(&mut self) -> Result<String> {
let start = self.position;
let mut end = start;
while end < self.data.len() && self.data[end] != 0 {
end += 1;
}
let string_data = &self.data[start..end];
if end < self.data.len() {
self.position = end + 1;
} else {
self.position = end;
}
String::from_utf8(string_data.to_vec()).map_err(|e| {
malformed_error!(
"Invalid UTF-8 string at offset {}-{}: {}",
start,
end,
e.utf8_error()
)
})
}
pub fn read_prefixed_string_utf8(&mut self) -> Result<String> {
let length = self.read_7bit_encoded_int()? as usize;
if self.position + length > self.data.len() {
return Err(out_of_bounds_error!());
}
let string_data = &self.data[self.position..self.position + length];
self.position += length;
String::from_utf8(string_data.to_vec()).map_err(|e| {
malformed_error!(
"Invalid UTF-8 string at offset {}-{}: {}",
self.position - length,
self.position,
e.utf8_error()
)
})
}
pub fn read_prefixed_string_utf8_ref(&mut self) -> Result<&'a str> {
let length = self.read_7bit_encoded_int()? as usize;
if self.position + length > self.data.len() {
return Err(out_of_bounds_error!());
}
let string_data = &self.data[self.position..self.position + length];
self.position += length;
std::str::from_utf8(string_data).map_err(|_| {
malformed_error!(
"Invalid UTF-8 string at position {} - {} - {:?}",
self.position - length,
self.position,
string_data
)
})
}
pub fn read_compressed_string_utf8(&mut self) -> Result<String> {
let length = self.read_compressed_uint()? as usize;
if self.position + length > self.data.len() {
return Err(out_of_bounds_error!());
}
let string_data = &self.data[self.position..self.position + length];
self.position += length;
String::from_utf8(string_data.to_vec()).map_err(|e| {
malformed_error!(
"Invalid UTF-8 compressed string at offset {}-{}: {}",
self.position - length,
self.position,
e.utf8_error()
)
})
}
#[must_use]
pub fn remaining(&self) -> usize {
self.data.len().saturating_sub(self.position)
}
pub fn ensure_remaining(&self, needed: usize) -> Result<()> {
if self.remaining() < needed {
return Err(out_of_bounds_error!());
}
Ok(())
}
pub fn calc_end_position(&self, length: usize) -> Result<usize> {
let end = self
.position
.checked_add(length)
.ok_or(out_of_bounds_error!())?;
if end > self.data.len() {
return Err(out_of_bounds_error!());
}
Ok(end)
}
pub fn read_bytes(&mut self, length: usize) -> Result<&'a [u8]> {
let end = self.calc_end_position(length)?;
let bytes = &self.data[self.position..end];
self.position = end;
Ok(bytes)
}
pub fn read_prefixed_string_utf16(&mut self) -> Result<String> {
let length = self.read_7bit_encoded_int()? as usize;
if self.position + length > self.data.len() {
return Err(out_of_bounds_error!());
}
if !length.is_multiple_of(2) || length < 2 {
return Err(malformed_error!("Invalid UTF-16 length - {}", length));
}
let mut utf16_chars: Vec<u16> = Vec::with_capacity(length / 2);
for _ in 0..length / 2 {
let char = self.read_le::<u16>()?;
utf16_chars.push(char);
}
match String::from_utf16(&utf16_chars) {
Ok(s) => Ok(s),
Err(_) => Err(malformed_error!(
"Invalid UTF-16 str - {} - {} - {:?}",
self.position,
length,
utf16_chars
)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Error;
#[test]
fn test_read_compressed_uint() {
let test_cases = vec![
(vec![0x03], 3), (vec![0x7F], 0x7F), (vec![0x80, 0x80], 0x80), (vec![0xBF, 0xFF], 0x3FFF), (vec![0xC0, 0x00, 0x00, 0x00], 0x00), (vec![0xDF, 0xFF, 0xFF, 0xFF], 0x1FFF_FFFF), ];
for (input, expected) in test_cases {
let mut parser = Parser::new(&input);
let result = parser.read_compressed_uint().unwrap();
assert_eq!(result, expected);
}
let mut parser = Parser::new(&[]);
assert!(matches!(
parser.read_compressed_uint(),
Err(Error::OutOfBounds { .. })
));
}
#[test]
fn test_read_compressed_int() {
let mut parser = Parser::new(&[20]);
assert_eq!(parser.read_compressed_int().unwrap(), 10);
let mut parser = Parser::new(&[9]);
assert_eq!(parser.read_compressed_int().unwrap(), -5);
let mut parser = Parser::new(&[0]);
assert_eq!(parser.read_compressed_int().unwrap(), 0);
}
#[test]
fn test_parse_string() {
let test_cases = vec![
(vec![0x61, 0x62, 0x63, 0x00], "abc"), (vec![0x00], ""), (vec![0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, 0x00], "䏿–‡"), ];
for (input, expected) in test_cases {
let mut parser = Parser::new(&input);
let result = parser.read_string_utf8().unwrap();
assert_eq!(result, expected);
}
}
#[test]
fn test_error_handling() {
let mut parser = Parser::new(&[0x08]); assert!(matches!(parser.read_compressed_uint(), Ok(8)));
assert!(matches!(
parser.read_compressed_uint(),
Err(Error::OutOfBounds { .. })
));
}
#[test]
fn test_read_7bit_encoded_int_single_byte() {
{
let input = &[0x00]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 0);
assert_eq!(parser.pos(), 1);
}
{
let input = &[0x7F]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 127);
assert_eq!(parser.pos(), 1);
}
}
#[test]
fn test_read_7bit_encoded_int_two_bytes() {
{
let input = &[0x80, 0x01]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 128);
assert_eq!(parser.pos(), 2);
}
{
let input = &[0xFF, 0x7F]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 16383);
assert_eq!(parser.pos(), 2);
}
}
#[test]
fn test_read_7bit_encoded_int_three_bytes() {
{
let input = &[0x80, 0x80, 0x01]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 16384);
assert_eq!(parser.pos(), 3);
}
{
let input = &[0xFF, 0xFF, 0x7F]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 2_097_151);
assert_eq!(parser.pos(), 3);
}
}
#[test]
fn test_read_7bit_encoded_int_four_bytes() {
{
let input = &[0x80, 0x80, 0x80, 0x01]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 2097152);
assert_eq!(parser.pos(), 4);
}
{
let input = &[0xFF, 0xFF, 0xFF, 0x7F]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 268435455);
assert_eq!(parser.pos(), 4);
}
}
#[test]
fn test_read_7bit_encoded_int_five_bytes() {
{
let input = &[0x80, 0x80, 0x80, 0x80, 0x01]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 268435456);
assert_eq!(parser.pos(), 5);
}
{
let input = &[0xFF, 0xFF, 0xFF, 0xFF, 0x0F]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 4294967295);
assert_eq!(parser.pos(), 5);
}
}
#[test]
fn test_read_7bit_encoded_int_truncated() {
let input = &[0x80];
let mut parser = Parser::new(input);
assert!(parser.read_7bit_encoded_int().is_err());
}
#[test]
fn test_read_7bit_encoded_int_overflow() {
let input = &[0x80, 0x80, 0x80, 0x80, 0x80, 0x01];
let mut parser = Parser::new(input);
assert!(parser.read_7bit_encoded_int().is_err());
}
#[test]
fn test_read_prefixed_string_utf8_ref() {
let data = [5, b'H', b'e', b'l', b'l', b'o'];
let mut parser = Parser::new(&data);
let result = parser.read_prefixed_string_utf8_ref().unwrap();
assert_eq!(result, "Hello");
let data_ptr = data.as_ptr() as usize;
let result_ptr = result.as_ptr() as usize;
assert!(
result_ptr >= data_ptr && result_ptr < data_ptr + data.len(),
"String should be borrowed from source data (zero-copy)"
);
let data = [0];
let mut parser = Parser::new(&data);
let result = parser.read_prefixed_string_utf8_ref().unwrap();
assert_eq!(result, "");
let data = [9, 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, 0xE2, 0x9C, 0x93]; let mut parser = Parser::new(&data);
let result = parser.read_prefixed_string_utf8_ref().unwrap();
assert_eq!(result, "䏿–‡âœ“");
let data = [3, 0xFF, 0xFE, 0xFD];
let mut parser = Parser::new(&data);
assert!(parser.read_prefixed_string_utf8_ref().is_err());
let data = [10, b'H', b'i']; let mut parser = Parser::new(&data);
assert!(matches!(
parser.read_prefixed_string_utf8_ref(),
Err(Error::OutOfBounds { .. })
));
}
#[test]
fn test_peek_le() {
let data = [0x01, 0x02, 0x03, 0x04];
let parser = Parser::new(&data);
let peeked: u8 = parser.peek_le().unwrap();
assert_eq!(peeked, 0x01);
assert_eq!(parser.pos(), 0);
let peeked: u16 = parser.peek_le().unwrap();
assert_eq!(peeked, 0x0201);
assert_eq!(parser.pos(), 0);
let peeked: u32 = parser.peek_le().unwrap();
assert_eq!(peeked, 0x04030201);
assert_eq!(parser.pos(), 0);
let mut parser = Parser::new(&data);
parser.advance_by(2).unwrap();
let peeked: u16 = parser.peek_le().unwrap();
assert_eq!(peeked, 0x0403);
assert_eq!(parser.pos(), 2);
let data = [0x01];
let parser = Parser::new(&data);
let result: Result<u16> = parser.peek_le();
assert!(matches!(result, Err(Error::OutOfBounds { .. })));
let mut parser = Parser::new(&data);
parser.advance().unwrap();
let result: Result<u8> = parser.peek_le();
assert!(matches!(result, Err(Error::OutOfBounds { .. })));
}
#[test]
fn test_transactional() {
let data = [0x01, 0x02, 0x03, 0x04];
let mut parser = Parser::new(&data);
let result: u16 = parser.transactional(|p| p.read_le()).unwrap();
assert_eq!(result, 0x0201);
assert_eq!(parser.pos(), 2);
let mut parser = Parser::new(&data);
let sum: u16 = parser
.transactional(|p| {
let a: u8 = p.read_le()?;
let b: u8 = p.read_le()?;
Ok(u16::from(a) + u16::from(b))
})
.unwrap();
assert_eq!(sum, 3); assert_eq!(parser.pos(), 2);
let mut parser = Parser::new(&data);
let result: Result<u32> = parser.transactional(|p| {
p.read_le::<u16>()?; p.read_le::<u32>() });
assert!(result.is_err());
assert_eq!(parser.pos(), 0);
let mut parser = Parser::new(&data);
parser.advance_by(2).unwrap();
let result: u16 = parser.transactional(|p| p.read_le()).unwrap();
assert_eq!(result, 0x0403);
assert_eq!(parser.pos(), 4);
let mut parser = Parser::new(&data);
let result = parser
.transactional(|p| {
let outer: u8 = p.read_le()?;
let inner_result: Result<u32> = p.transactional(|p2| p2.read_le());
assert!(inner_result.is_err());
assert_eq!(p.pos(), 1); Ok(outer)
})
.unwrap();
assert_eq!(result, 0x01);
assert_eq!(parser.pos(), 1); }
}