use crate::{
metadata::token::Token,
utils::{read_be_at, read_le_at, CilIO},
Error, ParseFailure, ParseStage, Result,
};
#[inline]
fn oob_err() -> Error {
ParseFailure::OutOfBounds {
stage: ParseStage::Generic,
}
.into()
}
#[inline]
fn invalid_err(field: &'static str, reason: String) -> Error {
ParseFailure::InvalidField {
stage: ParseStage::Generic,
field,
reason,
}
.into()
}
pub struct Parser<'a> {
data: &'a [u8],
position: usize,
}
impl<'a> Parser<'a> {
#[must_use]
pub fn new(data: &'a [u8]) -> Self {
Parser { data, position: 0 }
}
#[must_use]
pub fn len(&self) -> usize {
self.data.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
#[must_use]
pub fn has_more_data(&self) -> bool {
self.position < self.data.len()
}
pub fn seek(&mut self, pos: usize) -> Result<()> {
if pos >= self.data.len() {
return Err(oob_err());
}
self.position = pos;
Ok(())
}
pub fn advance(&mut self) -> Result<()> {
self.advance_by(1)
}
pub fn advance_by(&mut self, step: usize) -> Result<()> {
let new_pos = self.position.checked_add(step).ok_or(oob_err())?;
if new_pos > self.data.len() {
return Err(oob_err());
}
self.position = new_pos;
Ok(())
}
#[must_use]
pub fn pos(&self) -> usize {
self.position
}
#[must_use]
pub fn data(&self) -> &[u8] {
self.data
}
pub fn peek_byte(&self) -> Result<u8> {
self.data.get(self.position).copied().ok_or(oob_err())
}
pub fn peek_le<T: CilIO>(&self) -> Result<T> {
let mut temp_position = self.position;
read_le_at::<T>(self.data, &mut temp_position)
}
pub fn transactional<T, F>(&mut self, f: F) -> Result<T>
where
F: FnOnce(&mut Self) -> Result<T>,
{
let saved_position = self.position;
let result = f(self);
if result.is_err() {
self.position = saved_position;
}
result
}
pub fn align(&mut self, alignment: usize) -> Result<()> {
let rem = self.position.checked_rem(alignment).ok_or(oob_err())?;
let padding = alignment
.wrapping_sub(rem)
.checked_rem(alignment)
.ok_or(oob_err())?;
let new_pos = self.position.checked_add(padding).ok_or(oob_err())?;
if new_pos > self.data.len() {
return Err(oob_err());
}
self.position = new_pos;
Ok(())
}
pub fn read_le<T: CilIO>(&mut self) -> Result<T> {
read_le_at::<T>(self.data, &mut self.position)
}
pub fn read_be<T: CilIO>(&mut self) -> Result<T> {
read_be_at::<T>(self.data, &mut self.position)
}
pub fn read_compressed_uint(&mut self) -> Result<u32> {
let first_byte = self.read_le::<u8>()?;
if (first_byte & 0x80) == 0 {
return Ok(u32::from(first_byte));
}
if (first_byte & 0xC0) == 0x80 {
let second_byte = self.read_le::<u8>()?;
let value = ((u32::from(first_byte) & 0x3F) << 8) | u32::from(second_byte);
return Ok(value);
}
if (first_byte & 0xE0) == 0xC0 {
let b1 = u32::from(self.read_le::<u8>()?);
let b2 = u32::from(self.read_le::<u8>()?);
let b3 = u32::from(self.read_le::<u8>()?);
let value = ((u32::from(first_byte) & 0x1F) << 24) | (b1 << 16) | (b2 << 8) | b3;
return Ok(value);
}
Err(invalid_err(
"compressed_uint",
format!("invalid compressed uint - {first_byte}"),
))
}
pub fn read_compressed_int(&mut self) -> Result<i32> {
let unsigned = self.read_compressed_uint()?;
let signed = if (unsigned & 1) == 0 {
#[allow(clippy::cast_possible_wrap)]
let result = (unsigned >> 1) as i32;
result
} else {
#[allow(clippy::cast_possible_wrap)]
let half = (unsigned >> 1) as i32;
half.wrapping_add(1).wrapping_neg()
};
Ok(signed)
}
pub fn read_compressed_token(&mut self) -> Result<Token> {
let compressed_token = self.read_compressed_uint()?;
let table: u32 = match compressed_token & 0x3 {
0x0 => 0x0200_0000, 0x1 => 0x0100_0000, 0x2 => 0x1B00_0000, _ => {
return Err(invalid_err(
"compressed_token",
format!("invalid compressed token - {compressed_token}"),
))
}
};
let table_index = compressed_token >> 2;
let token = table.checked_add(table_index).ok_or_else(|| {
invalid_err(
"compressed_token",
format!("token index overflows table base: {table} + {table_index}"),
)
})?;
Ok(Token::new(token))
}
pub fn read_7bit_encoded_int(&mut self) -> Result<u32> {
let mut value = 0u32;
let mut shift: u32 = 0;
loop {
let byte = *self.data.get(self.position).ok_or(oob_err())?;
self.position = self.position.checked_add(1).ok_or(oob_err())?;
value |= u32::from(byte & 0x7F) << shift;
shift = shift
.checked_add(7)
.ok_or_else(|| invalid_err("varint", "7-bit encoded integer overflow".into()))?;
if (byte & 0x80) == 0 {
break;
}
if shift >= 32 {
return Err(invalid_err(
"varint",
format!(
"7-bit encoded integer overflow: value exceeds u32 capacity after {shift} bits",
),
));
}
}
Ok(value)
}
pub fn read_string_utf8(&mut self) -> Result<String> {
let start = self.position;
let mut end = start;
while let Some(&b) = self.data.get(end) {
if b == 0 {
break;
}
end = end.checked_add(1).ok_or(oob_err())?;
}
let string_data = self.data.get(start..end).ok_or(oob_err())?;
if end < self.data.len() {
self.position = end.checked_add(1).ok_or(oob_err())?;
} else {
self.position = end;
}
String::from_utf8(string_data.to_vec()).map_err(|e| {
invalid_err(
"utf8_string",
format!(
"invalid UTF-8 string at offset {start}-{end}: {}",
e.utf8_error()
),
)
})
}
pub fn read_prefixed_string_utf8(&mut self) -> Result<String> {
let length = self.read_7bit_encoded_int()? as usize;
let end = self.position.checked_add(length).ok_or(oob_err())?;
let start = self.position;
let string_data = self.data.get(start..end).ok_or(oob_err())?;
self.position = end;
String::from_utf8(string_data.to_vec()).map_err(|e| {
invalid_err(
"utf8_string",
format!(
"invalid UTF-8 string at offset {start}-{end}: {}",
e.utf8_error()
),
)
})
}
pub fn read_prefixed_string_utf8_ref(&mut self) -> Result<&'a str> {
let length = self.read_7bit_encoded_int()? as usize;
let start = self.position;
let end = start.checked_add(length).ok_or(oob_err())?;
let string_data = self.data.get(start..end).ok_or(oob_err())?;
self.position = end;
std::str::from_utf8(string_data).map_err(|_| {
invalid_err(
"utf8_string",
format!("invalid UTF-8 string at position {start} - {end} - {string_data:?}"),
)
})
}
pub fn read_compressed_string_utf8(&mut self) -> Result<String> {
let length = self.read_compressed_uint()? as usize;
let start = self.position;
let end = start.checked_add(length).ok_or(oob_err())?;
let string_data = self.data.get(start..end).ok_or(oob_err())?;
self.position = end;
String::from_utf8(string_data.to_vec()).map_err(|e| {
invalid_err(
"utf8_compressed_string",
format!(
"invalid UTF-8 compressed string at offset {start}-{end}: {}",
e.utf8_error()
),
)
})
}
#[must_use]
pub fn remaining(&self) -> usize {
self.data.len().saturating_sub(self.position)
}
pub fn ensure_remaining(&self, needed: usize) -> Result<()> {
if self.remaining() < needed {
return Err(oob_err());
}
Ok(())
}
pub fn calc_end_position(&self, length: usize) -> Result<usize> {
let end = self.position.checked_add(length).ok_or(oob_err())?;
if end > self.data.len() {
return Err(oob_err());
}
Ok(end)
}
pub fn read_bytes(&mut self, length: usize) -> Result<&'a [u8]> {
let end = self.calc_end_position(length)?;
let bytes = self.data.get(self.position..end).ok_or(oob_err())?;
self.position = end;
Ok(bytes)
}
pub fn read_prefixed_string_utf16(&mut self) -> Result<String> {
let length = self.read_7bit_encoded_int()? as usize;
let end = self.position.checked_add(length).ok_or(oob_err())?;
if end > self.data.len() {
return Err(oob_err());
}
if !length.is_multiple_of(2) || length < 2 {
return Err(invalid_err(
"utf16_length",
format!("invalid UTF-16 length - {length}"),
));
}
let char_count = length / 2;
let mut utf16_chars: Vec<u16> = Vec::with_capacity(char_count);
for _ in 0..char_count {
let char = self.read_le::<u16>()?;
utf16_chars.push(char);
}
match String::from_utf16(&utf16_chars) {
Ok(s) => Ok(s),
Err(_) => Err(invalid_err(
"utf16_string",
format!(
"invalid UTF-16 string at position {} (length {}): {utf16_chars:?}",
self.position, length
),
)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{Error, ParseFailure};
#[test]
fn test_read_compressed_uint() {
let test_cases = vec![
(vec![0x03], 3), (vec![0x7F], 0x7F), (vec![0x80, 0x80], 0x80), (vec![0xBF, 0xFF], 0x3FFF), (vec![0xC0, 0x00, 0x00, 0x00], 0x00), (vec![0xDF, 0xFF, 0xFF, 0xFF], 0x1FFF_FFFF), ];
for (input, expected) in test_cases {
let mut parser = Parser::new(&input);
let result = parser.read_compressed_uint().unwrap();
assert_eq!(result, expected);
}
let mut parser = Parser::new(&[]);
assert!(matches!(
parser.read_compressed_uint(),
Err(Error::Parse(ParseFailure::OutOfBounds { .. }))
));
}
#[test]
fn test_read_compressed_int() {
let mut parser = Parser::new(&[20]);
assert_eq!(parser.read_compressed_int().unwrap(), 10);
let mut parser = Parser::new(&[9]);
assert_eq!(parser.read_compressed_int().unwrap(), -5);
let mut parser = Parser::new(&[0]);
assert_eq!(parser.read_compressed_int().unwrap(), 0);
}
#[test]
fn test_parse_string() {
let test_cases = vec![
(vec![0x61, 0x62, 0x63, 0x00], "abc"), (vec![0x00], ""), (vec![0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, 0x00], "䏿–‡"), ];
for (input, expected) in test_cases {
let mut parser = Parser::new(&input);
let result = parser.read_string_utf8().unwrap();
assert_eq!(result, expected);
}
}
#[test]
fn test_error_handling() {
let mut parser = Parser::new(&[0x08]); assert!(matches!(parser.read_compressed_uint(), Ok(8)));
assert!(matches!(
parser.read_compressed_uint(),
Err(Error::Parse(ParseFailure::OutOfBounds { .. }))
));
}
#[test]
fn test_read_7bit_encoded_int_single_byte() {
{
let input = &[0x00]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 0);
assert_eq!(parser.pos(), 1);
}
{
let input = &[0x7F]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 127);
assert_eq!(parser.pos(), 1);
}
}
#[test]
fn test_read_7bit_encoded_int_two_bytes() {
{
let input = &[0x80, 0x01]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 128);
assert_eq!(parser.pos(), 2);
}
{
let input = &[0xFF, 0x7F]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 16383);
assert_eq!(parser.pos(), 2);
}
}
#[test]
fn test_read_7bit_encoded_int_three_bytes() {
{
let input = &[0x80, 0x80, 0x01]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 16384);
assert_eq!(parser.pos(), 3);
}
{
let input = &[0xFF, 0xFF, 0x7F]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 2_097_151);
assert_eq!(parser.pos(), 3);
}
}
#[test]
fn test_read_7bit_encoded_int_four_bytes() {
{
let input = &[0x80, 0x80, 0x80, 0x01]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 2097152);
assert_eq!(parser.pos(), 4);
}
{
let input = &[0xFF, 0xFF, 0xFF, 0x7F]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 268435455);
assert_eq!(parser.pos(), 4);
}
}
#[test]
fn test_read_7bit_encoded_int_five_bytes() {
{
let input = &[0x80, 0x80, 0x80, 0x80, 0x01]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 268435456);
assert_eq!(parser.pos(), 5);
}
{
let input = &[0xFF, 0xFF, 0xFF, 0xFF, 0x0F]; let mut parser = Parser::new(input);
assert_eq!(parser.read_7bit_encoded_int().unwrap(), 4294967295);
assert_eq!(parser.pos(), 5);
}
}
#[test]
fn test_read_7bit_encoded_int_truncated() {
let input = &[0x80];
let mut parser = Parser::new(input);
assert!(parser.read_7bit_encoded_int().is_err());
}
#[test]
fn test_read_7bit_encoded_int_overflow() {
let input = &[0x80, 0x80, 0x80, 0x80, 0x80, 0x01];
let mut parser = Parser::new(input);
assert!(parser.read_7bit_encoded_int().is_err());
}
#[test]
fn test_read_prefixed_string_utf8_ref() {
let data = [5, b'H', b'e', b'l', b'l', b'o'];
let mut parser = Parser::new(&data);
let result = parser.read_prefixed_string_utf8_ref().unwrap();
assert_eq!(result, "Hello");
let data_ptr = data.as_ptr() as usize;
let result_ptr = result.as_ptr() as usize;
assert!(
result_ptr >= data_ptr && result_ptr < data_ptr + data.len(),
"String should be borrowed from source data (zero-copy)"
);
let data = [0];
let mut parser = Parser::new(&data);
let result = parser.read_prefixed_string_utf8_ref().unwrap();
assert_eq!(result, "");
let data = [9, 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, 0xE2, 0x9C, 0x93]; let mut parser = Parser::new(&data);
let result = parser.read_prefixed_string_utf8_ref().unwrap();
assert_eq!(result, "䏿–‡âœ“");
let data = [3, 0xFF, 0xFE, 0xFD];
let mut parser = Parser::new(&data);
assert!(parser.read_prefixed_string_utf8_ref().is_err());
let data = [10, b'H', b'i']; let mut parser = Parser::new(&data);
assert!(matches!(
parser.read_prefixed_string_utf8_ref(),
Err(Error::Parse(ParseFailure::OutOfBounds { .. }))
));
}
#[test]
fn test_peek_le() {
let data = [0x01, 0x02, 0x03, 0x04];
let parser = Parser::new(&data);
let peeked: u8 = parser.peek_le().unwrap();
assert_eq!(peeked, 0x01);
assert_eq!(parser.pos(), 0);
let peeked: u16 = parser.peek_le().unwrap();
assert_eq!(peeked, 0x0201);
assert_eq!(parser.pos(), 0);
let peeked: u32 = parser.peek_le().unwrap();
assert_eq!(peeked, 0x04030201);
assert_eq!(parser.pos(), 0);
let mut parser = Parser::new(&data);
parser.advance_by(2).unwrap();
let peeked: u16 = parser.peek_le().unwrap();
assert_eq!(peeked, 0x0403);
assert_eq!(parser.pos(), 2);
let data = [0x01];
let parser = Parser::new(&data);
let result: Result<u16> = parser.peek_le();
assert!(matches!(
result,
Err(Error::Parse(ParseFailure::OutOfBounds { .. }))
));
let mut parser = Parser::new(&data);
parser.advance().unwrap();
let result: Result<u8> = parser.peek_le();
assert!(matches!(
result,
Err(Error::Parse(ParseFailure::OutOfBounds { .. }))
));
}
#[test]
fn test_transactional() {
let data = [0x01, 0x02, 0x03, 0x04];
let mut parser = Parser::new(&data);
let result: u16 = parser.transactional(|p| p.read_le()).unwrap();
assert_eq!(result, 0x0201);
assert_eq!(parser.pos(), 2);
let mut parser = Parser::new(&data);
let sum: u16 = parser
.transactional(|p| {
let a: u8 = p.read_le()?;
let b: u8 = p.read_le()?;
Ok(u16::from(a) + u16::from(b))
})
.unwrap();
assert_eq!(sum, 3); assert_eq!(parser.pos(), 2);
let mut parser = Parser::new(&data);
let result: Result<u32> = parser.transactional(|p| {
p.read_le::<u16>()?; p.read_le::<u32>() });
assert!(result.is_err());
assert_eq!(parser.pos(), 0);
let mut parser = Parser::new(&data);
parser.advance_by(2).unwrap();
let result: u16 = parser.transactional(|p| p.read_le()).unwrap();
assert_eq!(result, 0x0403);
assert_eq!(parser.pos(), 4);
let mut parser = Parser::new(&data);
let result = parser
.transactional(|p| {
let outer: u8 = p.read_le()?;
let inner_result: Result<u32> = p.transactional(|p2| p2.read_le());
assert!(inner_result.is_err());
assert_eq!(p.pos(), 1); Ok(outer)
})
.unwrap();
assert_eq!(result, 0x01);
assert_eq!(parser.pos(), 1); }
}