use std::marker::PhantomData;
use crate::Error;
use crate::optimised::validation::validate_seq_prologue;
#[doc(hidden)]
pub const FLAG_INDEXED: u8 = 0b0000_0001;
#[derive(Debug)]
pub struct IndexedSeqWalker<'p, T> {
body: &'p [u8],
offsets: Option<&'p [u8]>,
len: usize,
_marker: PhantomData<fn() -> T>,
}
impl<'p, T> IndexedSeqWalker<'p, T> {
pub fn from_payload(payload: &'p [u8]) -> Result<Self, Error> {
Self::from_payload_inner(payload, true)
}
pub fn from_payload_unvalidated(payload: &'p [u8]) -> Result<Self, Error> {
Self::from_payload_inner(payload, false)
}
fn from_payload_inner(payload: &'p [u8], validate: bool) -> Result<Self, Error> {
if payload.is_empty() {
return Err(Error::OptimisedSubReaderOverrun);
}
let flags = payload[0];
let mut cursor = 1usize;
let (len, varint_len) = read_varint(&payload[cursor..])?;
cursor += varint_len;
let indexed = (flags & FLAG_INDEXED) != 0;
if !indexed {
return Ok(Self {
body: &payload[cursor..],
offsets: None,
len,
_marker: PhantomData,
});
}
let table_bytes = len
.checked_mul(4)
.ok_or_else(|| Error::Deserialize("indexed-seq offset table size overflow".into()))?;
if payload.len() < cursor + table_bytes {
return Err(Error::OptimisedSubReaderOverrun);
}
let offsets = &payload[cursor..cursor + table_bytes];
cursor += table_bytes;
let body = &payload[cursor..];
if validate {
validate_seq_prologue(offsets, len, body.len() as u32)?;
}
Ok(Self {
body,
offsets: Some(offsets),
len,
_marker: PhantomData,
})
}
#[inline]
fn offset_at(offsets: &[u8], index: usize) -> u32 {
crate::optimised::validation::decode_u32_le_at(offsets, index * 4)
}
#[inline]
pub fn len(&self) -> usize {
self.len
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
#[inline]
pub fn is_indexed(&self) -> bool {
self.offsets.is_some()
}
pub fn element_bytes(&self, index: usize) -> Result<&'p [u8], Error> {
let offsets = self
.offsets
.ok_or_else(|| Error::Deserialize("element_bytes called on non-indexed seq".into()))?;
if index >= self.len {
return Err(Error::Deserialize(format!("index {index} out of range ({})", self.len)));
}
let start = Self::offset_at(offsets, index) as usize;
let end = if index + 1 < self.len {
Self::offset_at(offsets, index + 1) as usize
} else {
self.body.len()
};
Ok(&self.body[start..end])
}
#[inline]
pub fn body(&self) -> &'p [u8] {
self.body
}
}
fn read_varint(bytes: &[u8]) -> Result<(usize, usize), Error> {
if bytes.is_empty() {
return Err(Error::OptimisedSubReaderOverrun);
}
let tag = bytes[0];
match tag {
0..=250 => Ok((tag as usize, 1)),
251 => {
if bytes.len() < 3 {
return Err(Error::OptimisedSubReaderOverrun);
}
Ok((u16::from_le_bytes([bytes[1], bytes[2]]) as usize, 3))
}
252 => {
if bytes.len() < 5 {
return Err(Error::OptimisedSubReaderOverrun);
}
Ok((u32::from_le_bytes(bytes[1..5].try_into().unwrap()) as usize, 5))
}
253 => {
if bytes.len() < 9 {
return Err(Error::OptimisedSubReaderOverrun);
}
let v = u64::from_le_bytes(bytes[1..9].try_into().unwrap());
let v: usize = v.try_into().map_err(|_| Error::IntegerOverflow)?;
Ok((v, 9))
}
_ => Err(Error::InvalidIntegerEncoding),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn varint(v: usize) -> Vec<u8> {
match v {
0..=250 => vec![v as u8],
251..=65535 => {
let mut out = vec![251u8];
out.extend_from_slice(&(v as u16).to_le_bytes());
out
}
_ => {
let mut out = vec![252u8];
out.extend_from_slice(&(v as u32).to_le_bytes());
out
}
}
}
fn build_indexed_seq(elements: &[&[u8]]) -> Vec<u8> {
let len = elements.len();
let mut out = Vec::new();
out.push(FLAG_INDEXED);
out.extend_from_slice(&varint(len));
let mut running = 0u32;
let mut offsets = Vec::with_capacity(len);
for e in elements {
offsets.push(running);
running += e.len() as u32;
}
for o in &offsets {
out.extend_from_slice(&o.to_le_bytes());
}
for e in elements {
out.extend_from_slice(e);
}
out
}
#[test]
fn opens_indexed_seq_and_reads_elements() {
let payload = build_indexed_seq(&[b"foo", b"barbar", b"baz"]);
let w: IndexedSeqWalker<()> = IndexedSeqWalker::from_payload(&payload).unwrap();
assert!(w.is_indexed());
assert_eq!(w.len(), 3);
assert_eq!(w.element_bytes(0).unwrap(), b"foo");
assert_eq!(w.element_bytes(1).unwrap(), b"barbar");
assert_eq!(w.element_bytes(2).unwrap(), b"baz");
}
#[test]
fn opens_legacy_seq_passes_through() {
let payload = [0u8, 2, 1, 2];
let w: IndexedSeqWalker<()> = IndexedSeqWalker::from_payload(&payload).unwrap();
assert!(!w.is_indexed());
assert_eq!(w.len(), 2);
assert_eq!(w.body(), &[1u8, 2]);
assert!(w.element_bytes(0).is_err());
}
#[test]
fn rejects_truncated_payload() {
let payload = [FLAG_INDEXED, 251, 0];
let err: Error = IndexedSeqWalker::<()>::from_payload(&payload).unwrap_err();
assert!(matches!(err, Error::OptimisedSubReaderOverrun));
}
#[test]
fn rejects_truncated_offset_table() {
let mut payload = vec![FLAG_INDEXED, 3];
payload.extend_from_slice(&0u32.to_le_bytes());
let err: Error = IndexedSeqWalker::<()>::from_payload(&payload).unwrap_err();
assert!(matches!(err, Error::OptimisedSubReaderOverrun));
}
#[test]
fn rejects_non_monotonic_offsets() {
let mut payload = vec![FLAG_INDEXED, 2];
payload.extend_from_slice(&10u32.to_le_bytes());
payload.extend_from_slice(&0u32.to_le_bytes());
payload.extend_from_slice(&[0u8; 16]);
let err: Error = IndexedSeqWalker::<()>::from_payload(&payload).unwrap_err();
assert!(matches!(err, Error::OptimisedOffsetsNonMonotonic));
}
}