extern crate alloc;
use alloc::{borrow::Cow, format, vec::Vec};
use crate::error::codes;
use facet_format::{
ContainerKind, DeserializeErrorKind, FieldKey, FieldLocationHint, FormatParser, ParseError,
ParseEvent, ParseEventKind, SavePoint, ScalarValue,
};
use facet_reflect::Span;
const MSGPACK_NIL: u8 = 0xc0;
const MSGPACK_FALSE: u8 = 0xc2;
const MSGPACK_TRUE: u8 = 0xc3;
const MSGPACK_BIN8: u8 = 0xc4;
const MSGPACK_BIN16: u8 = 0xc5;
const MSGPACK_BIN32: u8 = 0xc6;
const MSGPACK_FLOAT32: u8 = 0xca;
const MSGPACK_FLOAT64: u8 = 0xcb;
const MSGPACK_UINT8: u8 = 0xcc;
const MSGPACK_UINT16: u8 = 0xcd;
const MSGPACK_UINT32: u8 = 0xce;
const MSGPACK_UINT64: u8 = 0xcf;
const MSGPACK_INT8: u8 = 0xd0;
const MSGPACK_INT16: u8 = 0xd1;
const MSGPACK_INT32: u8 = 0xd2;
const MSGPACK_INT64: u8 = 0xd3;
const MSGPACK_STR8: u8 = 0xd9;
const MSGPACK_STR16: u8 = 0xda;
const MSGPACK_STR32: u8 = 0xdb;
const MSGPACK_ARRAY16: u8 = 0xdc;
const MSGPACK_ARRAY32: u8 = 0xdd;
const MSGPACK_MAP16: u8 = 0xde;
const MSGPACK_MAP32: u8 = 0xdf;
const MSGPACK_POSFIXINT_MAX: u8 = 0x7f;
const MSGPACK_FIXMAP_MIN: u8 = 0x80;
const MSGPACK_FIXMAP_MAX: u8 = 0x8f;
const MSGPACK_FIXARRAY_MIN: u8 = 0x90;
const MSGPACK_FIXARRAY_MAX: u8 = 0x9f;
const MSGPACK_FIXSTR_MIN: u8 = 0xa0;
const MSGPACK_FIXSTR_MAX: u8 = 0xbf;
const MSGPACK_NEGFIXINT_MIN: u8 = 0xe0;
pub struct MsgPackParser<'de> {
input: &'de [u8],
pos: usize,
stack: Vec<ContextState>,
event_peek: Option<ParseEvent<'de>>,
}
#[derive(Debug, Clone, Copy)]
enum ContextState {
MapKey { remaining: usize },
MapValue { remaining: usize },
Array { remaining: usize },
}
fn error_from_code(code: i32, pos: usize) -> ParseError {
let message = match code {
codes::UNEXPECTED_EOF => "unexpected end of input",
codes::EXPECTED_BOOL => "expected bool (0xC2 or 0xC3)",
codes::EXPECTED_ARRAY => "expected array tag (fixarray/array16/array32)",
codes::EXPECTED_BIN => "expected bin tag (bin8/bin16/bin32)",
codes::EXPECTED_INT => "expected integer tag",
codes::INT_OVERFLOW => "integer value overflows target type",
codes::COUNT_OVERFLOW => "count too large for platform",
codes::SEQ_UNDERFLOW => "sequence underflow (internal error)",
codes::UNSUPPORTED => "unsupported operation",
_ => "unknown error",
};
ParseError::new(
Span::new(pos, 1),
DeserializeErrorKind::InvalidValue {
message: message.into(),
},
)
}
impl<'de> MsgPackParser<'de> {
pub const fn new(input: &'de [u8]) -> Self {
Self {
input,
pos: 0,
stack: Vec::new(),
event_peek: None,
}
}
fn peek_byte(&self) -> Result<u8, ParseError> {
self.input
.get(self.pos)
.copied()
.ok_or_else(|| error_from_code(codes::UNEXPECTED_EOF, self.pos))
}
fn read_byte(&mut self) -> Result<u8, ParseError> {
let byte = self.peek_byte()?;
self.pos += 1;
Ok(byte)
}
fn read_bytes(&mut self, n: usize) -> Result<&'de [u8], ParseError> {
if self.pos + n > self.input.len() {
return Err(error_from_code(codes::UNEXPECTED_EOF, self.pos));
}
let slice = &self.input[self.pos..self.pos + n];
self.pos += n;
Ok(slice)
}
fn read_u16(&mut self) -> Result<u16, ParseError> {
let bytes = self.read_bytes(2)?;
Ok(u16::from_be_bytes([bytes[0], bytes[1]]))
}
fn read_u32(&mut self) -> Result<u32, ParseError> {
let bytes = self.read_bytes(4)?;
Ok(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
}
fn read_u64(&mut self) -> Result<u64, ParseError> {
let bytes = self.read_bytes(8)?;
Ok(u64::from_be_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]))
}
fn read_i8(&mut self) -> Result<i8, ParseError> {
Ok(self.read_byte()? as i8)
}
fn read_i16(&mut self) -> Result<i16, ParseError> {
let bytes = self.read_bytes(2)?;
Ok(i16::from_be_bytes([bytes[0], bytes[1]]))
}
fn read_i32(&mut self) -> Result<i32, ParseError> {
let bytes = self.read_bytes(4)?;
Ok(i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
}
fn read_i64(&mut self) -> Result<i64, ParseError> {
let bytes = self.read_bytes(8)?;
Ok(i64::from_be_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]))
}
fn read_f32(&mut self) -> Result<f32, ParseError> {
let bytes = self.read_bytes(4)?;
Ok(f32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
}
fn read_f64(&mut self) -> Result<f64, ParseError> {
let bytes = self.read_bytes(8)?;
Ok(f64::from_be_bytes([
bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
]))
}
fn read_str_len(&mut self, prefix: u8) -> Result<usize, ParseError> {
match prefix {
MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX => Ok((prefix & 0x1f) as usize),
MSGPACK_STR8 => Ok(self.read_byte()? as usize),
MSGPACK_STR16 => Ok(self.read_u16()? as usize),
MSGPACK_STR32 => Ok(self.read_u32()? as usize),
_ => Err(ParseError::new(
Span::new(self.pos, 1),
DeserializeErrorKind::InvalidValue {
message: format!("expected string, got 0x{:02x}", prefix).into(),
},
)),
}
}
fn read_string(&mut self) -> Result<Cow<'de, str>, ParseError> {
let prefix = self.read_byte()?;
let len = self.read_str_len(prefix)?;
let bytes = self.read_bytes(len)?;
core::str::from_utf8(bytes).map(Cow::Borrowed).map_err(|_| {
let mut context = [0u8; 16];
let context_len = len.min(16);
context[..context_len].copy_from_slice(&bytes[..context_len]);
ParseError::new(
Span::new(self.pos - len, len),
DeserializeErrorKind::InvalidUtf8 {
context,
context_len: context_len as u8,
},
)
})
}
fn read_array_len(&mut self, prefix: u8) -> Result<usize, ParseError> {
match prefix {
MSGPACK_FIXARRAY_MIN..=MSGPACK_FIXARRAY_MAX => Ok((prefix & 0x0f) as usize),
MSGPACK_ARRAY16 => Ok(self.read_u16()? as usize),
MSGPACK_ARRAY32 => Ok(self.read_u32()? as usize),
_ => Err(error_from_code(codes::EXPECTED_ARRAY, self.pos)),
}
}
fn read_map_len(&mut self, prefix: u8) -> Result<usize, ParseError> {
match prefix {
MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX => Ok((prefix & 0x0f) as usize),
MSGPACK_MAP16 => Ok(self.read_u16()? as usize),
MSGPACK_MAP32 => Ok(self.read_u32()? as usize),
_ => Err(ParseError::new(
Span::new(self.pos, 1),
DeserializeErrorKind::InvalidValue {
message: format!("expected map, got 0x{:02x}", prefix).into(),
},
)),
}
}
fn finish_value(&mut self) {
if let Some(context) = self.stack.last_mut() {
match context {
ContextState::MapValue { remaining } => {
*context = ContextState::MapKey {
remaining: *remaining,
};
}
ContextState::MapKey { remaining } => {
if *remaining > 0 {
*remaining -= 1;
}
}
ContextState::Array { remaining } => {
if *remaining > 0 {
*remaining -= 1;
}
}
}
}
}
fn produce_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
if let Some(context) = self.stack.last() {
match context {
ContextState::MapKey { remaining: 0 } => {
self.stack.pop();
self.finish_value();
return Ok(Some(self.event(ParseEventKind::StructEnd)));
}
ContextState::Array { remaining: 0 } => {
self.stack.pop();
self.finish_value();
return Ok(Some(self.event(ParseEventKind::SequenceEnd)));
}
_ => {}
}
}
if self.pos >= self.input.len() {
return Ok(None);
}
let expecting_key_remaining = match self.stack.last() {
Some(ContextState::MapKey { remaining }) => Some(*remaining),
_ => None,
};
if let Some(remaining) = expecting_key_remaining {
let key = self.read_string()?;
let new_remaining = remaining - 1;
if let Some(state) = self.stack.last_mut() {
*state = ContextState::MapValue {
remaining: new_remaining,
};
}
return Ok(Some(self.event(ParseEventKind::FieldKey(FieldKey::new(
key,
FieldLocationHint::KeyValue,
)))));
}
let prefix = self.read_byte()?;
match prefix {
MSGPACK_NIL => {
self.finish_value();
Ok(Some(self.event(ParseEventKind::Scalar(ScalarValue::Null))))
}
MSGPACK_FALSE => {
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::Bool(false))),
))
}
MSGPACK_TRUE => {
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::Bool(true))),
))
}
0x00..=MSGPACK_POSFIXINT_MAX => {
self.finish_value();
Ok(Some(self.event(ParseEventKind::Scalar(ScalarValue::U64(
prefix as u64,
)))))
}
MSGPACK_NEGFIXINT_MIN..=0xff => {
self.finish_value();
Ok(Some(self.event(ParseEventKind::Scalar(ScalarValue::I64(
prefix as i8 as i64,
)))))
}
MSGPACK_UINT8 => {
let v = self.read_byte()? as u64;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::U64(v))),
))
}
MSGPACK_UINT16 => {
let v = self.read_u16()? as u64;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::U64(v))),
))
}
MSGPACK_UINT32 => {
let v = self.read_u32()? as u64;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::U64(v))),
))
}
MSGPACK_UINT64 => {
let v = self.read_u64()?;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::U64(v))),
))
}
MSGPACK_INT8 => {
let v = self.read_i8()? as i64;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::I64(v))),
))
}
MSGPACK_INT16 => {
let v = self.read_i16()? as i64;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::I64(v))),
))
}
MSGPACK_INT32 => {
let v = self.read_i32()? as i64;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::I64(v))),
))
}
MSGPACK_INT64 => {
let v = self.read_i64()?;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::I64(v))),
))
}
MSGPACK_FLOAT32 => {
let v = self.read_f32()? as f64;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::F64(v))),
))
}
MSGPACK_FLOAT64 => {
let v = self.read_f64()?;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::F64(v))),
))
}
MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX
| MSGPACK_STR8
| MSGPACK_STR16
| MSGPACK_STR32 => {
let len = self.read_str_len(prefix)?;
let bytes = self.read_bytes(len)?;
let s = core::str::from_utf8(bytes)
.map(Cow::Borrowed)
.map_err(|_| {
let mut context = [0u8; 16];
let context_len = len.min(16);
context[..context_len].copy_from_slice(&bytes[..context_len]);
ParseError::new(
Span::new(self.pos - len, len),
DeserializeErrorKind::InvalidUtf8 {
context,
context_len: context_len as u8,
},
)
})?;
self.finish_value();
Ok(Some(
self.event(ParseEventKind::Scalar(ScalarValue::Str(s))),
))
}
MSGPACK_BIN8 => {
let len = self.read_byte()? as usize;
let bytes = self.read_bytes(len)?;
self.finish_value();
Ok(Some(self.event(ParseEventKind::Scalar(
ScalarValue::Bytes(Cow::Borrowed(bytes)),
))))
}
MSGPACK_BIN16 => {
let len = self.read_u16()? as usize;
let bytes = self.read_bytes(len)?;
self.finish_value();
Ok(Some(self.event(ParseEventKind::Scalar(
ScalarValue::Bytes(Cow::Borrowed(bytes)),
))))
}
MSGPACK_BIN32 => {
let len = self.read_u32()? as usize;
let bytes = self.read_bytes(len)?;
self.finish_value();
Ok(Some(self.event(ParseEventKind::Scalar(
ScalarValue::Bytes(Cow::Borrowed(bytes)),
))))
}
MSGPACK_FIXARRAY_MIN..=MSGPACK_FIXARRAY_MAX | MSGPACK_ARRAY16 | MSGPACK_ARRAY32 => {
let len = self.read_array_len(prefix)?;
self.stack.push(ContextState::Array { remaining: len });
Ok(Some(self.event(ParseEventKind::SequenceStart(
ContainerKind::Array,
))))
}
MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX | MSGPACK_MAP16 | MSGPACK_MAP32 => {
let len = self.read_map_len(prefix)?;
self.stack.push(ContextState::MapKey { remaining: len });
Ok(Some(
self.event(ParseEventKind::StructStart(ContainerKind::Object)),
))
}
_ => Err(ParseError::new(
Span::new(self.pos - 1, 1),
DeserializeErrorKind::InvalidValue {
message: format!("unsupported MsgPack type: 0x{:02x}", prefix).into(),
},
)),
}
}
fn skip_value_internal(&mut self) -> Result<(), ParseError> {
let prefix = self.read_byte()?;
match prefix {
MSGPACK_NIL | MSGPACK_FALSE | MSGPACK_TRUE => Ok(()),
0x00..=MSGPACK_POSFIXINT_MAX => Ok(()),
MSGPACK_NEGFIXINT_MIN..=0xff => Ok(()),
MSGPACK_UINT8 => {
self.pos += 1;
Ok(())
}
MSGPACK_UINT16 => {
self.pos += 2;
Ok(())
}
MSGPACK_UINT32 => {
self.pos += 4;
Ok(())
}
MSGPACK_UINT64 => {
self.pos += 8;
Ok(())
}
MSGPACK_INT8 => {
self.pos += 1;
Ok(())
}
MSGPACK_INT16 => {
self.pos += 2;
Ok(())
}
MSGPACK_INT32 => {
self.pos += 4;
Ok(())
}
MSGPACK_INT64 => {
self.pos += 8;
Ok(())
}
MSGPACK_FLOAT32 => {
self.pos += 4;
Ok(())
}
MSGPACK_FLOAT64 => {
self.pos += 8;
Ok(())
}
MSGPACK_FIXSTR_MIN..=MSGPACK_FIXSTR_MAX => {
let len = (prefix & 0x1f) as usize;
self.pos += len;
Ok(())
}
MSGPACK_STR8 => {
let len = self.read_byte()? as usize;
self.pos += len;
Ok(())
}
MSGPACK_STR16 => {
let len = self.read_u16()? as usize;
self.pos += len;
Ok(())
}
MSGPACK_STR32 => {
let len = self.read_u32()? as usize;
self.pos += len;
Ok(())
}
MSGPACK_BIN8 => {
let len = self.read_byte()? as usize;
self.pos += len;
Ok(())
}
MSGPACK_BIN16 => {
let len = self.read_u16()? as usize;
self.pos += len;
Ok(())
}
MSGPACK_BIN32 => {
let len = self.read_u32()? as usize;
self.pos += len;
Ok(())
}
MSGPACK_FIXARRAY_MIN..=MSGPACK_FIXARRAY_MAX => {
let len = (prefix & 0x0f) as usize;
for _ in 0..len {
self.skip_value_internal()?;
}
Ok(())
}
MSGPACK_ARRAY16 => {
let len = self.read_u16()? as usize;
for _ in 0..len {
self.skip_value_internal()?;
}
Ok(())
}
MSGPACK_ARRAY32 => {
let len = self.read_u32()? as usize;
for _ in 0..len {
self.skip_value_internal()?;
}
Ok(())
}
MSGPACK_FIXMAP_MIN..=MSGPACK_FIXMAP_MAX => {
let len = (prefix & 0x0f) as usize;
for _ in 0..len {
self.skip_value_internal()?; self.skip_value_internal()?; }
Ok(())
}
MSGPACK_MAP16 => {
let len = self.read_u16()? as usize;
for _ in 0..len {
self.skip_value_internal()?; self.skip_value_internal()?; }
Ok(())
}
MSGPACK_MAP32 => {
let len = self.read_u32()? as usize;
for _ in 0..len {
self.skip_value_internal()?; self.skip_value_internal()?; }
Ok(())
}
0xc7 => {
let len = self.read_byte()? as usize;
self.pos += 1 + len; Ok(())
}
0xc8 => {
let len = self.read_u16()? as usize;
self.pos += 1 + len;
Ok(())
}
0xc9 => {
let len = self.read_u32()? as usize;
self.pos += 1 + len;
Ok(())
}
0xd4 => {
self.pos += 2;
Ok(())
}
0xd5 => {
self.pos += 3;
Ok(())
}
0xd6 => {
self.pos += 5;
Ok(())
}
0xd7 => {
self.pos += 9;
Ok(())
}
0xd8 => {
self.pos += 17;
Ok(())
}
_ => Err(ParseError::new(
Span::new(self.pos - 1, 1),
DeserializeErrorKind::InvalidValue {
message: format!("unsupported MsgPack type: 0x{:02x}", prefix).into(),
},
)),
}
}
}
impl<'de> MsgPackParser<'de> {
#[inline]
fn event(&self, kind: ParseEventKind<'de>) -> ParseEvent<'de> {
ParseEvent::new(kind, Span::new(self.pos, 1))
}
}
impl<'de> FormatParser<'de> for MsgPackParser<'de> {
fn next_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
if let Some(event) = self.event_peek.take() {
return Ok(Some(event));
}
self.produce_event()
}
fn peek_event(&mut self) -> Result<Option<ParseEvent<'de>>, ParseError> {
if let Some(event) = self.event_peek.clone() {
return Ok(Some(event));
}
let event = self.produce_event()?;
if let Some(ref e) = event {
self.event_peek = Some(e.clone());
}
Ok(event)
}
fn skip_value(&mut self) -> Result<(), ParseError> {
debug_assert!(
self.event_peek.is_none(),
"skip_value called while an event is buffered"
);
self.skip_value_internal()?;
self.finish_value();
Ok(())
}
fn save(&mut self) -> SavePoint {
unimplemented!("save/restore not yet implemented for MsgPack")
}
fn restore(&mut self, _save_point: SavePoint) {
unimplemented!("save/restore not yet implemented for MsgPack")
}
}