use crate::error::{BinaryError, Result};
use crate::jid::JidRef;
use crate::node::{AttrsRef, NodeContentRef, NodeRef, NodeVec, ValueRef};
use crate::token;
use std::borrow::Cow;
#[cfg(feature = "simd")]
use std::simd::{Simd, prelude::*, u8x16};
pub(crate) struct Decoder<'a> {
data: &'a [u8],
position: usize,
}
impl<'a> Decoder<'a> {
pub(crate) fn new(data: &'a [u8]) -> Self {
Self { data, position: 0 }
}
pub(crate) fn is_finished(&self) -> bool {
self.position >= self.data.len()
}
pub(crate) fn bytes_left(&self) -> usize {
self.data.len() - self.position
}
#[inline(always)]
fn check_eos(&self, len: usize) -> Result<()> {
if self.bytes_left() >= len {
Ok(())
} else {
Err(BinaryError::UnexpectedEof)
}
}
#[inline(always)]
fn read_u8(&mut self) -> Result<u8> {
self.check_eos(1)?;
let position = self.position;
self.position += 1;
Ok(self.data[position])
}
#[inline(always)]
fn read_u16_be(&mut self) -> Result<u16> {
self.check_eos(2)?;
let position = self.position;
self.position += 2;
Ok(u16::from_be_bytes([
self.data[position],
self.data[position + 1],
]))
}
#[inline(always)]
fn read_u20_be(&mut self) -> Result<u32> {
self.check_eos(3)?;
let position = self.position;
self.position += 3;
let bytes = [
self.data[position],
self.data[position + 1],
self.data[position + 2],
];
Ok(((bytes[0] as u32 & 0x0F) << 16) | ((bytes[1] as u32) << 8) | (bytes[2] as u32))
}
#[inline(always)]
fn read_u32_be(&mut self) -> Result<u32> {
self.check_eos(4)?;
let position = self.position;
self.position += 4;
Ok(u32::from_be_bytes([
self.data[position],
self.data[position + 1],
self.data[position + 2],
self.data[position + 3],
]))
}
#[inline(always)]
fn read_bytes(&mut self, len: usize) -> Result<&'a [u8]> {
self.check_eos(len)?;
let start = self.position;
let end = start + len;
self.position = end;
Ok(&self.data[start..end])
}
#[inline(always)]
fn read_string(&mut self, len: usize) -> Result<Cow<'a, str>> {
let bytes = self.read_bytes(len)?;
match std::str::from_utf8(bytes) {
Ok(s) => Ok(Cow::Borrowed(s)),
Err(e) => Err(BinaryError::InvalidUtf8(e)),
}
}
#[inline(always)]
fn read_list_size(&mut self, tag: u8) -> Result<usize> {
match tag {
token::LIST_EMPTY => Ok(0),
token::LIST_8 => self.read_u8().map(|v| v as usize),
token::LIST_16 => self.read_u16_be().map(|v| v as usize),
_ => Err(BinaryError::InvalidToken(tag)),
}
}
fn read_jid_pair(&mut self) -> Result<JidRef<'a>> {
let user_val = self.read_value_as_string()?;
let server = self.read_value_as_string()?.unwrap_or(Cow::Borrowed(""));
let user = user_val.unwrap_or(Cow::Borrowed(""));
Ok(JidRef {
user,
server,
agent: 0,
device: 0,
integrator: 0,
})
}
fn read_ad_jid(&mut self) -> Result<JidRef<'a>> {
let agent = self.read_u8()?;
let device = self.read_u8()? as u16;
let user = self
.read_value_as_string()?
.ok_or(BinaryError::InvalidNode)?;
let server = match agent {
0 => Cow::Borrowed(crate::jid::DEFAULT_USER_SERVER),
1 => Cow::Borrowed(crate::jid::HIDDEN_USER_SERVER),
128 => Cow::Borrowed(crate::jid::HOSTED_SERVER),
129 => Cow::Borrowed(crate::jid::HOSTED_LID_SERVER),
n if (n & 128) != 0 && (n & 1) == 0 => {
Cow::Borrowed(crate::jid::HOSTED_SERVER)
}
_ => {
return Err(BinaryError::AttrParse(format!(
"AD_JID invalid domain type: {agent}"
)));
}
};
Ok(JidRef {
user,
server,
agent,
device,
integrator: 0,
})
}
fn read_interop_jid(&mut self) -> Result<JidRef<'a>> {
let user = self
.read_value_as_string()?
.ok_or(BinaryError::InvalidNode)?;
let device = self.read_u16_be()?;
let integrator = self.read_u16_be()?;
let server = self.read_value_as_string()?.unwrap_or(Cow::Borrowed(""));
if server != crate::jid::INTEROP_SERVER {
return Err(BinaryError::InvalidNode);
}
Ok(JidRef {
user,
server,
device,
integrator,
agent: 0,
})
}
fn read_fb_jid(&mut self) -> Result<JidRef<'a>> {
let user = self
.read_value_as_string()?
.ok_or(BinaryError::InvalidNode)?;
let device = self.read_u16_be()?;
let server = self.read_value_as_string()?.unwrap_or(Cow::Borrowed(""));
if server != crate::jid::MESSENGER_SERVER {
return Err(BinaryError::InvalidNode);
}
Ok(JidRef {
user,
server,
device,
agent: 0,
integrator: 0,
})
}
fn read_value_as_string(&mut self) -> Result<Option<Cow<'a, str>>> {
let tag = self.read_u8()?;
self.read_value_as_string_from_tag(tag)
}
#[inline(always)]
fn read_value_as_string_from_tag(&mut self, tag: u8) -> Result<Option<Cow<'a, str>>> {
match tag {
token::LIST_EMPTY => Ok(None),
token::BINARY_8 => {
let size = self.read_u8()? as usize;
self.read_string(size).map(Some)
}
token::BINARY_20 => {
let size = self.read_u20_be()? as usize;
self.read_string(size).map(Some)
}
token::BINARY_32 => {
let size = self.read_u32_be()? as usize;
self.read_string(size).map(Some)
}
token::JID_PAIR => self
.read_jid_pair()
.map(|j| Some(Cow::Owned(j.to_string()))),
token::AD_JID => self.read_ad_jid().map(|j| Some(Cow::Owned(j.to_string()))),
token::INTEROP_JID => self
.read_interop_jid()
.map(|j| Some(Cow::Owned(j.to_string()))),
token::FB_JID => self.read_fb_jid().map(|j| Some(Cow::Owned(j.to_string()))),
token::NIBBLE_8 | token::HEX_8 => self.read_packed(tag).map(|s| Some(Cow::Owned(s))),
tag @ token::DICTIONARY_0..=token::DICTIONARY_3 => {
let index = self.read_u8()?;
token::get_double_token(tag - token::DICTIONARY_0, index)
.map(|s| Some(Cow::Borrowed(s)))
.ok_or(BinaryError::InvalidToken(tag))
}
_ => token::get_single_token(tag)
.map(|s| Some(Cow::Borrowed(s)))
.ok_or(BinaryError::InvalidToken(tag)),
}
}
fn read_value(&mut self) -> Result<Option<ValueRef<'a>>> {
let tag = self.read_u8()?;
match tag {
token::LIST_EMPTY => Ok(None),
token::BINARY_8 => {
let size = self.read_u8()? as usize;
self.read_string(size).map(|s| Some(ValueRef::String(s)))
}
token::BINARY_20 => {
let size = self.read_u20_be()? as usize;
self.read_string(size).map(|s| Some(ValueRef::String(s)))
}
token::BINARY_32 => {
let size = self.read_u32_be()? as usize;
self.read_string(size).map(|s| Some(ValueRef::String(s)))
}
token::JID_PAIR => self.read_jid_pair().map(|j| Some(ValueRef::Jid(j))),
token::AD_JID => self.read_ad_jid().map(|j| Some(ValueRef::Jid(j))),
token::INTEROP_JID => self.read_interop_jid().map(|j| Some(ValueRef::Jid(j))),
token::FB_JID => self.read_fb_jid().map(|j| Some(ValueRef::Jid(j))),
token::NIBBLE_8 | token::HEX_8 => self
.read_packed(tag)
.map(|s| Some(ValueRef::String(Cow::Owned(s)))),
tag @ token::DICTIONARY_0..=token::DICTIONARY_3 => {
let index = self.read_u8()?;
token::get_double_token(tag - token::DICTIONARY_0, index)
.map(|s| Some(ValueRef::String(Cow::Borrowed(s))))
.ok_or(BinaryError::InvalidToken(tag))
}
_ => token::get_single_token(tag)
.map(|s| Some(ValueRef::String(Cow::Borrowed(s))))
.ok_or(BinaryError::InvalidToken(tag)),
}
}
fn read_packed(&mut self, tag: u8) -> Result<String> {
let packed_len_byte = self.read_u8()?;
let is_half_byte = (packed_len_byte & 0x80) != 0;
let len = (packed_len_byte & 0x7F) as usize;
if len == 0 {
return Ok(String::new());
}
let raw_len = if is_half_byte { (len * 2) - 1 } else { len * 2 };
let packed_data = self.read_bytes(len)?;
let mut unpacked_bytes = Vec::with_capacity(raw_len);
match tag {
token::HEX_8 => Self::decode_packed_hex(packed_data, &mut unpacked_bytes),
token::NIBBLE_8 => Self::decode_packed_nibble(packed_data, &mut unpacked_bytes)?,
_ => return Err(BinaryError::InvalidToken(tag)),
}
if is_half_byte {
unpacked_bytes.pop();
}
Ok(String::from_utf8(unpacked_bytes).expect("packed decode produced non-ASCII"))
}
#[inline]
fn decode_packed_hex(packed_data: &[u8], unpacked_bytes: &mut Vec<u8>) {
#[cfg(feature = "simd")]
let packed_data = {
const HEX_LOOKUP: [u8; 16] = *b"0123456789ABCDEF";
let lookup_table = Simd::from_array(HEX_LOOKUP);
let low_mask = Simd::splat(0x0F);
let (chunks, remainder) = packed_data.as_chunks::<16>();
unpacked_bytes.reserve(chunks.len() * 32);
for chunk in chunks {
let data = u8x16::from_array(*chunk);
let high_nibbles = (data >> 4) & low_mask;
let low_nibbles = data & low_mask;
let high_chars = lookup_table.swizzle_dyn(high_nibbles);
let low_chars = lookup_table.swizzle_dyn(low_nibbles);
let (lo, hi) = Simd::interleave(high_chars, low_chars);
unpacked_bytes.extend_from_slice(lo.as_array());
unpacked_bytes.extend_from_slice(hi.as_array());
}
remainder
};
for &byte in packed_data {
let high = (byte & 0xF0) >> 4;
let low = byte & 0x0F;
unpacked_bytes.push(Self::unpack_hex(high));
unpacked_bytes.push(Self::unpack_hex(low));
}
}
#[inline]
fn decode_packed_nibble(packed_data: &[u8], unpacked_bytes: &mut Vec<u8>) -> Result<()> {
#[cfg(feature = "simd")]
let packed_data = {
const NIBBLE_LOOKUP: [u8; 16] = *b"0123456789-.\x00\x00\x00\x00";
let lookup_table = Simd::from_array(NIBBLE_LOOKUP);
let low_mask = Simd::splat(0x0F);
let le11 = Simd::splat(11);
let f15 = Simd::splat(15);
let (chunks, remainder) = packed_data.as_chunks::<16>();
unpacked_bytes.reserve(chunks.len() * 32);
for chunk in chunks {
let data = u8x16::from_array(*chunk);
let high_nibbles = (data >> 4) & low_mask;
let low_nibbles = data & low_mask;
let hi_valid = high_nibbles.simd_le(le11) | high_nibbles.simd_eq(f15);
let lo_valid = low_nibbles.simd_le(le11) | low_nibbles.simd_eq(f15);
if !(hi_valid & lo_valid).all() {
for byte in *chunk {
let high = (byte & 0xF0) >> 4;
let low = byte & 0x0F;
Self::unpack_nibble(high)?;
Self::unpack_nibble(low)?;
}
for byte in *chunk {
let high = (byte & 0xF0) >> 4;
let low = byte & 0x0F;
unpacked_bytes.push(Self::unpack_nibble(high)?);
unpacked_bytes.push(Self::unpack_nibble(low)?);
}
continue;
}
let high_chars = lookup_table.swizzle_dyn(high_nibbles);
let low_chars = lookup_table.swizzle_dyn(low_nibbles);
let (lo, hi) = Simd::interleave(high_chars, low_chars);
unpacked_bytes.extend_from_slice(lo.as_array());
unpacked_bytes.extend_from_slice(hi.as_array());
}
remainder
};
for &byte in packed_data {
let high = (byte & 0xF0) >> 4;
let low = byte & 0x0F;
unpacked_bytes.push(Self::unpack_nibble(high)?);
unpacked_bytes.push(Self::unpack_nibble(low)?);
}
Ok(())
}
#[inline(always)]
fn unpack_nibble(value: u8) -> Result<u8> {
match value {
0..=9 => Ok(b'0' + value),
10 => Ok(b'-'),
11 => Ok(b'.'),
15 => Ok(0),
_ => Err(BinaryError::InvalidToken(value)),
}
}
#[inline(always)]
fn unpack_hex(value: u8) -> u8 {
match value {
0..=9 => b'0' + value,
10..=15 => b'A' + value - 10,
_ => unreachable!("hex nibble validated by 4-bit mask"),
}
}
fn read_attributes(&mut self, size: usize) -> Result<AttrsRef<'a>> {
let mut attrs = AttrsRef::with_capacity(size);
for _ in 0..size {
let key = self
.read_value_as_string()?
.ok_or(BinaryError::NonStringKey)?;
let value = self
.read_value()?
.unwrap_or(ValueRef::String(Cow::Borrowed("")));
attrs.push((key, value));
}
Ok(attrs)
}
fn read_content(&mut self) -> Result<Option<NodeContentRef<'a>>> {
let tag = self.read_u8()?;
self.read_content_from_tag(tag)
}
#[inline(always)]
fn read_content_from_tag(&mut self, tag: u8) -> Result<Option<NodeContentRef<'a>>> {
match tag {
token::LIST_EMPTY => Ok(None),
token::LIST_8 | token::LIST_16 => {
let size = self.read_list_size(tag)?;
let mut nodes = NodeVec::with_capacity(size);
for _ in 0..size {
nodes.push(self.read_node_ref()?);
}
Ok(Some(NodeContentRef::Nodes(Box::new(nodes))))
}
token::BINARY_8 => {
let len = self.read_u8()? as usize;
let bytes = self.read_bytes(len)?;
Ok(Some(NodeContentRef::Bytes(Cow::Borrowed(bytes))))
}
token::BINARY_20 => {
let len = self.read_u20_be()? as usize;
let bytes = self.read_bytes(len)?;
Ok(Some(NodeContentRef::Bytes(Cow::Borrowed(bytes))))
}
token::BINARY_32 => {
let len = self.read_u32_be()? as usize;
let bytes = self.read_bytes(len)?;
Ok(Some(NodeContentRef::Bytes(Cow::Borrowed(bytes))))
}
_ => {
let string_content = self.read_value_as_string_from_tag(tag)?;
match string_content {
Some(s) => Ok(Some(NodeContentRef::String(s))),
None => Ok(None),
}
}
}
}
pub(crate) fn read_node_ref(&mut self) -> Result<NodeRef<'a>> {
let tag = self.read_u8()?;
let list_size = self.read_list_size(tag)?;
if list_size == 0 {
return Err(BinaryError::InvalidNode);
}
let tag = self
.read_value_as_string()?
.ok_or(BinaryError::InvalidNode)?;
let attr_count = (list_size - 1) / 2;
let has_content = list_size.is_multiple_of(2);
let attrs = self.read_attributes(attr_count)?;
let content = if has_content {
self.read_content()?.map(Box::new)
} else {
None
};
Ok(NodeRef {
tag,
attrs,
content,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::node::{Attrs, Node};
type TestResult = crate::error::Result<()>;
#[test]
fn test_decode_node() -> TestResult {
let node = Node::new(
"message",
Attrs::new(),
Some(crate::node::NodeContent::String("receipt".to_string())),
);
let mut buffer = Vec::new();
{
let mut encoder = crate::encoder::Encoder::new(std::io::Cursor::new(&mut buffer))?;
encoder.write_node(&node)?;
}
let mut decoder = Decoder::new(&buffer[1..]);
let decoded = decoder.read_node_ref().unwrap();
assert_eq!(decoded.tag, "message");
assert!(decoded.attrs.is_empty());
match &decoded.content {
Some(content) => match &**content {
crate::node::NodeContentRef::String(s) => assert_eq!(s, "receipt"),
_ => panic!("Expected string content"),
},
None => panic!("Expected content"),
}
Ok(())
}
#[test]
fn test_decode_nibble_packing() -> TestResult {
let test_str = "-.0123456789";
let node = Node::new(
"test",
Attrs::new(),
Some(crate::node::NodeContent::String(test_str.to_string())),
);
let mut buffer = Vec::new();
{
let mut encoder = crate::encoder::Encoder::new(std::io::Cursor::new(&mut buffer))?;
encoder.write_node(&node)?;
}
let mut decoder = Decoder::new(&buffer[1..]);
let decoded = decoder.read_node_ref().unwrap();
assert_eq!(decoded.tag, "test");
assert!(decoded.attrs.is_empty());
match &decoded.content {
Some(content) => match &**content {
crate::node::NodeContentRef::String(s) => assert_eq!(s, test_str),
_ => panic!("Expected string content"),
},
None => panic!("Expected content"),
}
Ok(())
}
#[test]
fn test_invalid_nibble_rejection() {
let invalid_data = vec![1, 0xC0];
let mut decoder = Decoder::new(&invalid_data);
let result = decoder.read_packed(token::NIBBLE_8);
assert!(
result.is_err(),
"Expected error for invalid nibble 12, got: {:?}",
result
);
if let Err(BinaryError::InvalidToken(invalid_nibble)) = result {
assert_eq!(invalid_nibble, 12, "Expected invalid nibble 12");
} else {
panic!("Expected InvalidToken error, got: {:?}", result);
}
}
#[test]
fn test_empty_input() {
let mut decoder = Decoder::new(&[]);
let result = decoder.read_node_ref();
assert!(result.is_err());
}
#[test]
fn test_truncated_u16() {
let data = vec![0x42];
let mut decoder = Decoder::new(&data);
let result = decoder.read_u16_be();
assert!(result.is_err());
if let Err(BinaryError::UnexpectedEof) = result {
} else {
panic!("Expected UnexpectedEof, got: {:?}", result);
}
}
#[test]
fn test_truncated_u20() {
let data = vec![0x42, 0x43];
let mut decoder = Decoder::new(&data);
let result = decoder.read_u20_be();
assert!(result.is_err());
}
#[test]
fn test_truncated_u32() {
let data = vec![0x42, 0x43, 0x44];
let mut decoder = Decoder::new(&data);
let result = decoder.read_u32_be();
assert!(result.is_err());
}
#[test]
fn test_binary8_length_exceeds_buffer() {
let data = vec![token::BINARY_8, 100, 1, 2, 3, 4, 5];
let mut decoder = Decoder::new(&data);
let result = decoder.read_value_as_string();
assert!(result.is_err());
}
#[test]
fn test_binary20_length_exceeds_buffer() {
let data = vec![token::BINARY_20, 0x00, 0x01, 0x00, 1, 2, 3]; let mut decoder = Decoder::new(&data);
let result = decoder.read_value_as_string();
assert!(result.is_err());
}
#[test]
fn test_list8_size_exceeds_data() {
let data = vec![token::LIST_8, 10, 1]; let mut decoder = Decoder::new(&data);
let result = decoder.read_node_ref();
assert!(result.is_err());
}
#[test]
fn test_invalid_token() {
let data = vec![token::AD_JID]; let mut decoder = Decoder::new(&data);
let result = decoder.read_value_as_string();
assert!(result.is_err());
}
#[test]
fn test_read_bytes_exact_length() {
let data = vec![1, 2, 3, 4, 5];
let mut decoder = Decoder::new(&data);
let bytes = decoder.read_bytes(5).unwrap();
assert_eq!(bytes, &[1, 2, 3, 4, 5]);
assert!(decoder.is_finished());
}
#[test]
fn test_read_bytes_exceeding_length() {
let data = vec![1, 2, 3];
let mut decoder = Decoder::new(&data);
let result = decoder.read_bytes(5);
assert!(result.is_err());
}
#[test]
fn test_u20_encoding() {
let data = vec![0x00, 0x00, 0x00];
let mut decoder = Decoder::new(&data);
assert_eq!(decoder.read_u20_be().unwrap(), 0);
let data = vec![0x00, 0x01, 0x00];
let mut decoder = Decoder::new(&data);
assert_eq!(decoder.read_u20_be().unwrap(), 256);
let data = vec![0x01, 0x00, 0x00];
let mut decoder = Decoder::new(&data);
assert_eq!(decoder.read_u20_be().unwrap(), 65536);
let data = vec![0x0F, 0xFF, 0xFF];
let mut decoder = Decoder::new(&data);
assert_eq!(decoder.read_u20_be().unwrap(), 1048575);
}
#[test]
fn test_bytes_left() {
let data = vec![1, 2, 3, 4, 5];
let mut decoder = Decoder::new(&data);
assert_eq!(decoder.bytes_left(), 5);
decoder.read_u8().unwrap();
assert_eq!(decoder.bytes_left(), 4);
decoder.read_u8().unwrap();
assert_eq!(decoder.bytes_left(), 3);
decoder.read_bytes(3).unwrap();
assert_eq!(decoder.bytes_left(), 0);
assert!(decoder.is_finished());
}
#[test]
fn test_hex_packed_decoding() {
let packed_data = vec![
3, 0xAB, 0xCD, 0xEF, ];
let mut decoder = Decoder::new(&packed_data);
let result = decoder.read_packed(token::HEX_8).unwrap();
assert_eq!(result, "ABCDEF");
}
#[test]
fn test_nibble_packed_odd_length() {
let packed_data = vec![
0x82, 0x12, 0x3F, ];
let mut decoder = Decoder::new(&packed_data);
let result = decoder.read_packed(token::NIBBLE_8).unwrap();
assert_eq!(result, "123");
}
#[test]
fn test_empty_packed_string() {
let packed_data = vec![0];
let mut decoder = Decoder::new(&packed_data);
let result = decoder.read_packed(token::NIBBLE_8).unwrap();
assert_eq!(result, "");
}
#[test]
fn test_invalid_nibble_value_12() {
let packed_data = vec![1, 0xC0];
let mut decoder = Decoder::new(&packed_data);
let result = decoder.read_packed(token::NIBBLE_8);
assert!(result.is_err());
}
#[test]
fn test_invalid_nibble_value_13() {
let packed_data = vec![1, 0xD0];
let mut decoder = Decoder::new(&packed_data);
let result = decoder.read_packed(token::NIBBLE_8);
assert!(result.is_err());
}
#[test]
fn test_invalid_nibble_value_14() {
let packed_data = vec![1, 0xE0];
let mut decoder = Decoder::new(&packed_data);
let result = decoder.read_packed(token::NIBBLE_8);
assert!(result.is_err());
}
#[test]
fn test_nested_nodes() -> TestResult {
let mut current = Node::new("leaf", Attrs::new(), None);
for i in 0..50 {
let tag = format!("level{}", i);
current = Node::new(
tag,
Attrs::new(),
Some(crate::node::NodeContent::Nodes(vec![current])),
);
}
let mut buffer = Vec::new();
{
let mut encoder = crate::encoder::Encoder::new(std::io::Cursor::new(&mut buffer))?;
encoder.write_node(¤t)?;
}
let mut decoder = Decoder::new(&buffer[1..]);
let decoded = decoder.read_node_ref()?;
assert_eq!(decoded.tag, "level49");
Ok(())
}
}