use alloc::string::String;
use alloc::vec::Vec;
use crate::error::{Error, Result};
use crate::value::{Document, Value};
const TAG_NULL: u8 = 0;
const TAG_BOOL: u8 = 1;
const TAG_INT: u8 = 2;
const TAG_FLOAT: u8 = 3;
const TAG_STR: u8 = 4;
const TAG_BYTES: u8 = 5;
const TAG_ARRAY: u8 = 6;
const TAG_OBJECT: u8 = 7;
const fn build_crc_table() -> [u32; 256] {
let mut table = [0u32; 256];
let mut i = 0;
while i < 256 {
let mut crc = i as u32;
let mut bit = 0;
while bit < 8 {
crc = if crc & 1 != 0 {
(crc >> 1) ^ 0x82F6_3B78
} else {
crc >> 1
};
bit += 1;
}
table[i] = crc;
i += 1;
}
table
}
static CRC_TABLE: [u32; 256] = build_crc_table();
#[must_use]
pub(crate) fn crc32c(data: &[u8]) -> u32 {
let mut crc = !0u32;
let mut i = 0;
while i < data.len() {
let index = ((crc ^ u32::from(data[i])) & 0xff) as usize;
crc = CRC_TABLE[index] ^ (crc >> 8);
i += 1;
}
!crc
}
pub(crate) fn encode_document_into(out: &mut Vec<u8>, doc: &Document) -> Result<()> {
write_len(out, doc.len())?;
for (key, value) in doc {
write_len(out, key.len())?;
out.extend_from_slice(key.as_bytes());
encode_value(out, value)?;
}
Ok(())
}
fn encode_value(out: &mut Vec<u8>, value: &Value) -> Result<()> {
match value {
Value::Null => out.push(TAG_NULL),
Value::Bool(b) => {
out.push(TAG_BOOL);
out.push(u8::from(*b));
}
Value::Int(n) => {
out.push(TAG_INT);
out.extend_from_slice(&n.to_le_bytes());
}
Value::Float(f) => {
out.push(TAG_FLOAT);
out.extend_from_slice(&f.to_bits().to_le_bytes());
}
Value::Str(s) => {
out.push(TAG_STR);
write_len(out, s.len())?;
out.extend_from_slice(s.as_bytes());
}
Value::Bytes(b) => {
out.push(TAG_BYTES);
write_len(out, b.len())?;
out.extend_from_slice(b);
}
Value::Array(items) => {
out.push(TAG_ARRAY);
write_len(out, items.len())?;
for item in items {
encode_value(out, item)?;
}
}
Value::Object(doc) => {
out.push(TAG_OBJECT);
encode_document_into(out, doc)?;
}
}
Ok(())
}
fn write_len(out: &mut Vec<u8>, len: usize) -> Result<()> {
let len = u32::try_from(len).map_err(|_| Error::ValueTooLarge)?;
out.extend_from_slice(&len.to_le_bytes());
Ok(())
}
pub(crate) fn decode_document(bytes: &[u8]) -> Result<Document> {
let mut reader = Reader::new(bytes);
let doc = read_document(&mut reader)?;
if reader.remaining() != 0 {
return Err(Error::Corrupt("trailing bytes after document"));
}
Ok(doc)
}
fn read_document(reader: &mut Reader<'_>) -> Result<Document> {
let field_count = reader.read_len()?;
let mut doc = Document::with_capacity(field_count.min(reader.remaining()));
for _ in 0..field_count {
let key_len = reader.read_len()?;
let key_bytes = reader.take(key_len)?;
let key = core::str::from_utf8(key_bytes)
.map_err(|_| Error::Corrupt("invalid utf-8 in field name"))?;
let value = read_value(reader)?;
let _ = doc.set(String::from(key), value);
}
Ok(doc)
}
fn read_value(reader: &mut Reader<'_>) -> Result<Value> {
let tag = reader.read_u8()?;
match tag {
TAG_NULL => Ok(Value::Null),
TAG_BOOL => Ok(Value::Bool(reader.read_u8()? != 0)),
TAG_INT => Ok(Value::Int(reader.read_i64()?)),
TAG_FLOAT => Ok(Value::Float(f64::from_bits(reader.read_u64()?))),
TAG_STR => {
let len = reader.read_len()?;
let bytes = reader.take(len)?;
let s = core::str::from_utf8(bytes)
.map_err(|_| Error::Corrupt("invalid utf-8 in string value"))?;
Ok(Value::Str(String::from(s)))
}
TAG_BYTES => {
let len = reader.read_len()?;
Ok(Value::Bytes(reader.take(len)?.to_vec()))
}
TAG_ARRAY => {
let len = reader.read_len()?;
let mut items = Vec::with_capacity(len.min(reader.remaining()));
for _ in 0..len {
items.push(read_value(reader)?);
}
Ok(Value::Array(items))
}
TAG_OBJECT => Ok(Value::Object(read_document(reader)?)),
_ => Err(Error::Corrupt("unknown value tag")),
}
}
struct Reader<'a> {
buf: &'a [u8],
pos: usize,
}
impl<'a> Reader<'a> {
#[inline]
fn new(buf: &'a [u8]) -> Self {
Reader { buf, pos: 0 }
}
#[inline]
fn remaining(&self) -> usize {
self.buf.len() - self.pos
}
#[inline]
fn take(&mut self, n: usize) -> Result<&'a [u8]> {
let end = self
.pos
.checked_add(n)
.ok_or(Error::Corrupt("length overflow"))?;
if end > self.buf.len() {
return Err(Error::Corrupt("unexpected end of record"));
}
let slice = &self.buf[self.pos..end];
self.pos = end;
Ok(slice)
}
#[inline]
fn read_u8(&mut self) -> Result<u8> {
Ok(self.take(1)?[0])
}
#[inline]
fn read_u32(&mut self) -> Result<u32> {
let b = self.take(4)?;
Ok(u32::from_le_bytes([b[0], b[1], b[2], b[3]]))
}
#[inline]
fn read_u64(&mut self) -> Result<u64> {
let b = self.take(8)?;
Ok(u64::from_le_bytes([
b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7],
]))
}
#[inline]
fn read_i64(&mut self) -> Result<i64> {
Ok(self.read_u64()? as i64)
}
#[inline]
fn read_len(&mut self) -> Result<usize> {
Ok(self.read_u32()? as usize)
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
fn roundtrip(doc: &Document) -> Document {
let mut buf = Vec::new();
encode_document_into(&mut buf, doc).expect("encode");
decode_document(&buf).expect("decode")
}
#[test]
fn test_roundtrip_preserves_all_variants() {
let mut nested = Document::new();
nested.set("inner", 7_i64);
let mut doc = Document::new();
doc.set("null", Value::Null)
.set("bool", true)
.set("int", -42_i64)
.set("float", 2.5_f64)
.set("str", "héllo")
.set("bytes", Value::Bytes(vec![0, 255, 7]))
.set("array", Value::Array(vec![Value::from(1_i64), Value::Null]))
.set("object", Value::Object(nested));
assert_eq!(roundtrip(&doc), doc);
}
#[test]
fn test_empty_document_roundtrips() {
assert_eq!(roundtrip(&Document::new()), Document::new());
}
#[test]
fn test_decode_truncated_buffer_is_corrupt() {
let mut doc = Document::new();
doc.set("k", "value");
let mut buf = Vec::new();
encode_document_into(&mut buf, &doc).unwrap();
buf.truncate(buf.len() - 2);
assert!(matches!(decode_document(&buf), Err(Error::Corrupt(_))));
}
#[test]
fn test_decode_unknown_tag_is_corrupt() {
let mut buf = Vec::new();
write_len(&mut buf, 1).unwrap();
write_len(&mut buf, 1).unwrap();
buf.push(b'k');
buf.push(200);
assert!(matches!(decode_document(&buf), Err(Error::Corrupt(_))));
}
#[test]
fn test_decode_rejects_trailing_bytes() {
let mut buf = Vec::new();
encode_document_into(&mut buf, &Document::new()).unwrap();
buf.push(0);
assert!(matches!(decode_document(&buf), Err(Error::Corrupt(_))));
}
#[test]
fn test_crc_detects_single_bit_flip() {
let a = crc32c(b"the quick brown fox");
let b = crc32c(b"the quick brown fox!");
assert_ne!(a, b);
assert_eq!(crc32c(b"abc"), crc32c(b"abc"));
}
#[test]
fn test_hostile_array_length_does_not_allocate_unbounded() {
let mut buf = Vec::new();
write_len(&mut buf, 1).unwrap(); write_len(&mut buf, 0).unwrap(); buf.push(TAG_ARRAY);
buf.extend_from_slice(&u32::MAX.to_le_bytes());
assert!(matches!(decode_document(&buf), Err(Error::Corrupt(_))));
}
}