use std::io::{Read, Write};
pub const VARINT_LEN_BIAS: u8 = 247;
pub const VARINT_MAX_SINGLE_BYTE: u64 = VARINT_LEN_BIAS as u64;
pub mod compact_node_types {
pub const N4: u8 = 0;
pub const N16: u8 = 1;
pub const N48: u8 = 2;
pub const N256: u8 = 3;
}
pub const COMPACT_NODE_TYPE_N4: u8 = compact_node_types::N4;
pub const COMPACT_NODE_TYPE_N16: u8 = compact_node_types::N16;
pub const COMPACT_NODE_TYPE_N48: u8 = compact_node_types::N48;
pub const COMPACT_NODE_TYPE_N256: u8 = compact_node_types::N256;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CompactHeader {
pub ptr_width: u8,
pub num_children: u8,
pub has_value: bool,
pub has_prefix: bool,
pub prefix_len: u8,
pub node_type: u8,
}
pub const COMPACT_HEADER_SIZE: usize = 2;
impl CompactHeader {
pub fn new(
ptr_width: u8,
num_children: u8,
has_value: bool,
prefix_len: u8,
node_type: u8,
) -> Self {
debug_assert!(ptr_width >= 1 && ptr_width <= 6);
debug_assert!(prefix_len <= 12);
debug_assert!(node_type <= 3);
Self {
ptr_width,
num_children,
has_value,
has_prefix: prefix_len > 0,
prefix_len,
node_type,
}
}
pub fn to_bytes(&self) -> [u8; 2] {
let stored_children = self.num_children.min(7);
let b0 = ((self.ptr_width - 1) & 0x07)
| ((stored_children & 0x07) << 3)
| ((self.has_value as u8) << 6)
| ((self.has_prefix as u8) << 7);
let b1 = (self.prefix_len & 0x0F)
| ((self.node_type & 0x03) << 4)
| if self.num_children > 7 { 0x40 } else { 0 };
[b0, b1]
}
pub fn to_bytes_with_extended(&self) -> ([u8; 2], Option<u8>) {
let bytes = self.to_bytes();
if self.num_children > 7 {
(bytes, Some(self.num_children))
} else {
(bytes, None)
}
}
pub fn from_bytes(bytes: [u8; 2]) -> Self {
let b0 = bytes[0];
let b1 = bytes[1];
let ptr_width = (b0 & 0x07) + 1;
let stored_children = (b0 >> 3) & 0x07;
let has_value = (b0 >> 6) & 0x01 != 0;
let has_prefix = (b0 >> 7) != 0;
let prefix_len = b1 & 0x0F;
let node_type = (b1 >> 4) & 0x03;
let has_extended = (b1 >> 6) & 0x01 != 0;
let num_children = if has_extended { 7 } else { stored_children };
Self {
ptr_width,
num_children,
has_value,
has_prefix,
prefix_len,
node_type,
}
}
pub fn from_bytes_with_extended(data: &[u8], offset: &mut usize) -> Self {
let mut header = Self::from_bytes([data[*offset], data[*offset + 1]]);
*offset += COMPACT_HEADER_SIZE;
if header.num_children == 7 && (data[*offset - 1] & 0x40) != 0 && *offset < data.len() {
header.num_children = data[*offset];
*offset += 1;
}
header
}
pub fn needs_extended_count(&self) -> bool {
self.num_children > 7
}
pub fn data_size(&self) -> usize {
COMPACT_HEADER_SIZE
+ if self.needs_extended_count() { 1 } else { 0 }
+ self.prefix_len as usize + self.num_children as usize + (self.num_children as usize * self.ptr_width as usize) + if self.has_value { self.ptr_width as usize } else { 0 } }
}
pub fn write_varint<W: Write>(value: u64, writer: &mut W) -> std::io::Result<usize> {
if value <= VARINT_MAX_SINGLE_BYTE {
writer.write_all(&[value as u8])?;
Ok(1)
} else {
let bytes = value.to_le_bytes();
let len = required_bytes_for_value(value);
writer.write_all(&[VARINT_LEN_BIAS + len as u8])?;
writer.write_all(&bytes[..len])?;
Ok(1 + len)
}
}
pub fn write_varint_to_vec(value: u64, out: &mut Vec<u8>) {
if value <= VARINT_MAX_SINGLE_BYTE {
out.push(value as u8);
} else {
let bytes = value.to_le_bytes();
let len = required_bytes_for_value(value);
out.push(VARINT_LEN_BIAS + len as u8);
out.extend_from_slice(&bytes[..len]);
}
}
pub fn read_varint<R: Read>(reader: &mut R) -> std::io::Result<(u64, usize)> {
let mut first = [0u8; 1];
reader.read_exact(&mut first)?;
if first[0] <= VARINT_LEN_BIAS {
Ok((first[0] as u64, 1))
} else {
let len = (first[0] - VARINT_LEN_BIAS) as usize;
let mut bytes = [0u8; 8];
reader.read_exact(&mut bytes[..len])?;
Ok((u64::from_le_bytes(bytes), 1 + len))
}
}
pub fn read_varint_from_slice(data: &[u8]) -> (u64, usize) {
let first = data[0];
if first <= VARINT_LEN_BIAS {
(first as u64, 1)
} else {
let len = (first - VARINT_LEN_BIAS) as usize;
let mut bytes = [0u8; 8];
bytes[..len].copy_from_slice(&data[1..1 + len]);
(u64::from_le_bytes(bytes), 1 + len)
}
}
#[inline]
pub fn read_varint_branchless(data: &[u8]) -> (u64, usize) {
let first = data[0];
if first <= VARINT_LEN_BIAS {
return (first as u64, 1);
}
let len = (first - VARINT_LEN_BIAS) as usize;
let rest = if data.len() >= 9 {
unsafe { data.as_ptr().add(1).cast::<u64>().read_unaligned() }
} else {
let mut buf = [0u8; 8];
let copy_len = (data.len() - 1).min(8);
buf[..copy_len].copy_from_slice(&data[1..1 + copy_len]);
u64::from_le_bytes(buf)
};
let zeros = 64 - (len * 8);
let value = if zeros < 64 {
(rest << zeros) >> zeros
} else {
rest
};
(value, len + 1)
}
pub fn required_bytes_for_value(value: u64) -> usize {
if value == 0 {
1
} else {
((64 - value.leading_zeros()) as usize + 7) / 8
}
}
pub fn varint_size(value: u64) -> usize {
if value <= VARINT_MAX_SINGLE_BYTE {
1
} else {
1 + required_bytes_for_value(value)
}
}
pub fn determine_ptr_width(max_offset: u64) -> u8 {
match max_offset {
0..=0xFF => 1,
0x100..=0xFFFF => 2,
0x10000..=0xFFFFFF => 3,
0x1000000..=0xFFFFFFFF => 4,
0x100000000..=0xFFFFFFFFFF => 5,
_ => 6,
}
}
pub fn write_fixed_width<W: Write>(value: u64, width: u8, writer: &mut W) -> std::io::Result<()> {
let bytes = value.to_le_bytes();
writer.write_all(&bytes[..width as usize])
}
pub fn write_fixed_width_to_vec(value: u64, width: u8, out: &mut Vec<u8>) {
let bytes = value.to_le_bytes();
out.extend_from_slice(&bytes[..width as usize]);
}
pub fn read_fixed_width<R: Read>(width: u8, reader: &mut R) -> std::io::Result<u64> {
let mut bytes = [0u8; 8];
reader.read_exact(&mut bytes[..width as usize])?;
Ok(u64::from_le_bytes(bytes))
}
pub fn read_fixed_width_from_slice(data: &[u8], offset: &mut usize, width: u8) -> u64 {
let mut bytes = [0u8; 8];
let end = *offset + width as usize;
bytes[..width as usize].copy_from_slice(&data[*offset..end]);
*offset = end;
u64::from_le_bytes(bytes)
}
pub fn read_n_values_from_slice(
data: &[u8],
offset: &mut usize,
count: usize,
width: u8,
) -> Vec<u64> {
let mut values = Vec::with_capacity(count);
for _ in 0..count {
values.push(read_fixed_width_from_slice(data, offset, width));
}
values
}
pub fn write_n_values_to_vec(values: &[u64], width: u8, out: &mut Vec<u8>) {
for &value in values {
write_fixed_width_to_vec(value, width, out);
}
}
pub fn encode_compact_byte_node(
header: &CompactHeader,
prefix: &[u8],
keys: &[u8],
children: &[u64],
value_ptr: Option<u64>,
) -> Vec<u8> {
assert_eq!(keys.len(), children.len());
let mut out = Vec::with_capacity(header.data_size());
let (header_bytes, extended_count) = header.to_bytes_with_extended();
out.extend_from_slice(&header_bytes);
if let Some(count) = extended_count {
out.push(count);
}
out.extend_from_slice(prefix);
out.extend_from_slice(keys);
for &child in children {
write_fixed_width_to_vec(child, header.ptr_width, &mut out);
}
if let Some(vp) = value_ptr {
write_fixed_width_to_vec(vp, header.ptr_width, &mut out);
}
out
}
pub fn encode_compact_byte_node_auto(
node_type: u8,
prefix: &[u8],
keys: &[u8],
children: &[u64],
value_ptr: Option<u64>,
max_arena_offset: u64,
) -> Vec<u8> {
assert_eq!(keys.len(), children.len());
let max_ptr = children
.iter()
.copied()
.max()
.unwrap_or(0)
.max(value_ptr.unwrap_or(0))
.max(max_arena_offset);
let ptr_width = determine_ptr_width(max_ptr);
let header = CompactHeader::new(
ptr_width,
keys.len() as u8,
value_ptr.is_some(),
prefix.len() as u8,
node_type,
);
encode_compact_byte_node(&header, prefix, keys, children, value_ptr)
}
#[derive(Debug, Clone)]
pub struct DecodedCompactByteNode {
pub header: CompactHeader,
pub prefix: Vec<u8>,
pub keys: Vec<u8>,
pub children: Vec<u64>,
pub value_ptr: Option<u64>,
}
pub fn decode_compact_byte_node(data: &[u8]) -> DecodedCompactByteNode {
let mut offset = 0;
let header = CompactHeader::from_bytes_with_extended(data, &mut offset);
let prefix: Vec<u8> = data[offset..offset + header.prefix_len as usize].to_vec();
offset += header.prefix_len as usize;
let keys: Vec<u8> = data[offset..offset + header.num_children as usize].to_vec();
offset += header.num_children as usize;
let children = read_n_values_from_slice(
data,
&mut offset,
header.num_children as usize,
header.ptr_width,
);
let value_ptr = if header.has_value {
Some(read_fixed_width_from_slice(
data,
&mut offset,
header.ptr_width,
))
} else {
None
};
DecodedCompactByteNode {
header,
prefix,
keys,
children,
value_ptr,
}
}
pub fn compact_byte_node_size(
prefix_len: usize,
num_children: usize,
has_value: bool,
max_ptr: u64,
) -> usize {
let ptr_width = determine_ptr_width(max_ptr) as usize;
COMPACT_HEADER_SIZE
+ if num_children > 7 { 1 } else { 0 } + prefix_len + num_children + (num_children * ptr_width) + if has_value { ptr_width } else { 0 } }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_varint_single_byte() {
for v in 0..=VARINT_MAX_SINGLE_BYTE {
let mut buf = Vec::new();
write_varint_to_vec(v, &mut buf);
assert_eq!(buf.len(), 1);
let (decoded, consumed) = read_varint_from_slice(&buf);
assert_eq!(decoded, v);
assert_eq!(consumed, 1);
}
}
#[test]
fn test_varint_multi_byte() {
let values = [
248u64,
255,
256,
1000,
65535,
65536,
0xFFFFFF,
0xFFFFFFFF,
u64::MAX,
];
for &v in &values {
let mut buf = Vec::new();
write_varint_to_vec(v, &mut buf);
assert!(buf.len() > 1);
let (decoded, consumed) = read_varint_from_slice(&buf);
assert_eq!(decoded, v);
assert_eq!(consumed, buf.len());
}
}
#[test]
fn test_varint_branchless() {
let values = [0u64, 1, 100, 247, 248, 1000, 65535, 0xFFFFFF, u64::MAX];
for &v in &values {
let mut buf = Vec::new();
write_varint_to_vec(v, &mut buf);
buf.extend_from_slice(&[0u8; 8]);
let (decoded, consumed) = read_varint_branchless(&buf);
assert_eq!(decoded, v);
assert_eq!(consumed, varint_size(v));
}
}
#[test]
fn test_compact_header_roundtrip() {
let header = CompactHeader::new(3, 4, true, 5, compact_node_types::N4);
let bytes = header.to_bytes();
assert_eq!(bytes.len(), 2);
let decoded = CompactHeader::from_bytes(bytes);
assert_eq!(decoded.ptr_width, 3);
assert_eq!(decoded.num_children, 4);
assert_eq!(decoded.has_value, true);
assert_eq!(decoded.prefix_len, 5);
assert_eq!(decoded.node_type, compact_node_types::N4);
}
#[test]
fn test_encode_decode_compact_byte_node() {
let prefix = b"hel".to_vec();
let keys = b"lp".to_vec();
let children = vec![100u64, 200u64];
let value_ptr = Some(300u64);
let encoded = encode_compact_byte_node_auto(
compact_node_types::N4,
&prefix,
&keys,
&children,
value_ptr,
1000,
);
assert_eq!(encoded.len(), 13);
let decoded = decode_compact_byte_node(&encoded);
assert_eq!(decoded.prefix, prefix);
assert_eq!(decoded.keys, keys);
assert_eq!(decoded.children, children);
assert_eq!(decoded.value_ptr, Some(300));
assert!(decoded.header.has_value);
}
#[test]
fn test_space_savings_byte_node() {
let prefix = b"abc".to_vec();
let keys = b"de".to_vec();
let children = vec![100u64, 200u64];
let encoded = encode_compact_byte_node_auto(
compact_node_types::N4,
&prefix,
&keys,
&children,
None,
1000,
);
assert_eq!(encoded.len(), 11);
assert!(encoded.len() < 15);
}
}