use std::io::{Read, Write};
use crate::persistent_artrie::error::{PersistentARTrieError, Result};
use crate::persistent_artrie::swizzled_ptr::SwizzledPtr;
use super::nodes::{
CharBucket, CharCompressedPrefix, CharNode, CharNode16, CharNode4, CharNode48, CharNodeHeader,
CHAR_MAX_PREFIX_LEN,
};
use super::compact_encoding::{
decode_compact_node, determine_key_width, determine_ptr_width, encode_compact_node,
CompactHeader, DecodedCompactNode, COMPACT_NODE_TYPE_BUCKET, COMPACT_NODE_TYPE_N16,
COMPACT_NODE_TYPE_N4, COMPACT_NODE_TYPE_N48,
};
use super::arena_manager::ArenaSlot;
use super::relative_encoding::{
encode_child_pointer, encode_sequential_siblings, try_decode_children,
try_decode_sequential_siblings, RelativeEncodingError, SerializationContext,
};
fn io_err(e: std::io::Error) -> PersistentARTrieError {
PersistentARTrieError::io_error("char serialization", "<buffer>", e)
}
pub const CHAR_NODE_MAGIC: [u8; 4] = *b"ARC\0";
pub const CHAR_FORMAT_VERSION: u8 = 2;
pub const CHAR_SERIALIZED_HEADER_SIZE: usize = 16;
pub mod char_node_types {
pub const CHARNODE4: u8 = 104;
pub const CHARNODE16: u8 = 116;
pub const CHARNODE48: u8 = 148;
pub const CHARBUCKET: u8 = 101;
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct SerializedCharNodeHeader {
pub magic: [u8; 4],
pub version: u8,
pub node_type: u8,
pub flags: u8,
pub reserved: u8,
pub num_children: u16,
pub prefix_len: u8,
pub _padding: u8,
pub data_size: u32,
}
impl SerializedCharNodeHeader {
pub fn from_node_header(header: &CharNodeHeader, data_size: u32) -> Self {
Self {
magic: CHAR_NODE_MAGIC,
version: CHAR_FORMAT_VERSION,
node_type: header.node_type,
flags: header.flags,
reserved: 0,
num_children: header.num_children,
prefix_len: header.prefix_len,
_padding: 0,
data_size,
}
}
pub fn from_node_header_v2(
header: &CharNodeHeader,
data_size: u32,
encoding_flags: u8,
) -> Self {
Self {
magic: CHAR_NODE_MAGIC,
version: CHAR_FORMAT_VERSION,
node_type: header.node_type,
flags: (header.flags & 0x3F) | (encoding_flags & 0xC0),
reserved: 0,
num_children: header.num_children,
prefix_len: header.prefix_len,
_padding: 0,
data_size,
}
}
#[inline]
pub fn uses_relative_offsets(&self) -> bool {
self.flags & 0x80 != 0 }
#[inline]
pub fn uses_sequential_siblings(&self) -> bool {
self.flags & 0x40 != 0 }
pub fn to_node_header(&self) -> CharNodeHeader {
CharNodeHeader {
node_type: self.node_type,
prefix_len: self.prefix_len,
flags: self.flags & 0x3F,
_padding: 0,
num_children: self.num_children,
_padding2: [0; 2],
version: 0, }
}
pub fn validate(&self) -> Result<()> {
if self.magic != CHAR_NODE_MAGIC {
return Err(PersistentARTrieError::InvalidMagic {
expected: u64::from_le_bytes([
CHAR_NODE_MAGIC[0],
CHAR_NODE_MAGIC[1],
CHAR_NODE_MAGIC[2],
CHAR_NODE_MAGIC[3],
0,
0,
0,
0,
]),
found: u64::from_le_bytes([
self.magic[0],
self.magic[1],
self.magic[2],
self.magic[3],
0,
0,
0,
0,
]),
});
}
if self.version > CHAR_FORMAT_VERSION {
return Err(PersistentARTrieError::UnsupportedVersion {
max_supported: CHAR_FORMAT_VERSION as u32,
found: self.version as u32,
});
}
match self.node_type {
char_node_types::CHARNODE4
| char_node_types::CHARNODE16
| char_node_types::CHARNODE48
| char_node_types::CHARBUCKET => {}
_ => {
return Err(PersistentARTrieError::corrupted(format!(
"invalid char node type: {}",
self.node_type
)));
}
}
if self.reserved != 0 || self._padding != 0 {
return Err(PersistentARTrieError::corrupted(format!(
"nonzero reserved char node header bytes: reserved={}, padding={}",
self.reserved, self._padding
)));
}
if self.prefix_len as usize > CHAR_MAX_PREFIX_LEN {
return Err(PersistentARTrieError::corrupted(format!(
"prefix length {} exceeds maximum {}",
self.prefix_len, CHAR_MAX_PREFIX_LEN
)));
}
if let Some(max_children) = fixed_node_child_capacity(self.node_type) {
if self.num_children as usize > max_children {
return Err(PersistentARTrieError::corrupted(format!(
"char node type {} declares {} children, capacity is {}",
self.node_type, self.num_children, max_children
)));
}
}
Ok(())
}
pub fn to_bytes(&self) -> [u8; CHAR_SERIALIZED_HEADER_SIZE] {
let mut bytes = [0u8; CHAR_SERIALIZED_HEADER_SIZE];
bytes[0..4].copy_from_slice(&self.magic);
bytes[4] = self.version;
bytes[5] = self.node_type;
bytes[6] = self.flags;
bytes[7] = self.reserved;
bytes[8..10].copy_from_slice(&self.num_children.to_le_bytes());
bytes[10] = self.prefix_len;
bytes[11] = self._padding;
bytes[12..16].copy_from_slice(&self.data_size.to_le_bytes());
bytes
}
pub fn from_bytes(bytes: &[u8; CHAR_SERIALIZED_HEADER_SIZE]) -> Self {
Self {
magic: [bytes[0], bytes[1], bytes[2], bytes[3]],
version: bytes[4],
node_type: bytes[5],
flags: bytes[6],
reserved: bytes[7],
num_children: u16::from_le_bytes([bytes[8], bytes[9]]),
prefix_len: bytes[10],
_padding: bytes[11],
data_size: u32::from_le_bytes([bytes[12], bytes[13], bytes[14], bytes[15]]),
}
}
}
fn fixed_node_child_capacity(node_type: u8) -> Option<usize> {
match node_type {
char_node_types::CHARNODE4 => Some(4),
char_node_types::CHARNODE16 => Some(16),
char_node_types::CHARNODE48 => Some(48),
char_node_types::CHARBUCKET => None,
_ => None,
}
}
fn checked_layout_add(left: usize, right: usize, context: &str) -> Result<usize> {
left.checked_add(right).ok_or_else(|| {
PersistentARTrieError::corrupted(format!("char node layout size overflow: {context}"))
})
}
fn validate_v2_header_layout(header: &SerializedCharNodeHeader) -> Result<()> {
header.validate()?;
if header.uses_sequential_siblings() && !header.uses_relative_offsets() {
return Err(PersistentARTrieError::corrupted(
"char v2 sequential-sibling flag requires relative-offset flag",
));
}
if header.uses_sequential_siblings() && header.num_children == 0 {
return Err(PersistentARTrieError::corrupted(
"char v2 sequential-sibling layout requires at least one child",
));
}
Ok(())
}
fn ensure_fixed_node_data_size(
header: &SerializedCharNodeHeader,
key_bytes: usize,
child_capacity: usize,
) -> Result<()> {
let prefix_size = header_prefix_size(header);
let child_bytes = child_capacity
.checked_mul(8)
.ok_or_else(|| PersistentARTrieError::corrupted("char fixed child layout size overflow"))?;
let expected = checked_layout_add(prefix_size, key_bytes, "fixed keys")?;
let expected = checked_layout_add(expected, child_bytes, "fixed children")?;
let expected = checked_layout_add(expected, 8, "fixed value pointer")?;
if header.data_size as usize != expected {
return Err(PersistentARTrieError::corrupted(format!(
"noncanonical char fixed node data_size: got {}, expected {}",
header.data_size, expected
)));
}
Ok(())
}
fn ensure_bucket_entry_count(header: &SerializedCharNodeHeader, num_entries: usize) -> Result<()> {
if num_entries != header.num_children as usize {
return Err(PersistentARTrieError::corrupted(format!(
"char bucket header declares {} children but payload has {} entries",
header.num_children, num_entries
)));
}
Ok(())
}
fn ensure_bucket_fixed_data_size(
header: &SerializedCharNodeHeader,
num_entries: usize,
) -> Result<()> {
let prefix_size = header_prefix_size(header);
let entry_bytes = num_entries.checked_mul(12).ok_or_else(|| {
PersistentARTrieError::corrupted("char bucket fixed entry layout size overflow")
})?;
let expected = checked_layout_add(prefix_size, 4, "bucket entry count")?;
let expected = checked_layout_add(expected, 8, "bucket value pointer")?;
let expected = checked_layout_add(expected, entry_bytes, "bucket entries")?;
if header.data_size as usize != expected {
return Err(PersistentARTrieError::corrupted(format!(
"noncanonical char bucket fixed data_size: got {}, expected {}",
header.data_size, expected
)));
}
Ok(())
}
pub fn char_serialized_size(node: &CharNode) -> usize {
CHAR_SERIALIZED_HEADER_SIZE + char_prefix_size(node) + char_node_data_size(node)
}
fn char_prefix_size(node: &CharNode) -> usize {
if node.header().prefix_len > 0 {
CHAR_MAX_PREFIX_LEN * 4 } else {
0
}
}
fn char_node_data_size(node: &CharNode) -> usize {
match node {
CharNode::N4(_) => 4 * 4 + 4 * 8 + 8,
CharNode::N16(_) => 16 * 4 + 16 * 8 + 8,
CharNode::N48(_) => 48 * 4 + 48 * 8 + 8,
CharNode::Bucket(n) => 4 + 8 + n.entries.len() * 12,
}
}
pub fn serialize_char_node<W: Write>(node: &CharNode, writer: &mut W) -> Result<usize> {
let data_size = char_prefix_size(node) + char_node_data_size(node);
let header = SerializedCharNodeHeader::from_node_header(node.header(), data_size as u32);
writer.write_all(&header.to_bytes()).map_err(io_err)?;
if node.header().prefix_len > 0 {
let prefix = node.prefix();
for &c in &prefix.chars {
writer.write_all(&c.to_le_bytes()).map_err(io_err)?;
}
}
match node {
CharNode::N4(n) => serialize_charnode4(n, writer)?,
CharNode::N16(n) => serialize_charnode16(n, writer)?,
CharNode::N48(n) => serialize_charnode48(n, writer)?,
CharNode::Bucket(n) => serialize_charbucket(n, writer)?,
}
Ok(CHAR_SERIALIZED_HEADER_SIZE + data_size)
}
fn serialize_charnode4<W: Write>(node: &CharNode4, writer: &mut W) -> Result<()> {
for key in &node.keys {
writer.write_all(&key.to_le_bytes()).map_err(io_err)?;
}
for child in &node.children {
let raw = child.to_raw();
writer.write_all(&raw.to_le_bytes()).map_err(io_err)?;
}
let value_raw = node.value_ptr.to_raw();
writer.write_all(&value_raw.to_le_bytes()).map_err(io_err)?;
Ok(())
}
fn serialize_charnode16<W: Write>(node: &CharNode16, writer: &mut W) -> Result<()> {
for key in &node.keys {
writer.write_all(&key.to_le_bytes()).map_err(io_err)?;
}
for child in &node.children {
let raw = child.to_raw();
writer.write_all(&raw.to_le_bytes()).map_err(io_err)?;
}
let value_raw = node.value_ptr.to_raw();
writer.write_all(&value_raw.to_le_bytes()).map_err(io_err)?;
Ok(())
}
fn serialize_charnode48<W: Write>(node: &CharNode48, writer: &mut W) -> Result<()> {
for key in &node.keys {
writer.write_all(&key.to_le_bytes()).map_err(io_err)?;
}
for child in &node.children {
let raw = child.to_raw();
writer.write_all(&raw.to_le_bytes()).map_err(io_err)?;
}
let value_raw = node.value_ptr.to_raw();
writer.write_all(&value_raw.to_le_bytes()).map_err(io_err)?;
Ok(())
}
fn serialize_charbucket<W: Write>(node: &CharBucket, writer: &mut W) -> Result<()> {
let num_entries = node.entries.len() as u32;
writer
.write_all(&num_entries.to_le_bytes())
.map_err(io_err)?;
let value_raw = node.value_ptr.to_raw();
writer.write_all(&value_raw.to_le_bytes()).map_err(io_err)?;
let mut entries: Vec<_> = node.entries.iter().collect();
entries.sort_by_key(|&(k, _)| *k);
for (&key, child) in entries {
writer.write_all(&key.to_le_bytes()).map_err(io_err)?;
let child_raw = child.to_raw();
writer.write_all(&child_raw.to_le_bytes()).map_err(io_err)?;
}
Ok(())
}
pub fn deserialize_char_node<R: Read>(reader: &mut R) -> Result<CharNode> {
let mut header_bytes = [0u8; CHAR_SERIALIZED_HEADER_SIZE];
reader.read_exact(&mut header_bytes).map_err(io_err)?;
let header = SerializedCharNodeHeader::from_bytes(&header_bytes);
header.validate()?;
let prefix = if header.prefix_len > 0 {
let mut chars = [0u32; CHAR_MAX_PREFIX_LEN];
for c in &mut chars {
let mut bytes = [0u8; 4];
reader.read_exact(&mut bytes).map_err(io_err)?;
*c = u32::from_le_bytes(bytes);
}
CharCompressedPrefix { chars }
} else {
CharCompressedPrefix::empty()
};
match header.node_type {
char_node_types::CHARNODE4 => deserialize_charnode4(reader, &header, prefix),
char_node_types::CHARNODE16 => deserialize_charnode16(reader, &header, prefix),
char_node_types::CHARNODE48 => deserialize_charnode48(reader, &header, prefix),
char_node_types::CHARBUCKET => deserialize_charbucket(reader, &header, prefix),
_ => Err(PersistentARTrieError::corrupted(format!(
"invalid char node type: {}",
header.node_type
))),
}
}
fn deserialize_charnode4<R: Read>(
reader: &mut R,
header: &SerializedCharNodeHeader,
prefix: CharCompressedPrefix,
) -> Result<CharNode> {
let mut node = CharNode4::new();
node.header = header.to_node_header();
node.prefix = prefix;
for key in &mut node.keys {
let mut bytes = [0u8; 4];
reader.read_exact(&mut bytes).map_err(io_err)?;
*key = u32::from_le_bytes(bytes);
}
for child in &mut node.children {
let mut raw_bytes = [0u8; 8];
reader.read_exact(&mut raw_bytes).map_err(io_err)?;
*child = SwizzledPtr::from_raw(u64::from_le_bytes(raw_bytes));
}
let mut value_bytes = [0u8; 8];
reader.read_exact(&mut value_bytes).map_err(io_err)?;
node.value_ptr = SwizzledPtr::from_raw(u64::from_le_bytes(value_bytes));
Ok(CharNode::N4(Box::new(node)))
}
fn deserialize_charnode16<R: Read>(
reader: &mut R,
header: &SerializedCharNodeHeader,
prefix: CharCompressedPrefix,
) -> Result<CharNode> {
let mut node = CharNode16::new();
node.header = header.to_node_header();
node.prefix = prefix;
for key in &mut node.keys {
let mut bytes = [0u8; 4];
reader.read_exact(&mut bytes).map_err(io_err)?;
*key = u32::from_le_bytes(bytes);
}
for child in &mut node.children {
let mut raw_bytes = [0u8; 8];
reader.read_exact(&mut raw_bytes).map_err(io_err)?;
*child = SwizzledPtr::from_raw(u64::from_le_bytes(raw_bytes));
}
let mut value_bytes = [0u8; 8];
reader.read_exact(&mut value_bytes).map_err(io_err)?;
node.value_ptr = SwizzledPtr::from_raw(u64::from_le_bytes(value_bytes));
Ok(CharNode::N16(Box::new(node)))
}
fn deserialize_charnode48<R: Read>(
reader: &mut R,
header: &SerializedCharNodeHeader,
prefix: CharCompressedPrefix,
) -> Result<CharNode> {
let mut node = CharNode48::new();
node.header = header.to_node_header();
node.prefix = prefix;
for key in &mut node.keys {
let mut bytes = [0u8; 4];
reader.read_exact(&mut bytes).map_err(io_err)?;
*key = u32::from_le_bytes(bytes);
}
for child in &mut node.children {
let mut raw_bytes = [0u8; 8];
reader.read_exact(&mut raw_bytes).map_err(io_err)?;
*child = SwizzledPtr::from_raw(u64::from_le_bytes(raw_bytes));
}
let mut value_bytes = [0u8; 8];
reader.read_exact(&mut value_bytes).map_err(io_err)?;
node.value_ptr = SwizzledPtr::from_raw(u64::from_le_bytes(value_bytes));
Ok(CharNode::N48(Box::new(node)))
}
fn deserialize_charbucket<R: Read>(
reader: &mut R,
header: &SerializedCharNodeHeader,
prefix: CharCompressedPrefix,
) -> Result<CharNode> {
let mut node = CharBucket::new();
node.header = header.to_node_header();
node.prefix = prefix;
let mut num_entries_bytes = [0u8; 4];
reader.read_exact(&mut num_entries_bytes).map_err(io_err)?;
let num_entries = u32::from_le_bytes(num_entries_bytes) as usize;
let mut value_bytes = [0u8; 8];
reader.read_exact(&mut value_bytes).map_err(io_err)?;
node.value_ptr = SwizzledPtr::from_raw(u64::from_le_bytes(value_bytes));
for _ in 0..num_entries {
let mut key_bytes = [0u8; 4];
reader.read_exact(&mut key_bytes).map_err(io_err)?;
let key = u32::from_le_bytes(key_bytes);
let mut child_bytes = [0u8; 8];
reader.read_exact(&mut child_bytes).map_err(io_err)?;
let child = SwizzledPtr::from_raw(u64::from_le_bytes(child_bytes));
node.entries.insert(key, child);
}
Ok(CharNode::Bucket(Box::new(node)))
}
pub fn char_to_bytes(node: &CharNode) -> Result<Vec<u8>> {
let mut buffer = Vec::with_capacity(char_serialized_size(node));
serialize_char_node(node, &mut buffer)?;
Ok(buffer)
}
pub fn char_from_bytes(bytes: &[u8]) -> Result<CharNode> {
let mut reader = std::io::Cursor::new(bytes);
deserialize_char_node(&mut reader)
}
pub fn char_to_bytes_compact(node: &CharNode, max_ptr_value: u64) -> Vec<u8> {
let (keys, children, prefix_chars, value_ptr, node_type, flags) = extract_node_data(node);
let max_key = keys
.iter()
.chain(prefix_chars.iter())
.copied()
.max()
.unwrap_or(0);
let key_width = determine_key_width(max_key);
let ptr_width = determine_ptr_width(max_ptr_value);
let header = CompactHeader {
key_width,
ptr_width,
num_children: children.len() as u8,
has_value: value_ptr.is_some(),
prefix_len: prefix_chars.len() as u8,
node_type,
flags,
};
encode_compact_node(&header, &prefix_chars, &keys, &children, value_ptr)
}
pub fn char_from_bytes_compact(bytes: &[u8]) -> Result<CharNode> {
let decoded = decode_compact_node(bytes);
reconstruct_node_from_decoded(decoded)
}
pub fn char_compact_serialized_size(node: &CharNode, max_ptr_value: u64) -> usize {
let (keys, children, prefix_chars, value_ptr, _node_type, _flags) = extract_node_data(node);
let max_key = keys
.iter()
.chain(prefix_chars.iter())
.copied()
.max()
.unwrap_or(0);
let key_width = determine_key_width(max_key) as usize;
let ptr_width = determine_ptr_width(max_ptr_value) as usize;
use super::compact_encoding::COMPACT_HEADER_SIZE;
let num_children = children.len();
COMPACT_HEADER_SIZE
+ if num_children > 15 { 1 } else { 0 } + (prefix_chars.len() * key_width)
+ (num_children * key_width)
+ (num_children * ptr_width)
+ if value_ptr.is_some() { ptr_width } else { 0 }
}
fn extract_node_data(node: &CharNode) -> (Vec<u32>, Vec<u64>, Vec<u32>, Option<u64>, u8, u8) {
match node {
CharNode::N4(n) => {
let num_children = n.header.num_children as usize;
let keys: Vec<u32> = n.keys[..num_children].to_vec();
let children: Vec<u64> = n.children[..num_children]
.iter()
.map(|p| p.to_raw())
.collect();
let prefix_chars: Vec<u32> = n.prefix.chars[..n.header.prefix_len as usize].to_vec();
let value_ptr = if n.value_ptr.is_null() {
None
} else {
Some(n.value_ptr.to_raw())
};
(
keys,
children,
prefix_chars,
value_ptr,
COMPACT_NODE_TYPE_N4,
n.header.flags,
)
}
CharNode::N16(n) => {
let num_children = n.header.num_children as usize;
let keys: Vec<u32> = n.keys[..num_children].to_vec();
let children: Vec<u64> = n.children[..num_children]
.iter()
.map(|p| p.to_raw())
.collect();
let prefix_chars: Vec<u32> = n.prefix.chars[..n.header.prefix_len as usize].to_vec();
let value_ptr = if n.value_ptr.is_null() {
None
} else {
Some(n.value_ptr.to_raw())
};
(
keys,
children,
prefix_chars,
value_ptr,
COMPACT_NODE_TYPE_N16,
n.header.flags,
)
}
CharNode::N48(n) => {
let num_children = n.header.num_children as usize;
let keys: Vec<u32> = n.keys[..num_children].to_vec();
let children: Vec<u64> = n.children[..num_children]
.iter()
.map(|p| p.to_raw())
.collect();
let prefix_chars: Vec<u32> = n.prefix.chars[..n.header.prefix_len as usize].to_vec();
let value_ptr = if n.value_ptr.is_null() {
None
} else {
Some(n.value_ptr.to_raw())
};
(
keys,
children,
prefix_chars,
value_ptr,
COMPACT_NODE_TYPE_N48,
n.header.flags,
)
}
CharNode::Bucket(n) => {
let mut entries: Vec<_> = n.entries.iter().collect();
entries.sort_by_key(|&(k, _)| *k);
let keys: Vec<u32> = entries.iter().map(|(&k, _)| k).collect();
let children: Vec<u64> = entries.iter().map(|(_, p)| p.to_raw()).collect();
let prefix_chars: Vec<u32> = n.prefix.chars[..n.header.prefix_len as usize].to_vec();
let value_ptr = if n.value_ptr.is_null() {
None
} else {
Some(n.value_ptr.to_raw())
};
(
keys,
children,
prefix_chars,
value_ptr,
COMPACT_NODE_TYPE_BUCKET,
n.header.flags,
)
}
}
}
fn reconstruct_node_from_decoded(decoded: DecodedCompactNode) -> Result<CharNode> {
let prefix = CharCompressedPrefix::from_chars(&decoded.prefix);
match decoded.header.node_type {
COMPACT_NODE_TYPE_N4 => {
let mut node = CharNode4::new();
node.header.prefix_len = decoded.header.prefix_len;
node.header.flags = decoded.header.flags;
node.header.num_children = decoded.header.num_children as u16;
node.prefix = prefix;
for (i, &key) in decoded.keys.iter().enumerate() {
if i < 4 {
node.keys[i] = key;
node.children[i] = SwizzledPtr::from_raw(decoded.children[i]);
}
}
if let Some(v) = decoded.value_ptr {
node.value_ptr = SwizzledPtr::from_raw(v);
}
Ok(CharNode::N4(Box::new(node)))
}
COMPACT_NODE_TYPE_N16 => {
let mut node = CharNode16::new();
node.header.prefix_len = decoded.header.prefix_len;
node.header.flags = decoded.header.flags;
node.header.num_children = decoded.header.num_children as u16;
node.prefix = prefix;
for (i, &key) in decoded.keys.iter().enumerate() {
if i < 16 {
node.keys[i] = key;
node.children[i] = SwizzledPtr::from_raw(decoded.children[i]);
}
}
if let Some(v) = decoded.value_ptr {
node.value_ptr = SwizzledPtr::from_raw(v);
}
Ok(CharNode::N16(Box::new(node)))
}
COMPACT_NODE_TYPE_N48 => {
let mut node = CharNode48::new();
node.header.prefix_len = decoded.header.prefix_len;
node.header.flags = decoded.header.flags;
node.header.num_children = decoded.header.num_children as u16;
node.prefix = prefix;
for (i, &key) in decoded.keys.iter().enumerate() {
if i < 48 {
node.keys[i] = key;
node.children[i] = SwizzledPtr::from_raw(decoded.children[i]);
}
}
if let Some(v) = decoded.value_ptr {
node.value_ptr = SwizzledPtr::from_raw(v);
}
Ok(CharNode::N48(Box::new(node)))
}
COMPACT_NODE_TYPE_BUCKET => {
let mut node = CharBucket::new();
node.header.prefix_len = decoded.header.prefix_len;
node.header.flags = decoded.header.flags;
node.header.num_children = decoded.header.num_children as u16;
node.prefix = prefix;
for (i, &key) in decoded.keys.iter().enumerate() {
node.entries
.insert(key, SwizzledPtr::from_raw(decoded.children[i]));
}
if let Some(v) = decoded.value_ptr {
node.value_ptr = SwizzledPtr::from_raw(v);
}
Ok(CharNode::Bucket(Box::new(node)))
}
_ => Err(PersistentARTrieError::corrupted(format!(
"invalid compact node type: {}",
decoded.header.node_type
))),
}
}
pub fn collect_char_child_slots(node: &CharNode) -> Vec<ArenaSlot> {
let mut slots = Vec::new();
match node {
CharNode::N4(n) => {
for i in 0..n.header.num_children as usize {
if !n.children[i].is_null() {
if let Some(slot) = ptr_to_arena_slot(&n.children[i]) {
slots.push(slot);
}
}
}
}
CharNode::N16(n) => {
for i in 0..n.header.num_children as usize {
if !n.children[i].is_null() {
if let Some(slot) = ptr_to_arena_slot(&n.children[i]) {
slots.push(slot);
}
}
}
}
CharNode::N48(n) => {
for i in 0..n.header.num_children as usize {
if !n.children[i].is_null() {
if let Some(slot) = ptr_to_arena_slot(&n.children[i]) {
slots.push(slot);
}
}
}
}
CharNode::Bucket(n) => {
let mut entries: Vec<_> = n.entries.iter().collect();
entries.sort_by_key(|&(k, _)| *k);
for (_, child) in entries {
if !child.is_null() {
if let Some(slot) = ptr_to_arena_slot(child) {
slots.push(slot);
}
}
}
}
}
slots
}
fn ptr_to_arena_slot(ptr: &SwizzledPtr) -> Option<ArenaSlot> {
let loc = ptr.disk_location()?;
let arena_id = loc.block_id.checked_sub(1)?;
Some(ArenaSlot::new(arena_id, loc.offset))
}
fn char_node_data_size_v2(
node: &CharNode,
ctx: &SerializationContext,
child_slots: &[ArenaSlot],
) -> usize {
if ctx.use_sequential && ctx.first_child_slot.is_some() {
let first_child = match ctx.first_child_slot {
Some(slot) => slot,
None => ctx.parent_slot,
};
let first_slot_size = if first_child.arena_id == ctx.parent_slot.arena_id {
use super::relative_encoding::encoded_size;
encoded_size(ctx.parent_slot, first_child)
} else {
super::relative_encoding::CROSS_ARENA_SIZE
};
match node {
CharNode::N4(_) => 4 * 4 + first_slot_size + 8, CharNode::N16(_) => 16 * 4 + first_slot_size + 8, CharNode::N48(_) => 48 * 4 + first_slot_size + 8, CharNode::Bucket(n) => 4 + first_slot_size + 8 + n.entries.len() * 4, }
} else {
let mut children_size = 0;
for slot in child_slots {
use super::relative_encoding::encoded_size;
children_size += encoded_size(ctx.parent_slot, *slot);
}
match node {
CharNode::N4(_) => 4 * 4 + children_size + 8, CharNode::N16(_) => 16 * 4 + children_size + 8, CharNode::N48(_) => 48 * 4 + children_size + 8, CharNode::Bucket(n) => 4 + children_size + 8 + n.entries.len() * 4, }
}
}
fn validate_v2_serialization_context(
node: &CharNode,
ctx: &SerializationContext,
child_slots: &[ArenaSlot],
) -> Result<()> {
let declared_children = node.header().num_children as usize;
if child_slots.len() != declared_children {
return Err(PersistentARTrieError::corrupted(format!(
"char v2 serialization saw {} disk children but header declares {}",
child_slots.len(),
declared_children
)));
}
if let CharNode::Bucket(bucket) = node {
if bucket.entries.len() != declared_children {
return Err(PersistentARTrieError::corrupted(format!(
"char v2 bucket header declares {} children but entries contain {}",
declared_children,
bucket.entries.len()
)));
}
}
if ctx.use_sequential {
if !ctx.use_relative {
return Err(PersistentARTrieError::corrupted(
"char v2 sequential serialization requires relative encoding",
));
}
if declared_children == 0 {
return Err(PersistentARTrieError::corrupted(
"char v2 sequential serialization requires at least one child",
));
}
let first_child = ctx.first_child_slot.ok_or_else(|| {
PersistentARTrieError::corrupted(
"char v2 sequential serialization missing first child slot",
)
})?;
for (idx, slot) in child_slots.iter().enumerate() {
let offset = u32::try_from(idx).map_err(|_| {
PersistentARTrieError::corrupted(
"char v2 sequential child index exceeds u32 slot range",
)
})?;
let expected_slot = first_child.slot_id.checked_add(offset).ok_or_else(|| {
PersistentARTrieError::corrupted(
"char v2 sequential child range overflows u32 slot range",
)
})?;
if slot.arena_id != first_child.arena_id || slot.slot_id != expected_slot {
return Err(PersistentARTrieError::corrupted(format!(
"char v2 sequential child mismatch at index {}: got {:?}, expected arena {} slot {}",
idx, slot, first_child.arena_id, expected_slot
)));
}
}
}
Ok(())
}
pub fn serialize_char_node_v2<W: Write>(
node: &CharNode,
writer: &mut W,
ctx: &SerializationContext,
) -> Result<usize> {
if !ctx.use_relative && !ctx.use_sequential {
return serialize_char_node(node, writer);
}
let child_slots = collect_char_child_slots(node);
validate_v2_serialization_context(node, ctx, &child_slots)?;
let data_size = char_prefix_size(node) + char_node_data_size_v2(node, ctx, &child_slots);
let header = SerializedCharNodeHeader::from_node_header_v2(
node.header(),
data_size as u32,
ctx.encoding_flags(),
);
writer.write_all(&header.to_bytes()).map_err(io_err)?;
if node.header().prefix_len > 0 {
let prefix = node.prefix();
for &c in &prefix.chars {
writer.write_all(&c.to_le_bytes()).map_err(io_err)?;
}
}
let mut children_buf = Vec::new();
if ctx.use_sequential {
let Some(first_child) = ctx.first_child_slot else {
return Err(PersistentARTrieError::corrupted(
"char v2 sequential serialization missing first child slot",
));
};
encode_sequential_siblings(ctx.parent_slot, first_child, &mut children_buf);
} else {
for &slot in &child_slots {
encode_child_pointer(ctx.parent_slot, slot, &mut children_buf);
}
}
match node {
CharNode::N4(n) => serialize_charnode4_v2(n, writer, &children_buf)?,
CharNode::N16(n) => serialize_charnode16_v2(n, writer, &children_buf)?,
CharNode::N48(n) => serialize_charnode48_v2(n, writer, &children_buf)?,
CharNode::Bucket(n) => serialize_charbucket_v2(n, writer, &children_buf)?,
}
Ok(CHAR_SERIALIZED_HEADER_SIZE + data_size)
}
fn serialize_charnode4_v2<W: Write>(
node: &CharNode4,
writer: &mut W,
encoded_children: &[u8],
) -> Result<()> {
for key in &node.keys {
writer.write_all(&key.to_le_bytes()).map_err(io_err)?;
}
writer.write_all(encoded_children).map_err(io_err)?;
let value_raw = node.value_ptr.to_raw();
writer.write_all(&value_raw.to_le_bytes()).map_err(io_err)?;
Ok(())
}
fn serialize_charnode16_v2<W: Write>(
node: &CharNode16,
writer: &mut W,
encoded_children: &[u8],
) -> Result<()> {
for key in &node.keys {
writer.write_all(&key.to_le_bytes()).map_err(io_err)?;
}
writer.write_all(encoded_children).map_err(io_err)?;
let value_raw = node.value_ptr.to_raw();
writer.write_all(&value_raw.to_le_bytes()).map_err(io_err)?;
Ok(())
}
fn serialize_charnode48_v2<W: Write>(
node: &CharNode48,
writer: &mut W,
encoded_children: &[u8],
) -> Result<()> {
for key in &node.keys {
writer.write_all(&key.to_le_bytes()).map_err(io_err)?;
}
writer.write_all(encoded_children).map_err(io_err)?;
let value_raw = node.value_ptr.to_raw();
writer.write_all(&value_raw.to_le_bytes()).map_err(io_err)?;
Ok(())
}
fn serialize_charbucket_v2<W: Write>(
node: &CharBucket,
writer: &mut W,
encoded_children: &[u8],
) -> Result<()> {
let num_entries = node.entries.len() as u32;
writer
.write_all(&num_entries.to_le_bytes())
.map_err(io_err)?;
let value_raw = node.value_ptr.to_raw();
writer.write_all(&value_raw.to_le_bytes()).map_err(io_err)?;
let mut entries: Vec<_> = node.entries.iter().collect();
entries.sort_by_key(|&(k, _)| *k);
for (&key, _) in entries {
writer.write_all(&key.to_le_bytes()).map_err(io_err)?;
}
writer.write_all(encoded_children).map_err(io_err)?;
Ok(())
}
#[derive(Debug, Clone)]
pub struct DeserializationContext {
pub parent_slot: ArenaSlot,
}
impl DeserializationContext {
pub fn new(parent_slot: ArenaSlot) -> Self {
Self { parent_slot }
}
}
fn relative_decode_err(err: RelativeEncodingError) -> PersistentARTrieError {
PersistentARTrieError::corrupted(format!("invalid relative child encoding: {}", err))
}
fn decode_v2_child_slots(
data: &[u8],
parent: ArenaSlot,
count: usize,
uses_sequential: bool,
) -> Result<(Vec<ArenaSlot>, usize)> {
if uses_sequential {
try_decode_sequential_siblings(data, parent, count).map_err(relative_decode_err)
} else {
try_decode_children(data, parent, count).map_err(relative_decode_err)
}
}
fn read_value_ptr_after_children(data: &[u8], value_offset: usize) -> Result<SwizzledPtr> {
let end = value_offset
.checked_add(8)
.ok_or_else(|| PersistentARTrieError::corrupted("char v2 value pointer offset overflow"))?;
if data.len() < end {
return Err(PersistentARTrieError::corrupted(format!(
"truncated char v2 value pointer: child bytes consumed {}, remaining data length {}",
value_offset,
data.len()
)));
}
if data.len() != end {
return Err(PersistentARTrieError::corrupted(format!(
"noncanonical char v2 data_size: value pointer ends at {}, remaining data length {}",
end,
data.len()
)));
}
let value_raw = u64::from_le_bytes(data[value_offset..end].try_into().unwrap());
Ok(SwizzledPtr::from_raw(value_raw))
}
pub fn deserialize_char_node_v2<R: Read>(
reader: &mut R,
ctx: &DeserializationContext,
) -> Result<CharNode> {
let mut header_bytes = [0u8; CHAR_SERIALIZED_HEADER_SIZE];
reader.read_exact(&mut header_bytes).map_err(io_err)?;
let header = SerializedCharNodeHeader::from_bytes(&header_bytes);
validate_v2_header_layout(&header)?;
let prefix = if header.prefix_len > 0 {
let mut chars = [0u32; CHAR_MAX_PREFIX_LEN];
for c in &mut chars {
let mut bytes = [0u8; 4];
reader.read_exact(&mut bytes).map_err(io_err)?;
*c = u32::from_le_bytes(bytes);
}
CharCompressedPrefix { chars }
} else {
CharCompressedPrefix::empty()
};
let uses_sequential = header.uses_sequential_siblings();
let uses_relative = header.uses_relative_offsets();
match header.node_type {
char_node_types::CHARNODE4 => {
deserialize_charnode4_v2(reader, &header, prefix, ctx, uses_sequential, uses_relative)
}
char_node_types::CHARNODE16 => {
deserialize_charnode16_v2(reader, &header, prefix, ctx, uses_sequential, uses_relative)
}
char_node_types::CHARNODE48 => {
deserialize_charnode48_v2(reader, &header, prefix, ctx, uses_sequential, uses_relative)
}
char_node_types::CHARBUCKET => {
deserialize_charbucket_v2(reader, &header, prefix, ctx, uses_sequential, uses_relative)
}
_ => Err(PersistentARTrieError::corrupted(format!(
"invalid char node type: {}",
header.node_type
))),
}
}
fn deserialize_charnode4_v2<R: Read>(
reader: &mut R,
header: &SerializedCharNodeHeader,
prefix: CharCompressedPrefix,
ctx: &DeserializationContext,
uses_sequential: bool,
uses_relative: bool,
) -> Result<CharNode> {
let mut node = CharNode4::new();
node.header = header.to_node_header();
node.prefix = prefix;
for key in &mut node.keys {
let mut bytes = [0u8; 4];
reader.read_exact(&mut bytes).map_err(io_err)?;
*key = u32::from_le_bytes(bytes);
}
let num_children = header.num_children as usize;
let prefix_size = header_prefix_size(header);
if uses_sequential {
let remaining_data =
read_remaining_data(reader, header.data_size as usize, 4 * 4, prefix_size)?;
let (children, bytes_consumed) =
decode_v2_child_slots(&remaining_data, ctx.parent_slot, num_children, true)?;
for (i, slot) in children.iter().enumerate().take(4) {
node.children[i] = arena_slot_to_ptr(*slot);
}
node.value_ptr = read_value_ptr_after_children(&remaining_data, bytes_consumed)?;
} else if uses_relative {
let remaining_data =
read_remaining_data(reader, header.data_size as usize, 4 * 4, prefix_size)?;
let (children, bytes_consumed) =
decode_v2_child_slots(&remaining_data, ctx.parent_slot, num_children, false)?;
for (i, slot) in children.iter().enumerate().take(4) {
node.children[i] = arena_slot_to_ptr(*slot);
}
node.value_ptr = read_value_ptr_after_children(&remaining_data, bytes_consumed)?;
} else {
ensure_fixed_node_data_size(header, 4 * 4, 4)?;
for child in &mut node.children {
let mut raw_bytes = [0u8; 8];
reader.read_exact(&mut raw_bytes).map_err(io_err)?;
*child = SwizzledPtr::from_raw(u64::from_le_bytes(raw_bytes));
}
let mut value_bytes = [0u8; 8];
reader.read_exact(&mut value_bytes).map_err(io_err)?;
node.value_ptr = SwizzledPtr::from_raw(u64::from_le_bytes(value_bytes));
}
Ok(CharNode::N4(Box::new(node)))
}
fn deserialize_charnode16_v2<R: Read>(
reader: &mut R,
header: &SerializedCharNodeHeader,
prefix: CharCompressedPrefix,
ctx: &DeserializationContext,
uses_sequential: bool,
uses_relative: bool,
) -> Result<CharNode> {
let mut node = CharNode16::new();
node.header = header.to_node_header();
node.prefix = prefix;
for key in &mut node.keys {
let mut bytes = [0u8; 4];
reader.read_exact(&mut bytes).map_err(io_err)?;
*key = u32::from_le_bytes(bytes);
}
let num_children = header.num_children as usize;
let prefix_size = header_prefix_size(header);
if uses_sequential {
let remaining_data =
read_remaining_data(reader, header.data_size as usize, 16 * 4, prefix_size)?;
let (children, bytes_consumed) =
decode_v2_child_slots(&remaining_data, ctx.parent_slot, num_children, true)?;
for (i, slot) in children.iter().enumerate().take(16) {
node.children[i] = arena_slot_to_ptr(*slot);
}
node.value_ptr = read_value_ptr_after_children(&remaining_data, bytes_consumed)?;
} else if uses_relative {
let remaining_data =
read_remaining_data(reader, header.data_size as usize, 16 * 4, prefix_size)?;
let (children, bytes_consumed) =
decode_v2_child_slots(&remaining_data, ctx.parent_slot, num_children, false)?;
for (i, slot) in children.iter().enumerate().take(16) {
node.children[i] = arena_slot_to_ptr(*slot);
}
node.value_ptr = read_value_ptr_after_children(&remaining_data, bytes_consumed)?;
} else {
ensure_fixed_node_data_size(header, 16 * 4, 16)?;
for child in &mut node.children {
let mut raw_bytes = [0u8; 8];
reader.read_exact(&mut raw_bytes).map_err(io_err)?;
*child = SwizzledPtr::from_raw(u64::from_le_bytes(raw_bytes));
}
let mut value_bytes = [0u8; 8];
reader.read_exact(&mut value_bytes).map_err(io_err)?;
node.value_ptr = SwizzledPtr::from_raw(u64::from_le_bytes(value_bytes));
}
Ok(CharNode::N16(Box::new(node)))
}
fn deserialize_charnode48_v2<R: Read>(
reader: &mut R,
header: &SerializedCharNodeHeader,
prefix: CharCompressedPrefix,
ctx: &DeserializationContext,
uses_sequential: bool,
uses_relative: bool,
) -> Result<CharNode> {
let mut node = CharNode48::new();
node.header = header.to_node_header();
node.prefix = prefix;
for key in &mut node.keys {
let mut bytes = [0u8; 4];
reader.read_exact(&mut bytes).map_err(io_err)?;
*key = u32::from_le_bytes(bytes);
}
let num_children = header.num_children as usize;
let prefix_size = header_prefix_size(header);
if uses_sequential {
let remaining_data =
read_remaining_data(reader, header.data_size as usize, 48 * 4, prefix_size)?;
let (children, bytes_consumed) =
decode_v2_child_slots(&remaining_data, ctx.parent_slot, num_children, true)?;
for (i, slot) in children.iter().enumerate().take(48) {
node.children[i] = arena_slot_to_ptr(*slot);
}
node.value_ptr = read_value_ptr_after_children(&remaining_data, bytes_consumed)?;
} else if uses_relative {
let remaining_data =
read_remaining_data(reader, header.data_size as usize, 48 * 4, prefix_size)?;
let (children, bytes_consumed) =
decode_v2_child_slots(&remaining_data, ctx.parent_slot, num_children, false)?;
for (i, slot) in children.iter().enumerate().take(48) {
node.children[i] = arena_slot_to_ptr(*slot);
}
node.value_ptr = read_value_ptr_after_children(&remaining_data, bytes_consumed)?;
} else {
ensure_fixed_node_data_size(header, 48 * 4, 48)?;
for child in &mut node.children {
let mut raw_bytes = [0u8; 8];
reader.read_exact(&mut raw_bytes).map_err(io_err)?;
*child = SwizzledPtr::from_raw(u64::from_le_bytes(raw_bytes));
}
let mut value_bytes = [0u8; 8];
reader.read_exact(&mut value_bytes).map_err(io_err)?;
node.value_ptr = SwizzledPtr::from_raw(u64::from_le_bytes(value_bytes));
}
Ok(CharNode::N48(Box::new(node)))
}
fn deserialize_charbucket_v2<R: Read>(
reader: &mut R,
header: &SerializedCharNodeHeader,
prefix: CharCompressedPrefix,
ctx: &DeserializationContext,
uses_sequential: bool,
uses_relative: bool,
) -> Result<CharNode> {
let mut node = CharBucket::new();
node.header = header.to_node_header();
node.prefix = prefix;
let mut num_entries_bytes = [0u8; 4];
reader.read_exact(&mut num_entries_bytes).map_err(io_err)?;
let num_entries = u32::from_le_bytes(num_entries_bytes) as usize;
ensure_bucket_entry_count(header, num_entries)?;
let mut value_bytes = [0u8; 8];
reader.read_exact(&mut value_bytes).map_err(io_err)?;
node.value_ptr = SwizzledPtr::from_raw(u64::from_le_bytes(value_bytes));
let prefix_size = header_prefix_size(header);
if uses_sequential || uses_relative {
let mut keys = Vec::with_capacity(num_entries);
for _ in 0..num_entries {
let mut key_bytes = [0u8; 4];
reader.read_exact(&mut key_bytes).map_err(io_err)?;
keys.push(u32::from_le_bytes(key_bytes));
}
let entries_key_bytes = num_entries.checked_mul(4).ok_or_else(|| {
PersistentARTrieError::corrupted("char bucket key layout size overflow")
})?;
let consumed_before_children = checked_layout_add(prefix_size, 4, "bucket count")?;
let consumed_before_children =
checked_layout_add(consumed_before_children, 8, "bucket value pointer")?;
let consumed_before_children =
checked_layout_add(consumed_before_children, entries_key_bytes, "bucket keys")?;
let remaining_size = (header.data_size as usize)
.checked_sub(consumed_before_children)
.ok_or_else(|| {
PersistentARTrieError::corrupted(format!(
"char bucket data_size {} is smaller than fixed payload {}",
header.data_size, consumed_before_children
))
})?;
let mut remaining_data = vec![0u8; remaining_size];
reader.read_exact(&mut remaining_data).map_err(io_err)?;
let (children, bytes_consumed) = decode_v2_child_slots(
&remaining_data,
ctx.parent_slot,
num_entries,
uses_sequential,
)?;
if bytes_consumed != remaining_data.len() {
return Err(PersistentARTrieError::corrupted(format!(
"char bucket relative children consumed {} bytes from {} bytes",
bytes_consumed,
remaining_data.len()
)));
}
for (key, slot) in keys.iter().zip(children.iter()) {
node.entries.insert(*key, arena_slot_to_ptr(*slot));
}
} else {
ensure_bucket_fixed_data_size(header, num_entries)?;
for _ in 0..num_entries {
let mut key_bytes = [0u8; 4];
reader.read_exact(&mut key_bytes).map_err(io_err)?;
let key = u32::from_le_bytes(key_bytes);
let mut child_bytes = [0u8; 8];
reader.read_exact(&mut child_bytes).map_err(io_err)?;
let child = SwizzledPtr::from_raw(u64::from_le_bytes(child_bytes));
node.entries.insert(key, child);
}
}
Ok(CharNode::Bucket(Box::new(node)))
}
fn read_remaining_data<R: Read>(
reader: &mut R,
data_size: usize,
keys_size: usize,
prefix_size: usize,
) -> Result<Vec<u8>> {
let consumed = checked_layout_add(prefix_size, keys_size, "node keys")?;
let remaining_size = data_size.checked_sub(consumed).ok_or_else(|| {
PersistentARTrieError::corrupted(format!(
"char v2 data_size {} is smaller than prefix+keys {}",
data_size, consumed
))
})?;
let mut data = vec![0u8; remaining_size];
reader.read_exact(&mut data).map_err(io_err)?;
Ok(data)
}
#[inline]
fn header_prefix_size(header: &SerializedCharNodeHeader) -> usize {
if header.prefix_len > 0 {
CHAR_MAX_PREFIX_LEN * 4 } else {
0
}
}
fn arena_slot_to_ptr(slot: ArenaSlot) -> SwizzledPtr {
use crate::persistent_artrie::NodeType;
let block_id = slot.arena_id.saturating_add(1);
SwizzledPtr::on_disk(block_id, slot.slot_id, NodeType::CharNode4) }
#[cfg(test)]
mod tests {
use super::*;
use crate::persistent_artrie::NodeType;
use crate::persistent_artrie_char::nodes::flags;
use crate::persistent_artrie_char::nodes::CharArtNode;
#[test]
fn test_header_roundtrip() {
let header = SerializedCharNodeHeader {
magic: CHAR_NODE_MAGIC,
version: CHAR_FORMAT_VERSION,
node_type: char_node_types::CHARNODE4,
flags: flags::IS_FINAL,
reserved: 0,
num_children: 3,
prefix_len: 5,
_padding: 0,
data_size: 100,
};
let bytes = header.to_bytes();
let restored = SerializedCharNodeHeader::from_bytes(&bytes);
assert_eq!(restored.magic, CHAR_NODE_MAGIC);
assert_eq!(restored.version, CHAR_FORMAT_VERSION);
assert_eq!(restored.node_type, char_node_types::CHARNODE4);
assert_eq!(restored.flags, flags::IS_FINAL);
assert_eq!(restored.num_children, 3);
assert_eq!(restored.prefix_len, 5);
assert_eq!(restored.data_size, 100);
}
#[test]
fn test_header_validation() {
let mut header = SerializedCharNodeHeader {
magic: CHAR_NODE_MAGIC,
version: CHAR_FORMAT_VERSION,
node_type: char_node_types::CHARNODE4,
flags: 0,
reserved: 0,
num_children: 0,
prefix_len: 0,
_padding: 0,
data_size: 0,
};
assert!(header.validate().is_ok());
header.magic = *b"BAD\0";
assert!(matches!(
header.validate(),
Err(PersistentARTrieError::InvalidMagic { .. })
));
header.magic = CHAR_NODE_MAGIC;
header.version = 255;
assert!(matches!(
header.validate(),
Err(PersistentARTrieError::UnsupportedVersion { .. })
));
header.version = CHAR_FORMAT_VERSION;
header.node_type = 99;
assert!(matches!(
header.validate(),
Err(PersistentARTrieError::CorruptedFile { .. })
));
header.node_type = char_node_types::CHARNODE4;
header.prefix_len = 10;
assert!(matches!(
header.validate(),
Err(PersistentARTrieError::CorruptedFile { .. })
));
}
#[test]
fn test_charnode4_roundtrip() {
let mut node4 = CharNode4::new();
let prefix_chars: Vec<u32> = "test".chars().map(|c| c as u32).collect();
node4.prefix = CharCompressedPrefix::from_chars(&prefix_chars);
node4.header.prefix_len = 4;
node4.header.set_final(true);
node4
.add_child(
'a' as u32,
SwizzledPtr::on_disk(100, 0, NodeType::CharNode4),
)
.expect("add child a");
node4
.add_child(
'b' as u32,
SwizzledPtr::on_disk(200, 0, NodeType::CharNode16),
)
.expect("add child b");
let node = CharNode::N4(Box::new(node4));
let bytes = char_to_bytes(&node).expect("serialize");
let restored = char_from_bytes(&bytes).expect("deserialize");
assert!(matches!(restored, CharNode::N4(_)));
assert_eq!(restored.header().prefix_len, 4);
assert!(restored.header().is_final());
assert_eq!(restored.header().num_children, 2);
assert!(restored.find_child('a' as u32).is_some());
assert!(restored.find_child('b' as u32).is_some());
assert!(restored.find_child('c' as u32).is_none());
}
#[test]
fn test_charnode16_roundtrip() {
let mut node16 = CharNode16::new();
let prefix_chars: Vec<u32> = "prefix".chars().map(|c| c as u32).collect();
node16.prefix = CharCompressedPrefix::from_chars(&prefix_chars);
node16.header.prefix_len = 6;
for i in 0..8 {
node16
.add_child(
'a' as u32 + i,
SwizzledPtr::on_disk(i as u32, 0, NodeType::CharNode4),
)
.expect("add child");
}
let node = CharNode::N16(Box::new(node16));
let bytes = char_to_bytes(&node).expect("serialize");
let restored = char_from_bytes(&bytes).expect("deserialize");
assert!(matches!(restored, CharNode::N16(_)));
assert_eq!(restored.header().prefix_len, 6);
assert_eq!(restored.header().num_children, 8);
for i in 0..8 {
assert!(restored.find_child('a' as u32 + i).is_some());
}
}
#[test]
fn test_charnode48_roundtrip() {
let mut node48 = CharNode48::new();
let keys: Vec<u32> = "αβγδεζηθ".chars().map(|c| c as u32).collect();
for (i, &key) in keys.iter().enumerate() {
node48
.add_child(key, SwizzledPtr::on_disk(i as u32, 0, NodeType::CharNode4))
.expect("add child");
}
let node = CharNode::N48(Box::new(node48));
let bytes = char_to_bytes(&node).expect("serialize");
let restored = char_from_bytes(&bytes).expect("deserialize");
assert!(matches!(restored, CharNode::N48(_)));
assert_eq!(restored.header().num_children, 8);
for &key in &keys {
assert!(
restored.find_child(key).is_some(),
"should find key {}",
char::from_u32(key).unwrap_or('?')
);
}
}
#[test]
fn test_charbucket_roundtrip() {
let mut bucket = CharBucket::new();
let keys: Vec<u32> = "日本語中文한글🎉🎊🎋🎌🎍🎎🎏🎐🎑🎒🎓"
.chars()
.map(|c| c as u32)
.collect();
for (i, &key) in keys.iter().enumerate() {
bucket
.add_child(key, SwizzledPtr::on_disk(i as u32, 0, NodeType::CharNode4))
.expect("add child");
}
bucket.header.set_final(true);
let node = CharNode::Bucket(Box::new(bucket));
let bytes = char_to_bytes(&node).expect("serialize");
let restored = char_from_bytes(&bytes).expect("deserialize");
assert!(matches!(restored, CharNode::Bucket(_)));
assert!(restored.header().is_final());
assert_eq!(restored.header().num_children, keys.len() as u16);
for &key in &keys {
assert!(
restored.find_child(key).is_some(),
"should find key {}",
char::from_u32(key).unwrap_or('?')
);
}
}
#[test]
fn test_empty_node_roundtrip() {
for create_node in [
|| CharNode::N4(Box::new(CharNode4::new())),
|| CharNode::N16(Box::new(CharNode16::new())),
|| CharNode::N48(Box::new(CharNode48::new())),
|| CharNode::Bucket(Box::new(CharBucket::new())),
] {
let node = create_node();
let bytes = char_to_bytes(&node).expect("serialize");
let restored = char_from_bytes(&bytes).expect("deserialize");
assert_eq!(restored.header().num_children, 0);
}
}
#[test]
fn test_serialized_size_calculation() {
let node4 = CharNode::N4(Box::new(CharNode4::new()));
assert_eq!(char_serialized_size(&node4), 16 + 0 + 56);
let mut node4_with_prefix = CharNode4::new();
let prefix: Vec<u32> = "test".chars().map(|c| c as u32).collect();
node4_with_prefix.prefix = CharCompressedPrefix::from_chars(&prefix);
node4_with_prefix.header.prefix_len = 4;
let node4_p = CharNode::N4(Box::new(node4_with_prefix));
assert_eq!(char_serialized_size(&node4_p), 16 + 24 + 56);
let node16 = CharNode::N16(Box::new(CharNode16::new()));
assert_eq!(char_serialized_size(&node16), 16 + 0 + 200);
let node48 = CharNode::N48(Box::new(CharNode48::new()));
assert_eq!(char_serialized_size(&node48), 16 + 0 + 584);
let mut bucket = CharBucket::new();
for i in 0..5 {
bucket
.add_child(i, SwizzledPtr::on_disk(i as u32, 0, NodeType::CharNode4))
.expect("add");
}
let bucket_node = CharNode::Bucket(Box::new(bucket));
assert_eq!(
char_serialized_size(&bucket_node),
16 + 0 + (4 + 8 + 5 * 12)
);
}
#[test]
fn test_unicode_prefix_roundtrip() {
let mut node = CharNode4::new();
let prefix: Vec<u32> = "日本🎉".chars().map(|c| c as u32).collect();
node.prefix = CharCompressedPrefix::from_chars(&prefix);
node.header.prefix_len = 3;
let char_node = CharNode::N4(Box::new(node));
let bytes = char_to_bytes(&char_node).expect("serialize");
let restored = char_from_bytes(&bytes).expect("deserialize");
assert_eq!(restored.header().prefix_len, 3);
let restored_chars = restored.prefix().to_chars(3);
assert_eq!(restored_chars, vec!['日', '本', '🎉']);
}
#[test]
fn test_value_ptr_roundtrip() {
let mut node = CharNode4::new();
node.value_ptr = SwizzledPtr::on_disk(999, 123, NodeType::Bucket);
node.header.set_final(true);
let char_node = CharNode::N4(Box::new(node));
let bytes = char_to_bytes(&char_node).expect("serialize");
let restored = char_from_bytes(&bytes).expect("deserialize");
if let CharNode::N4(n) = restored {
let loc = n
.value_ptr
.disk_location()
.expect("should have disk location");
assert_eq!(loc.block_id, 999);
assert_eq!(loc.offset, 123);
} else {
panic!("Expected CharNode::N4");
}
}
mod compact_tests {
use super::*;
#[test]
fn test_compact_charnode4_roundtrip() {
let mut node4 = CharNode4::new();
let prefix_chars: Vec<u32> = "test".chars().map(|c| c as u32).collect();
node4.prefix = CharCompressedPrefix::from_chars(&prefix_chars);
node4.header.prefix_len = 4;
node4.header.set_final(true);
node4
.add_child(
'a' as u32,
SwizzledPtr::on_disk(100, 0, NodeType::CharNode4),
)
.expect("add child a");
node4
.add_child(
'b' as u32,
SwizzledPtr::on_disk(200, 0, NodeType::CharNode16),
)
.expect("add child b");
let node = CharNode::N4(Box::new(node4));
let bytes = char_to_bytes_compact(&node, 1000);
let restored = char_from_bytes_compact(&bytes).expect("deserialize");
assert!(matches!(restored, CharNode::N4(_)));
assert_eq!(restored.header().prefix_len, 4);
assert!(restored.header().is_final());
assert_eq!(restored.header().num_children, 2);
assert!(restored.find_child('a' as u32).is_some());
assert!(restored.find_child('b' as u32).is_some());
}
#[test]
fn test_compact_charnode16_roundtrip() {
let mut node16 = CharNode16::new();
let prefix_chars: Vec<u32> = "prefix".chars().map(|c| c as u32).collect();
node16.prefix = CharCompressedPrefix::from_chars(&prefix_chars);
node16.header.prefix_len = 6;
for i in 0..8 {
node16
.add_child(
'a' as u32 + i,
SwizzledPtr::on_disk(i as u32, 0, NodeType::CharNode4),
)
.expect("add child");
}
let node = CharNode::N16(Box::new(node16));
let bytes = char_to_bytes_compact(&node, 1000);
let restored = char_from_bytes_compact(&bytes).expect("deserialize");
assert!(matches!(restored, CharNode::N16(_)));
assert_eq!(restored.header().prefix_len, 6);
assert_eq!(restored.header().num_children, 8);
for i in 0..8 {
assert!(restored.find_child('a' as u32 + i).is_some());
}
}
#[test]
fn test_compact_charnode48_roundtrip() {
let mut node48 = CharNode48::new();
let keys: Vec<u32> = "αβγδεζηθ".chars().map(|c| c as u32).collect();
for (i, &key) in keys.iter().enumerate() {
node48
.add_child(key, SwizzledPtr::on_disk(i as u32, 0, NodeType::CharNode4))
.expect("add child");
}
let node = CharNode::N48(Box::new(node48));
let bytes = char_to_bytes_compact(&node, 1000);
let restored = char_from_bytes_compact(&bytes).expect("deserialize");
assert!(matches!(restored, CharNode::N48(_)));
assert_eq!(restored.header().num_children, 8);
for &key in &keys {
assert!(restored.find_child(key).is_some());
}
}
#[test]
fn test_compact_bucket_roundtrip() {
let mut bucket = CharBucket::new();
let keys: Vec<u32> = "日本語中文".chars().map(|c| c as u32).collect();
for (i, &key) in keys.iter().enumerate() {
bucket
.add_child(key, SwizzledPtr::on_disk(i as u32, 0, NodeType::CharNode4))
.expect("add child");
}
bucket.header.set_final(true);
let node = CharNode::Bucket(Box::new(bucket));
let bytes = char_to_bytes_compact(&node, 1000);
let restored = char_from_bytes_compact(&bytes).expect("deserialize");
assert!(matches!(restored, CharNode::Bucket(_)));
assert!(restored.header().is_final());
assert_eq!(restored.header().num_children, keys.len() as u16);
for &key in &keys {
assert!(restored.find_child(key).is_some());
}
}
#[test]
fn test_compact_space_savings() {
let mut node4 = CharNode4::new();
node4
.add_child(
'a' as u32,
SwizzledPtr::on_disk(100, 0, NodeType::CharNode4),
)
.expect("add");
node4
.add_child(
'b' as u32,
SwizzledPtr::on_disk(200, 0, NodeType::CharNode4),
)
.expect("add");
let node = CharNode::N4(Box::new(node4));
let fixed_size = char_serialized_size(&node);
let compact_size = char_to_bytes_compact(&node, 1000).len();
assert!(
compact_size < fixed_size,
"compact {} should be less than fixed {}",
compact_size,
fixed_size
);
let savings = 1.0 - (compact_size as f64 / fixed_size as f64);
assert!(
savings > 0.5,
"Expected >50% savings, got {:.1}%",
savings * 100.0
);
}
#[test]
fn test_compact_empty_nodes() {
for create_node in [
|| CharNode::N4(Box::new(CharNode4::new())),
|| CharNode::N16(Box::new(CharNode16::new())),
|| CharNode::N48(Box::new(CharNode48::new())),
|| CharNode::Bucket(Box::new(CharBucket::new())),
] {
let node = create_node();
let bytes = char_to_bytes_compact(&node, 1000);
let restored = char_from_bytes_compact(&bytes).expect("deserialize");
assert_eq!(restored.header().num_children, 0);
}
}
#[test]
fn test_compact_with_value_ptr() {
let mut node = CharNode4::new();
node.value_ptr = SwizzledPtr::on_disk(500, 10, NodeType::Bucket);
node.header.set_final(true);
let char_node = CharNode::N4(Box::new(node));
let bytes = char_to_bytes_compact(&char_node, 1000);
let restored = char_from_bytes_compact(&bytes).expect("deserialize");
if let CharNode::N4(n) = restored {
assert!(n.header.is_final());
assert!(!n.value_ptr.is_null());
} else {
panic!("Expected CharNode::N4");
}
}
#[test]
fn test_compact_size_calculation() {
let mut node4 = CharNode4::new();
node4
.add_child(
'a' as u32,
SwizzledPtr::on_disk(100, 0, NodeType::CharNode4),
)
.expect("add");
node4
.add_child(
'b' as u32,
SwizzledPtr::on_disk(200, 0, NodeType::CharNode4),
)
.expect("add");
let node = CharNode::N4(Box::new(node4));
let calculated_size = char_compact_serialized_size(&node, 1000);
let actual_size = char_to_bytes_compact(&node, 1000).len();
assert_eq!(
calculated_size, actual_size,
"calculated {} != actual {}",
calculated_size, actual_size
);
}
#[test]
fn test_compact_unicode_prefix() {
let mut node = CharNode4::new();
let prefix: Vec<u32> = "日本🎉".chars().map(|c| c as u32).collect();
node.prefix = CharCompressedPrefix::from_chars(&prefix);
node.header.prefix_len = 3;
let char_node = CharNode::N4(Box::new(node));
let bytes = char_to_bytes_compact(&char_node, 1000);
let restored = char_from_bytes_compact(&bytes).expect("deserialize");
assert_eq!(restored.header().prefix_len, 3);
let restored_chars = restored.prefix().to_chars(3);
assert_eq!(restored_chars, vec!['日', '本', '🎉']);
}
#[test]
fn test_compact_large_pointers() {
let mut node4 = CharNode4::new();
node4
.add_child(
'a' as u32,
SwizzledPtr::on_disk(0x7FFFFF, 0x3FFFFF, NodeType::CharNode4),
)
.expect("add");
let node = CharNode::N4(Box::new(node4));
let bytes = char_to_bytes_compact(&node, 0xFFFFFFFF);
let restored = char_from_bytes_compact(&bytes).expect("deserialize");
assert!(matches!(restored, CharNode::N4(_)));
assert!(restored.find_child('a' as u32).is_some());
}
}
mod v2_tests {
use super::*;
#[test]
fn test_header_v2_encoding_flags() {
let header = CharNodeHeader::new(char_node_types::CHARNODE4);
let h1 = SerializedCharNodeHeader::from_node_header_v2(&header, 100, 0);
assert!(!h1.uses_relative_offsets());
assert!(!h1.uses_sequential_siblings());
let h2 = SerializedCharNodeHeader::from_node_header_v2(&header, 100, 0x80);
assert!(h2.uses_relative_offsets());
assert!(!h2.uses_sequential_siblings());
let h3 = SerializedCharNodeHeader::from_node_header_v2(&header, 100, 0x40);
assert!(!h3.uses_relative_offsets());
assert!(h3.uses_sequential_siblings());
let h4 = SerializedCharNodeHeader::from_node_header_v2(&header, 100, 0xC0);
assert!(h4.uses_relative_offsets());
assert!(h4.uses_sequential_siblings());
}
#[test]
fn test_header_v2_preserves_node_flags() {
let mut header = CharNodeHeader::new(char_node_types::CHARNODE4);
header.flags = flags::IS_FINAL | flags::IS_DIRTY;
let h = SerializedCharNodeHeader::from_node_header_v2(&header, 100, 0xC0);
assert!(h.flags & flags::IS_FINAL != 0);
assert!(h.flags & flags::IS_DIRTY != 0);
assert!(h.uses_relative_offsets());
assert!(h.uses_sequential_siblings());
}
#[test]
fn test_serialize_charnode4_v2_relative() {
let mut node4 = CharNode4::new();
node4
.add_child('a' as u32, SwizzledPtr::on_disk(1, 10, NodeType::CharNode4))
.expect("add child a");
node4
.add_child('b' as u32, SwizzledPtr::on_disk(1, 20, NodeType::CharNode4))
.expect("add child b");
let node = CharNode::N4(Box::new(node4));
let parent_slot = ArenaSlot::new(0, 100);
let ctx = SerializationContext::new(parent_slot);
let mut buffer = Vec::new();
let bytes_written =
serialize_char_node_v2(&node, &mut buffer, &ctx).expect("serialize");
assert!(bytes_written > 0);
let header = SerializedCharNodeHeader::from_bytes(buffer[..16].try_into().unwrap());
assert!(header.uses_relative_offsets());
assert!(!header.uses_sequential_siblings());
let deser_ctx = DeserializationContext::new(parent_slot);
let mut cursor = std::io::Cursor::new(&buffer);
let restored = deserialize_char_node_v2(&mut cursor, &deser_ctx).expect("deserialize");
assert!(matches!(restored, CharNode::N4(_)));
assert_eq!(restored.header().num_children, 2);
assert!(restored.find_child('a' as u32).is_some());
assert!(restored.find_child('b' as u32).is_some());
}
#[test]
fn test_serialize_charnode4_v2_sequential() {
let mut node4 = CharNode4::new();
node4
.add_child('a' as u32, SwizzledPtr::on_disk(1, 10, NodeType::CharNode4))
.expect("add child a");
node4
.add_child('b' as u32, SwizzledPtr::on_disk(1, 11, NodeType::CharNode4))
.expect("add child b");
node4
.add_child('c' as u32, SwizzledPtr::on_disk(1, 12, NodeType::CharNode4))
.expect("add child c");
let node = CharNode::N4(Box::new(node4));
let parent_slot = ArenaSlot::new(0, 100);
let first_child_slot = ArenaSlot::new(0, 10);
let ctx = SerializationContext::sequential(parent_slot, first_child_slot);
let mut buffer = Vec::new();
let bytes_written =
serialize_char_node_v2(&node, &mut buffer, &ctx).expect("serialize");
assert!(bytes_written > 0);
let header = SerializedCharNodeHeader::from_bytes(buffer[..16].try_into().unwrap());
assert!(header.uses_relative_offsets());
assert!(header.uses_sequential_siblings());
let deser_ctx = DeserializationContext::new(parent_slot);
let mut cursor = std::io::Cursor::new(&buffer);
let restored = deserialize_char_node_v2(&mut cursor, &deser_ctx).expect("deserialize");
assert!(matches!(restored, CharNode::N4(_)));
assert_eq!(restored.header().num_children, 3);
}
#[test]
fn test_collect_char_child_slots() {
let mut node4 = CharNode4::new();
node4
.add_child('x' as u32, SwizzledPtr::on_disk(1, 50, NodeType::CharNode4))
.expect("add");
node4
.add_child('y' as u32, SwizzledPtr::on_disk(1, 60, NodeType::CharNode4))
.expect("add");
let node = CharNode::N4(Box::new(node4));
let slots = collect_char_child_slots(&node);
assert_eq!(slots.len(), 2);
assert!(slots.iter().any(|s| s.arena_id == 0 && s.slot_id == 50));
assert!(slots.iter().any(|s| s.arena_id == 0 && s.slot_id == 60));
}
#[test]
fn test_v2_size_smaller_than_v1() {
let mut node4 = CharNode4::new();
for i in 0..4 {
node4
.add_child(
('a' as u32) + i,
SwizzledPtr::on_disk(1, 10 + i, NodeType::CharNode4),
)
.expect("add");
}
let node = CharNode::N4(Box::new(node4));
let mut v1_buffer = Vec::new();
serialize_char_node(&node, &mut v1_buffer).expect("v1");
let parent_slot = ArenaSlot::new(0, 100);
let ctx = SerializationContext::new(parent_slot);
let mut v2_buffer = Vec::new();
serialize_char_node_v2(&node, &mut v2_buffer, &ctx).expect("v2");
assert!(
v2_buffer.len() <= v1_buffer.len(),
"V2 size {} should be <= V1 size {}",
v2_buffer.len(),
v1_buffer.len()
);
}
}
}