use super::{BUCKET_PAGE_SIZE, MAX_BUCKET_ENTRIES};
pub const BUCKET_MAGIC: u64 = 0x0054_4B42_5452_4150;
pub const BUCKET_VERSION: u16 = 1;
pub const HEADER_SIZE: usize = 32;
pub const ENTRY_SIZE: usize = 8;
pub const MAX_DATA_SIZE: usize = BUCKET_PAGE_SIZE - HEADER_SIZE;
pub const MIN_FREE_SPACE: usize = 256;
pub const SPLIT_THRESHOLD: f32 = 0.75;
pub mod flags {
pub const COMPACTED: u16 = 0x0001;
pub const NEEDS_SPLIT: u16 = 0x0002;
pub const HAS_VALUES: u16 = 0x0004;
pub const IS_LEAF: u16 = 0x0008;
}
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct BucketHeader {
pub magic: u64,
pub version: u16,
pub flags: u16,
pub entry_count: u16,
pub reserved: u16,
pub data_start: u32,
pub free_space: u32,
pub checksum: u32,
}
impl BucketHeader {
pub fn new() -> Self {
Self {
magic: BUCKET_MAGIC,
version: BUCKET_VERSION,
flags: flags::IS_LEAF,
entry_count: 0,
reserved: 0,
data_start: BUCKET_PAGE_SIZE as u32,
free_space: MAX_DATA_SIZE as u32,
checksum: 0,
}
}
pub fn with_flags(flags: u16) -> Self {
let mut header = Self::new();
header.flags = flags | flags::IS_LEAF;
header
}
#[inline]
pub fn has_values(&self) -> bool {
self.flags & flags::HAS_VALUES != 0
}
#[inline]
pub fn needs_split(&self) -> bool {
self.flags & flags::NEEDS_SPLIT != 0
}
#[inline]
pub fn is_leaf(&self) -> bool {
self.flags & flags::IS_LEAF != 0
}
#[inline]
pub fn should_split(&self) -> bool {
let used = MAX_DATA_SIZE as u32 - self.free_space;
let threshold = (MAX_DATA_SIZE as f32 * SPLIT_THRESHOLD) as u32;
used >= threshold || self.entry_count as usize >= MAX_BUCKET_ENTRIES
}
pub fn validate(&self) -> Result<(), BucketError> {
if self.magic != BUCKET_MAGIC {
return Err(BucketError::InvalidMagic {
expected: BUCKET_MAGIC,
found: self.magic,
});
}
if self.version > BUCKET_VERSION {
return Err(BucketError::UnsupportedVersion {
max_supported: BUCKET_VERSION,
found: self.version,
});
}
Ok(())
}
#[inline]
pub fn directory_end(&self) -> usize {
HEADER_SIZE + (self.entry_count as usize * ENTRY_SIZE)
}
#[inline]
pub fn available_space(&self) -> usize {
if self.data_start as usize <= self.directory_end() {
0
} else {
self.data_start as usize - self.directory_end()
}
}
#[inline]
pub fn debug_assert_free_space_invariant(&self) {
debug_assert_eq!(
self.free_space,
self.available_space() as u32,
"free_space ({}) diverged from available_space() ({}) -- \
entry_count={}, data_start={}, directory_end={}",
self.free_space,
self.available_space(),
self.entry_count,
self.data_start,
self.directory_end(),
);
}
}
impl Default for BucketHeader {
fn default() -> Self {
Self::new()
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct StringEntry {
pub suffix_offset: u16,
pub suffix_len: u16,
pub value_offset: u16,
pub value_len: u16,
}
impl StringEntry {
pub fn key_only(suffix_offset: u16, suffix_len: u16) -> Self {
Self {
suffix_offset,
suffix_len,
value_offset: 0,
value_len: 0,
}
}
pub fn with_value(
suffix_offset: u16,
suffix_len: u16,
value_offset: u16,
value_len: u16,
) -> Self {
Self {
suffix_offset,
suffix_len,
value_offset,
value_len,
}
}
#[inline]
pub fn has_value(&self) -> bool {
self.value_len > 0
}
#[inline]
pub fn data_size(&self) -> usize {
self.suffix_len as usize + self.value_len as usize
}
pub fn to_bytes(&self) -> [u8; ENTRY_SIZE] {
let mut bytes = [0u8; ENTRY_SIZE];
bytes[0..2].copy_from_slice(&self.suffix_offset.to_le_bytes());
bytes[2..4].copy_from_slice(&self.suffix_len.to_le_bytes());
bytes[4..6].copy_from_slice(&self.value_offset.to_le_bytes());
bytes[6..8].copy_from_slice(&self.value_len.to_le_bytes());
bytes
}
pub fn from_bytes(bytes: &[u8; ENTRY_SIZE]) -> Self {
Self {
suffix_offset: u16::from_le_bytes([bytes[0], bytes[1]]),
suffix_len: u16::from_le_bytes([bytes[2], bytes[3]]),
value_offset: u16::from_le_bytes([bytes[4], bytes[5]]),
value_len: u16::from_le_bytes([bytes[6], bytes[7]]),
}
}
}
#[derive(Clone)]
pub struct StringBucket {
data: Box<[u8; BUCKET_PAGE_SIZE]>,
}
impl StringBucket {
pub fn new() -> Self {
let mut data = Box::new([0u8; BUCKET_PAGE_SIZE]);
let header = BucketHeader::new();
Self::write_header(&mut data, &header);
Self { data }
}
pub fn with_values() -> Self {
let mut data = Box::new([0u8; BUCKET_PAGE_SIZE]);
let header = BucketHeader::with_flags(flags::HAS_VALUES);
Self::write_header(&mut data, &header);
Self { data }
}
pub fn from_bytes(bytes: &[u8]) -> Result<Self, BucketError> {
if bytes.len() != BUCKET_PAGE_SIZE {
return Err(BucketError::InvalidSize {
expected: BUCKET_PAGE_SIZE,
found: bytes.len(),
});
}
let mut data = Box::new([0u8; BUCKET_PAGE_SIZE]);
data.copy_from_slice(bytes);
let bucket = Self { data };
bucket.header().validate()?;
Ok(bucket)
}
pub fn as_bytes(&self) -> &[u8; BUCKET_PAGE_SIZE] {
&self.data
}
pub fn as_bytes_mut(&mut self) -> &mut [u8; BUCKET_PAGE_SIZE] {
&mut self.data
}
pub fn header(&self) -> BucketHeader {
Self::read_header(&self.data)
}
pub fn set_header(&mut self, header: &BucketHeader) {
Self::write_header(&mut self.data, header);
}
#[inline]
pub fn len(&self) -> usize {
self.header().entry_count as usize
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn is_full(&self) -> bool {
let header = self.header();
header.entry_count as usize >= MAX_BUCKET_ENTRIES
|| header.available_space() < ENTRY_SIZE + MIN_FREE_SPACE
}
pub fn get_entry(&self, index: usize) -> Option<StringEntry> {
let header = self.header();
if index >= header.entry_count as usize {
return None;
}
let offset = HEADER_SIZE + (index * ENTRY_SIZE);
let bytes: [u8; ENTRY_SIZE] = self.data[offset..offset + ENTRY_SIZE]
.try_into()
.expect("slice length matches ENTRY_SIZE");
Some(StringEntry::from_bytes(&bytes))
}
pub fn get_suffix(&self, entry: &StringEntry) -> &[u8] {
let start = entry.suffix_offset as usize;
let end = start + entry.suffix_len as usize;
&self.data[start..end]
}
pub fn get_value(&self, entry: &StringEntry) -> Option<&[u8]> {
if entry.value_len == 0 {
return None;
}
let start = entry.value_offset as usize;
let end = start + entry.value_len as usize;
Some(&self.data[start..end])
}
pub fn search(&self, suffix: &[u8]) -> Result<usize, usize> {
let header = self.header();
let count = header.entry_count as usize;
if count == 0 {
return Err(0);
}
let mut left = 0;
let mut right = count;
while left < right {
let mid = left + (right - left) / 2;
let entry = self.get_entry(mid).expect("valid index");
let entry_suffix = self.get_suffix(&entry);
match entry_suffix.cmp(suffix) {
std::cmp::Ordering::Less => left = mid + 1,
std::cmp::Ordering::Greater => right = mid,
std::cmp::Ordering::Equal => return Ok(mid),
}
}
Err(left)
}
pub fn contains(&self, suffix: &[u8]) -> bool {
self.search(suffix).is_ok()
}
pub fn insert_key(&mut self, suffix: &[u8]) -> Result<bool, BucketError> {
self.insert_impl(suffix, None)
}
pub fn insert(&mut self, suffix: &[u8], value: &[u8]) -> Result<bool, BucketError> {
self.insert_impl(suffix, Some(value))
}
fn insert_impl(&mut self, suffix: &[u8], value: Option<&[u8]>) -> Result<bool, BucketError> {
let suffix_len = suffix.len();
let value_len = value.map_or(0, |v| v.len());
let total_data_size = suffix_len + value_len;
if suffix_len > u16::MAX as usize || value_len > u16::MAX as usize {
return Err(BucketError::DataTooLarge {
max: u16::MAX as usize,
found: suffix_len.max(value_len),
});
}
let mut header = self.header();
match self.search(suffix) {
Ok(index) => {
if let Some(new_value) = value {
self.update_value_at(index, new_value)?;
}
return Ok(false);
}
Err(insert_pos) => {
let space_needed = ENTRY_SIZE + total_data_size;
if header.available_space() < space_needed {
return Err(BucketError::InsufficientSpace {
needed: space_needed,
available: header.available_space(),
});
}
if header.entry_count as usize >= MAX_BUCKET_ENTRIES {
return Err(BucketError::BucketFull);
}
let new_data_start = header.data_start as usize - total_data_size;
self.data[new_data_start..new_data_start + suffix_len].copy_from_slice(suffix);
let value_offset = if let Some(v) = value {
let offset = new_data_start + suffix_len;
self.data[offset..offset + value_len].copy_from_slice(v);
offset as u16
} else {
0
};
let entry = StringEntry {
suffix_offset: new_data_start as u16,
suffix_len: suffix_len as u16,
value_offset,
value_len: value_len as u16,
};
self.insert_entry_at(insert_pos, &entry);
header.entry_count += 1;
header.data_start = new_data_start as u32;
header.free_space = header.available_space() as u32;
if value.is_some() {
header.flags |= flags::HAS_VALUES;
}
self.set_header(&header);
#[cfg(debug_assertions)]
self.header().debug_assert_free_space_invariant();
Ok(true)
}
}
}
fn update_value_at(&mut self, index: usize, new_value: &[u8]) -> Result<(), BucketError> {
let entry = self.get_entry(index).ok_or(BucketError::InvalidIndex)?;
if new_value.len() <= entry.value_len as usize {
let offset = entry.value_offset as usize;
self.data[offset..offset + new_value.len()].copy_from_slice(new_value);
let new_entry = StringEntry {
value_len: new_value.len() as u16,
..entry
};
self.write_entry_at(index, &new_entry);
return Ok(());
}
let mut header = self.header();
let space_needed = new_value.len();
if header.available_space() < space_needed {
return Err(BucketError::InsufficientSpace {
needed: space_needed,
available: header.available_space(),
});
}
let new_offset = header.data_start as usize - space_needed;
self.data[new_offset..new_offset + new_value.len()].copy_from_slice(new_value);
let new_entry = StringEntry {
value_offset: new_offset as u16,
value_len: new_value.len() as u16,
..entry
};
self.write_entry_at(index, &new_entry);
header.data_start = new_offset as u32;
header.free_space = header.available_space() as u32;
self.set_header(&header);
#[cfg(debug_assertions)]
self.header().debug_assert_free_space_invariant();
Ok(())
}
fn insert_entry_at(&mut self, index: usize, entry: &StringEntry) {
let header = self.header();
let count = header.entry_count as usize;
if index < count {
let src_start = HEADER_SIZE + (index * ENTRY_SIZE);
let src_end = HEADER_SIZE + (count * ENTRY_SIZE);
let dst_start = src_start + ENTRY_SIZE;
self.data.copy_within(src_start..src_end, dst_start);
}
self.write_entry_at(index, entry);
}
fn write_entry_at(&mut self, index: usize, entry: &StringEntry) {
let offset = HEADER_SIZE + (index * ENTRY_SIZE);
let bytes = entry.to_bytes();
self.data[offset..offset + ENTRY_SIZE].copy_from_slice(&bytes);
}
pub fn remove(&mut self, suffix: &[u8]) -> Option<StringEntry> {
match self.search(suffix) {
Ok(index) => {
let entry = self.get_entry(index).expect("valid index after search");
let mut header = self.header();
let count = header.entry_count as usize;
if index + 1 < count {
let src_start = HEADER_SIZE + ((index + 1) * ENTRY_SIZE);
let src_end = HEADER_SIZE + (count * ENTRY_SIZE);
let dst_start = HEADER_SIZE + (index * ENTRY_SIZE);
self.data.copy_within(src_start..src_end, dst_start);
}
header.entry_count -= 1;
header.free_space = header.available_space() as u32;
self.set_header(&header);
#[cfg(debug_assertions)]
self.header().debug_assert_free_space_invariant();
Some(entry)
}
Err(_) => None,
}
}
pub fn iter(&self) -> BucketIterator<'_> {
BucketIterator {
bucket: self,
index: 0,
count: self.header().entry_count as usize,
}
}
pub fn split_point(&self) -> Option<usize> {
let count = self.len();
if count < 2 {
return None;
}
Some(count / 2)
}
pub fn split(&self) -> Option<SplitResult> {
let split_point = self.split_point()?;
let count = self.len();
let has_values = self.header().has_values();
let mut left = if has_values {
StringBucket::with_values()
} else {
StringBucket::new()
};
let mut right = if has_values {
StringBucket::with_values()
} else {
StringBucket::new()
};
let mut split_key = Vec::new();
for i in 0..split_point {
let entry = self.get_entry(i).expect("valid index");
let suffix = self.get_suffix(&entry);
let value = self.get_value(&entry);
if let Some(v) = value {
left.insert(suffix, v).expect("left bucket has space");
} else {
left.insert_key(suffix).expect("left bucket has space");
}
}
for i in split_point..count {
let entry = self.get_entry(i).expect("valid index");
let suffix = self.get_suffix(&entry);
let value = self.get_value(&entry);
if i == split_point {
split_key = suffix.to_vec();
}
if let Some(v) = value {
right.insert(suffix, v).expect("right bucket has space");
} else {
right.insert_key(suffix).expect("right bucket has space");
}
}
Some(SplitResult {
left,
right,
split_key,
})
}
pub fn split_by_first_byte(&self) -> SplitByByteResult {
let has_values = self.header().has_values();
let mut buckets: std::collections::BTreeMap<u8, StringBucket> =
std::collections::BTreeMap::new();
let mut finals: Vec<(Vec<u8>, Option<Vec<u8>>)> = Vec::new();
let mut overflow: Vec<(u8, Vec<u8>, Option<Vec<u8>>)> = Vec::new();
for i in 0..self.len() {
let entry = self.get_entry(i).expect("valid index");
let suffix = self.get_suffix(&entry);
let value = self.get_value(&entry);
if suffix.is_empty() {
finals.push((Vec::new(), value.map(|v| v.to_vec())));
} else {
let first_byte = suffix[0];
let remaining = &suffix[1..];
let bucket = buckets.entry(first_byte).or_insert_with(|| {
if has_values {
StringBucket::with_values()
} else {
StringBucket::new()
}
});
let insert_result = if let Some(v) = value {
bucket.insert(remaining, v)
} else {
bucket.insert_key(remaining)
};
if insert_result.is_err() {
overflow.push((first_byte, remaining.to_vec(), value.map(|v| v.to_vec())));
}
}
}
SplitByByteResult {
buckets,
finals,
overflow,
}
}
pub fn compact(&mut self) {
let count = self.len();
if count == 0 {
return;
}
let has_values = self.header().has_values();
let entries: Vec<_> = (0..count)
.map(|i| {
let entry = self.get_entry(i).expect("valid index");
let suffix = self.get_suffix(&entry).to_vec();
let value = self.get_value(&entry).map(|v| v.to_vec());
(suffix, value)
})
.collect();
*self = if has_values {
StringBucket::with_values()
} else {
StringBucket::new()
};
for (suffix, value) in entries {
if let Some(v) = value {
self.insert(&suffix, &v).expect("re-insert succeeds");
} else {
self.insert_key(&suffix).expect("re-insert succeeds");
}
}
let mut header = self.header();
header.flags |= flags::COMPACTED;
self.set_header(&header);
}
pub fn merge(&mut self, other: &StringBucket) -> Result<(), BucketError> {
for i in 0..other.len() {
let entry = other.get_entry(i).expect("valid index");
let suffix = other.get_suffix(&entry);
let value = other.get_value(&entry);
if let Some(v) = value {
self.insert(suffix, v)?;
} else {
self.insert_key(suffix)?;
}
}
Ok(())
}
fn read_header(data: &[u8; BUCKET_PAGE_SIZE]) -> BucketHeader {
BucketHeader {
magic: u64::from_le_bytes(data[0..8].try_into().expect("slice length is 8")),
version: u16::from_le_bytes(data[8..10].try_into().expect("slice length is 2")),
flags: u16::from_le_bytes(data[10..12].try_into().expect("slice length is 2")),
entry_count: u16::from_le_bytes(data[12..14].try_into().expect("slice length is 2")),
reserved: u16::from_le_bytes(data[14..16].try_into().expect("slice length is 2")),
data_start: u32::from_le_bytes(data[16..20].try_into().expect("slice length is 4")),
free_space: u32::from_le_bytes(data[20..24].try_into().expect("slice length is 4")),
checksum: u32::from_le_bytes(data[24..28].try_into().expect("slice length is 4")),
}
}
fn write_header(data: &mut [u8; BUCKET_PAGE_SIZE], header: &BucketHeader) {
data[0..8].copy_from_slice(&header.magic.to_le_bytes());
data[8..10].copy_from_slice(&header.version.to_le_bytes());
data[10..12].copy_from_slice(&header.flags.to_le_bytes());
data[12..14].copy_from_slice(&header.entry_count.to_le_bytes());
data[14..16].copy_from_slice(&header.reserved.to_le_bytes());
data[16..20].copy_from_slice(&header.data_start.to_le_bytes());
data[20..24].copy_from_slice(&header.free_space.to_le_bytes());
data[24..28].copy_from_slice(&header.checksum.to_le_bytes());
}
}
impl Default for StringBucket {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Debug for StringBucket {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let header = self.header();
f.debug_struct("StringBucket")
.field("entry_count", &header.entry_count)
.field("data_start", &header.data_start)
.field("free_space", &header.free_space)
.field("has_values", &header.has_values())
.finish()
}
}
pub struct BucketIterator<'a> {
bucket: &'a StringBucket,
index: usize,
count: usize,
}
impl<'a> Iterator for BucketIterator<'a> {
type Item = (StringEntry, &'a [u8]);
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.count {
return None;
}
let entry = self.bucket.get_entry(self.index)?;
let suffix = self.bucket.get_suffix(&entry);
self.index += 1;
Some((entry, suffix))
}
fn size_hint(&self) -> (usize, Option<usize>) {
let remaining = self.count - self.index;
(remaining, Some(remaining))
}
}
impl ExactSizeIterator for BucketIterator<'_> {}
#[derive(Debug, Clone)]
pub struct SplitResult {
pub left: StringBucket,
pub right: StringBucket,
pub split_key: Vec<u8>,
}
#[derive(Debug)]
pub struct SplitByByteResult {
pub buckets: std::collections::BTreeMap<u8, StringBucket>,
pub finals: Vec<(Vec<u8>, Option<Vec<u8>>)>,
pub overflow: Vec<(u8, Vec<u8>, Option<Vec<u8>>)>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BucketError {
InvalidMagic {
expected: u64,
found: u64,
},
UnsupportedVersion {
max_supported: u16,
found: u16,
},
InvalidSize {
expected: usize,
found: usize,
},
InsufficientSpace {
needed: usize,
available: usize,
},
BucketFull,
DataTooLarge {
max: usize,
found: usize,
},
InvalidIndex,
Corrupted(String),
}
impl std::fmt::Display for BucketError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BucketError::InvalidMagic { expected, found } => {
write!(
f,
"invalid bucket magic: expected 0x{:016x}, found 0x{:016x}",
expected, found
)
}
BucketError::UnsupportedVersion {
max_supported,
found,
} => {
write!(
f,
"unsupported bucket version: max supported {}, found {}",
max_supported, found
)
}
BucketError::InvalidSize { expected, found } => {
write!(
f,
"invalid bucket size: expected {}, found {}",
expected, found
)
}
BucketError::InsufficientSpace { needed, available } => {
write!(
f,
"insufficient space: need {} bytes, have {} available",
needed, available
)
}
BucketError::BucketFull => write!(f, "bucket is full"),
BucketError::DataTooLarge { max, found } => {
write!(f, "data too large: max {} bytes, found {}", max, found)
}
BucketError::InvalidIndex => write!(f, "invalid entry index"),
BucketError::Corrupted(msg) => write!(f, "bucket corrupted: {}", msg),
}
}
}
impl std::error::Error for BucketError {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_bucket() {
let bucket = StringBucket::new();
let header = bucket.header();
assert_eq!(header.magic, BUCKET_MAGIC);
assert_eq!(header.version, BUCKET_VERSION);
assert_eq!(header.entry_count, 0);
assert!(bucket.is_empty());
assert!(!bucket.is_full());
}
#[test]
fn test_insert_and_search() {
let mut bucket = StringBucket::new();
assert!(bucket.insert_key(b"apple").unwrap());
assert!(bucket.insert_key(b"banana").unwrap());
assert!(bucket.insert_key(b"cherry").unwrap());
assert_eq!(bucket.len(), 3);
assert!(bucket.contains(b"apple"));
assert!(bucket.contains(b"banana"));
assert!(bucket.contains(b"cherry"));
assert!(!bucket.contains(b"date"));
let entries: Vec<_> = bucket.iter().map(|(_, s)| s.to_vec()).collect();
assert_eq!(
entries,
vec![b"apple".to_vec(), b"banana".to_vec(), b"cherry".to_vec()]
);
}
#[test]
fn test_insert_with_values() {
let mut bucket = StringBucket::with_values();
bucket.insert(b"key1", b"value1").unwrap();
bucket.insert(b"key2", b"value2").unwrap();
let header = bucket.header();
assert!(header.has_values());
match bucket.search(b"key1") {
Ok(idx) => {
let entry = bucket.get_entry(idx).unwrap();
let value = bucket.get_value(&entry).unwrap();
assert_eq!(value, b"value1");
}
Err(_) => panic!("key1 not found"),
}
}
#[test]
fn test_duplicate_insert() {
let mut bucket = StringBucket::new();
assert!(bucket.insert_key(b"test").unwrap()); assert!(!bucket.insert_key(b"test").unwrap());
assert_eq!(bucket.len(), 1);
}
#[test]
fn test_remove() {
let mut bucket = StringBucket::new();
bucket.insert_key(b"apple").unwrap();
bucket.insert_key(b"banana").unwrap();
bucket.insert_key(b"cherry").unwrap();
assert_eq!(bucket.len(), 3);
let removed = bucket.remove(b"banana");
assert!(removed.is_some());
assert_eq!(bucket.len(), 2);
assert!(bucket.contains(b"apple"));
assert!(!bucket.contains(b"banana"));
assert!(bucket.contains(b"cherry"));
let entries: Vec<_> = bucket.iter().map(|(_, s)| s.to_vec()).collect();
assert_eq!(entries, vec![b"apple".to_vec(), b"cherry".to_vec()]);
}
#[test]
fn test_remove_not_found() {
let mut bucket = StringBucket::new();
bucket.insert_key(b"apple").unwrap();
let removed = bucket.remove(b"banana");
assert!(removed.is_none());
assert_eq!(bucket.len(), 1);
}
#[test]
fn test_sorted_insertion() {
let mut bucket = StringBucket::new();
bucket.insert_key(b"zebra").unwrap();
bucket.insert_key(b"apple").unwrap();
bucket.insert_key(b"mango").unwrap();
let entries: Vec<_> = bucket.iter().map(|(_, s)| s.to_vec()).collect();
assert_eq!(
entries,
vec![b"apple".to_vec(), b"mango".to_vec(), b"zebra".to_vec()]
);
}
#[test]
fn test_binary_search() {
let mut bucket = StringBucket::new();
for i in 0..50 {
let key = format!("key{:03}", i);
bucket.insert_key(key.as_bytes()).unwrap();
}
assert_eq!(bucket.len(), 50);
assert_eq!(bucket.search(b"key000"), Ok(0));
assert_eq!(bucket.search(b"key025"), Ok(25));
assert_eq!(bucket.search(b"key049"), Ok(49));
assert_eq!(bucket.search(b"key000a"), Err(1));
assert_eq!(bucket.search(b"aaa"), Err(0)); assert_eq!(bucket.search(b"zzz"), Err(50)); }
#[test]
fn test_bucket_serialization() {
let mut bucket = StringBucket::new();
bucket.insert_key(b"hello").unwrap();
bucket.insert_key(b"world").unwrap();
let bytes = bucket.as_bytes().clone();
let bucket2 = StringBucket::from_bytes(&bytes).unwrap();
assert_eq!(bucket2.len(), 2);
assert!(bucket2.contains(b"hello"));
assert!(bucket2.contains(b"world"));
}
#[test]
fn test_invalid_magic() {
let mut data = [0u8; BUCKET_PAGE_SIZE];
data[0..8].copy_from_slice(&0u64.to_le_bytes());
let result = StringBucket::from_bytes(&data);
assert!(matches!(result, Err(BucketError::InvalidMagic { .. })));
}
#[test]
fn test_header_validation() {
let header = BucketHeader::new();
assert!(header.validate().is_ok());
let invalid_header = BucketHeader { magic: 0, ..header };
assert!(matches!(
invalid_header.validate(),
Err(BucketError::InvalidMagic { .. })
));
}
#[test]
fn test_split_threshold() {
let mut bucket = StringBucket::new();
let mut i = 0;
while !bucket.header().should_split() && i < MAX_BUCKET_ENTRIES {
let key = format!("key{:06}", i);
if bucket.insert_key(key.as_bytes()).is_err() {
break;
}
i += 1;
}
assert!(bucket.header().should_split() || bucket.is_full());
}
#[test]
fn test_entry_serialization() {
let entry = StringEntry {
suffix_offset: 1234,
suffix_len: 56,
value_offset: 7890,
value_len: 12,
};
let bytes = entry.to_bytes();
let restored = StringEntry::from_bytes(&bytes);
assert_eq!(entry, restored);
}
#[test]
fn test_empty_suffix() {
let mut bucket = StringBucket::new();
assert!(bucket.insert_key(b"").unwrap());
assert!(bucket.contains(b""));
assert_eq!(bucket.len(), 1);
}
#[test]
fn test_update_value() {
let mut bucket = StringBucket::with_values();
bucket.insert(b"key", b"old_value").unwrap();
bucket.insert(b"key", b"new").unwrap();
match bucket.search(b"key") {
Ok(idx) => {
let entry = bucket.get_entry(idx).unwrap();
let value = bucket.get_value(&entry).unwrap();
assert_eq!(value, b"new");
}
Err(_) => panic!("key not found"),
}
}
#[test]
fn test_split_empty_bucket() {
let bucket = StringBucket::new();
assert!(bucket.split().is_none());
}
#[test]
fn test_split_single_entry() {
let mut bucket = StringBucket::new();
bucket.insert_key(b"test").unwrap();
assert!(bucket.split().is_none());
}
#[test]
fn test_split_basic() {
let mut bucket = StringBucket::new();
for i in 0..10 {
let key = format!("key{:02}", i);
bucket.insert_key(key.as_bytes()).unwrap();
}
let result = bucket.split().expect("should split");
assert_eq!(result.left.len(), 5);
assert!(result.left.contains(b"key00"));
assert!(result.left.contains(b"key04"));
assert!(!result.left.contains(b"key05"));
assert_eq!(result.right.len(), 5);
assert!(result.right.contains(b"key05"));
assert!(result.right.contains(b"key09"));
assert!(!result.right.contains(b"key04"));
assert_eq!(result.split_key, b"key05");
}
#[test]
fn test_split_with_values() {
let mut bucket = StringBucket::with_values();
bucket.insert(b"a", b"val_a").unwrap();
bucket.insert(b"b", b"val_b").unwrap();
bucket.insert(b"c", b"val_c").unwrap();
bucket.insert(b"d", b"val_d").unwrap();
let result = bucket.split().expect("should split");
assert!(result.left.header().has_values());
assert!(result.right.header().has_values());
if let Ok(idx) = result.left.search(b"a") {
let entry = result.left.get_entry(idx).unwrap();
assert_eq!(result.left.get_value(&entry), Some(b"val_a".as_slice()));
}
}
#[test]
fn test_split_by_first_byte() {
let mut bucket = StringBucket::new();
bucket.insert_key(b"apple").unwrap();
bucket.insert_key(b"apricot").unwrap();
bucket.insert_key(b"banana").unwrap();
bucket.insert_key(b"berry").unwrap();
bucket.insert_key(b"cherry").unwrap();
bucket.insert_key(b"").unwrap();
let result = bucket.split_by_first_byte();
assert_eq!(result.buckets.len(), 3);
let a_bucket = result.buckets.get(&b'a').expect("'a' bucket exists");
assert_eq!(a_bucket.len(), 2);
assert!(a_bucket.contains(b"pple"));
assert!(a_bucket.contains(b"pricot"));
let b_bucket = result.buckets.get(&b'b').expect("'b' bucket exists");
assert_eq!(b_bucket.len(), 2);
assert!(b_bucket.contains(b"anana"));
assert!(b_bucket.contains(b"erry"));
let c_bucket = result.buckets.get(&b'c').expect("'c' bucket exists");
assert_eq!(c_bucket.len(), 1);
assert!(c_bucket.contains(b"herry"));
assert_eq!(result.finals.len(), 1);
assert_eq!(result.finals[0].0, Vec::<u8>::new());
}
#[test]
fn test_compact() {
let mut bucket = StringBucket::new();
bucket.insert_key(b"aaa").unwrap();
bucket.insert_key(b"bbb").unwrap();
bucket.insert_key(b"ccc").unwrap();
bucket.remove(b"bbb");
let before_compact = bucket.header().free_space;
bucket.compact();
let after_compact = bucket.header().free_space;
assert!(after_compact >= before_compact);
assert!(bucket.contains(b"aaa"));
assert!(!bucket.contains(b"bbb"));
assert!(bucket.contains(b"ccc"));
assert!(bucket.header().flags & flags::COMPACTED != 0);
}
#[test]
fn test_merge_buckets() {
let mut bucket1 = StringBucket::new();
let mut bucket2 = StringBucket::new();
bucket1.insert_key(b"apple").unwrap();
bucket1.insert_key(b"cherry").unwrap();
bucket2.insert_key(b"banana").unwrap();
bucket2.insert_key(b"date").unwrap();
bucket1.merge(&bucket2).unwrap();
assert_eq!(bucket1.len(), 4);
assert!(bucket1.contains(b"apple"));
assert!(bucket1.contains(b"banana"));
assert!(bucket1.contains(b"cherry"));
assert!(bucket1.contains(b"date"));
let entries: Vec<_> = bucket1.iter().map(|(_, s)| s.to_vec()).collect();
assert_eq!(
entries,
vec![
b"apple".to_vec(),
b"banana".to_vec(),
b"cherry".to_vec(),
b"date".to_vec()
]
);
}
#[test]
fn test_split_and_merge_roundtrip() {
let mut original = StringBucket::new();
for i in 0..20 {
let key = format!("key{:02}", i);
original.insert_key(key.as_bytes()).unwrap();
}
let original_entries: Vec<_> = original.iter().map(|(_, s)| s.to_vec()).collect();
let result = original.split().expect("should split");
let mut merged = result.left;
merged.merge(&result.right).unwrap();
let merged_entries: Vec<_> = merged.iter().map(|(_, s)| s.to_vec()).collect();
assert_eq!(original_entries, merged_entries);
}
#[test]
fn test_upsert_pattern_no_free_space_overflow() {
let mut bucket = StringBucket::with_values();
let suffix = b"checkpoint_key";
let value = b"some_val";
bucket.insert(suffix, value).expect("first insert");
for i in 0..200 {
let removed = bucket.remove(suffix);
assert!(removed.is_some(), "remove should succeed on cycle {}", i);
bucket
.insert(suffix, value)
.unwrap_or_else(|e| panic!("insert should not overflow on cycle {}: {}", i, e));
let header = bucket.header();
assert_eq!(
header.free_space,
header.available_space() as u32,
"free_space drift detected on cycle {}",
i
);
}
}
#[test]
fn test_remove_credits_directory_space_to_free_space() {
let mut bucket = StringBucket::new();
bucket.insert_key(b"alpha").expect("insert alpha");
bucket.insert_key(b"bravo").expect("insert bravo");
let free_before = bucket.header().free_space;
bucket.remove(b"bravo");
let header_after = bucket.header();
assert_eq!(
header_after.free_space,
header_after.available_space() as u32,
"free_space should match available_space after remove"
);
assert!(
header_after.free_space > free_before,
"free_space should increase after remove: before={}, after={}",
free_before,
header_after.free_space
);
}
}