use crate::error::Result;
use crate::storage::serialization::vint::encode_unsigned;
use crate::storage::write_engine::mutation::DecoratedKey;
#[derive(Debug)]
pub struct IndexWriter {
buffer: Vec<u8>,
entry_count: usize,
}
#[derive(Debug, Clone, Copy)]
pub struct IndexEntryInfo {
pub index_offset: u64,
pub entry_size: usize,
}
impl IndexWriter {
pub fn new() -> Self {
Self {
buffer: Vec::new(),
entry_count: 0,
}
}
pub fn add_partition(
&mut self,
key: &DecoratedKey,
data_offset: u64,
) -> Result<IndexEntryInfo> {
let index_offset = self.buffer.len() as u64;
let entry_size = self.write_entry(key, data_offset)?;
self.entry_count += 1;
Ok(IndexEntryInfo {
index_offset,
entry_size,
})
}
fn write_entry(&mut self, key: &DecoratedKey, data_offset: u64) -> Result<usize> {
let start_len = self.buffer.len();
let key_len = key.key.len() as u16;
self.buffer.extend_from_slice(&key_len.to_be_bytes());
self.buffer.extend_from_slice(&key.key);
encode_unsigned(data_offset, &mut self.buffer);
encode_unsigned(0, &mut self.buffer);
let bytes_written = self.buffer.len() - start_len;
Ok(bytes_written)
}
pub fn finish(self) -> Result<Vec<u8>> {
Ok(self.buffer)
}
pub fn entry_count(&self) -> usize {
self.entry_count
}
}
impl Default for IndexWriter {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_index_writer_new() {
let writer = IndexWriter::new();
assert_eq!(writer.entry_count(), 0);
}
#[test]
fn test_add_single_partition_int_key() {
let mut writer = IndexWriter::new();
let key = DecoratedKey::new(12345, vec![0x00, 0x00, 0x00, 0x2A]);
let info = writer.add_partition(&key, 0).unwrap();
assert_eq!(writer.entry_count(), 1);
assert_eq!(info.index_offset, 0);
assert_eq!(info.entry_size, 8);
}
#[test]
fn test_add_single_partition_uuid_key() {
let mut writer = IndexWriter::new();
let key = DecoratedKey::new(12345, vec![0xBB; 16]);
let info = writer.add_partition(&key, 0).unwrap();
assert_eq!(writer.entry_count(), 1);
assert_eq!(info.entry_size, 20);
}
#[test]
fn test_raw_key_bytes_written() {
let mut writer = IndexWriter::new();
let pk_bytes = vec![0x00, 0x00, 0x00, 0x2A];
let key = DecoratedKey::new(12345, pk_bytes.clone());
writer.add_partition(&key, 0).unwrap();
let bytes = writer.finish().unwrap();
assert_eq!(&bytes[0..2], &[0x00, 0x04], "Key length should be 4");
assert_eq!(&bytes[2..6], &pk_bytes, "Should be raw key bytes");
assert_eq!(bytes[6], 0x00, "Offset should be 0");
assert_eq!(bytes[7], 0x00, "Promoted size should be 0");
}
#[test]
fn test_uuid_key_raw_bytes() {
let mut writer = IndexWriter::new();
let pk_bytes = vec![
0x55, 0x0e, 0x84, 0x00, 0xe2, 0x9b, 0x41, 0xd4, 0xa7, 0x16, 0x44, 0x66, 0x55, 0x44,
0x00, 0x00,
];
let key = DecoratedKey::new(12345, pk_bytes.clone());
writer.add_partition(&key, 0).unwrap();
let bytes = writer.finish().unwrap();
assert_eq!(&bytes[0..2], &[0x00, 0x10], "Key length should be 16");
assert_eq!(&bytes[2..18], &pk_bytes, "Should be raw UUID bytes");
}
#[test]
fn test_add_multiple_partitions() {
let mut writer = IndexWriter::new();
let key1 = DecoratedKey::new(100, vec![0x00, 0x00, 0x00, 0x01]);
let key2 = DecoratedKey::new(200, vec![0x00, 0x00, 0x00, 0x02]);
let key3 = DecoratedKey::new(300, vec![0x00, 0x00, 0x00, 0x03]);
let info1 = writer.add_partition(&key1, 0).unwrap();
let info2 = writer.add_partition(&key2, 150).unwrap();
let info3 = writer.add_partition(&key3, 300).unwrap();
assert_eq!(writer.entry_count(), 3);
assert_eq!(info1.index_offset, 0);
assert_eq!(info2.index_offset, info1.entry_size as u64);
assert_eq!(
info3.index_offset,
(info1.entry_size + info2.entry_size) as u64
);
}
#[test]
fn test_finish_multiple_entries() {
let mut writer = IndexWriter::new();
let key1 = DecoratedKey::new(100, vec![0x00, 0x00, 0x00, 0x01]);
let key2 = DecoratedKey::new(200, vec![0x00, 0x00, 0x00, 0x02]);
writer.add_partition(&key1, 0).unwrap();
writer.add_partition(&key2, 150).unwrap();
let bytes = writer.finish().unwrap();
assert_eq!(bytes.len(), 17);
assert_eq!(&bytes[0..2], &[0x00, 0x04]);
assert_eq!(&bytes[8..10], &[0x00, 0x04]);
}
#[test]
fn test_position_encoding() {
let mut writer = IndexWriter::new();
let key = DecoratedKey::new(12345, vec![0x00, 0x00, 0x00, 0x2A]);
writer.add_partition(&key, 127).unwrap();
let bytes = writer.finish().unwrap();
assert_eq!(bytes[6], 0x7F);
assert_eq!(bytes[7], 0x00); }
#[test]
fn test_position_encoding_large_offset() {
let mut writer = IndexWriter::new();
let key = DecoratedKey::new(12345, vec![0x00, 0x00, 0x00, 0x2A]);
writer.add_partition(&key, 12381).unwrap();
let bytes = writer.finish().unwrap();
assert_eq!(bytes[6], 0xB0);
assert_eq!(bytes[7], 0x5D);
assert_eq!(bytes[8], 0x00);
assert_eq!(bytes.len(), 9);
}
#[test]
fn test_variable_key_sizes() {
let mut writer = IndexWriter::new();
let key = DecoratedKey::new(100, vec![0x42]);
let info = writer.add_partition(&key, 0).unwrap();
assert_eq!(info.entry_size, 5);
let mut writer = IndexWriter::new();
let key = DecoratedKey::new(100, vec![0x00, 0x00, 0x00, 0x2A]);
let info = writer.add_partition(&key, 0).unwrap();
assert_eq!(info.entry_size, 8);
let mut writer = IndexWriter::new();
let key = DecoratedKey::new(100, vec![0; 8]);
let info = writer.add_partition(&key, 0).unwrap();
assert_eq!(info.entry_size, 12);
let mut writer = IndexWriter::new();
let key = DecoratedKey::new(100, vec![0; 16]);
let info = writer.add_partition(&key, 0).unwrap();
assert_eq!(info.entry_size, 20); }
#[test]
fn test_empty_index() {
let writer = IndexWriter::new();
let bytes = writer.finish().unwrap();
assert_eq!(bytes.len(), 0);
}
#[test]
fn test_token_order_preservation() {
let mut writer = IndexWriter::new();
let key1 = DecoratedKey::new(100, vec![0x01]);
let key2 = DecoratedKey::new(200, vec![0x02]);
let key3 = DecoratedKey::new(300, vec![0x03]);
writer.add_partition(&key1, 0).unwrap();
writer.add_partition(&key2, 100).unwrap();
writer.add_partition(&key3, 200).unwrap();
let bytes = writer.finish().unwrap();
assert_eq!(bytes.len(), 16);
assert_eq!(&bytes[0..2], &[0x00, 0x01]);
assert_eq!(&bytes[5..7], &[0x00, 0x01]);
assert_eq!(&bytes[10..12], &[0x00, 0x01]);
}
#[test]
fn test_vint_encoding_boundaries() {
let key = DecoratedKey::new(12345, vec![0x00, 0x00, 0x00, 0x2A]);
let mut writer = IndexWriter::new();
writer.add_partition(&key, 127).unwrap();
assert_eq!(writer.finish().unwrap().len(), 8);
let mut writer = IndexWriter::new();
writer.add_partition(&key, 128).unwrap();
assert_eq!(writer.finish().unwrap().len(), 9);
let mut writer = IndexWriter::new();
writer.add_partition(&key, 16383).unwrap();
assert_eq!(writer.finish().unwrap().len(), 9);
let mut writer = IndexWriter::new();
writer.add_partition(&key, 16384).unwrap();
assert_eq!(writer.finish().unwrap().len(), 10); }
#[test]
fn test_index_offset_tracking() {
let mut writer = IndexWriter::new();
let key1 = DecoratedKey::new(100, vec![0x01, 0x02, 0x03, 0x04]);
let info1 = writer.add_partition(&key1, 0).unwrap();
let key2 = DecoratedKey::new(200, vec![0x05, 0x06]);
let info2 = writer.add_partition(&key2, 127).unwrap();
let key3 = DecoratedKey::new(300, vec![0x07]);
let info3 = writer.add_partition(&key3, 12381).unwrap();
assert_eq!(info1.index_offset, 0);
assert_eq!(info1.entry_size, 8, "Entry 1: 2 + 4 + 1 + 1 = 8");
assert_eq!(info2.index_offset, 8);
assert_eq!(info2.entry_size, 6, "Entry 2: 2 + 2 + 1 + 1 = 6");
assert_eq!(info3.index_offset, 14);
assert_eq!(info3.entry_size, 6, "Entry 3: 2 + 1 + 2 + 1 = 6");
let bytes = writer.finish().unwrap();
assert_eq!(
bytes.len(),
info1.entry_size + info2.entry_size + info3.entry_size,
"Total size matches sum of entry sizes"
);
}
#[test]
fn test_realistic_scenario() {
let mut writer = IndexWriter::new();
let key1 = DecoratedKey::new(-5000000000, vec![0x00, 0x00, 0x03, 0xE9]);
writer.add_partition(&key1, 0).unwrap();
let key2 = DecoratedKey::new(-2000000000, vec![0x00, 0x00, 0x03, 0xEA]);
writer.add_partition(&key2, 250).unwrap();
let key3 = DecoratedKey::new(3000000000, vec![0x00, 0x00, 0x03, 0xEB]);
writer.add_partition(&key3, 500).unwrap();
assert_eq!(writer.entry_count(), 3);
let bytes = writer.finish().unwrap();
assert_eq!(bytes.len(), 26);
}
}