use crate::core::{FieldId, LuciError, Result, SegmentId};
use crate::storage::block::{BLOCK_SIZE, BlockId, Extent};
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct SegmentEntry {
pub segment_id: SegmentId,
pub extent: Extent,
pub generation: u64,
pub data_len: u64,
}
impl SegmentEntry {
pub const fn new(
segment_id: SegmentId,
extent: Extent,
generation: u64,
data_len: u64,
) -> Self {
Self {
segment_id,
extent,
generation,
data_len,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct VectorIndexEntry {
pub field_id: FieldId,
pub extent: Extent,
pub data_len: u64,
}
impl VectorIndexEntry {
pub const fn new(field_id: FieldId, extent: Extent, data_len: u64) -> Self {
Self {
field_id,
extent,
data_len,
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct MetadataSnapshot {
pub segments: Vec<SegmentEntry>,
pub vector_indexes: Vec<VectorIndexEntry>,
pub total_blocks: u64,
pub free_list: Vec<Extent>,
pub user_metadata: Vec<u8>,
}
const HEADER_BYTES: usize = 20;
const SEGMENT_ENTRY_BYTES: usize = 36;
const FREE_EXTENT_BYTES: usize = 12;
const VECTOR_INDEX_ENTRY_BYTES: usize = 22;
impl MetadataSnapshot {
pub fn empty() -> Self {
Self {
segments: Vec::new(),
vector_indexes: Vec::new(),
total_blocks: 0,
free_list: Vec::new(),
user_metadata: Vec::new(),
}
}
pub fn serialized_size(&self) -> usize {
HEADER_BYTES
+ self.segments.len() * SEGMENT_ENTRY_BYTES
+ self.free_list.len() * FREE_EXTENT_BYTES
+ self.vector_indexes.len() * VECTOR_INDEX_ENTRY_BYTES
+ 4 + self.user_metadata.len()
}
pub fn to_bytes(&self) -> Vec<u8> {
let size = self.serialized_size();
let mut buf = Vec::with_capacity(size);
buf.extend_from_slice(&self.total_blocks.to_le_bytes());
buf.extend_from_slice(&(self.segments.len() as u32).to_le_bytes());
buf.extend_from_slice(&(self.free_list.len() as u32).to_le_bytes());
buf.extend_from_slice(&(self.vector_indexes.len() as u32).to_le_bytes());
for entry in &self.segments {
buf.extend_from_slice(&entry.segment_id.as_u64().to_le_bytes());
buf.extend_from_slice(&entry.extent.start.as_u64().to_le_bytes());
buf.extend_from_slice(&entry.extent.count.to_le_bytes());
buf.extend_from_slice(&entry.generation.to_le_bytes());
buf.extend_from_slice(&entry.data_len.to_le_bytes());
}
for extent in &self.free_list {
buf.extend_from_slice(&extent.start.as_u64().to_le_bytes());
buf.extend_from_slice(&extent.count.to_le_bytes());
}
for entry in &self.vector_indexes {
buf.extend_from_slice(&entry.field_id.as_u16().to_le_bytes());
buf.extend_from_slice(&entry.extent.start.as_u64().to_le_bytes());
buf.extend_from_slice(&entry.extent.count.to_le_bytes());
buf.extend_from_slice(&entry.data_len.to_le_bytes());
}
buf.extend_from_slice(&(self.user_metadata.len() as u32).to_le_bytes());
buf.extend_from_slice(&self.user_metadata);
debug_assert_eq!(buf.len(), size);
buf
}
pub fn from_bytes(data: &[u8]) -> Result<Self> {
if data.len() < HEADER_BYTES {
return Err(LuciError::IndexCorrupted(
"metadata block too small for header".into(),
));
}
let total_blocks = u64::from_le_bytes(data[0..8].try_into().unwrap());
let segment_count = u32::from_le_bytes(data[8..12].try_into().unwrap()) as usize;
let free_extent_count = u32::from_le_bytes(data[12..16].try_into().unwrap()) as usize;
let vector_index_count = u32::from_le_bytes(data[16..20].try_into().unwrap()) as usize;
let expected_size = HEADER_BYTES
+ segment_count * SEGMENT_ENTRY_BYTES
+ free_extent_count * FREE_EXTENT_BYTES
+ vector_index_count * VECTOR_INDEX_ENTRY_BYTES;
if data.len() < expected_size {
return Err(LuciError::IndexCorrupted(format!(
"metadata block truncated: need {expected_size} bytes, got {}",
data.len()
)));
}
let mut offset = HEADER_BYTES;
let mut segments = Vec::with_capacity(segment_count);
for _ in 0..segment_count {
let segment_id = SegmentId::new(u64::from_le_bytes(
data[offset..offset + 8].try_into().unwrap(),
));
offset += 8;
let extent_start = BlockId::new(u64::from_le_bytes(
data[offset..offset + 8].try_into().unwrap(),
));
offset += 8;
let extent_count = u32::from_le_bytes(data[offset..offset + 4].try_into().unwrap());
offset += 4;
let generation = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
offset += 8;
let data_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
offset += 8;
segments.push(SegmentEntry::new(
segment_id,
Extent::new(extent_start, extent_count),
generation,
data_len,
));
}
let mut free_list = Vec::with_capacity(free_extent_count);
for _ in 0..free_extent_count {
let start = BlockId::new(u64::from_le_bytes(
data[offset..offset + 8].try_into().unwrap(),
));
offset += 8;
let count = u32::from_le_bytes(data[offset..offset + 4].try_into().unwrap());
offset += 4;
free_list.push(Extent::new(start, count));
}
let mut vector_indexes = Vec::with_capacity(vector_index_count);
for _ in 0..vector_index_count {
let field_id = FieldId::new(u16::from_le_bytes(
data[offset..offset + 2].try_into().unwrap(),
));
offset += 2;
let extent_start = BlockId::new(u64::from_le_bytes(
data[offset..offset + 8].try_into().unwrap(),
));
offset += 8;
let extent_count = u32::from_le_bytes(data[offset..offset + 4].try_into().unwrap());
offset += 4;
let data_len = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
offset += 8;
vector_indexes.push(VectorIndexEntry::new(
field_id,
Extent::new(extent_start, extent_count),
data_len,
));
}
let user_metadata = if offset + 4 <= data.len() {
let meta_len =
u32::from_le_bytes(data[offset..offset + 4].try_into().unwrap()) as usize;
offset += 4;
if offset + meta_len <= data.len() {
data[offset..offset + meta_len].to_vec()
} else {
Vec::new()
}
} else {
Vec::new()
};
Ok(Self {
segments,
vector_indexes,
total_blocks,
free_list,
user_metadata,
})
}
pub fn fits_in_single_block(&self) -> bool {
self.serialized_size() <= BLOCK_SIZE as usize
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_snapshot() -> MetadataSnapshot {
MetadataSnapshot {
segments: vec![
SegmentEntry::new(SegmentId::new(1), Extent::new(BlockId(0), 4), 1, 900_000),
SegmentEntry::new(SegmentId::new(2), Extent::new(BlockId(4), 2), 1, 400_000),
SegmentEntry::new(SegmentId::new(3), Extent::new(BlockId(8), 6), 2, 1_500_000),
],
vector_indexes: Vec::new(),
total_blocks: 14,
free_list: vec![Extent::new(BlockId(6), 2)],
user_metadata: Vec::new(),
}
}
#[test]
fn empty_snapshot() {
let snap = MetadataSnapshot::empty();
assert!(snap.segments.is_empty());
assert_eq!(snap.total_blocks, 0);
assert!(snap.free_list.is_empty());
}
#[test]
fn round_trip_empty() {
let snap = MetadataSnapshot::empty();
let bytes = snap.to_bytes();
let snap2 = MetadataSnapshot::from_bytes(&bytes).unwrap();
assert_eq!(snap, snap2);
}
#[test]
fn round_trip_populated() {
let snap = sample_snapshot();
let bytes = snap.to_bytes();
let snap2 = MetadataSnapshot::from_bytes(&bytes).unwrap();
assert_eq!(snap, snap2);
}
#[test]
fn serialized_size_matches_output() {
let snap = sample_snapshot();
let bytes = snap.to_bytes();
assert_eq!(bytes.len(), snap.serialized_size());
assert_eq!(bytes.len(), 144);
}
#[test]
fn round_trip_with_vector_indexes() {
let mut snap = sample_snapshot();
snap.vector_indexes = vec![
VectorIndexEntry::new(FieldId::new(7), Extent::new(BlockId(100), 3), 600_000),
VectorIndexEntry::new(FieldId::new(11), Extent::new(BlockId(103), 5), 1_200_000),
];
let bytes = snap.to_bytes();
let snap2 = MetadataSnapshot::from_bytes(&bytes).unwrap();
assert_eq!(snap, snap2);
}
#[test]
fn truncated_header_is_rejected() {
let err = MetadataSnapshot::from_bytes(&[0u8; 10]).unwrap_err();
assert!(format!("{err}").contains("too small"));
}
#[test]
fn truncated_body_is_rejected() {
let snap = sample_snapshot();
let bytes = snap.to_bytes();
let err = MetadataSnapshot::from_bytes(&bytes[..bytes.len() - 5]).unwrap_err();
assert!(format!("{err}").contains("truncated"));
}
#[test]
fn fits_in_single_block() {
let snap = sample_snapshot();
assert!(snap.fits_in_single_block());
}
#[test]
fn from_bytes_tolerates_trailing_data() {
let snap = sample_snapshot();
let mut bytes = snap.to_bytes();
bytes.extend_from_slice(&[0u8; 1024]);
let snap2 = MetadataSnapshot::from_bytes(&bytes).unwrap();
assert_eq!(snap, snap2);
}
#[test]
fn segment_entry_fields() {
let entry = SegmentEntry::new(SegmentId::new(42), Extent::new(BlockId(10), 3), 7, 768_000);
assert_eq!(entry.segment_id, SegmentId::new(42));
assert_eq!(entry.extent, Extent::new(BlockId(10), 3));
assert_eq!(entry.generation, 7);
assert_eq!(entry.data_len, 768_000);
}
#[test]
fn round_trip_large_snapshot() {
let segments: Vec<_> = (0..500)
.map(|i| {
SegmentEntry::new(
SegmentId::new(i),
Extent::new(BlockId(i * 10), 5),
i / 10,
256_000,
)
})
.collect();
let free_list: Vec<_> = (0..200)
.map(|i| Extent::new(BlockId(5000 + i * 3), 2))
.collect();
let snap = MetadataSnapshot {
segments,
vector_indexes: Vec::new(),
total_blocks: 10000,
free_list,
user_metadata: Vec::new(),
};
assert!(snap.fits_in_single_block());
let bytes = snap.to_bytes();
let snap2 = MetadataSnapshot::from_bytes(&bytes).unwrap();
assert_eq!(snap, snap2);
}
#[test]
fn checksum_integration() {
let snap = sample_snapshot();
let bytes = snap.to_bytes();
let checksum = crate::storage::xxh3_checksum(&bytes);
assert_eq!(checksum, crate::storage::xxh3_checksum(&bytes));
let snap2 = MetadataSnapshot::empty();
let bytes2 = snap2.to_bytes();
assert_ne!(checksum, crate::storage::xxh3_checksum(&bytes2));
}
}