use std::{
fs::{self, File},
io::{BufWriter, Write},
path::Path,
};
use kimberlite_types::Offset;
use crate::StorageError;
const MAGIC: &[u8; 4] = b"VDXI";
const VERSION: u8 = 0x01;
const RESERVED: [u8; 3] = [0u8; 3];
const MAGIC_SIZE: usize = 4;
const VERSION_SIZE: usize = 1;
const RESERVED_SIZE: usize = 3;
const COUNT_SIZE: usize = 8; const POSITION_SIZE: usize = 8; const CRC_SIZE: usize = 4;
const HEADER_SIZE: usize = MAGIC_SIZE + VERSION_SIZE + RESERVED_SIZE + COUNT_SIZE;
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct OffsetIndex {
positions: Vec<u64>,
}
impl OffsetIndex {
pub fn new() -> Self {
Self::default()
}
pub fn append(&mut self, byte_position: u64) {
debug_assert!(
self.positions
.last()
.is_none_or(|&last| byte_position > last),
"byte_position {} must be greater than last position {:?}",
byte_position,
self.positions.last()
);
let prev_len = self.positions.len();
self.positions.push(byte_position);
debug_assert_eq!(self.positions.len(), prev_len + 1);
}
#[must_use]
pub fn lookup(&self, offset: Offset) -> Option<u64> {
self.positions.get(offset.as_usize()).copied()
}
#[must_use]
pub fn len(&self) -> usize {
self.positions.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.positions.is_empty()
}
pub fn from_positions(positions: Vec<u64>) -> Self {
debug_assert!(
positions.windows(2).all(|w| w[0] < w[1]),
"positions must be monotonically increasing"
);
Self { positions }
}
#[must_use]
pub fn positions(&self) -> &[u64] {
&self.positions
}
pub fn save(&self, path: &Path) -> Result<(), StorageError> {
let positions_size = self.positions.len() * POSITION_SIZE;
let total_size = HEADER_SIZE + positions_size + CRC_SIZE;
let mut buf: Vec<u8> = Vec::with_capacity(total_size);
buf.extend_from_slice(MAGIC);
buf.extend_from_slice(&[VERSION]);
buf.extend_from_slice(&RESERVED);
buf.extend_from_slice(&(self.positions.len() as u64).to_le_bytes());
for pos in &self.positions {
buf.extend_from_slice(&pos.to_le_bytes());
}
let checksum = kimberlite_crypto::crc32(&buf);
buf.extend_from_slice(&checksum.to_le_bytes());
debug_assert_eq!(buf.len(), total_size, "buffer size mismatch");
let file = File::create(path)?;
let mut writer = BufWriter::new(file);
writer.write_all(&buf)?;
writer.flush()?;
Ok(())
}
pub fn load(path: &Path) -> Result<Self, StorageError> {
let data = fs::read(path)?;
if data.len() < HEADER_SIZE + CRC_SIZE {
return Err(StorageError::IndexTruncated {
expected: HEADER_SIZE + CRC_SIZE,
actual: data.len(),
});
}
let magic: [u8; MAGIC_SIZE] = data[0..MAGIC_SIZE]
.try_into()
.expect("slice length equals MAGIC_SIZE after bounds check");
if &magic != MAGIC {
return Err(StorageError::InvalidIndexMagic);
}
let version = data[MAGIC_SIZE];
if version != VERSION {
return Err(StorageError::UnsupportedIndexVersion(version));
}
let count_start = MAGIC_SIZE + VERSION_SIZE + RESERVED_SIZE;
let count_bytes: [u8; COUNT_SIZE] = data[count_start..count_start + COUNT_SIZE]
.try_into()
.expect("slice length equals COUNT_SIZE after bounds check");
let count = u64::from_le_bytes(count_bytes) as usize;
let positions_size = count * POSITION_SIZE;
let expected_size = HEADER_SIZE + positions_size + CRC_SIZE;
if data.len() < expected_size {
return Err(StorageError::IndexTruncated {
expected: expected_size,
actual: data.len(),
});
}
let crc_start = HEADER_SIZE + positions_size;
let stored_crc_bytes: [u8; CRC_SIZE] = data[crc_start..crc_start + CRC_SIZE]
.try_into()
.expect("slice length equals CRC_SIZE after bounds check");
let stored_crc = u32::from_le_bytes(stored_crc_bytes);
let computed_crc = kimberlite_crypto::crc32(&data[0..crc_start]);
if stored_crc != computed_crc {
return Err(StorageError::IndexChecksumMismatch {
expected: stored_crc,
actual: computed_crc,
});
}
let mut positions = Vec::with_capacity(count);
for i in 0..count {
let start = HEADER_SIZE + (i * POSITION_SIZE);
let pos_bytes: [u8; POSITION_SIZE] = data[start..start + POSITION_SIZE]
.try_into()
.expect("slice length equals POSITION_SIZE after bounds check");
positions.push(u64::from_le_bytes(pos_bytes));
}
debug_assert_eq!(positions.len(), count, "position count mismatch");
Ok(Self { positions })
}
}