use crate::error::{CoreError, CoreResult, ErrorContext, ErrorLocation};
use serde::{Deserialize, Serialize};
use std::io::{Read, Seek, SeekFrom, Write};
pub const MAGIC_BYTES_V2: &[u8; 4] = b"SCI2";
pub const MAGIC_BYTES_V1_MARKER: u8 = 0x91;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum FormatVersion {
V1,
V2,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum CompressionType {
None,
Lz4,
Zstd,
Snappy,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OutOfCoreHeaderV2 {
pub magic: [u8; 4],
pub version: u16,
pub shape: Vec<usize>,
pub total_elements: usize,
pub element_size: usize,
pub num_chunks: usize,
pub compression: CompressionType,
pub chunk_index_offset: u64,
pub reserved: Vec<u8>,
}
impl OutOfCoreHeaderV2 {
pub fn new(
shape: Vec<usize>,
element_size: usize,
num_chunks: usize,
compression: CompressionType,
) -> Self {
let total_elements = shape.iter().product();
Self {
magic: *MAGIC_BYTES_V2,
version: 2,
shape,
total_elements,
element_size,
num_chunks,
compression,
chunk_index_offset: 0, reserved: vec![0; 64],
}
}
pub fn to_bytes(&self) -> CoreResult<Vec<u8>> {
use oxicode::{config, serde as oxicode_serde};
let cfg = config::standard();
oxicode_serde::encode_to_vec(self, cfg).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to serialize header: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})
}
pub fn from_bytes(bytes: &[u8]) -> CoreResult<Self> {
use oxicode::{config, serde as oxicode_serde};
let cfg = config::standard();
let (header, _len): (Self, usize) = oxicode_serde::decode_owned_from_slice(bytes, cfg)
.map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to deserialize header: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
Ok(header)
}
pub fn validate(&self) -> CoreResult<()> {
if &self.magic != MAGIC_BYTES_V2 {
return Err(CoreError::ValidationError(
ErrorContext::new(format!(
"Invalid magic bytes: expected {:?}, got {:?}",
MAGIC_BYTES_V2, self.magic
))
.with_location(ErrorLocation::new(file!(), line!())),
));
}
if self.version != 2 {
return Err(CoreError::ValidationError(
ErrorContext::new(format!("Unsupported version: {}", self.version))
.with_location(ErrorLocation::new(file!(), line!())),
));
}
if self.shape.is_empty() {
return Err(CoreError::ValidationError(
ErrorContext::new("Shape cannot be empty".to_string())
.with_location(ErrorLocation::new(file!(), line!())),
));
}
let computed_total: usize = self.shape.iter().product();
if computed_total != self.total_elements {
return Err(CoreError::ValidationError(
ErrorContext::new(format!(
"Total elements mismatch: computed {computed_total}, stored {}",
self.total_elements
))
.with_location(ErrorLocation::new(file!(), line!())),
));
}
if self.element_size == 0 {
return Err(CoreError::ValidationError(
ErrorContext::new("Element size cannot be zero".to_string())
.with_location(ErrorLocation::new(file!(), line!())),
));
}
if self.num_chunks == 0 {
return Err(CoreError::ValidationError(
ErrorContext::new("Number of chunks cannot be zero".to_string())
.with_location(ErrorLocation::new(file!(), line!())),
));
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkIndexEntry {
pub chunk_id: usize,
pub offset: u64,
pub size: usize,
pub compressed_size: usize,
pub num_elements: usize,
}
impl ChunkIndexEntry {
pub fn new(chunk_id: usize, offset: u64, size: usize, num_elements: usize) -> Self {
Self {
chunk_id,
offset,
size,
compressed_size: 0, num_elements,
}
}
pub fn with_compression(mut self, compressed_size: usize) -> Self {
self.compressed_size = compressed_size;
self
}
pub const fn is_compressed(&self) -> bool {
self.compressed_size > 0
}
pub const fn disk_size(&self) -> usize {
if self.is_compressed() {
self.compressed_size
} else {
self.size
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkIndex {
pub entries: Vec<ChunkIndexEntry>,
}
impl ChunkIndex {
pub fn new() -> Self {
Self {
entries: Vec::new(),
}
}
pub fn add_entry(&mut self, entry: ChunkIndexEntry) {
self.entries.push(entry);
}
pub fn get_entry(&self, chunk_id: usize) -> Option<&ChunkIndexEntry> {
self.entries.get(chunk_id)
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn to_bytes(&self) -> CoreResult<Vec<u8>> {
use oxicode::{config, serde as oxicode_serde};
let cfg = config::standard();
oxicode_serde::encode_to_vec(self, cfg).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to serialize chunk index: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})
}
pub fn from_bytes(bytes: &[u8]) -> CoreResult<Self> {
use oxicode::{config, serde as oxicode_serde};
let cfg = config::standard();
let (index, _len): (Self, usize) = oxicode_serde::decode_owned_from_slice(bytes, cfg)
.map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to deserialize chunk index: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
Ok(index)
}
}
impl Default for ChunkIndex {
fn default() -> Self {
Self::new()
}
}
pub fn detect_format_version<R: Read + Seek>(reader: &mut R) -> CoreResult<FormatVersion> {
let original_pos = reader.stream_position().map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to get stream position: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
let mut magic = [0u8; 4];
reader.read_exact(&mut magic).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to read magic bytes: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
reader.seek(SeekFrom::Start(original_pos)).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to restore stream position: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
if &magic == MAGIC_BYTES_V2 {
Ok(FormatVersion::V2)
} else {
Ok(FormatVersion::V1)
}
}
pub const HEADER_FIXED_SIZE: usize = 256;
pub fn write_header<W: Write>(writer: &mut W, header: &OutOfCoreHeaderV2) -> CoreResult<usize> {
let header_bytes = header.to_bytes()?;
if header_bytes.len() > HEADER_FIXED_SIZE - 4 {
return Err(CoreError::IoError(
ErrorContext::new(format!(
"Header too large: {} bytes (max {})",
header_bytes.len(),
HEADER_FIXED_SIZE - 4
))
.with_location(ErrorLocation::new(file!(), line!())),
));
}
let size_bytes = (header_bytes.len() as u32).to_le_bytes();
writer.write_all(&size_bytes).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to write header size: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
writer.write_all(&header_bytes).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to write header: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
let padding_size = HEADER_FIXED_SIZE - 4 - header_bytes.len();
let padding = vec![0u8; padding_size];
writer.write_all(&padding).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to write header padding: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
Ok(HEADER_FIXED_SIZE)
}
pub fn read_header<R: Read>(reader: &mut R) -> CoreResult<OutOfCoreHeaderV2> {
let mut size_bytes = [0u8; 4];
reader.read_exact(&mut size_bytes).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to read header size: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
let header_size = u32::from_le_bytes(size_bytes) as usize;
let mut header_bytes = vec![0u8; header_size];
reader.read_exact(&mut header_bytes).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to read header data: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
let padding_size = HEADER_FIXED_SIZE - 4 - header_size;
let mut padding = vec![0u8; padding_size];
reader.read_exact(&mut padding).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to skip header padding: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
let header = OutOfCoreHeaderV2::from_bytes(&header_bytes)?;
header.validate()?;
Ok(header)
}
pub fn write_chunk_index<W: Write>(writer: &mut W, index: &ChunkIndex) -> CoreResult<u64> {
let index_bytes = index.to_bytes()?;
let size_bytes = (index_bytes.len() as u32).to_le_bytes();
writer.write_all(&size_bytes).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to write chunk index size: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
writer.write_all(&index_bytes).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to write chunk index: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
Ok((4 + index_bytes.len()) as u64)
}
pub fn read_chunk_index<R: Read + Seek>(reader: &mut R, offset: u64) -> CoreResult<ChunkIndex> {
reader.seek(SeekFrom::Start(offset)).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to seek to chunk index: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
let mut size_bytes = [0u8; 4];
reader.read_exact(&mut size_bytes).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to read chunk index size: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
let index_size = u32::from_le_bytes(size_bytes) as usize;
let mut index_bytes = vec![0u8; index_size];
reader.read_exact(&mut index_bytes).map_err(|e| {
CoreError::IoError(
ErrorContext::new(format!("Failed to read chunk index data: {e}"))
.with_location(ErrorLocation::new(file!(), line!())),
)
})?;
ChunkIndex::from_bytes(&index_bytes)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_header_creation_and_validation() {
let header = OutOfCoreHeaderV2::new(vec![100, 200], 8, 10, CompressionType::None);
assert_eq!(header.magic, *MAGIC_BYTES_V2);
assert_eq!(header.version, 2);
assert_eq!(header.shape, vec![100, 200]);
assert_eq!(header.total_elements, 20000);
assert_eq!(header.element_size, 8);
assert_eq!(header.num_chunks, 10);
assert!(header.validate().is_ok());
}
#[test]
fn test_header_serialization() {
let header = OutOfCoreHeaderV2::new(vec![100, 200], 8, 10, CompressionType::Lz4);
let bytes = header.to_bytes().expect("Serialization failed");
let deserialized = OutOfCoreHeaderV2::from_bytes(&bytes).expect("Deserialization failed");
assert_eq!(header.magic, deserialized.magic);
assert_eq!(header.version, deserialized.version);
assert_eq!(header.shape, deserialized.shape);
assert_eq!(header.total_elements, deserialized.total_elements);
}
#[test]
fn test_chunk_index_entry() {
let entry = ChunkIndexEntry::new(0, 1024, 8192, 1000);
assert_eq!(entry.chunk_id, 0);
assert_eq!(entry.offset, 1024);
assert_eq!(entry.size, 8192);
assert_eq!(entry.num_elements, 1000);
assert!(!entry.is_compressed());
assert_eq!(entry.disk_size(), 8192);
let compressed = entry.with_compression(4096);
assert!(compressed.is_compressed());
assert_eq!(compressed.disk_size(), 4096);
}
#[test]
fn test_chunk_index() {
let mut index = ChunkIndex::new();
assert!(index.is_empty());
assert_eq!(index.len(), 0);
index.add_entry(ChunkIndexEntry::new(0, 1024, 8192, 1000));
index.add_entry(ChunkIndexEntry::new(1, 9216, 8192, 1000));
assert!(!index.is_empty());
assert_eq!(index.len(), 2);
let entry = index.get_entry(0).expect("Entry not found");
assert_eq!(entry.chunk_id, 0);
assert_eq!(entry.offset, 1024);
}
#[test]
fn test_chunk_index_serialization() {
let mut index = ChunkIndex::new();
index.add_entry(ChunkIndexEntry::new(0, 1024, 8192, 1000));
index.add_entry(ChunkIndexEntry::new(1, 9216, 8192, 1000));
let bytes = index.to_bytes().expect("Serialization failed");
let deserialized = ChunkIndex::from_bytes(&bytes).expect("Deserialization failed");
assert_eq!(index.len(), deserialized.len());
assert_eq!(
index.get_entry(0).expect("Entry not found").offset,
deserialized.get_entry(0).expect("Entry not found").offset
);
}
#[test]
fn test_format_detection() {
let mut v2_data = Vec::new();
v2_data.extend_from_slice(MAGIC_BYTES_V2);
v2_data.extend_from_slice(&[0; 100]);
let mut cursor = Cursor::new(v2_data);
let version = detect_format_version(&mut cursor).expect("Detection failed");
assert_eq!(version, FormatVersion::V2);
let v1_data = vec![MAGIC_BYTES_V1_MARKER, 0, 0, 0];
let mut cursor = Cursor::new(v1_data);
let version = detect_format_version(&mut cursor).expect("Detection failed");
assert_eq!(version, FormatVersion::V1);
}
}