use std::mem::size_of;
pub const BINARY_MAGIC: u32 = 0x494D414C;
pub const BINARY_FORMAT_VERSION: u8 = 1;
pub const DATA_ALIGNMENT: usize = 32;
pub mod flags {
pub const HAS_TIMESTAMPS: u8 = 0x01;
pub const IS_SORTED: u8 = 0x02;
}
#[repr(C, packed)]
#[derive(Debug, Clone, Copy)]
pub struct BinaryDataHeader {
pub magic: u32,
pub version: u8,
pub flags: u8,
pub column_count: u16,
pub row_count: u64,
}
impl BinaryDataHeader {
pub const SIZE: usize = size_of::<Self>();
pub fn new(column_count: u16, row_count: u64, has_timestamps: bool, is_sorted: bool) -> Self {
let mut header_flags = 0u8;
if has_timestamps {
header_flags |= flags::HAS_TIMESTAMPS;
}
if is_sorted {
header_flags |= flags::IS_SORTED;
}
Self {
magic: BINARY_MAGIC,
version: BINARY_FORMAT_VERSION,
flags: header_flags,
column_count,
row_count,
}
}
pub fn validate(&self) -> Result<(), BinaryFormatError> {
if self.magic != BINARY_MAGIC {
return Err(BinaryFormatError::InvalidMagic(self.magic));
}
if self.version != BINARY_FORMAT_VERSION {
return Err(BinaryFormatError::UnsupportedVersion(self.version));
}
Ok(())
}
pub fn has_timestamps(&self) -> bool {
self.flags & flags::HAS_TIMESTAMPS != 0
}
pub fn is_sorted(&self) -> bool {
self.flags & flags::IS_SORTED != 0
}
pub fn get_magic(&self) -> u32 {
unsafe { std::ptr::read_unaligned(std::ptr::addr_of!(self.magic)) }
}
pub fn get_column_count(&self) -> u16 {
unsafe { std::ptr::read_unaligned(std::ptr::addr_of!(self.column_count)) }
}
pub fn get_row_count(&self) -> u64 {
unsafe { std::ptr::read_unaligned(std::ptr::addr_of!(self.row_count)) }
}
pub fn from_bytes(data: &[u8]) -> Result<Self, BinaryFormatError> {
if data.len() < Self::SIZE {
return Err(BinaryFormatError::InsufficientData {
expected: Self::SIZE,
actual: data.len(),
});
}
let header = unsafe { std::ptr::read_unaligned(data.as_ptr() as *const Self) };
header.validate()?;
Ok(header)
}
pub fn to_bytes(&self) -> [u8; Self::SIZE] {
unsafe { std::mem::transmute_copy(self) }
}
}
#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ColumnType {
Float64 = 0,
Int64 = 1,
String = 2,
Bool = 3,
Timestamp = 4,
}
impl ColumnType {
pub fn from_u8(value: u8) -> Option<Self> {
match value {
0 => Some(Self::Float64),
1 => Some(Self::Int64),
2 => Some(Self::String),
3 => Some(Self::Bool),
4 => Some(Self::Timestamp),
_ => None,
}
}
pub fn element_size(&self) -> usize {
match self {
Self::Float64 | Self::Int64 | Self::Timestamp => 8,
Self::Bool => 1,
Self::String => 0, }
}
pub fn is_fixed_size(&self) -> bool {
!matches!(self, Self::String)
}
}
pub mod col_flags {
pub const NULLABLE: u8 = 0x01;
pub const SORTED: u8 = 0x02;
}
#[repr(C, packed)]
#[derive(Debug, Clone, Copy)]
pub struct ColumnDescriptor {
pub name_offset: u16,
pub data_type: u8,
pub flags: u8,
pub data_offset: u64,
pub data_len: u64,
}
impl ColumnDescriptor {
pub const SIZE: usize = size_of::<Self>();
pub fn new(
name_offset: u16,
data_type: ColumnType,
data_offset: u64,
data_len: u64,
nullable: bool,
) -> Self {
let mut column_flags = 0u8;
if nullable {
column_flags |= col_flags::NULLABLE;
}
Self {
name_offset,
data_type: data_type as u8,
flags: column_flags,
data_offset,
data_len,
}
}
pub fn column_type(&self) -> Option<ColumnType> {
ColumnType::from_u8(self.data_type)
}
pub fn is_nullable(&self) -> bool {
self.flags & col_flags::NULLABLE != 0
}
}
#[repr(C, packed)]
#[derive(Debug, Clone, Copy)]
pub struct StringEntry {
pub offset: u32,
pub length: u32,
}
impl StringEntry {
pub const SIZE: usize = size_of::<Self>();
}
#[derive(Debug, Clone, PartialEq)]
pub enum BinaryFormatError {
InvalidMagic(u32),
UnsupportedVersion(u8),
InsufficientData { expected: usize, actual: usize },
InvalidColumnType(u8),
ColumnNameNotFound { offset: u16 },
AlignmentError { offset: usize, required: usize },
InvalidUtf8,
}
impl std::fmt::Display for BinaryFormatError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidMagic(m) => write!(f, "Invalid magic number: 0x{:08X}", m),
Self::UnsupportedVersion(v) => write!(f, "Unsupported binary format version: {}", v),
Self::InsufficientData { expected, actual } => {
write!(
f,
"Insufficient data: expected {} bytes, got {}",
expected, actual
)
}
Self::InvalidColumnType(t) => write!(f, "Invalid column type: {}", t),
Self::ColumnNameNotFound { offset } => {
write!(f, "Column name not found at offset {}", offset)
}
Self::AlignmentError { offset, required } => {
write!(
f,
"Data at offset {} not aligned to {} bytes",
offset, required
)
}
Self::InvalidUtf8 => write!(f, "Invalid UTF-8 string encoding"),
}
}
}
impl std::error::Error for BinaryFormatError {}
pub const fn align_padding(offset: usize, alignment: usize) -> usize {
let remainder = offset % alignment;
if remainder == 0 {
0
} else {
alignment - remainder
}
}
pub const fn align_up(offset: usize, alignment: usize) -> usize {
offset + align_padding(offset, alignment)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_header_size() {
assert_eq!(BinaryDataHeader::SIZE, 16);
}
#[test]
fn test_column_descriptor_size() {
assert_eq!(ColumnDescriptor::SIZE, 20);
}
#[test]
fn test_header_roundtrip() {
let header = BinaryDataHeader::new(5, 1000, true, true);
let bytes = header.to_bytes();
let parsed = BinaryDataHeader::from_bytes(&bytes).unwrap();
assert_eq!(parsed.get_magic(), BINARY_MAGIC);
assert_eq!(parsed.get_column_count(), 5);
assert_eq!(parsed.get_row_count(), 1000);
assert!(parsed.has_timestamps());
assert!(parsed.is_sorted());
}
#[test]
fn test_column_type_element_size() {
assert_eq!(ColumnType::Float64.element_size(), 8);
assert_eq!(ColumnType::Int64.element_size(), 8);
assert_eq!(ColumnType::Timestamp.element_size(), 8);
assert_eq!(ColumnType::Bool.element_size(), 1);
assert_eq!(ColumnType::String.element_size(), 0);
}
#[test]
fn test_alignment() {
assert_eq!(align_up(0, 32), 0);
assert_eq!(align_up(1, 32), 32);
assert_eq!(align_up(31, 32), 32);
assert_eq!(align_up(32, 32), 32);
assert_eq!(align_up(33, 32), 64);
}
}