use anyhow::{anyhow, Context, Result};
use crc32fast::Hasher as Crc32Hasher;
use std::collections::BTreeMap;
use std::fs::File;
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::Path;
#[cfg(feature = "debug-prints")]
macro_rules! debug_print {
($($arg:tt)*) => {
eprintln!($($arg)*);
};
}
#[cfg(not(feature = "debug-prints"))]
macro_rules! debug_print {
($($arg:tt)*) => {
()
};
}
pub const FILE_MAGIC: [u8; 8] = *b"GEODB\0\0\0";
pub const FORMAT_VERSION: u32 = 1;
pub const HEADER_SIZE: u64 = 128;
pub const HEADER_SIZE_USIZE: usize = 128;
pub const SECTION_ENTRY_SIZE: u64 = 64;
pub const SECTION_ENTRY_SIZE_USIZE: usize = 64;
pub const MAX_SECTION_NAME_LEN: usize = 32;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GeoFileHeader {
pub magic: [u8; 8],
pub version: u32,
pub flags: u32,
pub section_table_offset: u64,
pub section_count: u64,
pub next_data_offset: u64,
pub created_at_epoch: u64,
pub modified_at_epoch: u64,
pub reserved: [u8; 72],
}
impl Default for GeoFileHeader {
fn default() -> Self {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
Self {
magic: FILE_MAGIC,
version: FORMAT_VERSION,
flags: 0,
section_table_offset: HEADER_SIZE,
section_count: 0,
next_data_offset: HEADER_SIZE,
created_at_epoch: now,
modified_at_epoch: 0,
reserved: [0u8; 72],
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SectionEntry {
pub name: String,
pub offset: u64,
pub length: u64,
pub capacity: u64,
pub flags: u32,
pub checksum: u32,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Section {
pub name: String,
pub offset: u64,
pub length: u64,
pub capacity: u64,
pub flags: u32,
pub checksum: u32,
}
impl std::fmt::Debug for SectionedStorage {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SectionedStorage")
.field("path", &self.path)
.field("header", &self.header)
.field("section_count", &self.sections.len())
.field("dirty", &self.dirty)
.finish()
}
}
pub struct SectionedStorage {
file: File,
path: std::path::PathBuf,
header: GeoFileHeader,
sections: BTreeMap<String, Section>,
dirty: bool,
}
pub fn encode_header(header: &GeoFileHeader) -> [u8; HEADER_SIZE_USIZE] {
let mut buf = [0u8; HEADER_SIZE_USIZE];
buf[0..8].copy_from_slice(&header.magic);
buf[8..12].copy_from_slice(&header.version.to_le_bytes());
buf[12..16].copy_from_slice(&header.flags.to_le_bytes());
buf[16..24].copy_from_slice(&header.section_table_offset.to_le_bytes());
buf[24..32].copy_from_slice(&header.section_count.to_le_bytes());
buf[32..40].copy_from_slice(&header.next_data_offset.to_le_bytes());
buf[40..48].copy_from_slice(&header.created_at_epoch.to_le_bytes());
buf[48..56].copy_from_slice(&header.modified_at_epoch.to_le_bytes());
buf
}
pub fn decode_header(buf: &[u8; HEADER_SIZE_USIZE]) -> Result<GeoFileHeader> {
let magic: [u8; 8] = buf[0..8]
.try_into()
.map_err(|_| anyhow!("Magic slice has wrong size"))?;
if magic != FILE_MAGIC {
return Err(anyhow!(
"Invalid magic: expected {:?}, got {:?}",
FILE_MAGIC,
magic
));
}
let version = u32::from_le_bytes(buf[8..12].try_into()?);
if version != FORMAT_VERSION {
return Err(anyhow!(
"Unsupported version: expected {}, got {}",
FORMAT_VERSION,
version
));
}
Ok(GeoFileHeader {
magic,
version,
flags: u32::from_le_bytes(buf[12..16].try_into()?),
section_table_offset: u64::from_le_bytes(buf[16..24].try_into()?),
section_count: u64::from_le_bytes(buf[24..32].try_into()?),
next_data_offset: u64::from_le_bytes(buf[32..40].try_into()?),
created_at_epoch: u64::from_le_bytes(buf[40..48].try_into()?),
modified_at_epoch: u64::from_le_bytes(buf[48..56].try_into()?),
reserved: {
let mut arr = [0u8; 72];
arr.copy_from_slice(&buf[56..128]);
arr
},
})
}
fn encode_section_entry_name(name: &str) -> [u8; MAX_SECTION_NAME_LEN] {
let mut buf = [0u8; MAX_SECTION_NAME_LEN];
let name_bytes = name.as_bytes();
let len = name_bytes.len().min(MAX_SECTION_NAME_LEN);
buf[..len].copy_from_slice(&name_bytes[..len]);
buf
}
fn decode_section_entry_name(buf: &[u8]) -> Result<String> {
let len = buf.iter().position(|&b| b == 0).unwrap_or(buf.len());
String::from_utf8(buf[..len].to_vec()).context("Section name is not valid UTF-8")
}
pub fn encode_section_entry(entry: &SectionEntry) -> [u8; SECTION_ENTRY_SIZE_USIZE] {
let mut buf = [0u8; SECTION_ENTRY_SIZE_USIZE];
buf[0..32].copy_from_slice(&encode_section_entry_name(&entry.name));
buf[32..40].copy_from_slice(&entry.offset.to_le_bytes());
buf[40..48].copy_from_slice(&entry.length.to_le_bytes());
buf[48..56].copy_from_slice(&entry.capacity.to_le_bytes());
buf[56..60].copy_from_slice(&entry.flags.to_le_bytes());
buf[60..64].copy_from_slice(&entry.checksum.to_le_bytes());
buf
}
pub fn decode_section_entry(buf: &[u8; SECTION_ENTRY_SIZE_USIZE]) -> Result<SectionEntry> {
let name = decode_section_entry_name(&buf[0..32])?;
Ok(SectionEntry {
name,
offset: u64::from_le_bytes(buf[32..40].try_into()?),
length: u64::from_le_bytes(buf[40..48].try_into()?),
capacity: u64::from_le_bytes(buf[48..56].try_into()?),
flags: u32::from_le_bytes(buf[56..60].try_into()?),
checksum: u32::from_le_bytes(buf[60..64].try_into()?),
})
}
pub fn compute_checksum(data: &[u8]) -> u32 {
let mut hasher = Crc32Hasher::new();
hasher.update(data);
hasher.finalize()
}
impl SectionedStorage {
pub fn create(path: &Path) -> Result<Self> {
let mut file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.truncate(true)
.open(path)
.context("Failed to create sectioned file")?;
let header = GeoFileHeader::default();
let header_bytes = encode_header(&header);
file.write_all(&header_bytes)
.context("Failed to write header")?;
file.sync_all().context("Failed to sync file")?;
Ok(Self {
file,
path: path.to_path_buf(),
header,
sections: BTreeMap::new(),
dirty: false,
})
}
pub fn is_sectioned_file(path: &Path) -> bool {
if !path.exists() {
return false;
}
match std::fs::File::open(path) {
Ok(mut file) => {
let mut header_buf = [0u8; HEADER_SIZE_USIZE];
if file.read_exact(&mut header_buf).is_err() {
return false;
}
match decode_header(&header_buf) {
Ok(header) => header.magic == FILE_MAGIC,
Err(_) => false,
}
}
Err(_) => false,
}
}
pub fn open(path: &Path) -> Result<Self> {
let mut file = std::fs::OpenOptions::new()
.read(true)
.write(true)
.open(path)
.context("Failed to open sectioned file")?;
let mut header_buf = [0u8; HEADER_SIZE_USIZE];
file.read_exact(&mut header_buf)
.context("Failed to read header")?;
let header = decode_header(&header_buf)?;
let mut sections = BTreeMap::new();
if header.section_count > 0 {
file.seek(SeekFrom::Start(header.section_table_offset))
.context("Failed to seek to section table")?;
for _ in 0..header.section_count {
let mut entry_buf = [0u8; SECTION_ENTRY_SIZE_USIZE];
file.read_exact(&mut entry_buf)
.context("Failed to read section entry")?;
let entry = decode_section_entry(&entry_buf)?;
sections.insert(
entry.name.clone(),
Section {
name: entry.name,
offset: entry.offset,
length: entry.length,
capacity: entry.capacity,
flags: entry.flags,
checksum: entry.checksum,
},
);
}
}
let mut storage = Self {
file,
path: path.to_path_buf(),
header,
sections,
dirty: false,
};
storage.validate()?;
Ok(storage)
}
pub fn create_section(&mut self, name: &str, capacity: u64, flags: u32) -> Result<()> {
if name.is_empty() {
return Err(anyhow!("Section name cannot be empty"));
}
let name_bytes = name.as_bytes();
if name_bytes.len() > MAX_SECTION_NAME_LEN {
return Err(anyhow!(
"Section name too long: {} bytes > {}",
name_bytes.len(),
MAX_SECTION_NAME_LEN
));
}
if self.sections.contains_key(name) {
return Err(anyhow!("Section '{}' already exists", name));
}
let file_len = self
.file
.metadata()
.context("Failed to get file metadata")?
.len();
let allocation_base = self.header.next_data_offset.max(file_len);
let new_next_data_offset = allocation_base.checked_add(capacity).ok_or_else(|| {
anyhow!(
"Data offset overflow: allocation_base {} + capacity {}",
allocation_base,
capacity
)
})?;
self.file.set_len(new_next_data_offset).with_context(|| {
format!(
"Failed to reserve {} bytes for section '{}' at offset {}",
capacity, name, allocation_base
)
})?;
let offset = allocation_base;
self.header.next_data_offset = new_next_data_offset;
self.sections.insert(
name.to_string(),
Section {
name: name.to_string(),
offset,
length: 0,
capacity,
flags,
checksum: 0,
},
);
self.dirty = true;
Ok(())
}
pub fn write_section(&mut self, name: &str, data: &[u8]) -> Result<()> {
debug_print!(
"[WRITE_SECTION] Writing section '{}': {} bytes",
name,
data.len()
);
let section = self
.sections
.get(name)
.ok_or_else(|| anyhow!("Section '{}' not found", name))?;
if data.len() as u64 > section.capacity {
return Err(anyhow!(
"Section '{}' overflow: attempted to write {} bytes, but capacity is {} bytes ({} bytes over limit)\n\
Section details: offset={}, length={}, capacity={}\n\
To fix: Increase section capacity during database creation or migrate to a larger capacity.",
name,
data.len(),
section.capacity,
data.len() as u64 - section.capacity,
section.offset,
section.length,
section.capacity
));
}
let checksum = compute_checksum(data);
self.file
.seek(SeekFrom::Start(section.offset))
.context("Failed to seek to section")?;
self.file
.write_all(data)
.context("Failed to write section data")?;
if let Some(s) = self.sections.get_mut(name) {
s.length = data.len() as u64;
s.checksum = checksum;
debug_print!(
"[WRITE_SECTION] Updated section '{}' metadata: length={}, checksum={}",
name,
s.length,
s.checksum
);
}
self.dirty = true;
Ok(())
}
pub fn read_section(&mut self, name: &str) -> Result<Vec<u8>> {
let section = self
.sections
.get(name)
.ok_or_else(|| anyhow!("Section '{}' not found", name))?;
if section.length == 0 {
return Ok(Vec::new());
}
self.file
.seek(SeekFrom::Start(section.offset))
.context("Failed to seek to section")?;
let mut buffer = vec![0u8; section.length as usize];
self.file
.read_exact(&mut buffer)
.context("Failed to read section data")?;
let computed = compute_checksum(&buffer);
if computed != section.checksum {
return Err(anyhow!(
"Checksum mismatch for section '{}': stored {}, computed {}",
name,
section.checksum,
computed
));
}
Ok(buffer)
}
pub fn get_section(&self, name: &str) -> Option<&Section> {
self.sections.get(name)
}
pub fn list_sections(&self) -> Vec<Section> {
self.sections.values().cloned().collect()
}
pub fn section_count(&self) -> usize {
self.sections.len()
}
pub fn path(&self) -> &Path {
&self.path
}
pub fn header(&self) -> &GeoFileHeader {
&self.header
}
pub fn flush(&mut self) -> Result<()> {
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let table_offset = self
.file
.seek(SeekFrom::End(0))
.context("Failed to seek to EOF for section table")?;
debug_print!(
"[FLUSH_DEBUG] Writing section table at offset {}",
table_offset
);
debug_print!("[FLUSH_DEBUG] Section count: {}", self.sections.len());
for section in self.sections.values() {
debug_print!(
"[FLUSH_DEBUG] Section {}: offset={}, length={}",
section.name,
section.offset,
section.length
);
let entry = SectionEntry {
name: section.name.clone(),
offset: section.offset,
length: section.length,
capacity: section.capacity,
flags: section.flags,
checksum: section.checksum,
};
let entry_bytes = encode_section_entry(&entry);
self.file
.write_all(&entry_bytes)
.context("Failed to write section entry")?;
}
self.header.section_table_offset = table_offset;
self.header.section_count = self.sections.len() as u64;
self.header.modified_at_epoch = now;
self.file
.seek(SeekFrom::Start(0))
.context("Failed to seek to header")?;
let header_bytes = encode_header(&self.header);
self.file
.write_all(&header_bytes)
.context("Failed to write header")?;
self.file.sync_all().context("Failed to sync file")?;
self.dirty = false;
Ok(())
}
pub fn validate(&mut self) -> Result<()> {
if self.dirty {
return Err(anyhow!(
"cannot validate dirty/unflushed state; flush first"
));
}
let metadata = self
.file
.metadata()
.context("Failed to get file metadata")?;
let file_len = metadata.len();
if file_len < HEADER_SIZE {
return Err(anyhow!(
"File too small: {} < header size {}",
file_len,
HEADER_SIZE
));
}
let mut header_buf = [0u8; HEADER_SIZE_USIZE];
self.file
.seek(SeekFrom::Start(0))
.context("Failed to seek to header for validation")?;
self.file
.read_exact(&mut header_buf)
.context("Failed to read header for validation")?;
let disk_header = decode_header(&header_buf)?;
self.header = disk_header.clone();
if disk_header.section_table_offset < HEADER_SIZE {
return Err(anyhow!(
"Section table offset {} before data area {}",
disk_header.section_table_offset,
HEADER_SIZE
));
}
if disk_header.section_table_offset > file_len {
return Err(anyhow!(
"Section table offset {} beyond file length {}",
disk_header.section_table_offset,
file_len
));
}
if file_len < disk_header.next_data_offset {
return Err(anyhow!(
"File truncated: length {} < next_data_offset {} (physical reservation missing)",
file_len,
disk_header.next_data_offset
));
}
if disk_header.next_data_offset > disk_header.section_table_offset {
return Err(anyhow!(
"Data area overlaps table: next_data_offset {} > section_table_offset {}",
disk_header.next_data_offset,
disk_header.section_table_offset
));
}
if disk_header.section_count > 0 {
let table_size = disk_header
.section_count
.checked_mul(SECTION_ENTRY_SIZE)
.ok_or_else(|| anyhow!("Section count overflow"))?;
let table_end = disk_header
.section_table_offset
.checked_add(table_size)
.ok_or_else(|| anyhow!("Section table end overflow"))?;
if table_end > file_len {
return Err(anyhow!(
"Section table extends beyond file: offset {} + size {} = {} > length {}",
disk_header.section_table_offset,
table_size,
table_end,
file_len
));
}
}
for name in self.sections.keys() {
if name.is_empty() {
return Err(anyhow!("Section name is empty"));
}
}
let mut prev_end = HEADER_SIZE;
let mut sorted_sections: Vec<_> = self.sections.iter().collect();
sorted_sections.sort_by_key(|(_, section)| section.offset);
for (name, section) in sorted_sections {
if section.offset < HEADER_SIZE {
return Err(anyhow!(
"Section '{}' offset {} before data area {}",
name,
section.offset,
HEADER_SIZE
));
}
if section.offset >= disk_header.section_table_offset {
return Err(anyhow!(
"Section '{}' offset {} at or after current table {}",
name,
section.offset,
disk_header.section_table_offset
));
}
if section.capacity < section.length {
return Err(anyhow!(
"Section '{}' capacity {} < length {}",
name,
section.capacity,
section.length
));
}
let section_end = section
.offset
.checked_add(section.capacity)
.ok_or_else(|| anyhow!("Section '{}' end overflow", name))?;
if section_end > disk_header.section_table_offset {
return Err(anyhow!(
"Section '{}' (offset {} + capacity {} = {}) extends beyond current table start {}",
name,
section.offset,
section.capacity,
section_end,
disk_header.section_table_offset
));
}
if section.offset < prev_end {
return Err(anyhow!(
"Section '{}' (offset {}) overlaps previous section (ends at {})",
name,
section.offset,
prev_end
));
}
prev_end = section_end;
if section.length > 0 {
self.file
.seek(SeekFrom::Start(section.offset))
.context("Failed to seek to section for checksum validation")?;
let mut buf = vec![0u8; section.length as usize];
self.file
.read_exact(&mut buf)
.context("Failed to read section for checksum validation")?;
let computed = compute_checksum(&buf);
if computed != section.checksum {
return Err(anyhow!(
"Checksum mismatch for section '{}': stored {}, computed {}",
name,
section.checksum,
computed
));
}
}
}
if disk_header.next_data_offset < prev_end {
return Err(anyhow!(
"next_data_offset {} overlaps last section (ends at {})",
disk_header.next_data_offset,
prev_end
));
}
if disk_header.section_count == 0 && disk_header.section_table_offset != HEADER_SIZE {
return Err(anyhow!(
"Empty file: section_table_offset should be {}, got {}",
HEADER_SIZE,
disk_header.section_table_offset
));
}
Ok(())
}
pub fn validate_required_sections(&self, required: &[&str]) -> Result<()> {
for name in required {
if !self.sections.contains_key(*name) {
return Err(anyhow!("Required section '{}' is missing", name));
}
}
Ok(())
}
pub fn resize_section(&mut self, name: &str, new_capacity: u64) -> Result<()> {
let (offset, length, _capacity, flags, checksum) = {
let section = self
.sections
.get(name)
.ok_or_else(|| anyhow!("Section '{}' not found", name))?;
if new_capacity < section.length {
return Err(anyhow!(
"Cannot resize section '{}' to {} bytes: current data length is {} bytes",
name,
new_capacity,
section.length
));
}
if new_capacity == section.capacity {
return Ok(());
}
debug_print!(
"[RESIZE_SECTION] Resizing '{}' from {} to {} bytes",
name,
section.capacity,
new_capacity
);
(
section.offset,
section.length,
section.capacity,
section.flags,
section.checksum,
)
};
let current_data = if length > 0 {
self.file
.seek(SeekFrom::Start(offset))
.context("Failed to seek to section for resize")?;
let mut buffer = vec![0u8; length as usize];
self.file
.read_exact(&mut buffer)
.context("Failed to read section data for resize")?;
buffer
} else {
Vec::new()
};
let file_len = self
.file
.metadata()
.context("Failed to get file metadata")?
.len();
let allocation_base = self.header.next_data_offset.max(file_len);
let new_next_data_offset = allocation_base
.checked_add(new_capacity)
.ok_or_else(|| anyhow!("Data offset overflow during section resize"))?;
self.file
.set_len(new_next_data_offset)
.context("Failed to extend file for section resize")?;
let new_offset = allocation_base;
if !current_data.is_empty() {
self.file
.seek(SeekFrom::Start(new_offset))
.context("Failed to seek to new section location")?;
self.file
.write_all(¤t_data)
.context("Failed to write data to new section location")?;
}
self.sections.remove(name);
self.sections.insert(
name.to_string(),
Section {
name: name.to_string(),
offset: new_offset,
length,
capacity: new_capacity,
flags,
checksum,
},
);
self.header.next_data_offset = new_next_data_offset;
self.dirty = true;
debug_print!(
"[RESIZE_SECTION] Section '{}' moved from offset {} to {}",
name,
offset,
new_offset
);
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_header_exactly_128_bytes() {
let header = GeoFileHeader::default();
let encoded = encode_header(&header);
assert_eq!(encoded.len(), 128, "Header must be exactly 128 bytes");
}
#[test]
fn test_header_reserved_field_size() {
let header = GeoFileHeader::default();
assert_eq!(header.reserved.len(), 72);
}
#[test]
fn test_header_roundtrip_preserves_all_fields() {
let header = GeoFileHeader {
flags: 0x12345678,
section_table_offset: 0x1000,
section_count: 5,
next_data_offset: 0x2000,
created_at_epoch: 1234567890,
modified_at_epoch: 1234567900,
..Default::default()
};
let encoded = encode_header(&header);
let decoded = decode_header(&encoded).unwrap();
assert_eq!(decoded, header);
}
#[test]
fn test_decode_header_reserved_slice_correct() {
let mut buf = [0u8; 128];
buf[0..8].copy_from_slice(&FILE_MAGIC);
buf[8..12].copy_from_slice(&1u32.to_le_bytes());
buf[100] = 0x42;
buf[127] = 0xFF;
let decoded = decode_header(&buf).unwrap();
assert_eq!(decoded.reserved[100 - 56], 0x42);
assert_eq!(decoded.reserved[127 - 56], 0xFF);
}
#[test]
fn test_invalid_magic_rejected() {
let mut buf = [0u8; 128];
buf[0..8].copy_from_slice(b"BADMAGIC");
assert!(decode_header(&buf).is_err());
}
#[test]
fn test_section_entry_exactly_64_bytes() {
let entry = SectionEntry {
name: "test".to_string(),
offset: 0,
length: 0,
capacity: 0,
flags: 0,
checksum: 0,
};
let encoded = encode_section_entry(&entry);
assert_eq!(encoded.len(), 64, "Section entry must be exactly 64 bytes");
}
#[test]
fn test_section_entry_roundtrip() {
let entry = SectionEntry {
name: "test_section".to_string(),
offset: 1024,
length: 512,
capacity: 1024,
flags: 0x12345678,
checksum: 0xABCDEF01,
};
let encoded = encode_section_entry(&entry);
assert_eq!(encoded.len(), 64);
let decoded = decode_section_entry(&encoded).unwrap();
assert_eq!(entry.name, decoded.name);
assert_eq!(entry.offset, decoded.offset);
assert_eq!(entry.length, decoded.length);
assert_eq!(entry.capacity, decoded.capacity);
assert_eq!(entry.flags, decoded.flags);
assert_eq!(entry.checksum, decoded.checksum);
}
#[test]
fn test_section_name_encoding() {
let name = "cfg_data";
let encoded = encode_section_entry_name(name);
let decoded = decode_section_entry_name(&encoded).unwrap();
assert_eq!(name, decoded);
}
#[test]
fn test_checksum_deterministic() {
let data = b"test data";
let crc1 = compute_checksum(data);
let crc2 = compute_checksum(data);
assert_eq!(crc1, crc2);
}
#[test]
fn test_checksum_detects_corruption() {
let data1 = b"test data";
let data2 = b"test datb"; assert_ne!(compute_checksum(data1), compute_checksum(data2));
}
#[test]
fn test_checksum_empty() {
let data = b"";
let crc = compute_checksum(data);
assert_eq!(crc, 0);
}
#[test]
fn test_section_name_32_bytes_accepted() {
let name_32_bytes = "12345678901234567890123456789012"; assert_eq!(name_32_bytes.len(), 32);
let encoded = encode_section_entry_name(name_32_bytes);
let decoded = decode_section_entry_name(&encoded).unwrap();
assert_eq!(name_32_bytes, decoded);
}
#[test]
fn test_section_name_encoding_truncates_at_32() {
let name_33_bytes = "123456789012345678901234567890123"; assert_eq!(name_33_bytes.len(), 33);
let encoded = encode_section_entry_name(name_33_bytes);
let decoded = decode_section_entry_name(&encoded).unwrap();
assert_eq!(decoded.len(), 32); assert_eq!(decoded, "12345678901234567890123456789012");
}
}