use anyhow::{bail, Context, Result};
use std::collections::BTreeMap;
use std::path::Path;
const PROP_MAGIC: u32 = 0x5052_4f50;
const PROP_VERSION: u32 = 1;
const HEADER_SIZE: usize = 32;
const LOOKUP_ENTRY_SIZE: usize = 20;
#[derive(Clone, Copy, Debug)]
pub(crate) struct PropStoreHeader {
magic: u32, version: u32, num_records: u64, lookup_offset: u64, data_offset: u64, _reserved: [u8; 8],
}
impl PropStoreHeader {
fn to_bytes(self) -> [u8; HEADER_SIZE] {
let mut buf = [0u8; HEADER_SIZE];
buf[0..4].copy_from_slice(&self.magic.to_le_bytes());
buf[4..8].copy_from_slice(&self.version.to_le_bytes());
buf[8..16].copy_from_slice(&self.num_records.to_le_bytes());
buf[16..24].copy_from_slice(&self.lookup_offset.to_le_bytes());
buf[24..32].copy_from_slice(&self.data_offset.to_le_bytes());
buf
}
fn from_bytes(buf: &[u8; HEADER_SIZE]) -> Self {
Self {
magic: u32::from_le_bytes(buf[0..4].try_into().unwrap()),
version: u32::from_le_bytes(buf[4..8].try_into().unwrap()),
num_records: u64::from_le_bytes(buf[8..16].try_into().unwrap()),
lookup_offset: u64::from_le_bytes(buf[16..24].try_into().unwrap()),
data_offset: u64::from_le_bytes(buf[24..32].try_into().unwrap()),
_reserved: [0; 8],
}
}
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct PropLookupEntry {
pub node_id: u64,
pub data_offset: u64,
pub data_len: u32,
}
impl PropLookupEntry {
fn to_bytes(self) -> [u8; LOOKUP_ENTRY_SIZE] {
let mut buf = [0u8; LOOKUP_ENTRY_SIZE];
buf[0..8].copy_from_slice(&self.node_id.to_le_bytes());
buf[8..16].copy_from_slice(&self.data_offset.to_le_bytes());
buf[16..20].copy_from_slice(&self.data_len.to_le_bytes());
buf
}
fn from_bytes(buf: &[u8; LOOKUP_ENTRY_SIZE]) -> Self {
Self {
node_id: u64::from_le_bytes(buf[0..8].try_into().unwrap()),
data_offset: u64::from_le_bytes(buf[8..16].try_into().unwrap()),
data_len: u32::from_le_bytes(buf[16..20].try_into().unwrap()),
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum PropertyValue {
Null,
Bool(bool),
U64(u64),
I64(i64),
F64(f64),
String(String),
F64Array(Vec<f64>),
Bytes(Vec<u8>),
}
#[repr(u8)]
enum TypeTag {
Null = 0,
Bool = 1,
U64 = 2,
I64 = 3,
F64 = 4,
String = 5,
F64Array = 6,
Bytes = 7,
}
impl TypeTag {
fn from_byte(b: u8) -> Result<Self> {
match b {
0 => Ok(TypeTag::Null),
1 => Ok(TypeTag::Bool),
2 => Ok(TypeTag::U64),
3 => Ok(TypeTag::I64),
4 => Ok(TypeTag::F64),
5 => Ok(TypeTag::String),
6 => Ok(TypeTag::F64Array),
7 => Ok(TypeTag::Bytes),
_ => bail!("unknown property type tag: {}", b),
}
}
}
pub type NodeProperties = BTreeMap<String, PropertyValue>;
pub struct PropStore {
pub entries: Vec<PropLookupEntry>,
pub data: Vec<u8>,
}
pub fn encode_value(buf: &mut Vec<u8>, val: &PropertyValue) {
match val {
PropertyValue::Null => buf.push(TypeTag::Null as u8),
PropertyValue::Bool(v) => {
buf.push(TypeTag::Bool as u8);
buf.push(*v as u8);
}
PropertyValue::U64(v) => {
buf.push(TypeTag::U64 as u8);
buf.extend_from_slice(&v.to_le_bytes());
}
PropertyValue::I64(v) => {
buf.push(TypeTag::I64 as u8);
buf.extend_from_slice(&v.to_le_bytes());
}
PropertyValue::F64(v) => {
buf.push(TypeTag::F64 as u8);
buf.extend_from_slice(&v.to_le_bytes());
}
PropertyValue::String(v) => {
buf.push(TypeTag::String as u8);
let bytes = v.as_bytes();
assert!(bytes.len() <= u32::MAX as usize);
buf.extend_from_slice(&(u32::try_from(bytes.len()).unwrap()).to_le_bytes());
buf.extend_from_slice(bytes);
}
PropertyValue::F64Array(v) => {
buf.push(TypeTag::F64Array as u8);
assert!(v.len() <= u16::MAX as usize);
buf.extend_from_slice(&(u16::try_from(v.len()).unwrap()).to_le_bytes());
for f in v {
buf.extend_from_slice(&f.to_le_bytes());
}
}
PropertyValue::Bytes(v) => {
buf.push(TypeTag::Bytes as u8);
assert!(v.len() <= u32::MAX as usize);
buf.extend_from_slice(&(u32::try_from(v.len()).unwrap()).to_le_bytes());
buf.extend_from_slice(v);
}
}
}
pub fn decode_value(bytes: &mut &[u8]) -> Result<PropertyValue> {
if bytes.is_empty() {
bail!("unexpected EOF reading property value type tag");
}
let tag_byte = bytes[0];
*bytes = &bytes[1..];
let tag = TypeTag::from_byte(tag_byte)?;
Ok(match tag {
TypeTag::Null => PropertyValue::Null,
TypeTag::Bool => {
if bytes.is_empty() {
bail!("EOF reading bool value");
}
let v = bytes[0] != 0;
*bytes = &bytes[1..];
PropertyValue::Bool(v)
}
TypeTag::U64 => {
if bytes.len() < 8 {
bail!("EOF reading u64 value");
}
let v = u64::from_le_bytes(bytes[..8].try_into().unwrap());
*bytes = &bytes[8..];
PropertyValue::U64(v)
}
TypeTag::I64 => {
if bytes.len() < 8 {
bail!("EOF reading i64 value");
}
let v = i64::from_le_bytes(bytes[..8].try_into().unwrap());
*bytes = &bytes[8..];
PropertyValue::I64(v)
}
TypeTag::F64 => {
if bytes.len() < 8 {
bail!("EOF reading f64 value");
}
let v = f64::from_le_bytes(bytes[..8].try_into().unwrap());
*bytes = &bytes[8..];
PropertyValue::F64(v)
}
TypeTag::String => {
if bytes.len() < 4 {
bail!("EOF reading string length");
}
let len = u32::from_le_bytes(bytes[..4].try_into().unwrap()) as usize;
*bytes = &bytes[4..];
if bytes.len() < len {
bail!(
"EOF reading string payload (need {}, have {})",
len,
bytes.len()
);
}
let s = String::from_utf8_lossy(&bytes[..len]).to_string();
*bytes = &bytes[len..];
PropertyValue::String(s)
}
TypeTag::F64Array => {
if bytes.len() < 2 {
bail!("EOF reading f64 array count");
}
let count = u16::from_le_bytes(bytes[..2].try_into().unwrap()) as usize;
*bytes = &bytes[2..];
if bytes.len() < count * 8 {
bail!("EOF reading f64 array payload");
}
let mut arr = Vec::with_capacity(count);
for i in 0..count {
let off = i * 8;
arr.push(f64::from_le_bytes(bytes[off..off + 8].try_into().unwrap()));
}
*bytes = &bytes[count * 8..];
PropertyValue::F64Array(arr)
}
TypeTag::Bytes => {
if bytes.len() < 4 {
bail!("EOF reading bytes length");
}
let len = u32::from_le_bytes(bytes[..4].try_into().unwrap()) as usize;
*bytes = &bytes[4..];
if bytes.len() < len {
bail!("EOF reading bytes payload");
}
let v = bytes[..len].to_vec();
*bytes = &bytes[len..];
PropertyValue::Bytes(v)
}
})
}
pub fn encode_node_properties(buf: &mut Vec<u8>, props: &NodeProperties) {
assert!(props.len() <= u16::MAX as usize);
buf.extend_from_slice(&(u16::try_from(props.len()).unwrap()).to_le_bytes());
for (key, val) in props.iter() {
let key_bytes = key.as_bytes();
assert!(key_bytes.len() <= u8::MAX as usize);
buf.push(u8::try_from(key_bytes.len()).unwrap());
buf.extend_from_slice(key_bytes);
encode_value(buf, val);
}
}
pub fn decode_node_properties(mut bytes: &[u8]) -> Result<NodeProperties> {
if bytes.len() < 2 {
bail!("EOF reading property pair count");
}
let count = u16::from_le_bytes(bytes[..2].try_into().unwrap()) as usize;
bytes = &bytes[2..];
let mut props = NodeProperties::new();
for _ in 0..count {
if bytes.is_empty() {
bail!("EOF reading property key length");
}
let key_len = bytes[0] as usize;
bytes = &bytes[1..];
if bytes.len() < key_len {
bail!(
"EOF reading property key (need {}, have {})",
key_len,
bytes.len()
);
}
let key = String::from_utf8_lossy(&bytes[..key_len]).to_string();
bytes = &bytes[key_len..];
let val = decode_value(&mut bytes)?;
props.insert(key, val);
}
Ok(props)
}
pub fn build_prop_store(nodes: Vec<(u64, NodeProperties)>) -> PropStore {
let mut sorted = nodes;
sorted.sort_by_key(|(id, _)| *id);
let mut entries = Vec::with_capacity(sorted.len());
let mut data = Vec::new();
for (node_id, props) in sorted {
let offset = data.len() as u64;
encode_node_properties(&mut data, &props);
let len = (data.len() as u64 - offset) as u32;
entries.push(PropLookupEntry {
node_id,
data_offset: offset,
data_len: len,
});
}
PropStore { entries, data }
}
pub fn write_prop_store(store: &PropStore, path: &Path) -> Result<()> {
let num_records = store.entries.len() as u64;
let lookup_offset = HEADER_SIZE as u64;
let lookup_size = num_records * LOOKUP_ENTRY_SIZE as u64;
let data_offset = lookup_offset + lookup_size;
let data_size = store.data.len() as u64;
let header = PropStoreHeader {
magic: PROP_MAGIC,
version: PROP_VERSION,
num_records,
lookup_offset,
data_offset,
_reserved: [0; 8],
};
let mut buf = Vec::with_capacity(HEADER_SIZE + lookup_size as usize + data_size as usize);
buf.extend_from_slice(&header.to_bytes());
for entry in &store.entries {
buf.extend_from_slice(&entry.to_bytes());
}
buf.extend_from_slice(&store.data);
std::fs::write(path, &buf).context("write prop store")?;
Ok(())
}
pub fn read_prop_store(path: &Path) -> Result<PropStore> {
let buf = std::fs::read(path).context("read prop store")?;
if buf.len() < HEADER_SIZE {
bail!("prop store file too short (header)");
}
let header = PropStoreHeader::from_bytes(buf[..HEADER_SIZE].try_into().unwrap());
if header.magic != PROP_MAGIC {
bail!(
"invalid prop store magic: expected 0x{:08x}, got 0x{:08x}",
PROP_MAGIC,
header.magic
);
}
if header.version != PROP_VERSION {
bail!(
"unsupported prop store version: expected {}, got {}",
PROP_VERSION,
header.version
);
}
let _lookup_size = header.num_records as usize * LOOKUP_ENTRY_SIZE;
let data_end = buf.len();
if buf.len() < header.data_offset as usize {
bail!("prop store file too short (data offset past EOF)");
}
let mut entries = Vec::with_capacity(header.num_records as usize);
let lookup_start = header.lookup_offset as usize;
for i in 0..header.num_records as usize {
let off = lookup_start + i * LOOKUP_ENTRY_SIZE;
let entry =
PropLookupEntry::from_bytes(buf[off..off + LOOKUP_ENTRY_SIZE].try_into().unwrap());
entries.push(entry);
}
let data = buf[header.data_offset as usize..data_end].to_vec();
Ok(PropStore { entries, data })
}
pub fn lookup(store: &PropStore, node_id: u64) -> Result<Option<NodeProperties>> {
let idx = match store.entries.binary_search_by_key(&node_id, |e| e.node_id) {
Ok(i) => i,
Err(_) => return Ok(None),
};
let entry = &store.entries[idx];
let start = entry.data_offset as usize;
let end = start + entry.data_len as usize;
let props = decode_node_properties(&store.data[start..end])
.with_context(|| format!("decode properties for node {}", node_id))?;
Ok(Some(props))
}
fn _data_len(_header: PropStoreHeader, file_len: usize) -> usize {
file_len.saturating_sub(_header.data_offset as usize)
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_roundtrip_empty() {
let nodes: Vec<(u64, NodeProperties)> = vec![(0, BTreeMap::new())];
let store = build_prop_store(nodes);
let tmp = tempdir().unwrap();
let path = tmp.path().join("test_empty.prop");
write_prop_store(&store, &path).unwrap();
let loaded = read_prop_store(&path).unwrap();
assert_eq!(loaded.entries.len(), 1);
let props = lookup(&loaded, 0).unwrap().unwrap();
assert!(props.is_empty());
}
#[test]
fn test_roundtrip_basic_props() {
let mut props = BTreeMap::new();
props.insert(
"name".to_string(),
PropertyValue::String("node_a".to_string()),
);
props.insert("active".to_string(), PropertyValue::Bool(true));
props.insert(
"score".to_string(),
PropertyValue::F64(std::f64::consts::PI),
);
let nodes = vec![(42, props)];
let store = build_prop_store(nodes);
let tmp = tempdir().unwrap();
let path = tmp.path().join("test.prop");
write_prop_store(&store, &path).unwrap();
let loaded = read_prop_store(&path).unwrap();
assert_eq!(loaded.entries.len(), 1);
let found = lookup(&loaded, 42).unwrap().unwrap();
assert_eq!(found.len(), 3);
assert_eq!(
found.get("name"),
Some(&PropertyValue::String("node_a".to_string()))
);
assert_eq!(found.get("active"), Some(&PropertyValue::Bool(true)));
assert_eq!(
found.get("score"),
Some(&PropertyValue::F64(std::f64::consts::PI))
);
}
#[test]
fn test_roundtrip_mixed_types() {
let mut props = BTreeMap::new();
props.insert(
"label".to_string(),
PropertyValue::String("hub".to_string()),
);
props.insert("capacity".to_string(), PropertyValue::U64(1_000_000));
props.insert("offset".to_string(), PropertyValue::I64(-42));
props.insert(
"coords".to_string(),
PropertyValue::F64Array(vec![1.0, 2.0, 3.0]),
);
props.insert(
"raw".to_string(),
PropertyValue::Bytes(vec![0xde, 0xad, 0xbe, 0xef]),
);
props.insert("deleted".to_string(), PropertyValue::Null);
let nodes = vec![(1, props)];
let store = build_prop_store(nodes);
let tmp = tempdir().unwrap();
let path = tmp.path().join("test.prop");
write_prop_store(&store, &path).unwrap();
let loaded = read_prop_store(&path).unwrap();
let found = lookup(&loaded, 1).unwrap().unwrap();
assert_eq!(
found.get("label"),
Some(&PropertyValue::String("hub".to_string()))
);
assert_eq!(found.get("capacity"), Some(&PropertyValue::U64(1_000_000)));
assert_eq!(found.get("offset"), Some(&PropertyValue::I64(-42)));
assert_eq!(
found.get("coords"),
Some(&PropertyValue::F64Array(vec![1.0, 2.0, 3.0]))
);
assert_eq!(
found.get("raw"),
Some(&PropertyValue::Bytes(vec![0xde, 0xad, 0xbe, 0xef]))
);
assert_eq!(found.get("deleted"), Some(&PropertyValue::Null));
}
#[test]
fn test_multiple_nodes() {
let mut p0 = BTreeMap::new();
p0.insert("a".to_string(), PropertyValue::U64(1));
let mut p1 = BTreeMap::new();
p1.insert("b".to_string(), PropertyValue::U64(2));
let mut p2 = BTreeMap::new();
p2.insert("c".to_string(), PropertyValue::U64(3));
let nodes = vec![(0, p0), (1, p1), (2, p2)];
let store = build_prop_store(nodes);
let tmp = tempdir().unwrap();
let path = tmp.path().join("test.prop");
write_prop_store(&store, &path).unwrap();
let loaded = read_prop_store(&path).unwrap();
assert_eq!(
lookup(&loaded, 0).unwrap().unwrap().get("a"),
Some(&PropertyValue::U64(1))
);
assert_eq!(
lookup(&loaded, 1).unwrap().unwrap().get("b"),
Some(&PropertyValue::U64(2))
);
assert_eq!(
lookup(&loaded, 2).unwrap().unwrap().get("c"),
Some(&PropertyValue::U64(3))
);
assert!(lookup(&loaded, 99).unwrap().is_none());
}
#[test]
fn test_binary_search_multiple() {
let mut nodes = Vec::new();
for i in 0..100u64 {
let mut props = BTreeMap::new();
props.insert("idx".to_string(), PropertyValue::U64(i));
nodes.push((i * 7, props)); }
let store = build_prop_store(nodes);
let tmp = tempdir().unwrap();
let path = tmp.path().join("test.prop");
write_prop_store(&store, &path).unwrap();
let loaded = read_prop_store(&path).unwrap();
for i in 0..100 {
let found = lookup(&loaded, i * 7).unwrap().unwrap();
assert_eq!(found.get("idx"), Some(&PropertyValue::U64(i)));
}
}
}