#[cfg(not(feature = "std"))]
use alloc::{string::String, vec::Vec};
use crate::btree_v1::collect_symbol_table_nodes;
use crate::error::FormatError;
use crate::local_heap::LocalHeap;
use crate::message_type::MessageType;
use crate::object_header::ObjectHeader;
use crate::symbol_table::{SymbolTableMessage, SymbolTableNode};
#[derive(Debug, Clone)]
pub struct GroupEntry {
pub name: String,
pub object_header_address: u64,
pub cache_type: u32,
}
pub fn resolve_v1_group_entries(
file_data: &[u8],
sym_table_msg: &SymbolTableMessage,
offset_size: u8,
length_size: u8,
) -> Result<Vec<GroupEntry>, FormatError> {
let heap = LocalHeap::parse(
file_data,
sym_table_msg.local_heap_address as usize,
offset_size,
length_size,
)?;
let snod_addrs = collect_symbol_table_nodes(
file_data,
sym_table_msg.btree_address,
offset_size,
length_size,
)?;
let mut entries = Vec::new();
for snod_addr in snod_addrs {
let snod = SymbolTableNode::parse(file_data, snod_addr as usize, offset_size)?;
for entry in &snod.entries {
let name = heap.read_string(file_data, entry.link_name_offset)?;
entries.push(GroupEntry {
name,
object_header_address: entry.object_header_address,
cache_type: entry.cache_type,
});
}
}
Ok(entries)
}
fn find_symbol_table_message(
obj_header: &ObjectHeader,
offset_size: u8,
) -> Result<SymbolTableMessage, FormatError> {
for msg in &obj_header.messages {
if msg.msg_type == MessageType::SymbolTable {
return SymbolTableMessage::parse(&msg.data, offset_size);
}
}
Err(FormatError::PathNotFound(String::from(
"no symbol table message found in object header",
)))
}
pub fn resolve_path(
file_data: &[u8],
root_sym_table: &SymbolTableMessage,
path: &str,
offset_size: u8,
length_size: u8,
) -> Result<u64, FormatError> {
let components: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
if components.is_empty() {
return Err(FormatError::PathNotFound(String::from(path)));
}
let mut current_sym_table = root_sym_table.clone();
for (i, component) in components.iter().enumerate() {
let entries =
resolve_v1_group_entries(file_data, ¤t_sym_table, offset_size, length_size)?;
let found = entries.iter().find(|e| e.name == *component);
match found {
Some(entry) => {
if i == components.len() - 1 {
return Ok(entry.object_header_address);
}
let obj_header = ObjectHeader::parse(
file_data,
entry.object_header_address as usize,
offset_size,
length_size,
)?;
current_sym_table = find_symbol_table_message(&obj_header, offset_size)?;
}
None => {
return Err(FormatError::PathNotFound(String::from(*component)));
}
}
}
Err(FormatError::PathNotFound(String::from(path)))
}
#[cfg(test)]
mod tests {
use super::*;
fn build_synthetic_group(
children: &[(&str, u64, u32)], offset_size: u8,
length_size: u8,
) -> (Vec<u8>, SymbolTableMessage) {
let os = offset_size as usize;
let ls = length_size as usize;
let mut heap_data = Vec::new();
let mut name_offsets = Vec::new();
for (name, _, _) in children {
name_offsets.push(heap_data.len() as u64);
heap_data.extend_from_slice(name.as_bytes());
heap_data.push(0);
}
let heap_data_size = heap_data.len();
let heap_offset = 0usize;
let heap_header_size = 8 + ls * 2 + os;
let heap_data_offset = heap_header_size;
let snod_offset = heap_data_offset + heap_data_size;
let snod_offset = (snod_offset + 7) & !7;
let entry_size = os + os + 4 + 4 + 16;
let snod_size = 8 + children.len() * entry_size;
let btree_offset = snod_offset + snod_size;
let btree_offset = (btree_offset + 7) & !7;
let last_key = if children.is_empty() { 0u64 } else { heap_data_size as u64 };
let btree_header_size = 8 + os * 2; let btree_keys_children = os + os + os; let total_size = btree_offset + btree_header_size + btree_keys_children + 64;
let mut file = vec![0u8; total_size];
{
let mut pos = heap_offset;
file[pos..pos + 4].copy_from_slice(b"HEAP");
pos += 4;
file[pos] = 0; pos += 4; match length_size {
4 => file[pos..pos + 4].copy_from_slice(&(heap_data_size as u32).to_le_bytes()),
8 => file[pos..pos + 8].copy_from_slice(&(heap_data_size as u64).to_le_bytes()),
_ => {}
}
pos += ls;
match length_size {
4 => file[pos..pos + 4].copy_from_slice(&0xFFFFFFFFu32.to_le_bytes()),
8 => file[pos..pos + 8].copy_from_slice(&0xFFFFFFFFFFFFFFFFu64.to_le_bytes()),
_ => {}
}
pos += ls;
match offset_size {
4 => file[pos..pos + 4]
.copy_from_slice(&(heap_data_offset as u32).to_le_bytes()),
8 => file[pos..pos + 8]
.copy_from_slice(&(heap_data_offset as u64).to_le_bytes()),
_ => {}
}
}
file[heap_data_offset..heap_data_offset + heap_data_size].copy_from_slice(&heap_data);
{
let mut pos = snod_offset;
file[pos..pos + 4].copy_from_slice(b"SNOD");
pos += 4;
file[pos] = 1; pos += 1;
pos += 1; file[pos..pos + 2].copy_from_slice(&(children.len() as u16).to_le_bytes());
pos += 2;
for (idx, &(_, obj_addr, cache_type)) in children.iter().enumerate() {
match offset_size {
4 => file[pos..pos + 4]
.copy_from_slice(&(name_offsets[idx] as u32).to_le_bytes()),
8 => file[pos..pos + 8].copy_from_slice(&name_offsets[idx].to_le_bytes()),
_ => {}
}
pos += os;
match offset_size {
4 => file[pos..pos + 4].copy_from_slice(&(obj_addr as u32).to_le_bytes()),
8 => file[pos..pos + 8].copy_from_slice(&obj_addr.to_le_bytes()),
_ => {}
}
pos += os;
file[pos..pos + 4].copy_from_slice(&cache_type.to_le_bytes());
pos += 4;
pos += 4; pos += 16; }
}
{
let mut pos = btree_offset;
file[pos..pos + 4].copy_from_slice(b"TREE");
pos += 4;
file[pos] = 0; pos += 1;
file[pos] = 0; pos += 1;
file[pos..pos + 2].copy_from_slice(&1u16.to_le_bytes()); pos += 2;
for _ in 0..2 {
match offset_size {
4 => file[pos..pos + 4].copy_from_slice(&0xFFFFFFFFu32.to_le_bytes()),
8 => file[pos..pos + 8]
.copy_from_slice(&0xFFFFFFFFFFFFFFFFu64.to_le_bytes()),
_ => {}
}
pos += os;
}
match offset_size {
4 => file[pos..pos + 4].copy_from_slice(&0u32.to_le_bytes()),
8 => file[pos..pos + 8].copy_from_slice(&0u64.to_le_bytes()),
_ => {}
}
pos += os;
match offset_size {
4 => file[pos..pos + 4].copy_from_slice(&(snod_offset as u32).to_le_bytes()),
8 => file[pos..pos + 8].copy_from_slice(&(snod_offset as u64).to_le_bytes()),
_ => {}
}
pos += os;
match offset_size {
4 => file[pos..pos + 4].copy_from_slice(&(last_key as u32).to_le_bytes()),
8 => file[pos..pos + 8].copy_from_slice(&last_key.to_le_bytes()),
_ => {}
}
}
let msg = SymbolTableMessage {
btree_address: btree_offset as u64,
local_heap_address: heap_offset as u64,
};
(file, msg)
}
#[test]
fn resolve_entries_two_children() {
let (file, msg) = build_synthetic_group(
&[("alpha", 0x1000, 0), ("beta", 0x2000, 0)],
8,
8,
);
let entries = resolve_v1_group_entries(&file, &msg, 8, 8).unwrap();
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].name, "alpha");
assert_eq!(entries[0].object_header_address, 0x1000);
assert_eq!(entries[1].name, "beta");
assert_eq!(entries[1].object_header_address, 0x2000);
}
#[test]
fn resolve_path_single_level() {
let (file, msg) = build_synthetic_group(
&[("child1", 0x3000, 0), ("child2", 0x4000, 0)],
8,
8,
);
let addr = resolve_path(&file, &msg, "child1", 8, 8).unwrap();
assert_eq!(addr, 0x3000);
}
#[test]
fn resolve_path_not_found() {
let (file, msg) = build_synthetic_group(&[("x", 0x100, 0)], 8, 8);
let err = resolve_path(&file, &msg, "nonexistent", 8, 8).unwrap_err();
assert!(matches!(err, FormatError::PathNotFound(_)));
}
fn extract_dataset(
_file_data: &[u8],
hdr: &crate::object_header::ObjectHeader,
offset_size: u8,
length_size: u8,
) -> (
crate::datatype::Datatype,
crate::dataspace::Dataspace,
crate::data_layout::DataLayout,
) {
let dt_data = &hdr.messages.iter().find(|m| m.msg_type == MessageType::Datatype).unwrap().data;
let ds_data = &hdr.messages.iter().find(|m| m.msg_type == MessageType::Dataspace).unwrap().data;
let dl_data = &hdr.messages.iter().find(|m| m.msg_type == MessageType::DataLayout).unwrap().data;
let (dt, _) = crate::datatype::Datatype::parse(dt_data).unwrap();
let ds = crate::dataspace::Dataspace::parse(ds_data, length_size).unwrap();
let dl = crate::data_layout::DataLayout::parse(dl_data, offset_size, length_size).unwrap();
(dt, ds, dl)
}
fn get_root_sym_table(
file_data: &[u8],
sb: &crate::superblock::Superblock,
) -> SymbolTableMessage {
let root_header = ObjectHeader::parse(
file_data, sb.root_group_address as usize, sb.offset_size, sb.length_size,
).unwrap();
let sym_msg = root_header.messages.iter()
.find(|m| m.msg_type == MessageType::SymbolTable).unwrap();
SymbolTableMessage::parse(&sym_msg.data, sb.offset_size).unwrap()
}
#[test]
fn integration_simple_dataset_full_traversal() {
let file_data: &[u8] = include_bytes!("../tests/fixtures/simple_dataset.h5");
let sig_offset = crate::signature::find_signature(file_data).unwrap();
let sb = crate::superblock::Superblock::parse(file_data, sig_offset).unwrap();
let root_sym = get_root_sym_table(file_data, &sb);
let entries = resolve_v1_group_entries(file_data, &root_sym, sb.offset_size, sb.length_size).unwrap();
let data_entry = entries.iter().find(|e| e.name == "data").expect("should have 'data'");
let hdr = ObjectHeader::parse(file_data, data_entry.object_header_address as usize, sb.offset_size, sb.length_size).unwrap();
let (dt, ds, dl) = extract_dataset(file_data, &hdr, sb.offset_size, sb.length_size);
let raw = crate::data_read::read_raw_data(file_data, &dl, &ds, &dt).unwrap();
let values = crate::data_read::read_as_f64(&raw, &dt).unwrap();
assert_eq!(values, vec![1.0, 2.0, 3.0]);
}
#[test]
fn integration_two_groups_group1_values() {
let file_data: &[u8] = include_bytes!("../tests/fixtures/two_groups.h5");
let sig_offset = crate::signature::find_signature(file_data).unwrap();
let sb = crate::superblock::Superblock::parse(file_data, sig_offset).unwrap();
let root_sym = get_root_sym_table(file_data, &sb);
let addr = resolve_path(file_data, &root_sym, "group1/values", sb.offset_size, sb.length_size).unwrap();
let hdr = ObjectHeader::parse(file_data, addr as usize, sb.offset_size, sb.length_size).unwrap();
let (dt, ds, dl) = extract_dataset(file_data, &hdr, sb.offset_size, sb.length_size);
let raw = crate::data_read::read_raw_data(file_data, &dl, &ds, &dt).unwrap();
let values = crate::data_read::read_as_i32(&raw, &dt).unwrap();
assert_eq!(values, vec![10, 20, 30]);
}
#[test]
fn integration_two_groups_group2_temps() {
let file_data: &[u8] = include_bytes!("../tests/fixtures/two_groups.h5");
let sig_offset = crate::signature::find_signature(file_data).unwrap();
let sb = crate::superblock::Superblock::parse(file_data, sig_offset).unwrap();
let root_sym = get_root_sym_table(file_data, &sb);
let addr = resolve_path(file_data, &root_sym, "group2/temps", sb.offset_size, sb.length_size).unwrap();
let hdr = ObjectHeader::parse(file_data, addr as usize, sb.offset_size, sb.length_size).unwrap();
let (dt, ds, dl) = extract_dataset(file_data, &hdr, sb.offset_size, sb.length_size);
let raw = crate::data_read::read_raw_data(file_data, &dl, &ds, &dt).unwrap();
let values = crate::data_read::read_as_f32(&raw, &dt).unwrap();
assert!((values[0] - 98.6).abs() < 0.01);
assert!((values[1] - 37.0).abs() < 0.01);
}
#[test]
fn integration_nested_groups() {
let file_data: &[u8] = include_bytes!("../tests/fixtures/nested_groups.h5");
let sig_offset = crate::signature::find_signature(file_data).unwrap();
let sb = crate::superblock::Superblock::parse(file_data, sig_offset).unwrap();
let root_sym = get_root_sym_table(file_data, &sb);
let addr = resolve_path(file_data, &root_sym, "a/b/c/deep", sb.offset_size, sb.length_size).unwrap();
let hdr = ObjectHeader::parse(file_data, addr as usize, sb.offset_size, sb.length_size).unwrap();
let (dt, ds, dl) = extract_dataset(file_data, &hdr, sb.offset_size, sb.length_size);
let raw = crate::data_read::read_raw_data(file_data, &dl, &ds, &dt).unwrap();
let values = crate::data_read::read_as_f64(&raw, &dt).unwrap();
assert_eq!(values, vec![42.0]);
}
}