use std::collections::HashMap;
use std::sync::Arc;
use parking_lot::Mutex;
use crate::attribute_api::{collect_attribute_messages, Attribute};
use crate::btree_v1;
use crate::btree_v2;
use crate::cache::ChunkCache;
use crate::dataset::Dataset;
use crate::error::{Error, Result};
use crate::filters::FilterRegistry;
use crate::fractal_heap::FractalHeap;
use crate::io::Cursor;
use crate::local_heap::LocalHeap;
use crate::messages::link::{self, LinkMessage, LinkTarget};
use crate::messages::link_info::LinkInfoMessage;
use crate::messages::symbol_table_msg::SymbolTableMessage;
use crate::messages::HdfMessage;
use crate::object_header::ObjectHeader;
use crate::symbol_table::SymbolTableNode;
pub struct Group<'f> {
file_data: &'f [u8],
offset_size: u8,
length_size: u8,
pub(crate) name: String,
pub(crate) address: u64,
pub(crate) root_address: u64,
pub(crate) chunk_cache: Arc<ChunkCache>,
pub(crate) header_cache: Arc<Mutex<HashMap<u64, Arc<ObjectHeader>>>>,
pub(crate) filter_registry: Arc<FilterRegistry>,
}
#[derive(Debug, Clone)]
struct ChildEntry {
name: String,
address: u64,
}
impl<'f> Group<'f> {
#[allow(clippy::too_many_arguments)]
pub(crate) fn new(
file_data: &'f [u8],
address: u64,
name: String,
offset_size: u8,
length_size: u8,
root_address: u64,
chunk_cache: Arc<ChunkCache>,
header_cache: Arc<Mutex<HashMap<u64, Arc<ObjectHeader>>>>,
filter_registry: Arc<FilterRegistry>,
) -> Self {
Group {
file_data,
offset_size,
length_size,
name,
address,
root_address,
chunk_cache,
header_cache,
filter_registry,
}
}
pub fn name(&self) -> &str {
&self.name
}
pub fn address(&self) -> u64 {
self.address
}
pub fn file_data(&self) -> &'f [u8] {
self.file_data
}
pub fn offset_size(&self) -> u8 {
self.offset_size
}
pub fn length_size(&self) -> u8 {
self.length_size
}
fn cached_header(&self, addr: u64) -> Result<Arc<ObjectHeader>> {
{
let cache = self.header_cache.lock();
if let Some(hdr) = cache.get(&addr) {
return Ok(Arc::clone(hdr));
}
}
let mut hdr =
ObjectHeader::parse_at(self.file_data, addr, self.offset_size, self.length_size)?;
hdr.resolve_shared_messages(self.file_data, self.offset_size, self.length_size)?;
let arc = Arc::new(hdr);
let mut cache = self.header_cache.lock();
cache.insert(addr, Arc::clone(&arc));
Ok(arc)
}
pub fn groups(&self) -> Result<Vec<Group<'f>>> {
let (groups, _) = self.resolve_member_objects()?;
Ok(groups)
}
pub fn members(&self) -> Result<(Vec<Group<'f>>, Vec<Dataset<'f>>)> {
self.resolve_member_objects()
}
fn resolve_member_objects(&self) -> Result<(Vec<Group<'f>>, Vec<Dataset<'f>>)> {
let children = self.resolve_children()?;
let mut groups = Vec::new();
let mut datasets = Vec::new();
for child in &children {
if self.child_is_group(child)? {
groups.push(Group::new(
self.file_data,
child.address,
child.name.clone(),
self.offset_size,
self.length_size,
self.root_address,
self.chunk_cache.clone(),
self.header_cache.clone(),
self.filter_registry.clone(),
));
} else if let Some(dataset) = self.try_open_child_dataset(child) {
datasets.push(dataset);
}
}
Ok((groups, datasets))
}
pub fn group(&self, name: &str) -> Result<Group<'f>> {
let children = self.resolve_children()?;
for child in &children {
if child.name == name {
if self.is_group_at(child.address)? {
return Ok(Group::new(
self.file_data,
child.address,
child.name.clone(),
self.offset_size,
self.length_size,
self.root_address,
self.chunk_cache.clone(),
self.header_cache.clone(),
self.filter_registry.clone(),
));
} else {
return Err(Error::GroupNotFound(format!(
"'{}' is a dataset, not a group",
name
)));
}
}
}
Err(Error::GroupNotFound(name.to_string()))
}
pub fn datasets(&self) -> Result<Vec<Dataset<'f>>> {
let (_, datasets) = self.resolve_member_objects()?;
Ok(datasets)
}
pub fn dataset(&self, name: &str) -> Result<Dataset<'f>> {
let children = self.resolve_children()?;
for child in &children {
if child.name == name {
if let Some(dataset) = self.try_open_child_dataset(child) {
return Ok(dataset);
}
return Err(Error::DatasetNotFound(name.to_string()));
}
}
Err(Error::DatasetNotFound(name.to_string()))
}
pub fn attributes(&self) -> Result<Vec<Attribute>> {
let mut header = (*self.cached_header(self.address)?).clone();
header.resolve_shared_messages(self.file_data, self.offset_size, self.length_size)?;
Ok(
collect_attribute_messages(
&header,
self.file_data,
self.offset_size,
self.length_size,
)?
.into_iter()
.map(|attr| {
Attribute::from_message_with_context(attr, Some(self.file_data), self.offset_size)
})
.collect(),
)
}
pub fn attribute(&self, name: &str) -> Result<Attribute> {
let attrs = self.attributes()?;
attrs
.into_iter()
.find(|a| a.name == name)
.ok_or_else(|| Error::AttributeNotFound(name.to_string()))
}
fn resolve_children(&self) -> Result<Vec<ChildEntry>> {
self.resolve_children_with_link_depth(0)
}
fn resolve_children_with_link_depth(&self, link_depth: u32) -> Result<Vec<ChildEntry>> {
let header = self.cached_header(self.address)?;
let mut children = Vec::new();
let mut found_symbol_table = false;
let mut link_info: Option<LinkInfoMessage> = None;
let mut links: Vec<LinkMessage> = Vec::new();
for msg in &header.messages {
match msg {
HdfMessage::SymbolTable(st) => {
found_symbol_table = true;
children = self.resolve_old_style_group(st)?;
}
HdfMessage::Link(link) => {
links.push(link.clone());
}
HdfMessage::LinkInfo(li) => {
link_info = Some(li.clone());
}
_ => {}
}
}
if !found_symbol_table {
Self::resolve_link_targets(self, &links, link_depth, &mut children);
if let Some(ref li) = link_info {
if !Cursor::is_undefined_offset(li.fractal_heap_address, self.offset_size) {
for child in self.resolve_dense_links(li, link_depth)? {
let is_duplicate = children.iter().any(|existing| {
existing.name == child.name && existing.address == child.address
});
if !is_duplicate {
children.push(child);
}
}
}
}
}
Ok(children)
}
fn resolve_link_targets(
&self,
links: &[LinkMessage],
link_depth: u32,
children: &mut Vec<ChildEntry>,
) {
for link in links {
match &link.target {
LinkTarget::Hard { address } => {
children.push(ChildEntry {
name: link.name.clone(),
address: *address,
});
}
LinkTarget::Soft { path } => {
if let Ok(address) = self.resolve_soft_link_depth(path, link_depth) {
children.push(ChildEntry {
name: link.name.clone(),
address,
});
}
}
LinkTarget::External { .. } => {
}
}
}
}
fn resolve_old_style_group(&self, st: &SymbolTableMessage) -> Result<Vec<ChildEntry>> {
let mut heap_cursor = Cursor::new(self.file_data);
heap_cursor.set_position(st.heap_address);
let heap = LocalHeap::parse(&mut heap_cursor, self.offset_size, self.length_size)?;
let leaves = btree_v1::collect_btree_v1_leaves(
self.file_data,
st.btree_address,
self.offset_size,
self.length_size,
None, &[],
None,
)?;
let mut children = Vec::new();
for (_key, snod_address) in &leaves {
let mut cursor = Cursor::new(self.file_data);
cursor.set_position(*snod_address);
let snod = SymbolTableNode::parse(&mut cursor, self.offset_size, self.length_size)?;
for entry in &snod.entries {
let name = heap.get_string(entry.link_name_offset, self.file_data)?;
children.push(ChildEntry {
name,
address: entry.object_header_address,
});
}
}
Ok(children)
}
fn resolve_dense_links(
&self,
link_info: &LinkInfoMessage,
link_depth: u32,
) -> Result<Vec<ChildEntry>> {
let mut heap_cursor = Cursor::new(self.file_data);
heap_cursor.set_position(link_info.fractal_heap_address);
let heap = FractalHeap::parse(&mut heap_cursor, self.offset_size, self.length_size)?;
let mut btree_cursor = Cursor::new(self.file_data);
btree_cursor.set_position(link_info.btree_name_index_address);
let btree_header =
btree_v2::BTreeV2Header::parse(&mut btree_cursor, self.offset_size, self.length_size)?;
let records = btree_v2::collect_btree_v2_records(
self.file_data,
&btree_header,
self.offset_size,
self.length_size,
None,
&[],
None,
)?;
let mut children = Vec::new();
for record in &records {
let heap_id = match record {
btree_v2::BTreeV2Record::LinkNameHash { heap_id, .. } => heap_id,
btree_v2::BTreeV2Record::CreationOrder { heap_id, .. } => heap_id,
_ => continue,
};
let managed_bytes = heap.get_managed_object(
heap_id,
self.file_data,
self.offset_size,
self.length_size,
)?;
let mut link_cursor = Cursor::new(&managed_bytes);
let link_msg = link::parse(
&mut link_cursor,
self.offset_size,
self.length_size,
managed_bytes.len(),
)?;
match &link_msg.target {
LinkTarget::Hard { address } => {
children.push(ChildEntry {
name: link_msg.name.clone(),
address: *address,
});
}
LinkTarget::Soft { path } => {
if let Ok(address) = self.resolve_soft_link_depth(path, link_depth) {
children.push(ChildEntry {
name: link_msg.name.clone(),
address,
});
}
}
LinkTarget::External { .. } => {
}
}
}
Ok(children)
}
fn is_group_at(&self, address: u64) -> Result<bool> {
let mut header = (*self.cached_header(address)?).clone();
header.resolve_shared_messages(self.file_data, self.offset_size, self.length_size)?;
for msg in &header.messages {
match msg {
HdfMessage::SymbolTable(_)
| HdfMessage::Link(_)
| HdfMessage::LinkInfo(_)
| HdfMessage::GroupInfo(_) => return Ok(true),
HdfMessage::DataLayout(_) => return Ok(false),
_ => {}
}
}
Ok(true)
}
fn try_open_child_dataset(&self, child: &ChildEntry) -> Option<Dataset<'f>> {
let header = self.cached_header(child.address).ok()?;
Dataset::from_parsed_header(
crate::dataset::DatasetParseContext {
file_data: self.file_data,
offset_size: self.offset_size,
length_size: self.length_size,
chunk_cache: self.chunk_cache.clone(),
filter_registry: self.filter_registry.clone(),
},
child.address,
child.name.clone(),
header.as_ref(),
)
.ok()
}
fn child_is_group(&self, child: &ChildEntry) -> Result<bool> {
match self.is_group_at(child.address) {
Ok(is_group) => Ok(is_group),
Err(_) => Ok(self.try_open_child_dataset(child).is_none()),
}
}
const MAX_SOFT_LINK_DEPTH: u32 = 16;
fn resolve_soft_link_depth(&self, path: &str, depth: u32) -> Result<u64> {
if depth >= Self::MAX_SOFT_LINK_DEPTH {
return Err(Error::Other(format!(
"soft link resolution exceeded maximum depth ({}) — possible cycle at '{}'",
Self::MAX_SOFT_LINK_DEPTH,
path,
)));
}
let parts: Vec<&str> = path
.trim_matches('/')
.split('/')
.filter(|s| !s.is_empty())
.collect();
if parts.is_empty() {
return Ok(self.root_address);
}
let start_addr = if path.starts_with('/') {
self.root_address
} else {
self.address
};
let mut current_group = Group::new(
self.file_data,
start_addr,
String::new(),
self.offset_size,
self.length_size,
self.root_address,
self.chunk_cache.clone(),
self.header_cache.clone(),
self.filter_registry.clone(),
);
for &part in &parts[..parts.len() - 1] {
current_group = current_group.group(part)?;
}
let target_name = parts[parts.len() - 1];
let children = current_group.resolve_children_with_link_depth(depth + 1)?;
for child in &children {
if child.name == target_name {
return Ok(child.address);
}
}
Err(Error::Other(format!(
"soft link target '{}' not found",
path
)))
}
}