1use std::sync::Arc;
2
3use crate::attribute_api::{
4 collect_attribute_messages_storage, resolve_vlen_bytes_storage, Attribute,
5};
6use crate::btree_v1;
7use crate::btree_v2;
8use crate::dataset::Dataset;
9use crate::error::{Error, Result};
10use crate::fractal_heap::{FractalHeap, FractalHeapDirectBlockCache};
11use crate::io::Cursor;
12use crate::local_heap::LocalHeap;
13use crate::messages::datatype::VarLenKind;
14use crate::messages::link::{self, LinkMessage, LinkTarget};
15use crate::messages::link_info::LinkInfoMessage;
16use crate::messages::symbol_table_msg::SymbolTableMessage;
17use crate::messages::HdfMessage;
18use crate::storage::Storage;
19use crate::FileContext;
20
21#[derive(Clone)]
23pub struct Group {
24 context: Arc<FileContext>,
25 pub(crate) name: String,
26 pub(crate) address: u64,
27 pub(crate) root_address: u64,
29}
30
31#[derive(Clone)]
32struct ChildEntry {
33 name: String,
34 location: ObjectLocation,
35}
36
37#[derive(Clone)]
38struct ObjectLocation {
39 context: Arc<FileContext>,
40 address: u64,
41 root_address: u64,
42}
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45enum ChildObjectKind {
46 Group,
47 Dataset,
48 Other,
49}
50
51impl Group {
52 pub(crate) fn new(
54 context: Arc<FileContext>,
55 address: u64,
56 name: String,
57 root_address: u64,
58 ) -> Self {
59 Group {
60 context,
61 name,
62 address,
63 root_address,
64 }
65 }
66
67 pub fn name(&self) -> &str {
69 &self.name
70 }
71
72 pub fn address(&self) -> u64 {
74 self.address
75 }
76
77 pub fn file_data(&self) -> Result<crate::storage::StorageBuffer> {
79 self.context.full_file_data()
80 }
81
82 pub fn storage(&self) -> &dyn Storage {
84 self.context.storage.as_ref()
85 }
86
87 pub fn offset_size(&self) -> u8 {
89 self.context.superblock.offset_size
90 }
91
92 pub fn length_size(&self) -> u8 {
94 self.context.superblock.length_size
95 }
96
97 fn cached_header(&self, addr: u64) -> Result<Arc<crate::object_header::ObjectHeader>> {
99 self.context.get_or_parse_header(addr)
100 }
101
102 fn local_location(&self, address: u64) -> ObjectLocation {
103 ObjectLocation {
104 context: self.context.clone(),
105 address,
106 root_address: self.root_address,
107 }
108 }
109
110 pub fn groups(&self) -> Result<Vec<Group>> {
112 let (groups, _) = self.resolve_member_objects()?;
113 Ok(groups)
114 }
115
116 pub fn members(&self) -> Result<(Vec<Group>, Vec<Dataset>)> {
118 self.resolve_member_objects()
119 }
120
121 fn resolve_member_objects(&self) -> Result<(Vec<Group>, Vec<Dataset>)> {
122 let children = self.resolve_children()?;
123 let mut groups = Vec::new();
124 let mut datasets = Vec::new();
125 for child in &children {
126 match self.child_object_kind(child)? {
127 ChildObjectKind::Group => {
128 groups.push(Group::new(
129 child.location.context.clone(),
130 child.location.address,
131 child.name.clone(),
132 child.location.root_address,
133 ));
134 }
135 ChildObjectKind::Dataset => {
136 if let Some(dataset) = self.try_open_child_dataset(child)? {
137 datasets.push(dataset);
138 }
139 }
140 ChildObjectKind::Other => {}
141 }
142 }
143 Ok((groups, datasets))
144 }
145
146 pub fn group(&self, name: &str) -> Result<Group> {
148 let children = self.resolve_children()?;
149 for child in &children {
150 if child.name == name {
151 return match self.child_object_kind(child)? {
152 ChildObjectKind::Group => Ok(Group::new(
153 child.location.context.clone(),
154 child.location.address,
155 child.name.clone(),
156 child.location.root_address,
157 )),
158 ChildObjectKind::Dataset => Err(Error::GroupNotFound(format!(
159 "'{}' is a dataset, not a group",
160 name
161 ))),
162 ChildObjectKind::Other => {
163 Err(Error::GroupNotFound(format!("'{}' is not a group", name)))
164 }
165 };
166 }
167 }
168 Err(Error::GroupNotFound(name.to_string()))
169 }
170
171 pub fn datasets(&self) -> Result<Vec<Dataset>> {
173 let (_, datasets) = self.resolve_member_objects()?;
174 Ok(datasets)
175 }
176
177 pub fn dataset(&self, name: &str) -> Result<Dataset> {
179 let children = self.resolve_children()?;
180 for child in &children {
181 if child.name == name {
182 if let Some(dataset) = self.try_open_child_dataset(child)? {
183 return Ok(dataset);
184 }
185 return Err(Error::DatasetNotFound(name.to_string()));
186 }
187 }
188 Err(Error::DatasetNotFound(name.to_string()))
189 }
190
191 pub fn attributes(&self) -> Result<Vec<Attribute>> {
193 let mut header = (*self.cached_header(self.address)?).clone();
194 header.resolve_shared_messages_storage(
195 self.context.storage.as_ref(),
196 self.offset_size(),
197 self.length_size(),
198 )?;
199 Ok(collect_attribute_messages_storage(
200 &header,
201 self.context.storage.as_ref(),
202 self.offset_size(),
203 self.length_size(),
204 Some(self.context.filter_registry.as_ref()),
205 )?
206 .into_iter()
207 .map(|attr| {
208 let raw_data = match &attr.datatype {
209 crate::messages::datatype::Datatype::VarLen {
210 base,
211 kind: VarLenKind::String,
212 ..
213 } if matches!(
214 base.as_ref(),
215 crate::messages::datatype::Datatype::FixedPoint { size: 1, .. }
216 ) && attr.dataspace.num_elements() == 1 =>
217 {
218 resolve_vlen_bytes_storage(
219 &attr.raw_data,
220 self.context.storage.as_ref(),
221 self.offset_size(),
222 self.length_size(),
223 )
224 .unwrap_or_else(|| attr.raw_data.clone())
225 }
226 _ => attr.raw_data.clone(),
227 };
228 Attribute {
229 name: attr.name,
230 datatype: attr.datatype,
231 shape: match attr.dataspace.dataspace_type {
232 crate::messages::dataspace::DataspaceType::Scalar => vec![],
233 crate::messages::dataspace::DataspaceType::Null => vec![0],
234 crate::messages::dataspace::DataspaceType::Simple => attr.dataspace.dims,
235 },
236 raw_data,
237 }
238 })
239 .collect())
240 }
241
242 pub fn attribute(&self, name: &str) -> Result<Attribute> {
244 let attrs = self.attributes()?;
245 attrs
246 .into_iter()
247 .find(|a| a.name == name)
248 .ok_or_else(|| Error::AttributeNotFound(name.to_string()))
249 }
250
251 fn resolve_children(&self) -> Result<Vec<ChildEntry>> {
254 self.resolve_children_with_link_depth(0)
255 }
256
257 fn resolve_children_with_link_depth(&self, link_depth: u32) -> Result<Vec<ChildEntry>> {
259 let header = self.cached_header(self.address)?;
260
261 let mut children = Vec::new();
262
263 let mut found_symbol_table = false;
265 let mut link_info: Option<LinkInfoMessage> = None;
267 let mut links: Vec<LinkMessage> = Vec::new();
268
269 for msg in &header.messages {
270 match msg {
271 HdfMessage::SymbolTable(st) => {
272 found_symbol_table = true;
273 children = self.resolve_old_style_group_storage(st)?;
274 }
275 HdfMessage::Link(link) => {
276 links.push(link.clone());
277 }
278 HdfMessage::LinkInfo(li) => {
279 link_info = Some(li.clone());
280 }
281 _ => {}
282 }
283 }
284
285 if !found_symbol_table {
286 self.resolve_link_targets(&links, link_depth, &mut children)?;
288
289 if let Some(ref li) = link_info {
291 if !Cursor::is_undefined_offset(li.fractal_heap_address, self.offset_size()) {
292 for child in self.resolve_dense_links_storage(li, link_depth)? {
293 let is_duplicate = children.iter().any(|existing| {
294 existing.name == child.name
295 && existing.location.address == child.location.address
296 && Arc::ptr_eq(&existing.location.context, &child.location.context)
297 });
298 if !is_duplicate {
299 children.push(child);
300 }
301 }
302 }
303 }
304 }
305
306 Ok(children)
307 }
308
309 fn resolve_link_targets(
311 &self,
312 links: &[LinkMessage],
313 link_depth: u32,
314 children: &mut Vec<ChildEntry>,
315 ) -> Result<()> {
316 for link in links {
317 match &link.target {
318 LinkTarget::Hard { address } => {
319 children.push(ChildEntry {
320 name: link.name.clone(),
321 location: self.local_location(*address),
322 });
323 }
324 LinkTarget::Soft { path } => {
325 if let Ok(location) = self.resolve_soft_link_depth(path, link_depth) {
326 children.push(ChildEntry {
327 name: link.name.clone(),
328 location,
329 });
330 }
331 }
332 LinkTarget::External { filename, path } => {
333 if let Some(location) =
334 self.resolve_external_link_depth(filename, path, link_depth)?
335 {
336 children.push(ChildEntry {
337 name: link.name.clone(),
338 location,
339 });
340 }
341 }
342 }
343 }
344 Ok(())
345 }
346
347 fn resolve_old_style_group_storage(&self, st: &SymbolTableMessage) -> Result<Vec<ChildEntry>> {
348 let heap = LocalHeap::parse_at_storage(
349 self.context.storage.as_ref(),
350 st.heap_address,
351 self.offset_size(),
352 self.length_size(),
353 )?;
354
355 let leaves = btree_v1::collect_btree_v1_leaves_storage(
356 self.context.storage.as_ref(),
357 st.btree_address,
358 self.offset_size(),
359 self.length_size(),
360 None,
361 &[],
362 None,
363 )?;
364
365 let mut children = Vec::new();
366 for (_key, snod_address) in &leaves {
367 let header_len = 8 + 2 * usize::from(self.offset_size());
368 let prefix = self.context.read_range(*snod_address, header_len)?;
369 let mut prefix_cursor = Cursor::new(prefix.as_ref());
370 let sig = prefix_cursor.read_bytes(4)?;
371 if sig != *b"SNOD" {
372 return Err(Error::InvalidData(format!(
373 "expected SNOD signature at offset {:#x}",
374 snod_address
375 )));
376 }
377 let version = prefix_cursor.read_u8()?;
378 if version != 1 {
379 return Err(Error::InvalidData(format!(
380 "unsupported symbol table node version {}",
381 version
382 )));
383 }
384 prefix_cursor.skip(1)?;
385 let num_symbols = prefix_cursor.read_u16_le()?;
386 let node_len =
387 8 + usize::from(num_symbols) * (2 * usize::from(self.offset_size()) + 4 + 4 + 16);
388 let bytes = self.context.read_range(*snod_address, node_len)?;
389 let mut cursor = Cursor::new(bytes.as_ref());
390 let snod = crate::symbol_table::SymbolTableNode::parse(
391 &mut cursor,
392 self.offset_size(),
393 self.length_size(),
394 )?;
395
396 for entry in &snod.entries {
397 let name =
398 heap.get_string_storage(entry.link_name_offset, self.context.storage.as_ref())?;
399 children.push(ChildEntry {
400 name,
401 location: self.local_location(entry.object_header_address),
402 });
403 }
404 }
405
406 Ok(children)
407 }
408
409 fn resolve_dense_links_storage(
410 &self,
411 link_info: &LinkInfoMessage,
412 link_depth: u32,
413 ) -> Result<Vec<ChildEntry>> {
414 let heap = FractalHeap::parse_at_storage(
415 self.context.storage.as_ref(),
416 link_info.fractal_heap_address,
417 self.offset_size(),
418 self.length_size(),
419 )?;
420
421 let btree_header = btree_v2::BTreeV2Header::parse_at_storage(
422 self.context.storage.as_ref(),
423 link_info.btree_name_index_address,
424 self.offset_size(),
425 self.length_size(),
426 )?;
427
428 let records = btree_v2::collect_btree_v2_records_storage(
429 self.context.storage.as_ref(),
430 &btree_header,
431 self.offset_size(),
432 self.length_size(),
433 None,
434 &[],
435 None,
436 )?;
437
438 let mut children = Vec::new();
439 let mut direct_block_cache = FractalHeapDirectBlockCache::default();
440 for record in &records {
441 let heap_id = match record {
442 btree_v2::BTreeV2Record::LinkNameHash { heap_id, .. }
443 | btree_v2::BTreeV2Record::CreationOrder { heap_id, .. } => heap_id,
444 _ => continue,
445 };
446
447 let managed_bytes = heap.get_object_storage_cached_with_registry(
448 heap_id,
449 self.context.storage.as_ref(),
450 self.offset_size(),
451 self.length_size(),
452 &mut direct_block_cache,
453 Some(self.context.filter_registry.as_ref()),
454 )?;
455
456 let mut link_cursor = Cursor::new(&managed_bytes);
457 let link_msg = link::parse(
458 &mut link_cursor,
459 self.offset_size(),
460 self.length_size(),
461 managed_bytes.len(),
462 )?;
463
464 match &link_msg.target {
465 LinkTarget::Hard { address } => {
466 children.push(ChildEntry {
467 name: link_msg.name.clone(),
468 location: self.local_location(*address),
469 });
470 }
471 LinkTarget::Soft { path } => {
472 if let Ok(location) = self.resolve_soft_link_depth(path, link_depth) {
473 children.push(ChildEntry {
474 name: link_msg.name.clone(),
475 location,
476 });
477 }
478 }
479 LinkTarget::External { filename, path } => {
480 if let Some(location) =
481 self.resolve_external_link_depth(filename, path, link_depth)?
482 {
483 children.push(ChildEntry {
484 name: link_msg.name.clone(),
485 location,
486 });
487 }
488 }
489 }
490 }
491
492 Ok(children)
493 }
494
495 pub fn child_name_by_address(&self, address: u64) -> Result<Option<String>> {
496 Ok(self
497 .resolve_children()?
498 .into_iter()
499 .find(|child| child.location.address == address)
500 .map(|child| child.name))
501 }
502
503 fn child_context(&self, child: &ChildEntry) -> String {
504 format!("child '{}' at {:#x}", child.name, child.location.address)
505 }
506
507 fn child_object_kind(&self, child: &ChildEntry) -> Result<ChildObjectKind> {
508 let header = self
509 .cached_child_header(child)
510 .map_err(|err| err.with_context(self.child_context(child)))?;
511
512 Ok(classify_child_header(header.as_ref()))
513 }
514
515 fn try_open_child_dataset(&self, child: &ChildEntry) -> Result<Option<Dataset>> {
516 let header = self
517 .cached_child_header(child)
518 .map_err(|err| err.with_context(self.child_context(child)))?;
519
520 if classify_child_header(header.as_ref()) != ChildObjectKind::Dataset {
521 return Ok(None);
522 }
523
524 Dataset::from_parsed_header(
525 crate::dataset::DatasetParseContext {
526 context: child.location.context.clone(),
527 },
528 child.location.address,
529 child.name.clone(),
530 header.as_ref(),
531 )
532 .map(Some)
533 .map_err(|err| err.with_context(self.child_context(child)))
534 }
535
536 fn cached_child_header(
537 &self,
538 child: &ChildEntry,
539 ) -> Result<Arc<crate::object_header::ObjectHeader>> {
540 child
541 .location
542 .context
543 .get_or_parse_header(child.location.address)
544 }
545
546 const MAX_SOFT_LINK_DEPTH: u32 = 16;
548
549 fn resolve_soft_link_depth(&self, path: &str, depth: u32) -> Result<ObjectLocation> {
550 self.resolve_path_location(path, depth, "soft link")
551 }
552
553 fn resolve_external_link_depth(
554 &self,
555 filename: &str,
556 path: &str,
557 depth: u32,
558 ) -> Result<Option<ObjectLocation>> {
559 if depth >= Self::MAX_SOFT_LINK_DEPTH {
560 return Err(Error::Other(format!(
561 "external link resolution exceeded maximum depth ({}) at '{}:{}'",
562 Self::MAX_SOFT_LINK_DEPTH,
563 filename,
564 path,
565 )));
566 }
567
568 let Some(resolver) = self.context.external_link_resolver.as_ref() else {
569 return Ok(None);
570 };
571 let Some(file) = resolver.resolve_external_link(filename)? else {
572 return Ok(None);
573 };
574 let root = file.root_group()?;
575 Ok(Some(root.resolve_path_location(
576 path,
577 depth + 1,
578 "external link",
579 )?))
580 }
581
582 fn resolve_path_location(
583 &self,
584 path: &str,
585 depth: u32,
586 link_kind: &str,
587 ) -> Result<ObjectLocation> {
588 if depth >= Self::MAX_SOFT_LINK_DEPTH {
589 return Err(Error::Other(format!(
590 "{} resolution exceeded maximum depth ({}) — possible cycle at '{}'",
591 link_kind,
592 Self::MAX_SOFT_LINK_DEPTH,
593 path,
594 )));
595 }
596
597 let parts: Vec<&str> = path
598 .trim_matches('/')
599 .split('/')
600 .filter(|s| !s.is_empty())
601 .collect();
602
603 if parts.is_empty() {
604 return Ok(self.local_location(self.root_address));
605 }
606
607 let start_addr = if path.starts_with('/') {
608 self.root_address
609 } else {
610 self.address
611 };
612
613 let mut current_group = Group::new(
614 self.context.clone(),
615 start_addr,
616 String::new(),
617 self.root_address,
618 );
619
620 for &part in &parts[..parts.len() - 1] {
621 current_group = current_group.group(part)?;
622 }
623
624 let target_name = parts[parts.len() - 1];
625 let children = current_group.resolve_children_with_link_depth(depth + 1)?;
626 for child in &children {
627 if child.name == target_name {
628 return Ok(child.location.clone());
629 }
630 }
631
632 Err(Error::Other(format!(
633 "{} target '{}' not found",
634 link_kind, path
635 )))
636 }
637}
638
639fn classify_child_header(header: &crate::object_header::ObjectHeader) -> ChildObjectKind {
640 let mut has_dataset_message = false;
641
642 for msg in &header.messages {
643 match msg {
644 HdfMessage::SymbolTable(_)
645 | HdfMessage::Link(_)
646 | HdfMessage::LinkInfo(_)
647 | HdfMessage::GroupInfo(_) => return ChildObjectKind::Group,
648 HdfMessage::Dataspace(_)
649 | HdfMessage::DataLayout(_)
650 | HdfMessage::FillValue(_)
651 | HdfMessage::FilterPipeline(_) => has_dataset_message = true,
652 _ => {}
653 }
654 }
655
656 if has_dataset_message {
657 ChildObjectKind::Dataset
658 } else {
659 ChildObjectKind::Other
660 }
661}