1use std::sync::Arc;
2
3use crate::attribute_api::{
4 collect_attribute_messages_storage, resolve_vlen_bytes_storage, Attribute,
5};
6use crate::btree_v1;
7use crate::btree_v2;
8use crate::dataset::Dataset;
9use crate::error::{Error, Result};
10use crate::fractal_heap::FractalHeap;
11use crate::io::Cursor;
12use crate::local_heap::LocalHeap;
13use crate::messages::link::{self, LinkMessage, LinkTarget};
14use crate::messages::link_info::LinkInfoMessage;
15use crate::messages::symbol_table_msg::SymbolTableMessage;
16use crate::messages::HdfMessage;
17use crate::storage::Storage;
18use crate::FileContext;
19
20#[derive(Clone)]
22pub struct Group {
23 context: Arc<FileContext>,
24 pub(crate) name: String,
25 pub(crate) address: u64,
26 pub(crate) root_address: u64,
28}
29
30#[derive(Clone)]
31struct ChildEntry {
32 name: String,
33 location: ObjectLocation,
34}
35
36#[derive(Clone)]
37struct ObjectLocation {
38 context: Arc<FileContext>,
39 address: u64,
40 root_address: u64,
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44enum ChildObjectKind {
45 Group,
46 Dataset,
47 Other,
48}
49
50impl Group {
51 pub(crate) fn new(
53 context: Arc<FileContext>,
54 address: u64,
55 name: String,
56 root_address: u64,
57 ) -> Self {
58 Group {
59 context,
60 name,
61 address,
62 root_address,
63 }
64 }
65
66 pub fn name(&self) -> &str {
68 &self.name
69 }
70
71 pub fn address(&self) -> u64 {
73 self.address
74 }
75
76 pub fn file_data(&self) -> Result<crate::storage::StorageBuffer> {
78 self.context.full_file_data()
79 }
80
81 pub fn storage(&self) -> &dyn Storage {
83 self.context.storage.as_ref()
84 }
85
86 pub fn offset_size(&self) -> u8 {
88 self.context.superblock.offset_size
89 }
90
91 pub fn length_size(&self) -> u8 {
93 self.context.superblock.length_size
94 }
95
96 fn cached_header(&self, addr: u64) -> Result<Arc<crate::object_header::ObjectHeader>> {
98 self.context.get_or_parse_header(addr)
99 }
100
101 fn local_location(&self, address: u64) -> ObjectLocation {
102 ObjectLocation {
103 context: self.context.clone(),
104 address,
105 root_address: self.root_address,
106 }
107 }
108
109 pub fn groups(&self) -> Result<Vec<Group>> {
111 let (groups, _) = self.resolve_member_objects()?;
112 Ok(groups)
113 }
114
115 pub fn members(&self) -> Result<(Vec<Group>, Vec<Dataset>)> {
117 self.resolve_member_objects()
118 }
119
120 fn resolve_member_objects(&self) -> Result<(Vec<Group>, Vec<Dataset>)> {
121 let children = self.resolve_children()?;
122 let mut groups = Vec::new();
123 let mut datasets = Vec::new();
124 for child in &children {
125 match self.child_object_kind(child)? {
126 ChildObjectKind::Group | ChildObjectKind::Other => {
127 groups.push(Group::new(
128 child.location.context.clone(),
129 child.location.address,
130 child.name.clone(),
131 child.location.root_address,
132 ));
133 }
134 ChildObjectKind::Dataset => {
135 if let Some(dataset) = self.try_open_child_dataset(child)? {
136 datasets.push(dataset);
137 }
138 }
139 }
140 }
141 Ok((groups, datasets))
142 }
143
144 pub fn group(&self, name: &str) -> Result<Group> {
146 let children = self.resolve_children()?;
147 for child in &children {
148 if child.name == name {
149 return match self.child_object_kind(child)? {
150 ChildObjectKind::Group => Ok(Group::new(
151 child.location.context.clone(),
152 child.location.address,
153 child.name.clone(),
154 child.location.root_address,
155 )),
156 ChildObjectKind::Dataset => Err(Error::GroupNotFound(format!(
157 "'{}' is a dataset, not a group",
158 name
159 ))),
160 ChildObjectKind::Other => Ok(Group::new(
161 child.location.context.clone(),
162 child.location.address,
163 child.name.clone(),
164 child.location.root_address,
165 )),
166 };
167 }
168 }
169 Err(Error::GroupNotFound(name.to_string()))
170 }
171
172 pub fn datasets(&self) -> Result<Vec<Dataset>> {
174 let (_, datasets) = self.resolve_member_objects()?;
175 Ok(datasets)
176 }
177
178 pub fn dataset(&self, name: &str) -> Result<Dataset> {
180 let children = self.resolve_children()?;
181 for child in &children {
182 if child.name == name {
183 if let Some(dataset) = self.try_open_child_dataset(child)? {
184 return Ok(dataset);
185 }
186 return Err(Error::DatasetNotFound(name.to_string()));
187 }
188 }
189 Err(Error::DatasetNotFound(name.to_string()))
190 }
191
192 pub fn attributes(&self) -> Result<Vec<Attribute>> {
194 let mut header = (*self.cached_header(self.address)?).clone();
195 header.resolve_shared_messages_storage(
196 self.context.storage.as_ref(),
197 self.offset_size(),
198 self.length_size(),
199 )?;
200 Ok(collect_attribute_messages_storage(
201 &header,
202 self.context.storage.as_ref(),
203 self.offset_size(),
204 self.length_size(),
205 )?
206 .into_iter()
207 .map(|attr| {
208 let raw_data = match &attr.datatype {
209 crate::messages::datatype::Datatype::VarLen { base }
210 if matches!(
211 base.as_ref(),
212 crate::messages::datatype::Datatype::FixedPoint { size: 1, .. }
213 ) && attr.dataspace.num_elements() == 1 =>
214 {
215 resolve_vlen_bytes_storage(
216 &attr.raw_data,
217 self.context.storage.as_ref(),
218 self.offset_size(),
219 self.length_size(),
220 )
221 .unwrap_or_else(|| attr.raw_data.clone())
222 }
223 _ => attr.raw_data.clone(),
224 };
225 Attribute {
226 name: attr.name,
227 datatype: attr.datatype,
228 shape: match attr.dataspace.dataspace_type {
229 crate::messages::dataspace::DataspaceType::Scalar => vec![],
230 crate::messages::dataspace::DataspaceType::Null => vec![0],
231 crate::messages::dataspace::DataspaceType::Simple => attr.dataspace.dims,
232 },
233 raw_data,
234 }
235 })
236 .collect())
237 }
238
239 pub fn attribute(&self, name: &str) -> Result<Attribute> {
241 let attrs = self.attributes()?;
242 attrs
243 .into_iter()
244 .find(|a| a.name == name)
245 .ok_or_else(|| Error::AttributeNotFound(name.to_string()))
246 }
247
248 fn resolve_children(&self) -> Result<Vec<ChildEntry>> {
251 self.resolve_children_with_link_depth(0)
252 }
253
254 fn resolve_children_with_link_depth(&self, link_depth: u32) -> Result<Vec<ChildEntry>> {
256 let header = self.cached_header(self.address)?;
257
258 let mut children = Vec::new();
259
260 let mut found_symbol_table = false;
262 let mut link_info: Option<LinkInfoMessage> = None;
264 let mut links: Vec<LinkMessage> = Vec::new();
265
266 for msg in &header.messages {
267 match msg {
268 HdfMessage::SymbolTable(st) => {
269 found_symbol_table = true;
270 children = self.resolve_old_style_group_storage(st)?;
271 }
272 HdfMessage::Link(link) => {
273 links.push(link.clone());
274 }
275 HdfMessage::LinkInfo(li) => {
276 link_info = Some(li.clone());
277 }
278 _ => {}
279 }
280 }
281
282 if !found_symbol_table {
283 self.resolve_link_targets(&links, link_depth, &mut children)?;
285
286 if let Some(ref li) = link_info {
288 if !Cursor::is_undefined_offset(li.fractal_heap_address, self.offset_size()) {
289 for child in self.resolve_dense_links_storage(li, link_depth)? {
290 let is_duplicate = children.iter().any(|existing| {
291 existing.name == child.name
292 && existing.location.address == child.location.address
293 && Arc::ptr_eq(&existing.location.context, &child.location.context)
294 });
295 if !is_duplicate {
296 children.push(child);
297 }
298 }
299 }
300 }
301 }
302
303 Ok(children)
304 }
305
306 fn resolve_link_targets(
308 &self,
309 links: &[LinkMessage],
310 link_depth: u32,
311 children: &mut Vec<ChildEntry>,
312 ) -> Result<()> {
313 for link in links {
314 match &link.target {
315 LinkTarget::Hard { address } => {
316 children.push(ChildEntry {
317 name: link.name.clone(),
318 location: self.local_location(*address),
319 });
320 }
321 LinkTarget::Soft { path } => {
322 if let Ok(location) = self.resolve_soft_link_depth(path, link_depth) {
323 children.push(ChildEntry {
324 name: link.name.clone(),
325 location,
326 });
327 }
328 }
329 LinkTarget::External { filename, path } => {
330 if let Some(location) =
331 self.resolve_external_link_depth(filename, path, link_depth)?
332 {
333 children.push(ChildEntry {
334 name: link.name.clone(),
335 location,
336 });
337 }
338 }
339 }
340 }
341 Ok(())
342 }
343
344 #[allow(dead_code)]
346 fn resolve_old_style_group(
347 &self,
348 st: &SymbolTableMessage,
349 file_data: &[u8],
350 ) -> Result<Vec<ChildEntry>> {
351 let mut heap_cursor = Cursor::new(file_data);
352 heap_cursor.set_position(st.heap_address);
353 let heap = LocalHeap::parse(&mut heap_cursor, self.offset_size(), self.length_size())?;
354
355 let leaves = btree_v1::collect_btree_v1_leaves(
356 file_data,
357 st.btree_address,
358 self.offset_size(),
359 self.length_size(),
360 None,
361 &[],
362 None,
363 )?;
364
365 let mut children = Vec::new();
366 for (_key, snod_address) in &leaves {
367 let mut cursor = Cursor::new(file_data);
368 cursor.set_position(*snod_address);
369 let snod = crate::symbol_table::SymbolTableNode::parse(
370 &mut cursor,
371 self.offset_size(),
372 self.length_size(),
373 )?;
374
375 for entry in &snod.entries {
376 let name = heap.get_string(entry.link_name_offset, file_data)?;
377 children.push(ChildEntry {
378 name,
379 location: self.local_location(entry.object_header_address),
380 });
381 }
382 }
383
384 Ok(children)
385 }
386
387 fn resolve_old_style_group_storage(&self, st: &SymbolTableMessage) -> Result<Vec<ChildEntry>> {
388 let heap = LocalHeap::parse_at_storage(
389 self.context.storage.as_ref(),
390 st.heap_address,
391 self.offset_size(),
392 self.length_size(),
393 )?;
394
395 let leaves = btree_v1::collect_btree_v1_leaves_storage(
396 self.context.storage.as_ref(),
397 st.btree_address,
398 self.offset_size(),
399 self.length_size(),
400 None,
401 &[],
402 None,
403 )?;
404
405 let mut children = Vec::new();
406 for (_key, snod_address) in &leaves {
407 let header_len = 8 + 2 * usize::from(self.offset_size());
408 let prefix = self.context.read_range(*snod_address, header_len)?;
409 let mut prefix_cursor = Cursor::new(prefix.as_ref());
410 let sig = prefix_cursor.read_bytes(4)?;
411 if sig != *b"SNOD" {
412 return Err(Error::InvalidData(format!(
413 "expected SNOD signature at offset {:#x}",
414 snod_address
415 )));
416 }
417 let version = prefix_cursor.read_u8()?;
418 if version != 1 {
419 return Err(Error::InvalidData(format!(
420 "unsupported symbol table node version {}",
421 version
422 )));
423 }
424 prefix_cursor.skip(1)?;
425 let num_symbols = prefix_cursor.read_u16_le()?;
426 let node_len =
427 8 + usize::from(num_symbols) * (2 * usize::from(self.offset_size()) + 4 + 4 + 16);
428 let bytes = self.context.read_range(*snod_address, node_len)?;
429 let mut cursor = Cursor::new(bytes.as_ref());
430 let snod = crate::symbol_table::SymbolTableNode::parse(
431 &mut cursor,
432 self.offset_size(),
433 self.length_size(),
434 )?;
435
436 for entry in &snod.entries {
437 let name =
438 heap.get_string_storage(entry.link_name_offset, self.context.storage.as_ref())?;
439 children.push(ChildEntry {
440 name,
441 location: self.local_location(entry.object_header_address),
442 });
443 }
444 }
445
446 Ok(children)
447 }
448
449 #[allow(dead_code)]
451 fn resolve_dense_links(
452 &self,
453 link_info: &LinkInfoMessage,
454 link_depth: u32,
455 file_data: &[u8],
456 ) -> Result<Vec<ChildEntry>> {
457 let mut heap_cursor = Cursor::new(file_data);
458 heap_cursor.set_position(link_info.fractal_heap_address);
459 let heap = FractalHeap::parse(&mut heap_cursor, self.offset_size(), self.length_size())?;
460
461 let mut btree_cursor = Cursor::new(file_data);
462 btree_cursor.set_position(link_info.btree_name_index_address);
463 let btree_header = btree_v2::BTreeV2Header::parse(
464 &mut btree_cursor,
465 self.offset_size(),
466 self.length_size(),
467 )?;
468
469 let records = btree_v2::collect_btree_v2_records(
470 file_data,
471 &btree_header,
472 self.offset_size(),
473 self.length_size(),
474 None,
475 &[],
476 None,
477 )?;
478
479 let mut children = Vec::new();
480 for record in &records {
481 let heap_id = match record {
482 btree_v2::BTreeV2Record::LinkNameHash { heap_id, .. } => heap_id,
483 btree_v2::BTreeV2Record::CreationOrder { heap_id, .. } => heap_id,
484 _ => continue,
485 };
486
487 let managed_bytes =
488 heap.get_object(heap_id, file_data, self.offset_size(), self.length_size())?;
489
490 let mut link_cursor = Cursor::new(&managed_bytes);
491 let link_msg = link::parse(
492 &mut link_cursor,
493 self.offset_size(),
494 self.length_size(),
495 managed_bytes.len(),
496 )?;
497
498 match &link_msg.target {
499 LinkTarget::Hard { address } => {
500 children.push(ChildEntry {
501 name: link_msg.name.clone(),
502 location: self.local_location(*address),
503 });
504 }
505 LinkTarget::Soft { path } => {
506 if let Ok(location) = self.resolve_soft_link_depth(path, link_depth) {
507 children.push(ChildEntry {
508 name: link_msg.name.clone(),
509 location,
510 });
511 }
512 }
513 LinkTarget::External { filename, path } => {
514 if let Some(location) =
515 self.resolve_external_link_depth(filename, path, link_depth)?
516 {
517 children.push(ChildEntry {
518 name: link_msg.name.clone(),
519 location,
520 });
521 }
522 }
523 }
524 }
525
526 Ok(children)
527 }
528
529 fn resolve_dense_links_storage(
530 &self,
531 link_info: &LinkInfoMessage,
532 link_depth: u32,
533 ) -> Result<Vec<ChildEntry>> {
534 let heap = FractalHeap::parse_at_storage(
535 self.context.storage.as_ref(),
536 link_info.fractal_heap_address,
537 self.offset_size(),
538 self.length_size(),
539 )?;
540
541 let btree_header = btree_v2::BTreeV2Header::parse_at_storage(
542 self.context.storage.as_ref(),
543 link_info.btree_name_index_address,
544 self.offset_size(),
545 self.length_size(),
546 )?;
547
548 let records = btree_v2::collect_btree_v2_records_storage(
549 self.context.storage.as_ref(),
550 &btree_header,
551 self.offset_size(),
552 self.length_size(),
553 None,
554 &[],
555 None,
556 )?;
557
558 let mut children = Vec::new();
559 for record in &records {
560 let heap_id = match record {
561 btree_v2::BTreeV2Record::LinkNameHash { heap_id, .. }
562 | btree_v2::BTreeV2Record::CreationOrder { heap_id, .. } => heap_id,
563 _ => continue,
564 };
565
566 let managed_bytes = heap.get_object_storage(
567 heap_id,
568 self.context.storage.as_ref(),
569 self.offset_size(),
570 self.length_size(),
571 )?;
572
573 let mut link_cursor = Cursor::new(&managed_bytes);
574 let link_msg = link::parse(
575 &mut link_cursor,
576 self.offset_size(),
577 self.length_size(),
578 managed_bytes.len(),
579 )?;
580
581 match &link_msg.target {
582 LinkTarget::Hard { address } => {
583 children.push(ChildEntry {
584 name: link_msg.name.clone(),
585 location: self.local_location(*address),
586 });
587 }
588 LinkTarget::Soft { path } => {
589 if let Ok(location) = self.resolve_soft_link_depth(path, link_depth) {
590 children.push(ChildEntry {
591 name: link_msg.name.clone(),
592 location,
593 });
594 }
595 }
596 LinkTarget::External { filename, path } => {
597 if let Some(location) =
598 self.resolve_external_link_depth(filename, path, link_depth)?
599 {
600 children.push(ChildEntry {
601 name: link_msg.name.clone(),
602 location,
603 });
604 }
605 }
606 }
607 }
608
609 Ok(children)
610 }
611
612 pub fn child_name_by_address(&self, address: u64) -> Result<Option<String>> {
613 Ok(self
614 .resolve_children()?
615 .into_iter()
616 .find(|child| child.location.address == address)
617 .map(|child| child.name))
618 }
619
620 fn child_context(&self, child: &ChildEntry) -> String {
621 format!("child '{}' at {:#x}", child.name, child.location.address)
622 }
623
624 fn child_object_kind(&self, child: &ChildEntry) -> Result<ChildObjectKind> {
625 let header = self
626 .cached_child_header(child)
627 .map_err(|err| err.with_context(self.child_context(child)))?;
628
629 Ok(classify_child_header(header.as_ref()))
630 }
631
632 fn try_open_child_dataset(&self, child: &ChildEntry) -> Result<Option<Dataset>> {
633 let header = self
634 .cached_child_header(child)
635 .map_err(|err| err.with_context(self.child_context(child)))?;
636
637 if classify_child_header(header.as_ref()) != ChildObjectKind::Dataset {
638 return Ok(None);
639 }
640
641 Dataset::from_parsed_header(
642 crate::dataset::DatasetParseContext {
643 context: child.location.context.clone(),
644 },
645 child.location.address,
646 child.name.clone(),
647 header.as_ref(),
648 )
649 .map(Some)
650 .map_err(|err| err.with_context(self.child_context(child)))
651 }
652
653 fn cached_child_header(
654 &self,
655 child: &ChildEntry,
656 ) -> Result<Arc<crate::object_header::ObjectHeader>> {
657 child
658 .location
659 .context
660 .get_or_parse_header(child.location.address)
661 }
662
663 const MAX_SOFT_LINK_DEPTH: u32 = 16;
665
666 fn resolve_soft_link_depth(&self, path: &str, depth: u32) -> Result<ObjectLocation> {
667 self.resolve_path_location(path, depth, "soft link")
668 }
669
670 fn resolve_external_link_depth(
671 &self,
672 filename: &str,
673 path: &str,
674 depth: u32,
675 ) -> Result<Option<ObjectLocation>> {
676 if depth >= Self::MAX_SOFT_LINK_DEPTH {
677 return Err(Error::Other(format!(
678 "external link resolution exceeded maximum depth ({}) at '{}:{}'",
679 Self::MAX_SOFT_LINK_DEPTH,
680 filename,
681 path,
682 )));
683 }
684
685 let Some(resolver) = self.context.external_link_resolver.as_ref() else {
686 return Ok(None);
687 };
688 let Some(file) = resolver.resolve_external_link(filename)? else {
689 return Ok(None);
690 };
691 let root = file.root_group()?;
692 Ok(Some(root.resolve_path_location(
693 path,
694 depth + 1,
695 "external link",
696 )?))
697 }
698
699 fn resolve_path_location(
700 &self,
701 path: &str,
702 depth: u32,
703 link_kind: &str,
704 ) -> Result<ObjectLocation> {
705 if depth >= Self::MAX_SOFT_LINK_DEPTH {
706 return Err(Error::Other(format!(
707 "{} resolution exceeded maximum depth ({}) — possible cycle at '{}'",
708 link_kind,
709 Self::MAX_SOFT_LINK_DEPTH,
710 path,
711 )));
712 }
713
714 let parts: Vec<&str> = path
715 .trim_matches('/')
716 .split('/')
717 .filter(|s| !s.is_empty())
718 .collect();
719
720 if parts.is_empty() {
721 return Ok(self.local_location(self.root_address));
722 }
723
724 let start_addr = if path.starts_with('/') {
725 self.root_address
726 } else {
727 self.address
728 };
729
730 let mut current_group = Group::new(
731 self.context.clone(),
732 start_addr,
733 String::new(),
734 self.root_address,
735 );
736
737 for &part in &parts[..parts.len() - 1] {
738 current_group = current_group.group(part)?;
739 }
740
741 let target_name = parts[parts.len() - 1];
742 let children = current_group.resolve_children_with_link_depth(depth + 1)?;
743 for child in &children {
744 if child.name == target_name {
745 return Ok(child.location.clone());
746 }
747 }
748
749 Err(Error::Other(format!(
750 "{} target '{}' not found",
751 link_kind, path
752 )))
753 }
754}
755
756fn classify_child_header(header: &crate::object_header::ObjectHeader) -> ChildObjectKind {
757 let mut has_dataset_message = false;
758
759 for msg in &header.messages {
760 match msg {
761 HdfMessage::SymbolTable(_)
762 | HdfMessage::Link(_)
763 | HdfMessage::LinkInfo(_)
764 | HdfMessage::GroupInfo(_) => return ChildObjectKind::Group,
765 HdfMessage::Dataspace(_)
766 | HdfMessage::DataLayout(_)
767 | HdfMessage::FillValue(_)
768 | HdfMessage::FilterPipeline(_) => has_dataset_message = true,
769 _ => {}
770 }
771 }
772
773 if has_dataset_message {
774 ChildObjectKind::Dataset
775 } else {
776 ChildObjectKind::Other
777 }
778}