1use std::sync::Arc;
2
3use crate::attribute_api::{
4 collect_attribute_messages_storage, resolve_vlen_bytes_storage, Attribute,
5};
6use crate::btree_v1;
7use crate::btree_v2;
8use crate::checksum::jenkins_lookup3;
9use crate::dataset::Dataset;
10use crate::error::{Error, Result};
11use crate::fractal_heap::{FractalHeap, FractalHeapDirectBlockCache};
12use crate::io::Cursor;
13use crate::local_heap::LocalHeap;
14use crate::messages::datatype::VarLenKind;
15use crate::messages::link::{self, LinkMessage, LinkTarget};
16use crate::messages::link_info::LinkInfoMessage;
17use crate::messages::symbol_table_msg::SymbolTableMessage;
18use crate::messages::HdfMessage;
19use crate::storage::Storage;
20use crate::FileContext;
21
22#[derive(Clone)]
24pub struct Group {
25 context: Arc<FileContext>,
26 pub(crate) name: String,
27 pub(crate) address: u64,
28 pub(crate) root_address: u64,
30}
31
32#[derive(Clone)]
33struct ChildEntry {
34 name: String,
35 location: ObjectLocation,
36}
37
38#[derive(Clone)]
39struct ObjectLocation {
40 context: Arc<FileContext>,
41 address: u64,
42 root_address: u64,
43}
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46enum ChildObjectKind {
47 Group,
48 Dataset,
49 Other,
50}
51
52impl Group {
53 pub(crate) fn new(
55 context: Arc<FileContext>,
56 address: u64,
57 name: String,
58 root_address: u64,
59 ) -> Self {
60 Group {
61 context,
62 name,
63 address,
64 root_address,
65 }
66 }
67
68 pub fn name(&self) -> &str {
70 &self.name
71 }
72
73 pub fn address(&self) -> u64 {
75 self.address
76 }
77
78 pub fn file_data(&self) -> Result<crate::storage::StorageBuffer> {
80 self.context.full_file_data()
81 }
82
83 pub fn storage(&self) -> &dyn Storage {
85 self.context.storage.as_ref()
86 }
87
88 pub fn offset_size(&self) -> u8 {
90 self.context.superblock.offset_size
91 }
92
93 pub fn length_size(&self) -> u8 {
95 self.context.superblock.length_size
96 }
97
98 fn cached_header(&self, addr: u64) -> Result<Arc<crate::object_header::ObjectHeader>> {
100 self.context.get_or_parse_header(addr)
101 }
102
103 fn local_location(&self, address: u64) -> ObjectLocation {
104 ObjectLocation {
105 context: self.context.clone(),
106 address,
107 root_address: self.root_address,
108 }
109 }
110
111 pub fn groups(&self) -> Result<Vec<Group>> {
113 let (groups, _) = self.resolve_member_objects()?;
114 Ok(groups)
115 }
116
117 pub fn members(&self) -> Result<(Vec<Group>, Vec<Dataset>)> {
119 self.resolve_member_objects()
120 }
121
122 fn resolve_member_objects(&self) -> Result<(Vec<Group>, Vec<Dataset>)> {
123 let children = self.resolve_children()?;
124 let mut groups = Vec::new();
125 let mut datasets = Vec::new();
126 for child in &children {
127 match self.child_object_kind(child)? {
128 ChildObjectKind::Group => {
129 groups.push(Group::new(
130 child.location.context.clone(),
131 child.location.address,
132 child.name.clone(),
133 child.location.root_address,
134 ));
135 }
136 ChildObjectKind::Dataset => {
137 if let Some(dataset) = self.try_open_child_dataset(child)? {
138 datasets.push(dataset);
139 }
140 }
141 ChildObjectKind::Other => {}
142 }
143 }
144 Ok((groups, datasets))
145 }
146
147 pub fn group(&self, name: &str) -> Result<Group> {
149 if let Some(child) = self.resolve_child(name)? {
150 return match self.child_object_kind(&child)? {
151 ChildObjectKind::Group => Ok(Group::new(
152 child.location.context.clone(),
153 child.location.address,
154 child.name.clone(),
155 child.location.root_address,
156 )),
157 ChildObjectKind::Dataset => Err(Error::GroupNotFound(format!(
158 "'{}' is a dataset, not a group",
159 name
160 ))),
161 ChildObjectKind::Other => {
162 Err(Error::GroupNotFound(format!("'{}' is not a group", name)))
163 }
164 };
165 }
166 Err(Error::GroupNotFound(name.to_string()))
167 }
168
169 pub fn datasets(&self) -> Result<Vec<Dataset>> {
171 let (_, datasets) = self.resolve_member_objects()?;
172 Ok(datasets)
173 }
174
175 pub fn dataset(&self, name: &str) -> Result<Dataset> {
177 if let Some(child) = self.resolve_child(name)? {
178 if let Some(dataset) = self.try_open_child_dataset(&child)? {
179 return Ok(dataset);
180 }
181 return Err(Error::DatasetNotFound(name.to_string()));
182 }
183 Err(Error::DatasetNotFound(name.to_string()))
184 }
185
186 pub fn attributes(&self) -> Result<Vec<Attribute>> {
188 let mut header = (*self.cached_header(self.address)?).clone();
189 header.resolve_shared_messages_storage(
190 self.context.storage.as_ref(),
191 self.offset_size(),
192 self.length_size(),
193 )?;
194 Ok(collect_attribute_messages_storage(
195 &header,
196 self.context.storage.as_ref(),
197 self.offset_size(),
198 self.length_size(),
199 Some(self.context.filter_registry.as_ref()),
200 )?
201 .into_iter()
202 .map(|attr| {
203 let raw_data = match &attr.datatype {
204 crate::messages::datatype::Datatype::VarLen {
205 base,
206 kind: VarLenKind::String,
207 ..
208 } if matches!(
209 base.as_ref(),
210 crate::messages::datatype::Datatype::FixedPoint { size: 1, .. }
211 ) && matches!(attr.dataspace.num_elements(), Ok(1)) =>
212 {
213 resolve_vlen_bytes_storage(
214 &attr.raw_data,
215 self.context.storage.as_ref(),
216 self.offset_size(),
217 self.length_size(),
218 )
219 .unwrap_or_else(|| attr.raw_data.clone())
220 }
221 _ => attr.raw_data.clone(),
222 };
223 Attribute {
224 name: attr.name,
225 datatype: attr.datatype,
226 shape: match attr.dataspace.dataspace_type {
227 crate::messages::dataspace::DataspaceType::Scalar => vec![],
228 crate::messages::dataspace::DataspaceType::Null => vec![0],
229 crate::messages::dataspace::DataspaceType::Simple => attr.dataspace.dims,
230 },
231 raw_data,
232 }
233 })
234 .collect())
235 }
236
237 pub fn attribute(&self, name: &str) -> Result<Attribute> {
239 let attrs = self.attributes()?;
240 attrs
241 .into_iter()
242 .find(|a| a.name == name)
243 .ok_or_else(|| Error::AttributeNotFound(name.to_string()))
244 }
245
246 fn resolve_children(&self) -> Result<Vec<ChildEntry>> {
249 self.resolve_children_with_link_depth(0)
250 }
251
252 fn resolve_child(&self, name: &str) -> Result<Option<ChildEntry>> {
253 self.resolve_child_with_link_depth(name, 0)
254 }
255
256 fn resolve_child_with_link_depth(
257 &self,
258 name: &str,
259 link_depth: u32,
260 ) -> Result<Option<ChildEntry>> {
261 let header = self.cached_header(self.address)?;
262
263 let mut link_info: Option<LinkInfoMessage> = None;
264 let mut matching_compact_link: Option<LinkMessage> = None;
265
266 for msg in &header.messages {
267 match msg {
268 HdfMessage::SymbolTable(st) => {
269 return Ok(self
270 .resolve_old_style_group_storage(st)?
271 .into_iter()
272 .find(|child| child.name == name));
273 }
274 HdfMessage::Link(link) if link.name == name => {
275 matching_compact_link = Some(link.clone());
276 }
277 HdfMessage::LinkInfo(li) => {
278 link_info = Some(li.clone());
279 }
280 _ => {}
281 }
282 }
283
284 if let Some(link) = matching_compact_link {
285 if let Some(child) = self.resolve_link_message_target(&link, link_depth)? {
286 return Ok(Some(child));
287 }
288 }
289
290 if let Some(ref li) = link_info {
291 if !Cursor::is_undefined_offset(li.fractal_heap_address, self.offset_size()) {
292 return self.resolve_dense_link_storage(li, name, link_depth);
293 }
294 }
295
296 Ok(None)
297 }
298
299 fn resolve_children_with_link_depth(&self, link_depth: u32) -> Result<Vec<ChildEntry>> {
301 let header = self.cached_header(self.address)?;
302
303 let mut children = Vec::new();
304
305 let mut found_symbol_table = false;
307 let mut link_info: Option<LinkInfoMessage> = None;
309 let mut links: Vec<LinkMessage> = Vec::new();
310
311 for msg in &header.messages {
312 match msg {
313 HdfMessage::SymbolTable(st) => {
314 found_symbol_table = true;
315 children = self.resolve_old_style_group_storage(st)?;
316 }
317 HdfMessage::Link(link) => {
318 links.push(link.clone());
319 }
320 HdfMessage::LinkInfo(li) => {
321 link_info = Some(li.clone());
322 }
323 _ => {}
324 }
325 }
326
327 if !found_symbol_table {
328 self.resolve_link_targets(&links, link_depth, &mut children)?;
330
331 if let Some(ref li) = link_info {
333 if !Cursor::is_undefined_offset(li.fractal_heap_address, self.offset_size()) {
334 for child in self.resolve_dense_links_storage(li, link_depth)? {
335 let is_duplicate = children.iter().any(|existing| {
336 existing.name == child.name
337 && existing.location.address == child.location.address
338 && Arc::ptr_eq(&existing.location.context, &child.location.context)
339 });
340 if !is_duplicate {
341 children.push(child);
342 }
343 }
344 }
345 }
346 }
347
348 Ok(children)
349 }
350
351 fn resolve_link_targets(
353 &self,
354 links: &[LinkMessage],
355 link_depth: u32,
356 children: &mut Vec<ChildEntry>,
357 ) -> Result<()> {
358 for link in links {
359 if let Some(child) = self.resolve_link_message_target(link, link_depth)? {
360 children.push(child);
361 }
362 }
363 Ok(())
364 }
365
366 fn resolve_link_message_target(
367 &self,
368 link: &LinkMessage,
369 link_depth: u32,
370 ) -> Result<Option<ChildEntry>> {
371 match &link.target {
372 LinkTarget::Hard { address } => Ok(Some(ChildEntry {
373 name: link.name.clone(),
374 location: self.local_location(*address),
375 })),
376 LinkTarget::Soft { path } => Ok(self
377 .resolve_soft_link_depth(path, link_depth)
378 .ok()
379 .map(|location| ChildEntry {
380 name: link.name.clone(),
381 location,
382 })),
383 LinkTarget::External { filename, path } => Ok(self
384 .resolve_external_link_depth(filename, path, link_depth)?
385 .map(|location| ChildEntry {
386 name: link.name.clone(),
387 location,
388 })),
389 }
390 }
391
392 fn resolve_old_style_group_storage(&self, st: &SymbolTableMessage) -> Result<Vec<ChildEntry>> {
393 let heap = LocalHeap::parse_at_storage(
394 self.context.storage.as_ref(),
395 st.heap_address,
396 self.offset_size(),
397 self.length_size(),
398 )?;
399
400 let leaves = btree_v1::collect_btree_v1_leaves_storage(
401 self.context.storage.as_ref(),
402 st.btree_address,
403 self.offset_size(),
404 self.length_size(),
405 None,
406 &[],
407 None,
408 )?;
409
410 let mut children = Vec::new();
411 for (_key, snod_address) in &leaves {
412 let header_len = 8 + 2 * usize::from(self.offset_size());
413 let prefix = self.context.read_range(*snod_address, header_len)?;
414 let mut prefix_cursor = Cursor::new(prefix.as_ref());
415 let sig = prefix_cursor.read_bytes(4)?;
416 if sig != *b"SNOD" {
417 return Err(Error::InvalidData(format!(
418 "expected SNOD signature at offset {:#x}",
419 snod_address
420 )));
421 }
422 let version = prefix_cursor.read_u8()?;
423 if version != 1 {
424 return Err(Error::InvalidData(format!(
425 "unsupported symbol table node version {}",
426 version
427 )));
428 }
429 prefix_cursor.skip(1)?;
430 let num_symbols = prefix_cursor.read_u16_le()?;
431 let node_len =
432 8 + usize::from(num_symbols) * (2 * usize::from(self.offset_size()) + 4 + 4 + 16);
433 let bytes = self.context.read_range(*snod_address, node_len)?;
434 let mut cursor = Cursor::new(bytes.as_ref());
435 let snod = crate::symbol_table::SymbolTableNode::parse(
436 &mut cursor,
437 self.offset_size(),
438 self.length_size(),
439 )?;
440
441 for entry in &snod.entries {
442 let name =
443 heap.get_string_storage(entry.link_name_offset, self.context.storage.as_ref())?;
444 children.push(ChildEntry {
445 name,
446 location: self.local_location(entry.object_header_address),
447 });
448 }
449 }
450
451 Ok(children)
452 }
453
454 fn resolve_dense_links_storage(
455 &self,
456 link_info: &LinkInfoMessage,
457 link_depth: u32,
458 ) -> Result<Vec<ChildEntry>> {
459 let heap = FractalHeap::parse_at_storage(
460 self.context.storage.as_ref(),
461 link_info.fractal_heap_address,
462 self.offset_size(),
463 self.length_size(),
464 )?;
465
466 let btree_header = btree_v2::BTreeV2Header::parse_at_storage(
467 self.context.storage.as_ref(),
468 link_info.btree_name_index_address,
469 self.offset_size(),
470 self.length_size(),
471 )?;
472
473 let records = btree_v2::collect_btree_v2_records_storage(
474 self.context.storage.as_ref(),
475 &btree_header,
476 self.offset_size(),
477 self.length_size(),
478 None,
479 &[],
480 None,
481 )?;
482
483 let mut children = Vec::new();
484 let mut direct_block_cache = FractalHeapDirectBlockCache::default();
485 for record in &records {
486 let heap_id = match record {
487 btree_v2::BTreeV2Record::LinkNameHash { heap_id, .. }
488 | btree_v2::BTreeV2Record::CreationOrder { heap_id, .. } => heap_id,
489 _ => continue,
490 };
491
492 let managed_bytes = heap.get_object_storage_cached_with_registry(
493 heap_id,
494 self.context.storage.as_ref(),
495 self.offset_size(),
496 self.length_size(),
497 &mut direct_block_cache,
498 Some(self.context.filter_registry.as_ref()),
499 )?;
500
501 let mut link_cursor = Cursor::new(&managed_bytes);
502 let link_msg = link::parse(
503 &mut link_cursor,
504 self.offset_size(),
505 self.length_size(),
506 managed_bytes.len(),
507 )?;
508
509 match &link_msg.target {
510 LinkTarget::Hard { address } => {
511 children.push(ChildEntry {
512 name: link_msg.name.clone(),
513 location: self.local_location(*address),
514 });
515 }
516 LinkTarget::Soft { path } => {
517 if let Ok(location) = self.resolve_soft_link_depth(path, link_depth) {
518 children.push(ChildEntry {
519 name: link_msg.name.clone(),
520 location,
521 });
522 }
523 }
524 LinkTarget::External { filename, path } => {
525 if let Some(location) =
526 self.resolve_external_link_depth(filename, path, link_depth)?
527 {
528 children.push(ChildEntry {
529 name: link_msg.name.clone(),
530 location,
531 });
532 }
533 }
534 }
535 }
536
537 Ok(children)
538 }
539
540 fn resolve_dense_link_storage(
541 &self,
542 link_info: &LinkInfoMessage,
543 name: &str,
544 link_depth: u32,
545 ) -> Result<Option<ChildEntry>> {
546 let heap = FractalHeap::parse_at_storage(
547 self.context.storage.as_ref(),
548 link_info.fractal_heap_address,
549 self.offset_size(),
550 self.length_size(),
551 )?;
552
553 let btree_header = btree_v2::BTreeV2Header::parse_at_storage(
554 self.context.storage.as_ref(),
555 link_info.btree_name_index_address,
556 self.offset_size(),
557 self.length_size(),
558 )?;
559
560 let records = btree_v2::collect_btree_v2_link_name_hash_records_storage(
561 self.context.storage.as_ref(),
562 &btree_header,
563 self.offset_size(),
564 self.length_size(),
565 jenkins_lookup3(name.as_bytes()),
566 )?;
567
568 let mut direct_block_cache = FractalHeapDirectBlockCache::default();
569 for record in &records {
570 let btree_v2::BTreeV2Record::LinkNameHash { heap_id, .. } = record else {
571 continue;
572 };
573
574 let managed_bytes = heap.get_object_storage_cached_with_registry(
575 heap_id,
576 self.context.storage.as_ref(),
577 self.offset_size(),
578 self.length_size(),
579 &mut direct_block_cache,
580 Some(self.context.filter_registry.as_ref()),
581 )?;
582
583 let mut link_cursor = Cursor::new(&managed_bytes);
584 let link_msg = link::parse(
585 &mut link_cursor,
586 self.offset_size(),
587 self.length_size(),
588 managed_bytes.len(),
589 )?;
590 if link_msg.name == name {
591 return self.resolve_link_message_target(&link_msg, link_depth);
592 }
593 }
594
595 Ok(None)
596 }
597
598 pub fn child_name_by_address(&self, address: u64) -> Result<Option<String>> {
599 Ok(self
600 .resolve_children()?
601 .into_iter()
602 .find(|child| child.location.address == address)
603 .map(|child| child.name))
604 }
605
606 fn child_context(&self, child: &ChildEntry) -> String {
607 format!("child '{}' at {:#x}", child.name, child.location.address)
608 }
609
610 fn child_object_kind(&self, child: &ChildEntry) -> Result<ChildObjectKind> {
611 let header = self
612 .cached_child_header(child)
613 .map_err(|err| err.with_context(self.child_context(child)))?;
614
615 Ok(classify_child_header(header.as_ref()))
616 }
617
618 fn try_open_child_dataset(&self, child: &ChildEntry) -> Result<Option<Dataset>> {
619 let header = self
620 .cached_child_header(child)
621 .map_err(|err| err.with_context(self.child_context(child)))?;
622
623 if classify_child_header(header.as_ref()) != ChildObjectKind::Dataset {
624 return Ok(None);
625 }
626
627 Dataset::from_parsed_header(
628 crate::dataset::DatasetParseContext {
629 context: child.location.context.clone(),
630 },
631 child.location.address,
632 child.name.clone(),
633 header.as_ref(),
634 )
635 .map(Some)
636 .map_err(|err| err.with_context(self.child_context(child)))
637 }
638
639 fn cached_child_header(
640 &self,
641 child: &ChildEntry,
642 ) -> Result<Arc<crate::object_header::ObjectHeader>> {
643 child
644 .location
645 .context
646 .get_or_parse_header(child.location.address)
647 }
648
649 const MAX_SOFT_LINK_DEPTH: u32 = 16;
651
652 fn resolve_soft_link_depth(&self, path: &str, depth: u32) -> Result<ObjectLocation> {
653 self.resolve_path_location(path, depth, "soft link")
654 }
655
656 fn resolve_external_link_depth(
657 &self,
658 filename: &str,
659 path: &str,
660 depth: u32,
661 ) -> Result<Option<ObjectLocation>> {
662 if depth >= Self::MAX_SOFT_LINK_DEPTH {
663 return Err(Error::Other(format!(
664 "external link resolution exceeded maximum depth ({}) at '{}:{}'",
665 Self::MAX_SOFT_LINK_DEPTH,
666 filename,
667 path,
668 )));
669 }
670
671 let Some(resolver) = self.context.external_link_resolver.as_ref() else {
672 return Ok(None);
673 };
674 let Some(file) = resolver.resolve_external_link(filename)? else {
675 return Ok(None);
676 };
677 let root = file.root_group()?;
678 Ok(Some(root.resolve_path_location(
679 path,
680 depth + 1,
681 "external link",
682 )?))
683 }
684
685 fn resolve_path_location(
686 &self,
687 path: &str,
688 depth: u32,
689 link_kind: &str,
690 ) -> Result<ObjectLocation> {
691 if depth >= Self::MAX_SOFT_LINK_DEPTH {
692 return Err(Error::Other(format!(
693 "{} resolution exceeded maximum depth ({}) — possible cycle at '{}'",
694 link_kind,
695 Self::MAX_SOFT_LINK_DEPTH,
696 path,
697 )));
698 }
699
700 let parts: Vec<&str> = path
701 .trim_matches('/')
702 .split('/')
703 .filter(|s| !s.is_empty())
704 .collect();
705
706 if parts.is_empty() {
707 return Ok(self.local_location(self.root_address));
708 }
709
710 let start_addr = if path.starts_with('/') {
711 self.root_address
712 } else {
713 self.address
714 };
715
716 let mut current_group = Group::new(
717 self.context.clone(),
718 start_addr,
719 String::new(),
720 self.root_address,
721 );
722
723 for &part in &parts[..parts.len() - 1] {
724 current_group = current_group.group(part)?;
725 }
726
727 let target_name = parts[parts.len() - 1];
728 if let Some(child) = current_group.resolve_child_with_link_depth(target_name, depth + 1)? {
729 return Ok(child.location);
730 }
731
732 Err(Error::Other(format!(
733 "{} target '{}' not found",
734 link_kind, path
735 )))
736 }
737}
738
739fn classify_child_header(header: &crate::object_header::ObjectHeader) -> ChildObjectKind {
740 let mut has_dataset_message = false;
741
742 for msg in &header.messages {
743 match msg {
744 HdfMessage::SymbolTable(_)
745 | HdfMessage::Link(_)
746 | HdfMessage::LinkInfo(_)
747 | HdfMessage::GroupInfo(_) => return ChildObjectKind::Group,
748 HdfMessage::Dataspace(_)
749 | HdfMessage::DataLayout(_)
750 | HdfMessage::FillValue(_)
751 | HdfMessage::FilterPipeline(_) => has_dataset_message = true,
752 _ => {}
753 }
754 }
755
756 if has_dataset_message {
757 ChildObjectKind::Dataset
758 } else {
759 ChildObjectKind::Other
760 }
761}