1use std::collections::HashMap;
2use std::sync::Arc;
3
4use parking_lot::Mutex;
5
6use crate::attribute_api::{collect_attribute_messages, Attribute};
7use crate::btree_v1;
8use crate::btree_v2;
9use crate::cache::ChunkCache;
10use crate::dataset::Dataset;
11use crate::error::{Error, Result};
12use crate::filters::FilterRegistry;
13use crate::fractal_heap::FractalHeap;
14use crate::io::Cursor;
15use crate::local_heap::LocalHeap;
16use crate::messages::link::{self, LinkMessage, LinkTarget};
17use crate::messages::link_info::LinkInfoMessage;
18use crate::messages::symbol_table_msg::SymbolTableMessage;
19use crate::messages::HdfMessage;
20use crate::object_header::ObjectHeader;
21use crate::symbol_table::SymbolTableNode;
22
23pub struct Group<'f> {
25 file_data: &'f [u8],
26 offset_size: u8,
27 length_size: u8,
28 pub(crate) name: String,
29 pub(crate) address: u64,
30 pub(crate) root_address: u64,
32 pub(crate) chunk_cache: Arc<ChunkCache>,
33 pub(crate) header_cache: Arc<Mutex<HashMap<u64, Arc<ObjectHeader>>>>,
34 pub(crate) filter_registry: Arc<FilterRegistry>,
35}
36
37#[derive(Debug, Clone)]
38struct ChildEntry {
39 name: String,
40 address: u64,
41}
42
43impl<'f> Group<'f> {
44 #[allow(clippy::too_many_arguments)]
46 pub(crate) fn new(
47 file_data: &'f [u8],
48 address: u64,
49 name: String,
50 offset_size: u8,
51 length_size: u8,
52 root_address: u64,
53 chunk_cache: Arc<ChunkCache>,
54 header_cache: Arc<Mutex<HashMap<u64, Arc<ObjectHeader>>>>,
55 filter_registry: Arc<FilterRegistry>,
56 ) -> Self {
57 Group {
58 file_data,
59 offset_size,
60 length_size,
61 name,
62 address,
63 root_address,
64 chunk_cache,
65 header_cache,
66 filter_registry,
67 }
68 }
69
70 pub fn name(&self) -> &str {
72 &self.name
73 }
74
75 pub fn address(&self) -> u64 {
77 self.address
78 }
79
80 pub fn file_data(&self) -> &'f [u8] {
82 self.file_data
83 }
84
85 pub fn offset_size(&self) -> u8 {
87 self.offset_size
88 }
89
90 pub fn length_size(&self) -> u8 {
92 self.length_size
93 }
94
95 fn cached_header(&self, addr: u64) -> Result<Arc<ObjectHeader>> {
97 {
98 let cache = self.header_cache.lock();
99 if let Some(hdr) = cache.get(&addr) {
100 return Ok(Arc::clone(hdr));
101 }
102 }
103 let mut hdr =
104 ObjectHeader::parse_at(self.file_data, addr, self.offset_size, self.length_size)?;
105 hdr.resolve_shared_messages(self.file_data, self.offset_size, self.length_size)?;
106 let arc = Arc::new(hdr);
107 let mut cache = self.header_cache.lock();
108 cache.insert(addr, Arc::clone(&arc));
109 Ok(arc)
110 }
111
112 pub fn groups(&self) -> Result<Vec<Group<'f>>> {
114 let (groups, _) = self.resolve_member_objects()?;
115 Ok(groups)
116 }
117
118 pub fn members(&self) -> Result<(Vec<Group<'f>>, Vec<Dataset<'f>>)> {
120 self.resolve_member_objects()
121 }
122
123 fn resolve_member_objects(&self) -> Result<(Vec<Group<'f>>, Vec<Dataset<'f>>)> {
124 let children = self.resolve_children()?;
125 let mut groups = Vec::new();
126 let mut datasets = Vec::new();
127 for child in &children {
128 if self.child_is_group(child)? {
129 groups.push(Group::new(
130 self.file_data,
131 child.address,
132 child.name.clone(),
133 self.offset_size,
134 self.length_size,
135 self.root_address,
136 self.chunk_cache.clone(),
137 self.header_cache.clone(),
138 self.filter_registry.clone(),
139 ));
140 } else if let Some(dataset) = self.try_open_child_dataset(child) {
141 datasets.push(dataset);
142 }
143 }
144 Ok((groups, datasets))
145 }
146
147 pub fn group(&self, name: &str) -> Result<Group<'f>> {
149 let children = self.resolve_children()?;
150 for child in &children {
151 if child.name == name {
152 if self.is_group_at(child.address)? {
153 return Ok(Group::new(
154 self.file_data,
155 child.address,
156 child.name.clone(),
157 self.offset_size,
158 self.length_size,
159 self.root_address,
160 self.chunk_cache.clone(),
161 self.header_cache.clone(),
162 self.filter_registry.clone(),
163 ));
164 } else {
165 return Err(Error::GroupNotFound(format!(
166 "'{}' is a dataset, not a group",
167 name
168 )));
169 }
170 }
171 }
172 Err(Error::GroupNotFound(name.to_string()))
173 }
174
175 pub fn datasets(&self) -> Result<Vec<Dataset<'f>>> {
177 let (_, datasets) = self.resolve_member_objects()?;
178 Ok(datasets)
179 }
180
181 pub fn dataset(&self, name: &str) -> Result<Dataset<'f>> {
183 let children = self.resolve_children()?;
184 for child in &children {
185 if child.name == name {
186 if let Some(dataset) = self.try_open_child_dataset(child) {
187 return Ok(dataset);
188 }
189 return Err(Error::DatasetNotFound(name.to_string()));
190 }
191 }
192 Err(Error::DatasetNotFound(name.to_string()))
193 }
194
195 pub fn attributes(&self) -> Result<Vec<Attribute>> {
197 let mut header = (*self.cached_header(self.address)?).clone();
198 header.resolve_shared_messages(self.file_data, self.offset_size, self.length_size)?;
199 Ok(
200 collect_attribute_messages(
201 &header,
202 self.file_data,
203 self.offset_size,
204 self.length_size,
205 )?
206 .into_iter()
207 .map(|attr| {
208 Attribute::from_message_with_context(attr, Some(self.file_data), self.offset_size)
209 })
210 .collect(),
211 )
212 }
213
214 pub fn attribute(&self, name: &str) -> Result<Attribute> {
216 let attrs = self.attributes()?;
217 attrs
218 .into_iter()
219 .find(|a| a.name == name)
220 .ok_or_else(|| Error::AttributeNotFound(name.to_string()))
221 }
222
223 fn resolve_children(&self) -> Result<Vec<ChildEntry>> {
226 self.resolve_children_with_link_depth(0)
227 }
228
229 fn resolve_children_with_link_depth(&self, link_depth: u32) -> Result<Vec<ChildEntry>> {
231 let header = self.cached_header(self.address)?;
232
233 let mut children = Vec::new();
234
235 let mut found_symbol_table = false;
237 let mut link_info: Option<LinkInfoMessage> = None;
239 let mut links: Vec<LinkMessage> = Vec::new();
240
241 for msg in &header.messages {
242 match msg {
243 HdfMessage::SymbolTable(st) => {
244 found_symbol_table = true;
245 children = self.resolve_old_style_group(st)?;
246 }
247 HdfMessage::Link(link) => {
248 links.push(link.clone());
249 }
250 HdfMessage::LinkInfo(li) => {
251 link_info = Some(li.clone());
252 }
253 _ => {}
254 }
255 }
256
257 if !found_symbol_table {
258 Self::resolve_link_targets(self, &links, link_depth, &mut children);
260
261 if let Some(ref li) = link_info {
263 if !Cursor::is_undefined_offset(li.fractal_heap_address, self.offset_size) {
264 for child in self.resolve_dense_links(li, link_depth)? {
265 let is_duplicate = children.iter().any(|existing| {
266 existing.name == child.name && existing.address == child.address
267 });
268 if !is_duplicate {
269 children.push(child);
270 }
271 }
272 }
273 }
274 }
275
276 Ok(children)
277 }
278
279 fn resolve_link_targets(
281 &self,
282 links: &[LinkMessage],
283 link_depth: u32,
284 children: &mut Vec<ChildEntry>,
285 ) {
286 for link in links {
287 match &link.target {
288 LinkTarget::Hard { address } => {
289 children.push(ChildEntry {
290 name: link.name.clone(),
291 address: *address,
292 });
293 }
294 LinkTarget::Soft { path } => {
295 if let Ok(address) = self.resolve_soft_link_depth(path, link_depth) {
296 children.push(ChildEntry {
297 name: link.name.clone(),
298 address,
299 });
300 }
301 }
302 LinkTarget::External { .. } => {
303 }
305 }
306 }
307 }
308
309 fn resolve_old_style_group(&self, st: &SymbolTableMessage) -> Result<Vec<ChildEntry>> {
311 let mut heap_cursor = Cursor::new(self.file_data);
313 heap_cursor.set_position(st.heap_address);
314 let heap = LocalHeap::parse(&mut heap_cursor, self.offset_size, self.length_size)?;
315
316 let leaves = btree_v1::collect_btree_v1_leaves(
318 self.file_data,
319 st.btree_address,
320 self.offset_size,
321 self.length_size,
322 None, &[],
324 None,
325 )?;
326
327 let mut children = Vec::new();
328
329 for (_key, snod_address) in &leaves {
330 let mut cursor = Cursor::new(self.file_data);
331 cursor.set_position(*snod_address);
332 let snod = SymbolTableNode::parse(&mut cursor, self.offset_size, self.length_size)?;
333
334 for entry in &snod.entries {
335 let name = heap.get_string(entry.link_name_offset, self.file_data)?;
336 children.push(ChildEntry {
337 name,
338 address: entry.object_header_address,
339 });
340 }
341 }
342
343 Ok(children)
344 }
345
346 fn resolve_dense_links(
348 &self,
349 link_info: &LinkInfoMessage,
350 link_depth: u32,
351 ) -> Result<Vec<ChildEntry>> {
352 let mut heap_cursor = Cursor::new(self.file_data);
354 heap_cursor.set_position(link_info.fractal_heap_address);
355 let heap = FractalHeap::parse(&mut heap_cursor, self.offset_size, self.length_size)?;
356
357 let mut btree_cursor = Cursor::new(self.file_data);
359 btree_cursor.set_position(link_info.btree_name_index_address);
360 let btree_header =
361 btree_v2::BTreeV2Header::parse(&mut btree_cursor, self.offset_size, self.length_size)?;
362
363 let records = btree_v2::collect_btree_v2_records(
365 self.file_data,
366 &btree_header,
367 self.offset_size,
368 self.length_size,
369 None,
370 &[],
371 None,
372 )?;
373
374 let mut children = Vec::new();
375 for record in &records {
376 let heap_id = match record {
377 btree_v2::BTreeV2Record::LinkNameHash { heap_id, .. } => heap_id,
378 btree_v2::BTreeV2Record::CreationOrder { heap_id, .. } => heap_id,
379 _ => continue,
380 };
381
382 let managed_bytes = heap.get_managed_object(
384 heap_id,
385 self.file_data,
386 self.offset_size,
387 self.length_size,
388 )?;
389
390 let mut link_cursor = Cursor::new(&managed_bytes);
392 let link_msg = link::parse(
393 &mut link_cursor,
394 self.offset_size,
395 self.length_size,
396 managed_bytes.len(),
397 )?;
398
399 match &link_msg.target {
400 LinkTarget::Hard { address } => {
401 children.push(ChildEntry {
402 name: link_msg.name.clone(),
403 address: *address,
404 });
405 }
406 LinkTarget::Soft { path } => {
407 if let Ok(address) = self.resolve_soft_link_depth(path, link_depth) {
408 children.push(ChildEntry {
409 name: link_msg.name.clone(),
410 address,
411 });
412 }
413 }
414 LinkTarget::External { .. } => {
415 }
417 }
418 }
419
420 Ok(children)
421 }
422
423 fn is_group_at(&self, address: u64) -> Result<bool> {
427 let mut header = (*self.cached_header(address)?).clone();
428 header.resolve_shared_messages(self.file_data, self.offset_size, self.length_size)?;
429 for msg in &header.messages {
430 match msg {
431 HdfMessage::SymbolTable(_)
433 | HdfMessage::Link(_)
434 | HdfMessage::LinkInfo(_)
435 | HdfMessage::GroupInfo(_) => return Ok(true),
436 HdfMessage::DataLayout(_) => return Ok(false),
438 _ => {}
439 }
440 }
441 Ok(true)
443 }
444
445 fn try_open_child_dataset(&self, child: &ChildEntry) -> Option<Dataset<'f>> {
446 let header = self.cached_header(child.address).ok()?;
447 Dataset::from_parsed_header(
448 crate::dataset::DatasetParseContext {
449 file_data: self.file_data,
450 offset_size: self.offset_size,
451 length_size: self.length_size,
452 chunk_cache: self.chunk_cache.clone(),
453 filter_registry: self.filter_registry.clone(),
454 },
455 child.address,
456 child.name.clone(),
457 header.as_ref(),
458 )
459 .ok()
460 }
461
462 fn child_is_group(&self, child: &ChildEntry) -> Result<bool> {
463 match self.is_group_at(child.address) {
464 Ok(is_group) => Ok(is_group),
465 Err(_) => Ok(self.try_open_child_dataset(child).is_none()),
466 }
467 }
468
469 const MAX_SOFT_LINK_DEPTH: u32 = 16;
472
473 fn resolve_soft_link_depth(&self, path: &str, depth: u32) -> Result<u64> {
474 if depth >= Self::MAX_SOFT_LINK_DEPTH {
475 return Err(Error::Other(format!(
476 "soft link resolution exceeded maximum depth ({}) — possible cycle at '{}'",
477 Self::MAX_SOFT_LINK_DEPTH,
478 path,
479 )));
480 }
481
482 let parts: Vec<&str> = path
483 .trim_matches('/')
484 .split('/')
485 .filter(|s| !s.is_empty())
486 .collect();
487
488 if parts.is_empty() {
489 return Ok(self.root_address);
490 }
491
492 let start_addr = if path.starts_with('/') {
494 self.root_address
495 } else {
496 self.address
497 };
498
499 let mut current_group = Group::new(
500 self.file_data,
501 start_addr,
502 String::new(),
503 self.offset_size,
504 self.length_size,
505 self.root_address,
506 self.chunk_cache.clone(),
507 self.header_cache.clone(),
508 self.filter_registry.clone(),
509 );
510
511 for &part in &parts[..parts.len() - 1] {
513 current_group = current_group.group(part)?;
514 }
515
516 let target_name = parts[parts.len() - 1];
518 let children = current_group.resolve_children_with_link_depth(depth + 1)?;
519 for child in &children {
520 if child.name == target_name {
521 return Ok(child.address);
522 }
523 }
524
525 Err(Error::Other(format!(
526 "soft link target '{}' not found",
527 path
528 )))
529 }
530}