1use crate::error::{Error, Result};
2use crate::filters::FilterRegistry;
3use crate::fractal_heap::FractalHeap;
4use crate::global_heap::GlobalHeapCollection;
5use crate::io::Cursor;
6use crate::messages::attribute::AttributeMessage;
7use crate::messages::attribute_info::AttributeInfoMessage;
8use crate::messages::dataspace::DataspaceType;
9use crate::messages::datatype::{Datatype, StringEncoding, StringPadding, StringSize, VarLenKind};
10use crate::messages::HdfMessage;
11use crate::object_header::ObjectHeader;
12use crate::storage::Storage;
13use crate::{btree_v2, messages};
14
15fn checked_usize(value: u64, context: &str) -> Result<usize> {
16 usize::try_from(value).map_err(|_| {
17 Error::InvalidData(format!(
18 "{context} value {value} exceeds platform usize capacity"
19 ))
20 })
21}
22
23#[derive(Debug, Clone)]
25pub struct Attribute {
26 pub name: String,
27 pub datatype: Datatype,
28 pub shape: Vec<u64>,
29 pub raw_data: Vec<u8>,
30}
31
32impl Attribute {
33 pub fn from_message(msg: AttributeMessage) -> Self {
35 Self::from_message_with_context(msg, None, 0)
36 }
37
38 pub fn from_message_with_context(
41 msg: AttributeMessage,
42 file_data: Option<&[u8]>,
43 offset_size: u8,
44 ) -> Self {
45 let shape = match msg.dataspace.dataspace_type {
46 DataspaceType::Scalar => vec![],
47 DataspaceType::Null => vec![0],
48 DataspaceType::Simple => msg.dataspace.dims.clone(),
49 };
50 let raw_data = if let (Some(file_data), Datatype::VarLen { base, kind, .. }) =
51 (file_data, &msg.datatype)
52 {
53 if *kind == VarLenKind::String && is_byte_vlen(base) && shape.is_empty() {
54 resolve_vlen_bytes(&msg.raw_data, file_data, offset_size)
55 .unwrap_or_else(|| msg.raw_data.clone())
56 } else {
57 msg.raw_data.clone()
58 }
59 } else {
60 msg.raw_data.clone()
61 };
62 Attribute {
63 name: msg.name,
64 datatype: msg.datatype,
65 shape,
66 raw_data,
67 }
68 }
69
70 pub fn num_elements(&self) -> Result<u64> {
72 if self.shape.is_empty() {
73 Ok(1) } else {
75 self.shape.iter().try_fold(1u64, |acc, &dim| {
76 acc.checked_mul(dim).ok_or_else(|| {
77 Error::InvalidData("attribute element count overflows u64".to_string())
78 })
79 })
80 }
81 }
82
83 pub fn read_scalar<T: crate::datatype_api::H5Type>(&self) -> Result<T> {
85 T::from_bytes(&self.raw_data, &self.datatype)
86 }
87
88 pub fn read_1d<T: crate::datatype_api::H5Type>(&self) -> Result<Vec<T>> {
90 let elem_size = T::element_size(&self.datatype);
91 let n = checked_usize(self.num_elements()?, "attribute element count")?;
92 let mut result = Vec::with_capacity(n);
93 for i in 0..n {
94 let start = i * elem_size;
95 let end = start + elem_size;
96 if end > self.raw_data.len() {
97 return Err(Error::InvalidData(format!(
98 "attribute data too short: need {} bytes, have {}",
99 end,
100 self.raw_data.len()
101 )));
102 }
103 result.push(T::from_bytes(&self.raw_data[start..end], &self.datatype)?);
104 }
105 Ok(result)
106 }
107
108 pub fn read_string(&self) -> Result<String> {
113 match &self.datatype {
114 Datatype::VarLen {
115 base,
116 kind: VarLenKind::String,
117 ..
118 } if is_byte_vlen(base) => decode_varlen_byte_string(&self.raw_data),
119 Datatype::String {
120 size,
121 encoding,
122 padding,
123 } => match size {
124 StringSize::Fixed(len) => {
125 let len = *len as usize;
126 let bytes = if self.raw_data.len() < len {
127 &self.raw_data
128 } else {
129 &self.raw_data[..len]
130 };
131 decode_string(bytes, *padding, *encoding)
132 }
133 StringSize::Variable => {
134 if self.raw_data.len() >= 12 {
138 let trimmed = match padding {
140 StringPadding::NullTerminate => {
141 let end = self
142 .raw_data
143 .iter()
144 .position(|&b| b == 0)
145 .unwrap_or(self.raw_data.len());
146 &self.raw_data[..end]
147 }
148 _ => &self.raw_data,
149 };
150 if let Ok(s) = String::from_utf8(trimmed.to_vec()) {
151 if s.chars()
152 .all(|c| !c.is_control() || c == '\n' || c == '\r' || c == '\t')
153 {
154 return Ok(s);
155 }
156 }
157 }
158 decode_string(&self.raw_data, *padding, *encoding)
159 }
160 },
161 _ => Err(Error::TypeMismatch {
162 expected: "String".into(),
163 actual: format!("{:?}", self.datatype),
164 }),
165 }
166 }
167
168 pub fn read_vlen_string(&self, file_data: &[u8], offset_size: u8) -> Result<String> {
173 match &self.datatype {
174 Datatype::String {
175 size: StringSize::Variable,
176 encoding,
177 padding,
178 } => {
179 let ref_size = 4 + offset_size as usize + 4; if self.raw_data.len() < ref_size {
181 return decode_string(&self.raw_data, *padding, *encoding);
183 }
184 let bytes = read_one_vlen_string(
185 &self.raw_data,
186 0,
187 file_data,
188 offset_size,
189 *padding,
190 *encoding,
191 )?;
192 Ok(bytes)
193 }
194 Datatype::String {
195 size: StringSize::Fixed(_),
196 ..
197 } => self.read_string(),
198 _ => Err(Error::TypeMismatch {
199 expected: "String".into(),
200 actual: format!("{:?}", self.datatype),
201 }),
202 }
203 }
204
205 pub fn read_vlen_strings(&self, file_data: &[u8], offset_size: u8) -> Result<Vec<String>> {
207 match &self.datatype {
208 Datatype::String {
209 size: StringSize::Variable,
210 encoding,
211 padding,
212 } => {
213 let ref_size = 4 + offset_size as usize + 4;
214 let n = checked_usize(self.num_elements()?, "attribute string element count")?;
215 let mut result = Vec::with_capacity(n);
216 for i in 0..n {
217 let offset = i * ref_size;
218 if offset + ref_size > self.raw_data.len() {
219 break;
220 }
221 result.push(read_one_vlen_string(
222 &self.raw_data,
223 offset,
224 file_data,
225 offset_size,
226 *padding,
227 *encoding,
228 )?);
229 }
230 Ok(result)
231 }
232 Datatype::String {
233 size: StringSize::Fixed(_),
234 ..
235 } => self.read_strings(),
236 _ => Err(Error::TypeMismatch {
237 expected: "String array".into(),
238 actual: format!("{:?}", self.datatype),
239 }),
240 }
241 }
242
243 pub fn read_strings(&self) -> Result<Vec<String>> {
245 match &self.datatype {
246 Datatype::String {
247 size: StringSize::Fixed(len),
248 encoding,
249 padding,
250 } => {
251 let len = *len as usize;
252 let n = checked_usize(self.num_elements()?, "attribute string element count")?;
253 let mut result = Vec::with_capacity(n);
254 for i in 0..n {
255 let start = i * len;
256 let end = (start + len).min(self.raw_data.len());
257 if start >= self.raw_data.len() {
258 break;
259 }
260 result.push(decode_string(
261 &self.raw_data[start..end],
262 *padding,
263 *encoding,
264 )?);
265 }
266 Ok(result)
267 }
268 _ => Err(Error::TypeMismatch {
269 expected: "String array".into(),
270 actual: format!("{:?}", self.datatype),
271 }),
272 }
273 }
274
275 pub fn read_as_f64(&self) -> Result<f64> {
277 match &self.datatype {
278 Datatype::FloatingPoint { size, .. } => {
279 let val: f64 = match size {
280 4 => {
281 let v = self.read_scalar::<f32>()?;
282 v as f64
283 }
284 8 => self.read_scalar::<f64>()?,
285 _ => {
286 return Err(Error::TypeMismatch {
287 expected: "f32 or f64".into(),
288 actual: format!("FloatingPoint(size={})", size),
289 })
290 }
291 };
292 Ok(val)
293 }
294 Datatype::FixedPoint { size, signed, .. } => {
295 let val = match (size, signed) {
296 (1, true) => self.read_scalar::<i8>()? as f64,
297 (1, false) => self.read_scalar::<u8>()? as f64,
298 (2, true) => self.read_scalar::<i16>()? as f64,
299 (2, false) => self.read_scalar::<u16>()? as f64,
300 (4, true) => self.read_scalar::<i32>()? as f64,
301 (4, false) => self.read_scalar::<u32>()? as f64,
302 (8, true) => self.read_scalar::<i64>()? as f64,
303 (8, false) => self.read_scalar::<u64>()? as f64,
304 _ => {
305 return Err(Error::TypeMismatch {
306 expected: "numeric".into(),
307 actual: format!("FixedPoint(size={})", size),
308 })
309 }
310 };
311 Ok(val)
312 }
313 _ => Err(Error::TypeMismatch {
314 expected: "numeric".into(),
315 actual: format!("{:?}", self.datatype),
316 }),
317 }
318 }
319}
320
321pub(crate) fn collect_attribute_messages_storage(
322 header: &ObjectHeader,
323 storage: &dyn Storage,
324 offset_size: u8,
325 length_size: u8,
326 filter_registry: Option<&FilterRegistry>,
327) -> Result<Vec<AttributeMessage>> {
328 let mut attributes = Vec::new();
329 let mut attribute_info = None;
330
331 for msg in &header.messages {
332 match msg {
333 HdfMessage::Attribute(attr) => attributes.push(attr.clone()),
334 HdfMessage::AttributeInfo(info) => attribute_info = Some(info.clone()),
335 _ => {}
336 }
337 }
338
339 if let Some(info) = attribute_info {
340 attributes.extend(load_dense_attribute_messages_storage(
341 &info,
342 storage,
343 offset_size,
344 length_size,
345 filter_registry,
346 )?);
347 }
348
349 Ok(attributes)
350}
351
352fn load_dense_attribute_messages_storage(
353 info: &AttributeInfoMessage,
354 storage: &dyn Storage,
355 offset_size: u8,
356 length_size: u8,
357 filter_registry: Option<&FilterRegistry>,
358) -> Result<Vec<AttributeMessage>> {
359 if Cursor::is_undefined_offset(info.fractal_heap_address, offset_size) {
360 return Ok(Vec::new());
361 }
362
363 let heap = FractalHeap::parse_at_storage(
364 storage,
365 info.fractal_heap_address,
366 offset_size,
367 length_size,
368 )?;
369
370 let records = load_dense_attribute_records_storage(info, storage, offset_size, length_size)?;
371
372 let mut attributes = Vec::new();
373 for record in records {
374 let heap_id = match record {
375 btree_v2::BTreeV2Record::AttributeNameHash { heap_id, .. }
376 | btree_v2::BTreeV2Record::AttributeCreationOrder { heap_id, .. } => heap_id,
377 _ => continue,
378 };
379
380 let managed_bytes = heap.get_object_storage_with_registry(
381 &heap_id,
382 storage,
383 offset_size,
384 length_size,
385 filter_registry,
386 )?;
387
388 let mut attr_cursor = Cursor::new(&managed_bytes);
389 let attr = messages::attribute::parse(
390 &mut attr_cursor,
391 offset_size,
392 length_size,
393 managed_bytes.len(),
394 )?;
395 attributes.push(attr);
396 }
397
398 Ok(attributes)
399}
400
401fn load_dense_attribute_records_storage(
402 info: &AttributeInfoMessage,
403 storage: &dyn Storage,
404 offset_size: u8,
405 length_size: u8,
406) -> Result<Vec<btree_v2::BTreeV2Record>> {
407 let mut addrs = vec![("name", info.btree_name_index_address)];
408 if let Some(creation_order_addr) = info.btree_creation_order_address {
409 addrs.push(("creation-order", creation_order_addr));
410 }
411
412 let mut last_error = None;
413 for (index_name, addr) in addrs {
414 if Cursor::is_undefined_offset(addr, offset_size) {
415 continue;
416 }
417
418 let header = match btree_v2::BTreeV2Header::parse_at_storage(
419 storage,
420 addr,
421 offset_size,
422 length_size,
423 ) {
424 Ok(header) => header,
425 Err(err) => {
426 last_error = Some(format!(
427 "failed to parse dense attribute {index_name} B-tree at {addr:#x}: {err}"
428 ));
429 continue;
430 }
431 };
432
433 match btree_v2::collect_btree_v2_records_storage(
434 storage,
435 &header,
436 offset_size,
437 length_size,
438 None,
439 &[],
440 None,
441 ) {
442 Ok(records) => return Ok(records),
443 Err(err) => {
444 last_error = Some(format!(
445 "failed to read dense attribute {index_name} B-tree at {addr:#x}: {err}"
446 ));
447 }
448 }
449 }
450
451 if let Some(err) = last_error {
452 Err(Error::InvalidData(format!(
453 "failed to load dense attribute records: {err}"
454 )))
455 } else {
456 Ok(Vec::new())
457 }
458}
459
460pub(crate) fn read_one_vlen_string(
462 raw_data: &[u8],
463 offset: usize,
464 file_data: &[u8],
465 offset_size: u8,
466 padding: StringPadding,
467 encoding: StringEncoding,
468) -> Result<String> {
469 let mut cursor = Cursor::new(&raw_data[offset..]);
470 let _seq_len = cursor.read_u32_le()?;
471 let heap_addr = cursor.read_offset(offset_size)?;
472 let obj_index = cursor.read_u32_le()?;
473
474 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
475 return Ok(String::new());
476 }
477
478 let mut heap_cursor = Cursor::new(file_data);
479 heap_cursor.set_position(heap_addr);
480 let collection = GlobalHeapCollection::parse(&mut heap_cursor, offset_size, offset_size)?;
481
482 match collection.get_object(obj_index as u16) {
483 Some(obj) => decode_string(&obj.data, padding, encoding),
484 None => Ok(String::new()),
485 }
486}
487
488pub(crate) fn read_one_vlen_string_storage(
489 raw_data: &[u8],
490 offset: usize,
491 storage: &dyn Storage,
492 offset_size: u8,
493 length_size: u8,
494 padding: StringPadding,
495 encoding: StringEncoding,
496) -> Result<String> {
497 let mut cursor = Cursor::new(&raw_data[offset..]);
498 let _seq_len = cursor.read_u32_le()?;
499 let heap_addr = cursor.read_offset(offset_size)?;
500 let obj_index = cursor.read_u32_le()?;
501
502 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
503 return Ok(String::new());
504 }
505
506 let collection =
507 GlobalHeapCollection::parse_at_storage(storage, heap_addr, offset_size, length_size)?;
508 match collection.get_object(obj_index as u16) {
509 Some(obj) => decode_string(&obj.data, padding, encoding),
510 None => Ok(String::new()),
511 }
512}
513
514pub(crate) fn decode_string(
519 bytes: &[u8],
520 padding: StringPadding,
521 _encoding: StringEncoding,
522) -> Result<String> {
523 let trimmed = match padding {
524 StringPadding::NullTerminate => {
525 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
526 &bytes[..end]
527 }
528 StringPadding::NullPad => {
529 let end = bytes.iter().rposition(|&b| b != 0).map_or(0, |i| i + 1);
530 &bytes[..end]
531 }
532 StringPadding::SpacePad => {
533 let end = bytes.iter().rposition(|&b| b != b' ').map_or(0, |i| i + 1);
534 &bytes[..end]
535 }
536 };
537
538 String::from_utf8(trimmed.to_vec())
539 .map_err(|e| Error::InvalidData(format!("invalid string data: {e}")))
540}
541
542fn is_byte_vlen(base: &Datatype) -> bool {
543 matches!(base, Datatype::FixedPoint { size: 1, .. })
544}
545
546pub(crate) fn decode_varlen_byte_string(bytes: &[u8]) -> Result<String> {
547 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
548 String::from_utf8(bytes[..end].to_vec())
549 .map_err(|e| Error::InvalidData(format!("invalid string data: {e}")))
550}
551
552pub(crate) fn resolve_vlen_bytes(
553 raw_data: &[u8],
554 file_data: &[u8],
555 offset_size: u8,
556) -> Option<Vec<u8>> {
557 if raw_data.len() < 4 + offset_size as usize + 4 {
558 return None;
559 }
560
561 let mut cursor = Cursor::new(raw_data);
562 let seq_len = cursor.read_u32_le().ok()? as usize;
563 let heap_addr = cursor.read_offset(offset_size).ok()?;
564 let obj_index = cursor.read_u32_le().ok()? as u16;
565
566 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
567 return Some(Vec::new());
568 }
569
570 let mut heap_cursor = Cursor::new(file_data);
571 heap_cursor.set_position(heap_addr);
572 let collection =
573 GlobalHeapCollection::parse(&mut heap_cursor, offset_size, offset_size).ok()?;
574 let object = collection.get_object(obj_index)?;
575 Some(object.data[..object.data.len().min(seq_len)].to_vec())
576}
577
578pub(crate) fn resolve_vlen_bytes_storage(
579 raw_data: &[u8],
580 storage: &dyn Storage,
581 offset_size: u8,
582 length_size: u8,
583) -> Option<Vec<u8>> {
584 if raw_data.len() < 4 + offset_size as usize + 4 {
585 return None;
586 }
587
588 let mut cursor = Cursor::new(raw_data);
589 let seq_len = cursor.read_u32_le().ok()? as usize;
590 let heap_addr = cursor.read_offset(offset_size).ok()?;
591 let obj_index = cursor.read_u32_le().ok()? as u16;
592
593 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
594 return Some(Vec::new());
595 }
596
597 let collection =
598 GlobalHeapCollection::parse_at_storage(storage, heap_addr, offset_size, length_size)
599 .ok()?;
600 let object = collection.get_object(obj_index)?;
601 Some(object.data[..object.data.len().min(seq_len)].to_vec())
602}
603
604#[cfg(test)]
605mod tests {
606 use super::*;
607 use crate::error::ByteOrder;
608 use crate::storage::BytesStorage;
609 use std::f64::consts::PI;
610
611 #[test]
612 fn scalar_f64_attribute() {
613 let value: f64 = PI;
614 let raw_data = value.to_le_bytes().to_vec();
615 let attr = Attribute {
616 name: "pi".to_string(),
617 datatype: Datatype::FloatingPoint {
618 size: 8,
619 byte_order: ByteOrder::LittleEndian,
620 },
621 shape: vec![],
622 raw_data,
623 };
624 let val = attr.read_scalar::<f64>().unwrap();
625 assert!((val - PI).abs() < 1e-10);
626 }
627
628 #[test]
629 fn one_dimensional_i32_attribute() {
630 let values = [1i32, 2, 3, 4];
631 let mut raw_data = Vec::new();
632 for v in &values {
633 raw_data.extend_from_slice(&v.to_le_bytes());
634 }
635 let attr = Attribute {
636 name: "data".to_string(),
637 datatype: Datatype::FixedPoint {
638 size: 4,
639 signed: true,
640 byte_order: ByteOrder::LittleEndian,
641 },
642 shape: vec![4],
643 raw_data,
644 };
645 let result = attr.read_1d::<i32>().unwrap();
646 assert_eq!(result, vec![1, 2, 3, 4]);
647 }
648
649 #[test]
650 fn string_attribute() {
651 let attr = Attribute {
652 name: "units".to_string(),
653 datatype: Datatype::String {
654 size: StringSize::Fixed(10),
655 encoding: StringEncoding::Ascii,
656 padding: StringPadding::NullPad,
657 },
658 shape: vec![],
659 raw_data: b"meters\0\0\0\0".to_vec(),
660 };
661 assert_eq!(attr.read_string().unwrap(), "meters");
662 }
663
664 #[test]
665 fn varlen_byte_string_attribute() {
666 let attr = Attribute {
667 name: "name".to_string(),
668 datatype: Datatype::VarLen {
669 base: Box::new(Datatype::FixedPoint {
670 size: 1,
671 signed: false,
672 byte_order: ByteOrder::LittleEndian,
673 }),
674 kind: VarLenKind::String,
675 encoding: StringEncoding::Utf8,
676 padding: StringPadding::NullTerminate,
677 },
678 shape: vec![],
679 raw_data: b"test_dataset".to_vec(),
680 };
681 assert_eq!(attr.read_string().unwrap(), "test_dataset");
682 }
683
684 #[test]
685 fn read_as_f64_from_int() {
686 let raw_data = 42i32.to_le_bytes().to_vec();
687 let attr = Attribute {
688 name: "count".to_string(),
689 datatype: Datatype::FixedPoint {
690 size: 4,
691 signed: true,
692 byte_order: ByteOrder::LittleEndian,
693 },
694 shape: vec![],
695 raw_data,
696 };
697 let val = attr.read_as_f64().unwrap();
698 assert!((val - 42.0).abs() < 1e-10);
699 }
700
701 #[test]
702 fn dense_attribute_btree_errors_surface() {
703 let info = AttributeInfoMessage {
704 creation_order_tracked: false,
705 creation_order_indexed: false,
706 max_creation_index: None,
707 fractal_heap_address: 0,
708 btree_name_index_address: 0,
709 btree_creation_order_address: None,
710 };
711 let storage = BytesStorage::new(Vec::new());
712
713 let err = load_dense_attribute_records_storage(&info, &storage, 8, 8).unwrap_err();
714 assert!(matches!(err, Error::InvalidData(_)));
715 assert!(err.to_string().contains("dense attribute"));
716 }
717}