1use crate::error::{Error, Result};
2use crate::fractal_heap::FractalHeap;
3use crate::global_heap::GlobalHeapCollection;
4use crate::io::Cursor;
5use crate::messages::attribute::AttributeMessage;
6use crate::messages::attribute_info::AttributeInfoMessage;
7use crate::messages::dataspace::DataspaceType;
8use crate::messages::datatype::{Datatype, StringEncoding, StringPadding, StringSize};
9use crate::messages::HdfMessage;
10use crate::object_header::ObjectHeader;
11use crate::storage::Storage;
12use crate::{btree_v2, messages};
13
14#[derive(Debug, Clone)]
16pub struct Attribute {
17 pub name: String,
18 pub datatype: Datatype,
19 pub shape: Vec<u64>,
20 pub raw_data: Vec<u8>,
21}
22
23impl Attribute {
24 pub fn from_message(msg: AttributeMessage) -> Self {
26 Self::from_message_with_context(msg, None, 0)
27 }
28
29 pub fn from_message_with_context(
32 msg: AttributeMessage,
33 file_data: Option<&[u8]>,
34 offset_size: u8,
35 ) -> Self {
36 let shape = match msg.dataspace.dataspace_type {
37 DataspaceType::Scalar => vec![],
38 DataspaceType::Null => vec![0],
39 DataspaceType::Simple => msg.dataspace.dims.clone(),
40 };
41 let raw_data =
42 if let (Some(file_data), Datatype::VarLen { base }) = (file_data, &msg.datatype) {
43 if is_byte_vlen(base) && shape.is_empty() {
44 resolve_vlen_bytes(&msg.raw_data, file_data, offset_size)
45 .unwrap_or_else(|| msg.raw_data.clone())
46 } else {
47 msg.raw_data.clone()
48 }
49 } else {
50 msg.raw_data.clone()
51 };
52 Attribute {
53 name: msg.name,
54 datatype: msg.datatype,
55 shape,
56 raw_data,
57 }
58 }
59
60 pub fn num_elements(&self) -> u64 {
62 if self.shape.is_empty() {
63 1 } else {
65 self.shape.iter().product()
66 }
67 }
68
69 pub fn read_scalar<T: crate::datatype_api::H5Type>(&self) -> Result<T> {
71 T::from_bytes(&self.raw_data, &self.datatype)
72 }
73
74 pub fn read_1d<T: crate::datatype_api::H5Type>(&self) -> Result<Vec<T>> {
76 let elem_size = T::element_size(&self.datatype);
77 let n = self.num_elements() as usize;
78 let mut result = Vec::with_capacity(n);
79 for i in 0..n {
80 let start = i * elem_size;
81 let end = start + elem_size;
82 if end > self.raw_data.len() {
83 return Err(Error::InvalidData(format!(
84 "attribute data too short: need {} bytes, have {}",
85 end,
86 self.raw_data.len()
87 )));
88 }
89 result.push(T::from_bytes(&self.raw_data[start..end], &self.datatype)?);
90 }
91 Ok(result)
92 }
93
94 pub fn read_string(&self) -> Result<String> {
99 match &self.datatype {
100 Datatype::VarLen { base } if is_byte_vlen(base) => {
101 decode_varlen_byte_string(&self.raw_data)
102 }
103 Datatype::String {
104 size,
105 encoding,
106 padding,
107 } => match size {
108 StringSize::Fixed(len) => {
109 let len = *len as usize;
110 let bytes = if self.raw_data.len() < len {
111 &self.raw_data
112 } else {
113 &self.raw_data[..len]
114 };
115 decode_string(bytes, *padding, *encoding)
116 }
117 StringSize::Variable => {
118 if self.raw_data.len() >= 12 {
122 let trimmed = match padding {
124 StringPadding::NullTerminate => {
125 let end = self
126 .raw_data
127 .iter()
128 .position(|&b| b == 0)
129 .unwrap_or(self.raw_data.len());
130 &self.raw_data[..end]
131 }
132 _ => &self.raw_data,
133 };
134 if let Ok(s) = String::from_utf8(trimmed.to_vec()) {
135 if s.chars()
136 .all(|c| !c.is_control() || c == '\n' || c == '\r' || c == '\t')
137 {
138 return Ok(s);
139 }
140 }
141 }
142 decode_string(&self.raw_data, *padding, *encoding)
143 }
144 },
145 _ => Err(Error::TypeMismatch {
146 expected: "String".into(),
147 actual: format!("{:?}", self.datatype),
148 }),
149 }
150 }
151
152 pub fn read_vlen_string(&self, file_data: &[u8], offset_size: u8) -> Result<String> {
157 match &self.datatype {
158 Datatype::String {
159 size: StringSize::Variable,
160 encoding,
161 padding,
162 } => {
163 let ref_size = 4 + offset_size as usize + 4; if self.raw_data.len() < ref_size {
165 return decode_string(&self.raw_data, *padding, *encoding);
167 }
168 let bytes = read_one_vlen_string(
169 &self.raw_data,
170 0,
171 file_data,
172 offset_size,
173 *padding,
174 *encoding,
175 )?;
176 Ok(bytes)
177 }
178 Datatype::String {
179 size: StringSize::Fixed(_),
180 ..
181 } => self.read_string(),
182 _ => Err(Error::TypeMismatch {
183 expected: "String".into(),
184 actual: format!("{:?}", self.datatype),
185 }),
186 }
187 }
188
189 pub fn read_vlen_strings(&self, file_data: &[u8], offset_size: u8) -> Result<Vec<String>> {
191 match &self.datatype {
192 Datatype::String {
193 size: StringSize::Variable,
194 encoding,
195 padding,
196 } => {
197 let ref_size = 4 + offset_size as usize + 4;
198 let n = self.num_elements() as usize;
199 let mut result = Vec::with_capacity(n);
200 for i in 0..n {
201 let offset = i * ref_size;
202 if offset + ref_size > self.raw_data.len() {
203 break;
204 }
205 result.push(read_one_vlen_string(
206 &self.raw_data,
207 offset,
208 file_data,
209 offset_size,
210 *padding,
211 *encoding,
212 )?);
213 }
214 Ok(result)
215 }
216 Datatype::String {
217 size: StringSize::Fixed(_),
218 ..
219 } => self.read_strings(),
220 _ => Err(Error::TypeMismatch {
221 expected: "String array".into(),
222 actual: format!("{:?}", self.datatype),
223 }),
224 }
225 }
226
227 pub fn read_strings(&self) -> Result<Vec<String>> {
229 match &self.datatype {
230 Datatype::String {
231 size: StringSize::Fixed(len),
232 encoding,
233 padding,
234 } => {
235 let len = *len as usize;
236 let n = self.num_elements() as usize;
237 let mut result = Vec::with_capacity(n);
238 for i in 0..n {
239 let start = i * len;
240 let end = (start + len).min(self.raw_data.len());
241 if start >= self.raw_data.len() {
242 break;
243 }
244 result.push(decode_string(
245 &self.raw_data[start..end],
246 *padding,
247 *encoding,
248 )?);
249 }
250 Ok(result)
251 }
252 _ => Err(Error::TypeMismatch {
253 expected: "String array".into(),
254 actual: format!("{:?}", self.datatype),
255 }),
256 }
257 }
258
259 pub fn read_as_f64(&self) -> Result<f64> {
261 match &self.datatype {
262 Datatype::FloatingPoint { size, .. } => {
263 let val: f64 = match size {
264 4 => {
265 let v = self.read_scalar::<f32>()?;
266 v as f64
267 }
268 8 => self.read_scalar::<f64>()?,
269 _ => {
270 return Err(Error::TypeMismatch {
271 expected: "f32 or f64".into(),
272 actual: format!("FloatingPoint(size={})", size),
273 })
274 }
275 };
276 Ok(val)
277 }
278 Datatype::FixedPoint { size, signed, .. } => {
279 let val = match (size, signed) {
280 (1, true) => self.read_scalar::<i8>()? as f64,
281 (1, false) => self.read_scalar::<u8>()? as f64,
282 (2, true) => self.read_scalar::<i16>()? as f64,
283 (2, false) => self.read_scalar::<u16>()? as f64,
284 (4, true) => self.read_scalar::<i32>()? as f64,
285 (4, false) => self.read_scalar::<u32>()? as f64,
286 (8, true) => self.read_scalar::<i64>()? as f64,
287 (8, false) => self.read_scalar::<u64>()? as f64,
288 _ => {
289 return Err(Error::TypeMismatch {
290 expected: "numeric".into(),
291 actual: format!("FixedPoint(size={})", size),
292 })
293 }
294 };
295 Ok(val)
296 }
297 _ => Err(Error::TypeMismatch {
298 expected: "numeric".into(),
299 actual: format!("{:?}", self.datatype),
300 }),
301 }
302 }
303}
304
305#[allow(dead_code)]
306pub(crate) fn collect_attribute_messages(
307 header: &ObjectHeader,
308 file_data: &[u8],
309 offset_size: u8,
310 length_size: u8,
311) -> Result<Vec<AttributeMessage>> {
312 let mut attributes = Vec::new();
313 let mut attribute_info = None;
314
315 for msg in &header.messages {
316 match msg {
317 HdfMessage::Attribute(attr) => attributes.push(attr.clone()),
318 HdfMessage::AttributeInfo(info) => attribute_info = Some(info.clone()),
319 _ => {}
320 }
321 }
322
323 if let Some(info) = attribute_info {
324 attributes.extend(load_dense_attribute_messages(
325 &info,
326 file_data,
327 offset_size,
328 length_size,
329 )?);
330 }
331
332 Ok(attributes)
333}
334
335pub(crate) fn collect_attribute_messages_storage(
336 header: &ObjectHeader,
337 storage: &dyn Storage,
338 offset_size: u8,
339 length_size: u8,
340) -> Result<Vec<AttributeMessage>> {
341 let mut attributes = Vec::new();
342 let mut attribute_info = None;
343
344 for msg in &header.messages {
345 match msg {
346 HdfMessage::Attribute(attr) => attributes.push(attr.clone()),
347 HdfMessage::AttributeInfo(info) => attribute_info = Some(info.clone()),
348 _ => {}
349 }
350 }
351
352 if let Some(info) = attribute_info {
353 attributes.extend(load_dense_attribute_messages_storage(
354 &info,
355 storage,
356 offset_size,
357 length_size,
358 )?);
359 }
360
361 Ok(attributes)
362}
363
364#[allow(dead_code)]
365fn load_dense_attribute_messages(
366 info: &AttributeInfoMessage,
367 file_data: &[u8],
368 offset_size: u8,
369 length_size: u8,
370) -> Result<Vec<AttributeMessage>> {
371 if Cursor::is_undefined_offset(info.fractal_heap_address, offset_size) {
372 return Ok(Vec::new());
373 }
374
375 let mut heap_cursor = Cursor::new(file_data);
376 heap_cursor.set_position(info.fractal_heap_address);
377 let heap = FractalHeap::parse(&mut heap_cursor, offset_size, length_size)?;
378
379 let records =
380 load_dense_attribute_records(info, file_data, offset_size, length_size).unwrap_or_default();
381
382 let mut attributes = Vec::new();
383 for record in records {
384 let heap_id = match record {
385 btree_v2::BTreeV2Record::AttributeNameHash { heap_id, .. }
386 | btree_v2::BTreeV2Record::AttributeCreationOrder { heap_id, .. } => heap_id,
387 _ => continue,
388 };
389
390 let managed_bytes = match heap.get_object(&heap_id, file_data, offset_size, length_size) {
391 Ok(bytes) => bytes,
392 Err(_) => continue,
393 };
394
395 let mut attr_cursor = Cursor::new(&managed_bytes);
396 if let Ok(attr) = messages::attribute::parse(
397 &mut attr_cursor,
398 offset_size,
399 length_size,
400 managed_bytes.len(),
401 ) {
402 attributes.push(attr);
403 }
404 }
405
406 Ok(attributes)
407}
408
409fn load_dense_attribute_messages_storage(
410 info: &AttributeInfoMessage,
411 storage: &dyn Storage,
412 offset_size: u8,
413 length_size: u8,
414) -> Result<Vec<AttributeMessage>> {
415 if Cursor::is_undefined_offset(info.fractal_heap_address, offset_size) {
416 return Ok(Vec::new());
417 }
418
419 let heap = FractalHeap::parse_at_storage(
420 storage,
421 info.fractal_heap_address,
422 offset_size,
423 length_size,
424 )?;
425
426 let records = load_dense_attribute_records_storage(info, storage, offset_size, length_size)
427 .unwrap_or_default();
428
429 let mut attributes = Vec::new();
430 for record in records {
431 let heap_id = match record {
432 btree_v2::BTreeV2Record::AttributeNameHash { heap_id, .. }
433 | btree_v2::BTreeV2Record::AttributeCreationOrder { heap_id, .. } => heap_id,
434 _ => continue,
435 };
436
437 let managed_bytes =
438 match heap.get_object_storage(&heap_id, storage, offset_size, length_size) {
439 Ok(bytes) => bytes,
440 Err(_) => continue,
441 };
442
443 let mut attr_cursor = Cursor::new(&managed_bytes);
444 if let Ok(attr) = messages::attribute::parse(
445 &mut attr_cursor,
446 offset_size,
447 length_size,
448 managed_bytes.len(),
449 ) {
450 attributes.push(attr);
451 }
452 }
453
454 Ok(attributes)
455}
456
457#[allow(dead_code)]
458fn load_dense_attribute_records(
459 info: &AttributeInfoMessage,
460 file_data: &[u8],
461 offset_size: u8,
462 length_size: u8,
463) -> Result<Vec<btree_v2::BTreeV2Record>> {
464 let mut addrs = vec![info.btree_name_index_address];
465 if let Some(creation_order_addr) = info.btree_creation_order_address {
466 addrs.push(creation_order_addr);
467 }
468
469 for addr in addrs {
470 if Cursor::is_undefined_offset(addr, offset_size) {
471 continue;
472 }
473
474 let mut btree_cursor = Cursor::new(file_data);
475 btree_cursor.set_position(addr);
476 let header =
477 match btree_v2::BTreeV2Header::parse(&mut btree_cursor, offset_size, length_size) {
478 Ok(header) => header,
479 Err(_) => continue,
480 };
481
482 if let Ok(records) = btree_v2::collect_btree_v2_records(
483 file_data,
484 &header,
485 offset_size,
486 length_size,
487 None,
488 &[],
489 None,
490 ) {
491 return Ok(records);
492 }
493 }
494
495 Ok(Vec::new())
496}
497
498fn load_dense_attribute_records_storage(
499 info: &AttributeInfoMessage,
500 storage: &dyn Storage,
501 offset_size: u8,
502 length_size: u8,
503) -> Result<Vec<btree_v2::BTreeV2Record>> {
504 let mut addrs = vec![info.btree_name_index_address];
505 if let Some(creation_order_addr) = info.btree_creation_order_address {
506 addrs.push(creation_order_addr);
507 }
508
509 for addr in addrs {
510 if Cursor::is_undefined_offset(addr, offset_size) {
511 continue;
512 }
513
514 let header = match btree_v2::BTreeV2Header::parse_at_storage(
515 storage,
516 addr,
517 offset_size,
518 length_size,
519 ) {
520 Ok(header) => header,
521 Err(_) => continue,
522 };
523
524 if let Ok(records) = btree_v2::collect_btree_v2_records_storage(
525 storage,
526 &header,
527 offset_size,
528 length_size,
529 None,
530 &[],
531 None,
532 ) {
533 return Ok(records);
534 }
535 }
536
537 Ok(Vec::new())
538}
539
540pub(crate) fn read_one_vlen_string(
542 raw_data: &[u8],
543 offset: usize,
544 file_data: &[u8],
545 offset_size: u8,
546 padding: StringPadding,
547 encoding: StringEncoding,
548) -> Result<String> {
549 let mut cursor = Cursor::new(&raw_data[offset..]);
550 let _seq_len = cursor.read_u32_le()?;
551 let heap_addr = cursor.read_offset(offset_size)?;
552 let obj_index = cursor.read_u32_le()?;
553
554 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
555 return Ok(String::new());
556 }
557
558 let mut heap_cursor = Cursor::new(file_data);
559 heap_cursor.set_position(heap_addr);
560 let collection = GlobalHeapCollection::parse(&mut heap_cursor, offset_size, offset_size)?;
561
562 match collection.get_object(obj_index as u16) {
563 Some(obj) => decode_string(&obj.data, padding, encoding),
564 None => Ok(String::new()),
565 }
566}
567
568pub(crate) fn read_one_vlen_string_storage(
569 raw_data: &[u8],
570 offset: usize,
571 storage: &dyn Storage,
572 offset_size: u8,
573 length_size: u8,
574 padding: StringPadding,
575 encoding: StringEncoding,
576) -> Result<String> {
577 let mut cursor = Cursor::new(&raw_data[offset..]);
578 let _seq_len = cursor.read_u32_le()?;
579 let heap_addr = cursor.read_offset(offset_size)?;
580 let obj_index = cursor.read_u32_le()?;
581
582 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
583 return Ok(String::new());
584 }
585
586 let collection =
587 GlobalHeapCollection::parse_at_storage(storage, heap_addr, offset_size, length_size)?;
588 match collection.get_object(obj_index as u16) {
589 Some(obj) => decode_string(&obj.data, padding, encoding),
590 None => Ok(String::new()),
591 }
592}
593
594pub(crate) fn decode_string(
599 bytes: &[u8],
600 padding: StringPadding,
601 _encoding: StringEncoding,
602) -> Result<String> {
603 let trimmed = match padding {
604 StringPadding::NullTerminate => {
605 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
606 &bytes[..end]
607 }
608 StringPadding::NullPad => {
609 let end = bytes.iter().rposition(|&b| b != 0).map_or(0, |i| i + 1);
610 &bytes[..end]
611 }
612 StringPadding::SpacePad => {
613 let end = bytes.iter().rposition(|&b| b != b' ').map_or(0, |i| i + 1);
614 &bytes[..end]
615 }
616 };
617
618 String::from_utf8(trimmed.to_vec())
619 .map_err(|e| Error::InvalidData(format!("invalid string data: {e}")))
620}
621
622fn is_byte_vlen(base: &Datatype) -> bool {
623 matches!(base, Datatype::FixedPoint { size: 1, .. })
624}
625
626pub(crate) fn decode_varlen_byte_string(bytes: &[u8]) -> Result<String> {
627 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
628 String::from_utf8(bytes[..end].to_vec())
629 .map_err(|e| Error::InvalidData(format!("invalid string data: {e}")))
630}
631
632pub(crate) fn resolve_vlen_bytes(
633 raw_data: &[u8],
634 file_data: &[u8],
635 offset_size: u8,
636) -> Option<Vec<u8>> {
637 if raw_data.len() < 4 + offset_size as usize + 4 {
638 return None;
639 }
640
641 let mut cursor = Cursor::new(raw_data);
642 let seq_len = cursor.read_u32_le().ok()? as usize;
643 let heap_addr = cursor.read_offset(offset_size).ok()?;
644 let obj_index = cursor.read_u32_le().ok()? as u16;
645
646 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
647 return Some(Vec::new());
648 }
649
650 let mut heap_cursor = Cursor::new(file_data);
651 heap_cursor.set_position(heap_addr);
652 let collection =
653 GlobalHeapCollection::parse(&mut heap_cursor, offset_size, offset_size).ok()?;
654 let object = collection.get_object(obj_index)?;
655 Some(object.data[..object.data.len().min(seq_len)].to_vec())
656}
657
658pub(crate) fn resolve_vlen_bytes_storage(
659 raw_data: &[u8],
660 storage: &dyn Storage,
661 offset_size: u8,
662 length_size: u8,
663) -> Option<Vec<u8>> {
664 if raw_data.len() < 4 + offset_size as usize + 4 {
665 return None;
666 }
667
668 let mut cursor = Cursor::new(raw_data);
669 let seq_len = cursor.read_u32_le().ok()? as usize;
670 let heap_addr = cursor.read_offset(offset_size).ok()?;
671 let obj_index = cursor.read_u32_le().ok()? as u16;
672
673 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
674 return Some(Vec::new());
675 }
676
677 let collection =
678 GlobalHeapCollection::parse_at_storage(storage, heap_addr, offset_size, length_size)
679 .ok()?;
680 let object = collection.get_object(obj_index)?;
681 Some(object.data[..object.data.len().min(seq_len)].to_vec())
682}
683
684#[cfg(test)]
685mod tests {
686 use super::*;
687 use crate::error::ByteOrder;
688 use std::f64::consts::PI;
689
690 #[test]
691 fn test_scalar_f64_attribute() {
692 let value: f64 = PI;
693 let raw_data = value.to_le_bytes().to_vec();
694 let attr = Attribute {
695 name: "pi".to_string(),
696 datatype: Datatype::FloatingPoint {
697 size: 8,
698 byte_order: ByteOrder::LittleEndian,
699 },
700 shape: vec![],
701 raw_data,
702 };
703 let val = attr.read_scalar::<f64>().unwrap();
704 assert!((val - PI).abs() < 1e-10);
705 }
706
707 #[test]
708 fn test_1d_i32_attribute() {
709 let values = [1i32, 2, 3, 4];
710 let mut raw_data = Vec::new();
711 for v in &values {
712 raw_data.extend_from_slice(&v.to_le_bytes());
713 }
714 let attr = Attribute {
715 name: "data".to_string(),
716 datatype: Datatype::FixedPoint {
717 size: 4,
718 signed: true,
719 byte_order: ByteOrder::LittleEndian,
720 },
721 shape: vec![4],
722 raw_data,
723 };
724 let result = attr.read_1d::<i32>().unwrap();
725 assert_eq!(result, vec![1, 2, 3, 4]);
726 }
727
728 #[test]
729 fn test_string_attribute() {
730 let attr = Attribute {
731 name: "units".to_string(),
732 datatype: Datatype::String {
733 size: StringSize::Fixed(10),
734 encoding: StringEncoding::Ascii,
735 padding: StringPadding::NullPad,
736 },
737 shape: vec![],
738 raw_data: b"meters\0\0\0\0".to_vec(),
739 };
740 assert_eq!(attr.read_string().unwrap(), "meters");
741 }
742
743 #[test]
744 fn test_varlen_byte_string_attribute() {
745 let attr = Attribute {
746 name: "name".to_string(),
747 datatype: Datatype::VarLen {
748 base: Box::new(Datatype::FixedPoint {
749 size: 1,
750 signed: false,
751 byte_order: ByteOrder::LittleEndian,
752 }),
753 },
754 shape: vec![],
755 raw_data: b"test_dataset".to_vec(),
756 };
757 assert_eq!(attr.read_string().unwrap(), "test_dataset");
758 }
759
760 #[test]
761 fn test_read_as_f64_from_int() {
762 let raw_data = 42i32.to_le_bytes().to_vec();
763 let attr = Attribute {
764 name: "count".to_string(),
765 datatype: Datatype::FixedPoint {
766 size: 4,
767 signed: true,
768 byte_order: ByteOrder::LittleEndian,
769 },
770 shape: vec![],
771 raw_data,
772 };
773 let val = attr.read_as_f64().unwrap();
774 assert!((val - 42.0).abs() < 1e-10);
775 }
776}