1use crate::error::{Error, Result};
2use crate::fractal_heap::FractalHeap;
3use crate::global_heap::GlobalHeapCollection;
4use crate::io::Cursor;
5use crate::messages::attribute::AttributeMessage;
6use crate::messages::attribute_info::AttributeInfoMessage;
7use crate::messages::dataspace::DataspaceType;
8use crate::messages::datatype::{Datatype, StringEncoding, StringPadding, StringSize};
9use crate::messages::HdfMessage;
10use crate::object_header::ObjectHeader;
11use crate::storage::Storage;
12use crate::{btree_v2, messages};
13
14#[derive(Debug, Clone)]
16pub struct Attribute {
17 pub name: String,
18 pub datatype: Datatype,
19 pub shape: Vec<u64>,
20 pub raw_data: Vec<u8>,
21}
22
23impl Attribute {
24 pub fn from_message(msg: AttributeMessage) -> Self {
26 Self::from_message_with_context(msg, None, 0)
27 }
28
29 pub fn from_message_with_context(
32 msg: AttributeMessage,
33 file_data: Option<&[u8]>,
34 offset_size: u8,
35 ) -> Self {
36 let shape = match msg.dataspace.dataspace_type {
37 DataspaceType::Scalar => vec![],
38 DataspaceType::Null => vec![0],
39 DataspaceType::Simple => msg.dataspace.dims.clone(),
40 };
41 let raw_data =
42 if let (Some(file_data), Datatype::VarLen { base }) = (file_data, &msg.datatype) {
43 if is_byte_vlen(base) && shape.is_empty() {
44 resolve_vlen_bytes(&msg.raw_data, file_data, offset_size)
45 .unwrap_or_else(|| msg.raw_data.clone())
46 } else {
47 msg.raw_data.clone()
48 }
49 } else {
50 msg.raw_data.clone()
51 };
52 Attribute {
53 name: msg.name,
54 datatype: msg.datatype,
55 shape,
56 raw_data,
57 }
58 }
59
60 pub fn num_elements(&self) -> u64 {
62 if self.shape.is_empty() {
63 1 } else {
65 self.shape.iter().product()
66 }
67 }
68
69 pub fn read_scalar<T: crate::datatype_api::H5Type>(&self) -> Result<T> {
71 T::from_bytes(&self.raw_data, &self.datatype)
72 }
73
74 pub fn read_1d<T: crate::datatype_api::H5Type>(&self) -> Result<Vec<T>> {
76 let elem_size = T::element_size(&self.datatype);
77 let n = self.num_elements() as usize;
78 let mut result = Vec::with_capacity(n);
79 for i in 0..n {
80 let start = i * elem_size;
81 let end = start + elem_size;
82 if end > self.raw_data.len() {
83 return Err(Error::InvalidData(format!(
84 "attribute data too short: need {} bytes, have {}",
85 end,
86 self.raw_data.len()
87 )));
88 }
89 result.push(T::from_bytes(&self.raw_data[start..end], &self.datatype)?);
90 }
91 Ok(result)
92 }
93
94 pub fn read_string(&self) -> Result<String> {
99 match &self.datatype {
100 Datatype::VarLen { base } if is_byte_vlen(base) => {
101 decode_varlen_byte_string(&self.raw_data)
102 }
103 Datatype::String {
104 size,
105 encoding,
106 padding,
107 } => match size {
108 StringSize::Fixed(len) => {
109 let len = *len as usize;
110 let bytes = if self.raw_data.len() < len {
111 &self.raw_data
112 } else {
113 &self.raw_data[..len]
114 };
115 decode_string(bytes, *padding, *encoding)
116 }
117 StringSize::Variable => {
118 if self.raw_data.len() >= 12 {
122 let trimmed = match padding {
124 StringPadding::NullTerminate => {
125 let end = self
126 .raw_data
127 .iter()
128 .position(|&b| b == 0)
129 .unwrap_or(self.raw_data.len());
130 &self.raw_data[..end]
131 }
132 _ => &self.raw_data,
133 };
134 if let Ok(s) = String::from_utf8(trimmed.to_vec()) {
135 if s.chars()
136 .all(|c| !c.is_control() || c == '\n' || c == '\r' || c == '\t')
137 {
138 return Ok(s);
139 }
140 }
141 }
142 decode_string(&self.raw_data, *padding, *encoding)
143 }
144 },
145 _ => Err(Error::TypeMismatch {
146 expected: "String".into(),
147 actual: format!("{:?}", self.datatype),
148 }),
149 }
150 }
151
152 pub fn read_vlen_string(&self, file_data: &[u8], offset_size: u8) -> Result<String> {
157 match &self.datatype {
158 Datatype::String {
159 size: StringSize::Variable,
160 encoding,
161 padding,
162 } => {
163 let ref_size = 4 + offset_size as usize + 4; if self.raw_data.len() < ref_size {
165 return decode_string(&self.raw_data, *padding, *encoding);
167 }
168 let bytes = read_one_vlen_string(
169 &self.raw_data,
170 0,
171 file_data,
172 offset_size,
173 *padding,
174 *encoding,
175 )?;
176 Ok(bytes)
177 }
178 Datatype::String {
179 size: StringSize::Fixed(_),
180 ..
181 } => self.read_string(),
182 _ => Err(Error::TypeMismatch {
183 expected: "String".into(),
184 actual: format!("{:?}", self.datatype),
185 }),
186 }
187 }
188
189 pub fn read_vlen_strings(&self, file_data: &[u8], offset_size: u8) -> Result<Vec<String>> {
191 match &self.datatype {
192 Datatype::String {
193 size: StringSize::Variable,
194 encoding,
195 padding,
196 } => {
197 let ref_size = 4 + offset_size as usize + 4;
198 let n = self.num_elements() as usize;
199 let mut result = Vec::with_capacity(n);
200 for i in 0..n {
201 let offset = i * ref_size;
202 if offset + ref_size > self.raw_data.len() {
203 break;
204 }
205 result.push(read_one_vlen_string(
206 &self.raw_data,
207 offset,
208 file_data,
209 offset_size,
210 *padding,
211 *encoding,
212 )?);
213 }
214 Ok(result)
215 }
216 Datatype::String {
217 size: StringSize::Fixed(_),
218 ..
219 } => self.read_strings(),
220 _ => Err(Error::TypeMismatch {
221 expected: "String array".into(),
222 actual: format!("{:?}", self.datatype),
223 }),
224 }
225 }
226
227 pub fn read_strings(&self) -> Result<Vec<String>> {
229 match &self.datatype {
230 Datatype::String {
231 size: StringSize::Fixed(len),
232 encoding,
233 padding,
234 } => {
235 let len = *len as usize;
236 let n = self.num_elements() as usize;
237 let mut result = Vec::with_capacity(n);
238 for i in 0..n {
239 let start = i * len;
240 let end = (start + len).min(self.raw_data.len());
241 if start >= self.raw_data.len() {
242 break;
243 }
244 result.push(decode_string(
245 &self.raw_data[start..end],
246 *padding,
247 *encoding,
248 )?);
249 }
250 Ok(result)
251 }
252 _ => Err(Error::TypeMismatch {
253 expected: "String array".into(),
254 actual: format!("{:?}", self.datatype),
255 }),
256 }
257 }
258
259 pub fn read_as_f64(&self) -> Result<f64> {
261 match &self.datatype {
262 Datatype::FloatingPoint { size, .. } => {
263 let val: f64 = match size {
264 4 => {
265 let v = self.read_scalar::<f32>()?;
266 v as f64
267 }
268 8 => self.read_scalar::<f64>()?,
269 _ => {
270 return Err(Error::TypeMismatch {
271 expected: "f32 or f64".into(),
272 actual: format!("FloatingPoint(size={})", size),
273 })
274 }
275 };
276 Ok(val)
277 }
278 Datatype::FixedPoint { size, signed, .. } => {
279 let val = match (size, signed) {
280 (1, true) => self.read_scalar::<i8>()? as f64,
281 (1, false) => self.read_scalar::<u8>()? as f64,
282 (2, true) => self.read_scalar::<i16>()? as f64,
283 (2, false) => self.read_scalar::<u16>()? as f64,
284 (4, true) => self.read_scalar::<i32>()? as f64,
285 (4, false) => self.read_scalar::<u32>()? as f64,
286 (8, true) => self.read_scalar::<i64>()? as f64,
287 (8, false) => self.read_scalar::<u64>()? as f64,
288 _ => {
289 return Err(Error::TypeMismatch {
290 expected: "numeric".into(),
291 actual: format!("FixedPoint(size={})", size),
292 })
293 }
294 };
295 Ok(val)
296 }
297 _ => Err(Error::TypeMismatch {
298 expected: "numeric".into(),
299 actual: format!("{:?}", self.datatype),
300 }),
301 }
302 }
303}
304
305#[allow(dead_code)]
306pub(crate) fn collect_attribute_messages(
307 header: &ObjectHeader,
308 file_data: &[u8],
309 offset_size: u8,
310 length_size: u8,
311) -> Result<Vec<AttributeMessage>> {
312 let mut attributes = Vec::new();
313 let mut attribute_info = None;
314
315 for msg in &header.messages {
316 match msg {
317 HdfMessage::Attribute(attr) => attributes.push(attr.clone()),
318 HdfMessage::AttributeInfo(info) => attribute_info = Some(info.clone()),
319 _ => {}
320 }
321 }
322
323 if let Some(info) = attribute_info {
324 attributes.extend(load_dense_attribute_messages(
325 &info,
326 file_data,
327 offset_size,
328 length_size,
329 )?);
330 }
331
332 Ok(attributes)
333}
334
335pub(crate) fn collect_attribute_messages_storage(
336 header: &ObjectHeader,
337 storage: &dyn Storage,
338 offset_size: u8,
339 length_size: u8,
340) -> Result<Vec<AttributeMessage>> {
341 let mut attributes = Vec::new();
342 let mut attribute_info = None;
343
344 for msg in &header.messages {
345 match msg {
346 HdfMessage::Attribute(attr) => attributes.push(attr.clone()),
347 HdfMessage::AttributeInfo(info) => attribute_info = Some(info.clone()),
348 _ => {}
349 }
350 }
351
352 if let Some(info) = attribute_info {
353 attributes.extend(load_dense_attribute_messages_storage(
354 &info,
355 storage,
356 offset_size,
357 length_size,
358 )?);
359 }
360
361 Ok(attributes)
362}
363
364#[allow(dead_code)]
365fn load_dense_attribute_messages(
366 info: &AttributeInfoMessage,
367 file_data: &[u8],
368 offset_size: u8,
369 length_size: u8,
370) -> Result<Vec<AttributeMessage>> {
371 if Cursor::is_undefined_offset(info.fractal_heap_address, offset_size) {
372 return Ok(Vec::new());
373 }
374
375 let mut heap_cursor = Cursor::new(file_data);
376 heap_cursor.set_position(info.fractal_heap_address);
377 let heap = FractalHeap::parse(&mut heap_cursor, offset_size, length_size)?;
378
379 let records =
380 load_dense_attribute_records(info, file_data, offset_size, length_size).unwrap_or_default();
381
382 let mut attributes = Vec::new();
383 for record in records {
384 let heap_id = match record {
385 btree_v2::BTreeV2Record::AttributeNameHash { heap_id, .. }
386 | btree_v2::BTreeV2Record::AttributeCreationOrder { heap_id, .. } => heap_id,
387 _ => continue,
388 };
389
390 let managed_bytes =
391 match heap.get_managed_object(&heap_id, file_data, offset_size, length_size) {
392 Ok(bytes) => bytes,
393 Err(_) => continue,
394 };
395
396 let mut attr_cursor = Cursor::new(&managed_bytes);
397 if let Ok(attr) = messages::attribute::parse(
398 &mut attr_cursor,
399 offset_size,
400 length_size,
401 managed_bytes.len(),
402 ) {
403 attributes.push(attr);
404 }
405 }
406
407 Ok(attributes)
408}
409
410fn load_dense_attribute_messages_storage(
411 info: &AttributeInfoMessage,
412 storage: &dyn Storage,
413 offset_size: u8,
414 length_size: u8,
415) -> Result<Vec<AttributeMessage>> {
416 if Cursor::is_undefined_offset(info.fractal_heap_address, offset_size) {
417 return Ok(Vec::new());
418 }
419
420 let heap = FractalHeap::parse_at_storage(
421 storage,
422 info.fractal_heap_address,
423 offset_size,
424 length_size,
425 )?;
426
427 let records = load_dense_attribute_records_storage(info, storage, offset_size, length_size)
428 .unwrap_or_default();
429
430 let mut attributes = Vec::new();
431 for record in records {
432 let heap_id = match record {
433 btree_v2::BTreeV2Record::AttributeNameHash { heap_id, .. }
434 | btree_v2::BTreeV2Record::AttributeCreationOrder { heap_id, .. } => heap_id,
435 _ => continue,
436 };
437
438 let managed_bytes =
439 match heap.get_managed_object_storage(&heap_id, storage, offset_size, length_size) {
440 Ok(bytes) => bytes,
441 Err(_) => continue,
442 };
443
444 let mut attr_cursor = Cursor::new(&managed_bytes);
445 if let Ok(attr) = messages::attribute::parse(
446 &mut attr_cursor,
447 offset_size,
448 length_size,
449 managed_bytes.len(),
450 ) {
451 attributes.push(attr);
452 }
453 }
454
455 Ok(attributes)
456}
457
458#[allow(dead_code)]
459fn load_dense_attribute_records(
460 info: &AttributeInfoMessage,
461 file_data: &[u8],
462 offset_size: u8,
463 length_size: u8,
464) -> Result<Vec<btree_v2::BTreeV2Record>> {
465 let mut addrs = vec![info.btree_name_index_address];
466 if let Some(creation_order_addr) = info.btree_creation_order_address {
467 addrs.push(creation_order_addr);
468 }
469
470 for addr in addrs {
471 if Cursor::is_undefined_offset(addr, offset_size) {
472 continue;
473 }
474
475 let mut btree_cursor = Cursor::new(file_data);
476 btree_cursor.set_position(addr);
477 let header =
478 match btree_v2::BTreeV2Header::parse(&mut btree_cursor, offset_size, length_size) {
479 Ok(header) => header,
480 Err(_) => continue,
481 };
482
483 if let Ok(records) = btree_v2::collect_btree_v2_records(
484 file_data,
485 &header,
486 offset_size,
487 length_size,
488 None,
489 &[],
490 None,
491 ) {
492 return Ok(records);
493 }
494 }
495
496 Ok(Vec::new())
497}
498
499fn load_dense_attribute_records_storage(
500 info: &AttributeInfoMessage,
501 storage: &dyn Storage,
502 offset_size: u8,
503 length_size: u8,
504) -> Result<Vec<btree_v2::BTreeV2Record>> {
505 let mut addrs = vec![info.btree_name_index_address];
506 if let Some(creation_order_addr) = info.btree_creation_order_address {
507 addrs.push(creation_order_addr);
508 }
509
510 for addr in addrs {
511 if Cursor::is_undefined_offset(addr, offset_size) {
512 continue;
513 }
514
515 let header = match btree_v2::BTreeV2Header::parse_at_storage(
516 storage,
517 addr,
518 offset_size,
519 length_size,
520 ) {
521 Ok(header) => header,
522 Err(_) => continue,
523 };
524
525 if let Ok(records) = btree_v2::collect_btree_v2_records_storage(
526 storage,
527 &header,
528 offset_size,
529 length_size,
530 None,
531 &[],
532 None,
533 ) {
534 return Ok(records);
535 }
536 }
537
538 Ok(Vec::new())
539}
540
541pub(crate) fn read_one_vlen_string(
543 raw_data: &[u8],
544 offset: usize,
545 file_data: &[u8],
546 offset_size: u8,
547 padding: StringPadding,
548 encoding: StringEncoding,
549) -> Result<String> {
550 let mut cursor = Cursor::new(&raw_data[offset..]);
551 let _seq_len = cursor.read_u32_le()?;
552 let heap_addr = cursor.read_offset(offset_size)?;
553 let obj_index = cursor.read_u32_le()?;
554
555 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
556 return Ok(String::new());
557 }
558
559 let mut heap_cursor = Cursor::new(file_data);
560 heap_cursor.set_position(heap_addr);
561 let collection = GlobalHeapCollection::parse(&mut heap_cursor, offset_size, offset_size)?;
562
563 match collection.get_object(obj_index as u16) {
564 Some(obj) => decode_string(&obj.data, padding, encoding),
565 None => Ok(String::new()),
566 }
567}
568
569pub(crate) fn read_one_vlen_string_storage(
570 raw_data: &[u8],
571 offset: usize,
572 storage: &dyn Storage,
573 offset_size: u8,
574 length_size: u8,
575 padding: StringPadding,
576 encoding: StringEncoding,
577) -> Result<String> {
578 let mut cursor = Cursor::new(&raw_data[offset..]);
579 let _seq_len = cursor.read_u32_le()?;
580 let heap_addr = cursor.read_offset(offset_size)?;
581 let obj_index = cursor.read_u32_le()?;
582
583 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
584 return Ok(String::new());
585 }
586
587 let collection =
588 GlobalHeapCollection::parse_at_storage(storage, heap_addr, offset_size, length_size)?;
589 match collection.get_object(obj_index as u16) {
590 Some(obj) => decode_string(&obj.data, padding, encoding),
591 None => Ok(String::new()),
592 }
593}
594
595pub(crate) fn decode_string(
600 bytes: &[u8],
601 padding: StringPadding,
602 _encoding: StringEncoding,
603) -> Result<String> {
604 let trimmed = match padding {
605 StringPadding::NullTerminate => {
606 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
607 &bytes[..end]
608 }
609 StringPadding::NullPad => {
610 let end = bytes.iter().rposition(|&b| b != 0).map_or(0, |i| i + 1);
611 &bytes[..end]
612 }
613 StringPadding::SpacePad => {
614 let end = bytes.iter().rposition(|&b| b != b' ').map_or(0, |i| i + 1);
615 &bytes[..end]
616 }
617 };
618
619 String::from_utf8(trimmed.to_vec())
620 .map_err(|e| Error::InvalidData(format!("invalid string data: {e}")))
621}
622
623fn is_byte_vlen(base: &Datatype) -> bool {
624 matches!(base, Datatype::FixedPoint { size: 1, .. })
625}
626
627pub(crate) fn decode_varlen_byte_string(bytes: &[u8]) -> Result<String> {
628 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
629 String::from_utf8(bytes[..end].to_vec())
630 .map_err(|e| Error::InvalidData(format!("invalid string data: {e}")))
631}
632
633pub(crate) fn resolve_vlen_bytes(
634 raw_data: &[u8],
635 file_data: &[u8],
636 offset_size: u8,
637) -> Option<Vec<u8>> {
638 if raw_data.len() < 4 + offset_size as usize + 4 {
639 return None;
640 }
641
642 let mut cursor = Cursor::new(raw_data);
643 let seq_len = cursor.read_u32_le().ok()? as usize;
644 let heap_addr = cursor.read_offset(offset_size).ok()?;
645 let obj_index = cursor.read_u32_le().ok()? as u16;
646
647 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
648 return Some(Vec::new());
649 }
650
651 let mut heap_cursor = Cursor::new(file_data);
652 heap_cursor.set_position(heap_addr);
653 let collection =
654 GlobalHeapCollection::parse(&mut heap_cursor, offset_size, offset_size).ok()?;
655 let object = collection.get_object(obj_index)?;
656 Some(object.data[..object.data.len().min(seq_len)].to_vec())
657}
658
659pub(crate) fn resolve_vlen_bytes_storage(
660 raw_data: &[u8],
661 storage: &dyn Storage,
662 offset_size: u8,
663 length_size: u8,
664) -> Option<Vec<u8>> {
665 if raw_data.len() < 4 + offset_size as usize + 4 {
666 return None;
667 }
668
669 let mut cursor = Cursor::new(raw_data);
670 let seq_len = cursor.read_u32_le().ok()? as usize;
671 let heap_addr = cursor.read_offset(offset_size).ok()?;
672 let obj_index = cursor.read_u32_le().ok()? as u16;
673
674 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
675 return Some(Vec::new());
676 }
677
678 let collection =
679 GlobalHeapCollection::parse_at_storage(storage, heap_addr, offset_size, length_size)
680 .ok()?;
681 let object = collection.get_object(obj_index)?;
682 Some(object.data[..object.data.len().min(seq_len)].to_vec())
683}
684
685#[cfg(test)]
686mod tests {
687 use super::*;
688 use crate::error::ByteOrder;
689 use std::f64::consts::PI;
690
691 #[test]
692 fn test_scalar_f64_attribute() {
693 let value: f64 = PI;
694 let raw_data = value.to_le_bytes().to_vec();
695 let attr = Attribute {
696 name: "pi".to_string(),
697 datatype: Datatype::FloatingPoint {
698 size: 8,
699 byte_order: ByteOrder::LittleEndian,
700 },
701 shape: vec![],
702 raw_data,
703 };
704 let val = attr.read_scalar::<f64>().unwrap();
705 assert!((val - PI).abs() < 1e-10);
706 }
707
708 #[test]
709 fn test_1d_i32_attribute() {
710 let values = [1i32, 2, 3, 4];
711 let mut raw_data = Vec::new();
712 for v in &values {
713 raw_data.extend_from_slice(&v.to_le_bytes());
714 }
715 let attr = Attribute {
716 name: "data".to_string(),
717 datatype: Datatype::FixedPoint {
718 size: 4,
719 signed: true,
720 byte_order: ByteOrder::LittleEndian,
721 },
722 shape: vec![4],
723 raw_data,
724 };
725 let result = attr.read_1d::<i32>().unwrap();
726 assert_eq!(result, vec![1, 2, 3, 4]);
727 }
728
729 #[test]
730 fn test_string_attribute() {
731 let attr = Attribute {
732 name: "units".to_string(),
733 datatype: Datatype::String {
734 size: StringSize::Fixed(10),
735 encoding: StringEncoding::Ascii,
736 padding: StringPadding::NullPad,
737 },
738 shape: vec![],
739 raw_data: b"meters\0\0\0\0".to_vec(),
740 };
741 assert_eq!(attr.read_string().unwrap(), "meters");
742 }
743
744 #[test]
745 fn test_varlen_byte_string_attribute() {
746 let attr = Attribute {
747 name: "name".to_string(),
748 datatype: Datatype::VarLen {
749 base: Box::new(Datatype::FixedPoint {
750 size: 1,
751 signed: false,
752 byte_order: ByteOrder::LittleEndian,
753 }),
754 },
755 shape: vec![],
756 raw_data: b"test_dataset".to_vec(),
757 };
758 assert_eq!(attr.read_string().unwrap(), "test_dataset");
759 }
760
761 #[test]
762 fn test_read_as_f64_from_int() {
763 let raw_data = 42i32.to_le_bytes().to_vec();
764 let attr = Attribute {
765 name: "count".to_string(),
766 datatype: Datatype::FixedPoint {
767 size: 4,
768 signed: true,
769 byte_order: ByteOrder::LittleEndian,
770 },
771 shape: vec![],
772 raw_data,
773 };
774 let val = attr.read_as_f64().unwrap();
775 assert!((val - 42.0).abs() < 1e-10);
776 }
777}