1use crate::error::{Error, Result};
2use crate::fractal_heap::FractalHeap;
3use crate::global_heap::GlobalHeapCollection;
4use crate::io::Cursor;
5use crate::messages::attribute::AttributeMessage;
6use crate::messages::attribute_info::AttributeInfoMessage;
7use crate::messages::dataspace::DataspaceType;
8use crate::messages::datatype::{Datatype, StringEncoding, StringPadding, StringSize};
9use crate::messages::HdfMessage;
10use crate::object_header::ObjectHeader;
11use crate::{btree_v2, messages};
12
13#[derive(Debug, Clone)]
15pub struct Attribute {
16 pub name: String,
17 pub datatype: Datatype,
18 pub shape: Vec<u64>,
19 pub raw_data: Vec<u8>,
20}
21
22impl Attribute {
23 pub fn from_message(msg: AttributeMessage) -> Self {
25 Self::from_message_with_context(msg, None, 0)
26 }
27
28 pub fn from_message_with_context(
31 msg: AttributeMessage,
32 file_data: Option<&[u8]>,
33 offset_size: u8,
34 ) -> Self {
35 let shape = match msg.dataspace.dataspace_type {
36 DataspaceType::Scalar => vec![],
37 DataspaceType::Null => vec![0],
38 DataspaceType::Simple => msg.dataspace.dims.clone(),
39 };
40 let raw_data =
41 if let (Some(file_data), Datatype::VarLen { base }) = (file_data, &msg.datatype) {
42 if is_byte_vlen(base) && shape.is_empty() {
43 resolve_vlen_bytes(&msg.raw_data, file_data, offset_size)
44 .unwrap_or_else(|| msg.raw_data.clone())
45 } else {
46 msg.raw_data.clone()
47 }
48 } else {
49 msg.raw_data.clone()
50 };
51 Attribute {
52 name: msg.name,
53 datatype: msg.datatype,
54 shape,
55 raw_data,
56 }
57 }
58
59 pub fn num_elements(&self) -> u64 {
61 if self.shape.is_empty() {
62 1 } else {
64 self.shape.iter().product()
65 }
66 }
67
68 pub fn read_scalar<T: crate::datatype_api::H5Type>(&self) -> Result<T> {
70 T::from_bytes(&self.raw_data, &self.datatype)
71 }
72
73 pub fn read_1d<T: crate::datatype_api::H5Type>(&self) -> Result<Vec<T>> {
75 let elem_size = T::element_size(&self.datatype);
76 let n = self.num_elements() as usize;
77 let mut result = Vec::with_capacity(n);
78 for i in 0..n {
79 let start = i * elem_size;
80 let end = start + elem_size;
81 if end > self.raw_data.len() {
82 return Err(Error::InvalidData(format!(
83 "attribute data too short: need {} bytes, have {}",
84 end,
85 self.raw_data.len()
86 )));
87 }
88 result.push(T::from_bytes(&self.raw_data[start..end], &self.datatype)?);
89 }
90 Ok(result)
91 }
92
93 pub fn read_string(&self) -> Result<String> {
98 match &self.datatype {
99 Datatype::VarLen { base } if is_byte_vlen(base) => {
100 decode_varlen_byte_string(&self.raw_data)
101 }
102 Datatype::String {
103 size,
104 encoding,
105 padding,
106 } => match size {
107 StringSize::Fixed(len) => {
108 let len = *len as usize;
109 let bytes = if self.raw_data.len() < len {
110 &self.raw_data
111 } else {
112 &self.raw_data[..len]
113 };
114 decode_string(bytes, *padding, *encoding)
115 }
116 StringSize::Variable => {
117 if self.raw_data.len() >= 12 {
121 let trimmed = match padding {
123 StringPadding::NullTerminate => {
124 let end = self
125 .raw_data
126 .iter()
127 .position(|&b| b == 0)
128 .unwrap_or(self.raw_data.len());
129 &self.raw_data[..end]
130 }
131 _ => &self.raw_data,
132 };
133 if let Ok(s) = String::from_utf8(trimmed.to_vec()) {
134 if s.chars()
135 .all(|c| !c.is_control() || c == '\n' || c == '\r' || c == '\t')
136 {
137 return Ok(s);
138 }
139 }
140 }
141 decode_string(&self.raw_data, *padding, *encoding)
142 }
143 },
144 _ => Err(Error::TypeMismatch {
145 expected: "String".into(),
146 actual: format!("{:?}", self.datatype),
147 }),
148 }
149 }
150
151 pub fn read_vlen_string(&self, file_data: &[u8], offset_size: u8) -> Result<String> {
156 match &self.datatype {
157 Datatype::String {
158 size: StringSize::Variable,
159 encoding,
160 padding,
161 } => {
162 let ref_size = 4 + offset_size as usize + 4; if self.raw_data.len() < ref_size {
164 return decode_string(&self.raw_data, *padding, *encoding);
166 }
167 let bytes = read_one_vlen_string(
168 &self.raw_data,
169 0,
170 file_data,
171 offset_size,
172 *padding,
173 *encoding,
174 )?;
175 Ok(bytes)
176 }
177 Datatype::String {
178 size: StringSize::Fixed(_),
179 ..
180 } => self.read_string(),
181 _ => Err(Error::TypeMismatch {
182 expected: "String".into(),
183 actual: format!("{:?}", self.datatype),
184 }),
185 }
186 }
187
188 pub fn read_vlen_strings(&self, file_data: &[u8], offset_size: u8) -> Result<Vec<String>> {
190 match &self.datatype {
191 Datatype::String {
192 size: StringSize::Variable,
193 encoding,
194 padding,
195 } => {
196 let ref_size = 4 + offset_size as usize + 4;
197 let n = self.num_elements() as usize;
198 let mut result = Vec::with_capacity(n);
199 for i in 0..n {
200 let offset = i * ref_size;
201 if offset + ref_size > self.raw_data.len() {
202 break;
203 }
204 result.push(read_one_vlen_string(
205 &self.raw_data,
206 offset,
207 file_data,
208 offset_size,
209 *padding,
210 *encoding,
211 )?);
212 }
213 Ok(result)
214 }
215 Datatype::String {
216 size: StringSize::Fixed(_),
217 ..
218 } => self.read_strings(),
219 _ => Err(Error::TypeMismatch {
220 expected: "String array".into(),
221 actual: format!("{:?}", self.datatype),
222 }),
223 }
224 }
225
226 pub fn read_strings(&self) -> Result<Vec<String>> {
228 match &self.datatype {
229 Datatype::String {
230 size: StringSize::Fixed(len),
231 encoding,
232 padding,
233 } => {
234 let len = *len as usize;
235 let n = self.num_elements() as usize;
236 let mut result = Vec::with_capacity(n);
237 for i in 0..n {
238 let start = i * len;
239 let end = (start + len).min(self.raw_data.len());
240 if start >= self.raw_data.len() {
241 break;
242 }
243 result.push(decode_string(
244 &self.raw_data[start..end],
245 *padding,
246 *encoding,
247 )?);
248 }
249 Ok(result)
250 }
251 _ => Err(Error::TypeMismatch {
252 expected: "String array".into(),
253 actual: format!("{:?}", self.datatype),
254 }),
255 }
256 }
257
258 pub fn read_as_f64(&self) -> Result<f64> {
260 match &self.datatype {
261 Datatype::FloatingPoint { size, .. } => {
262 let val: f64 = match size {
263 4 => {
264 let v = self.read_scalar::<f32>()?;
265 v as f64
266 }
267 8 => self.read_scalar::<f64>()?,
268 _ => {
269 return Err(Error::TypeMismatch {
270 expected: "f32 or f64".into(),
271 actual: format!("FloatingPoint(size={})", size),
272 })
273 }
274 };
275 Ok(val)
276 }
277 Datatype::FixedPoint { size, signed, .. } => {
278 let val = match (size, signed) {
279 (1, true) => self.read_scalar::<i8>()? as f64,
280 (1, false) => self.read_scalar::<u8>()? as f64,
281 (2, true) => self.read_scalar::<i16>()? as f64,
282 (2, false) => self.read_scalar::<u16>()? as f64,
283 (4, true) => self.read_scalar::<i32>()? as f64,
284 (4, false) => self.read_scalar::<u32>()? as f64,
285 (8, true) => self.read_scalar::<i64>()? as f64,
286 (8, false) => self.read_scalar::<u64>()? as f64,
287 _ => {
288 return Err(Error::TypeMismatch {
289 expected: "numeric".into(),
290 actual: format!("FixedPoint(size={})", size),
291 })
292 }
293 };
294 Ok(val)
295 }
296 _ => Err(Error::TypeMismatch {
297 expected: "numeric".into(),
298 actual: format!("{:?}", self.datatype),
299 }),
300 }
301 }
302}
303
304pub(crate) fn collect_attribute_messages(
305 header: &ObjectHeader,
306 file_data: &[u8],
307 offset_size: u8,
308 length_size: u8,
309) -> Result<Vec<AttributeMessage>> {
310 let mut attributes = Vec::new();
311 let mut attribute_info = None;
312
313 for msg in &header.messages {
314 match msg {
315 HdfMessage::Attribute(attr) => attributes.push(attr.clone()),
316 HdfMessage::AttributeInfo(info) => attribute_info = Some(info.clone()),
317 _ => {}
318 }
319 }
320
321 if let Some(info) = attribute_info {
322 attributes.extend(load_dense_attribute_messages(
323 &info,
324 file_data,
325 offset_size,
326 length_size,
327 )?);
328 }
329
330 Ok(attributes)
331}
332
333fn load_dense_attribute_messages(
334 info: &AttributeInfoMessage,
335 file_data: &[u8],
336 offset_size: u8,
337 length_size: u8,
338) -> Result<Vec<AttributeMessage>> {
339 if Cursor::is_undefined_offset(info.fractal_heap_address, offset_size) {
340 return Ok(Vec::new());
341 }
342
343 let mut heap_cursor = Cursor::new(file_data);
344 heap_cursor.set_position(info.fractal_heap_address);
345 let heap = FractalHeap::parse(&mut heap_cursor, offset_size, length_size)?;
346
347 let records =
348 load_dense_attribute_records(info, file_data, offset_size, length_size).unwrap_or_default();
349
350 let mut attributes = Vec::new();
351 for record in records {
352 let heap_id = match record {
353 btree_v2::BTreeV2Record::AttributeNameHash { heap_id, .. }
354 | btree_v2::BTreeV2Record::AttributeCreationOrder { heap_id, .. } => heap_id,
355 _ => continue,
356 };
357
358 let managed_bytes =
359 match heap.get_managed_object(&heap_id, file_data, offset_size, length_size) {
360 Ok(bytes) => bytes,
361 Err(_) => continue,
362 };
363
364 let mut attr_cursor = Cursor::new(&managed_bytes);
365 if let Ok(attr) = messages::attribute::parse(
366 &mut attr_cursor,
367 offset_size,
368 length_size,
369 managed_bytes.len(),
370 ) {
371 attributes.push(attr);
372 }
373 }
374
375 Ok(attributes)
376}
377
378fn load_dense_attribute_records(
379 info: &AttributeInfoMessage,
380 file_data: &[u8],
381 offset_size: u8,
382 length_size: u8,
383) -> Result<Vec<btree_v2::BTreeV2Record>> {
384 let mut addrs = vec![info.btree_name_index_address];
385 if let Some(creation_order_addr) = info.btree_creation_order_address {
386 addrs.push(creation_order_addr);
387 }
388
389 for addr in addrs {
390 if Cursor::is_undefined_offset(addr, offset_size) {
391 continue;
392 }
393
394 let mut btree_cursor = Cursor::new(file_data);
395 btree_cursor.set_position(addr);
396 let header =
397 match btree_v2::BTreeV2Header::parse(&mut btree_cursor, offset_size, length_size) {
398 Ok(header) => header,
399 Err(_) => continue,
400 };
401
402 if let Ok(records) = btree_v2::collect_btree_v2_records(
403 file_data,
404 &header,
405 offset_size,
406 length_size,
407 None,
408 &[],
409 None,
410 ) {
411 return Ok(records);
412 }
413 }
414
415 Ok(Vec::new())
416}
417
418pub(crate) fn read_one_vlen_string(
420 raw_data: &[u8],
421 offset: usize,
422 file_data: &[u8],
423 offset_size: u8,
424 padding: StringPadding,
425 encoding: StringEncoding,
426) -> Result<String> {
427 let mut cursor = Cursor::new(&raw_data[offset..]);
428 let _seq_len = cursor.read_u32_le()?;
429 let heap_addr = cursor.read_offset(offset_size)?;
430 let obj_index = cursor.read_u32_le()?;
431
432 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
433 return Ok(String::new());
434 }
435
436 let mut heap_cursor = Cursor::new(file_data);
437 heap_cursor.set_position(heap_addr);
438 let collection = GlobalHeapCollection::parse(&mut heap_cursor, offset_size, offset_size)?;
439
440 match collection.get_object(obj_index as u16) {
441 Some(obj) => decode_string(&obj.data, padding, encoding),
442 None => Ok(String::new()),
443 }
444}
445
446pub(crate) fn decode_string(
451 bytes: &[u8],
452 padding: StringPadding,
453 _encoding: StringEncoding,
454) -> Result<String> {
455 let trimmed = match padding {
456 StringPadding::NullTerminate => {
457 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
458 &bytes[..end]
459 }
460 StringPadding::NullPad => {
461 let end = bytes.iter().rposition(|&b| b != 0).map_or(0, |i| i + 1);
462 &bytes[..end]
463 }
464 StringPadding::SpacePad => {
465 let end = bytes.iter().rposition(|&b| b != b' ').map_or(0, |i| i + 1);
466 &bytes[..end]
467 }
468 };
469
470 String::from_utf8(trimmed.to_vec())
471 .map_err(|e| Error::InvalidData(format!("invalid string data: {e}")))
472}
473
474fn is_byte_vlen(base: &Datatype) -> bool {
475 matches!(base, Datatype::FixedPoint { size: 1, .. })
476}
477
478pub(crate) fn decode_varlen_byte_string(bytes: &[u8]) -> Result<String> {
479 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
480 String::from_utf8(bytes[..end].to_vec())
481 .map_err(|e| Error::InvalidData(format!("invalid string data: {e}")))
482}
483
484pub(crate) fn resolve_vlen_bytes(
485 raw_data: &[u8],
486 file_data: &[u8],
487 offset_size: u8,
488) -> Option<Vec<u8>> {
489 if raw_data.len() < 4 + offset_size as usize + 4 {
490 return None;
491 }
492
493 let mut cursor = Cursor::new(raw_data);
494 let seq_len = cursor.read_u32_le().ok()? as usize;
495 let heap_addr = cursor.read_offset(offset_size).ok()?;
496 let obj_index = cursor.read_u32_le().ok()? as u16;
497
498 if Cursor::is_undefined_offset(heap_addr, offset_size) || obj_index == 0 {
499 return Some(Vec::new());
500 }
501
502 let mut heap_cursor = Cursor::new(file_data);
503 heap_cursor.set_position(heap_addr);
504 let collection =
505 GlobalHeapCollection::parse(&mut heap_cursor, offset_size, offset_size).ok()?;
506 let object = collection.get_object(obj_index)?;
507 Some(object.data[..object.data.len().min(seq_len)].to_vec())
508}
509
510#[cfg(test)]
511mod tests {
512 use super::*;
513 use crate::error::ByteOrder;
514 use std::f64::consts::PI;
515
516 #[test]
517 fn test_scalar_f64_attribute() {
518 let value: f64 = PI;
519 let raw_data = value.to_le_bytes().to_vec();
520 let attr = Attribute {
521 name: "pi".to_string(),
522 datatype: Datatype::FloatingPoint {
523 size: 8,
524 byte_order: ByteOrder::LittleEndian,
525 },
526 shape: vec![],
527 raw_data,
528 };
529 let val = attr.read_scalar::<f64>().unwrap();
530 assert!((val - PI).abs() < 1e-10);
531 }
532
533 #[test]
534 fn test_1d_i32_attribute() {
535 let values = [1i32, 2, 3, 4];
536 let mut raw_data = Vec::new();
537 for v in &values {
538 raw_data.extend_from_slice(&v.to_le_bytes());
539 }
540 let attr = Attribute {
541 name: "data".to_string(),
542 datatype: Datatype::FixedPoint {
543 size: 4,
544 signed: true,
545 byte_order: ByteOrder::LittleEndian,
546 },
547 shape: vec![4],
548 raw_data,
549 };
550 let result = attr.read_1d::<i32>().unwrap();
551 assert_eq!(result, vec![1, 2, 3, 4]);
552 }
553
554 #[test]
555 fn test_string_attribute() {
556 let attr = Attribute {
557 name: "units".to_string(),
558 datatype: Datatype::String {
559 size: StringSize::Fixed(10),
560 encoding: StringEncoding::Ascii,
561 padding: StringPadding::NullPad,
562 },
563 shape: vec![],
564 raw_data: b"meters\0\0\0\0".to_vec(),
565 };
566 assert_eq!(attr.read_string().unwrap(), "meters");
567 }
568
569 #[test]
570 fn test_varlen_byte_string_attribute() {
571 let attr = Attribute {
572 name: "name".to_string(),
573 datatype: Datatype::VarLen {
574 base: Box::new(Datatype::FixedPoint {
575 size: 1,
576 signed: false,
577 byte_order: ByteOrder::LittleEndian,
578 }),
579 },
580 shape: vec![],
581 raw_data: b"test_dataset".to_vec(),
582 };
583 assert_eq!(attr.read_string().unwrap(), "test_dataset");
584 }
585
586 #[test]
587 fn test_read_as_f64_from_int() {
588 let raw_data = 42i32.to_le_bytes().to_vec();
589 let attr = Attribute {
590 name: "count".to_string(),
591 datatype: Datatype::FixedPoint {
592 size: 4,
593 signed: true,
594 byte_order: ByteOrder::LittleEndian,
595 },
596 shape: vec![],
597 raw_data,
598 };
599 let val = attr.read_as_f64().unwrap();
600 assert!((val - 42.0).abs() < 1e-10);
601 }
602}