1use std::sync::Arc;
2
3use bytes::Bytes;
4
5use crate::endian::HDF5Reader;
6use crate::error::Result;
7use crate::heap;
8use crate::messages::dataspace::DataspaceMessage;
9use crate::messages::datatype::{ByteOrder, DataType, StringPadding};
10use crate::reader::AsyncFileReader;
11
12#[derive(Debug, Clone, PartialEq)]
14pub enum AttributeValue {
15 I8(Vec<i8>),
17 I16(Vec<i16>),
19 I32(Vec<i32>),
21 I64(Vec<i64>),
23 U8(Vec<u8>),
25 U16(Vec<u16>),
27 U32(Vec<u32>),
29 U64(Vec<u64>),
31 F32(Vec<f32>),
33 F64(Vec<f64>),
35 String(String),
37 Raw(Vec<u8>),
39}
40
41impl AttributeValue {
42 pub fn is_scalar(&self) -> bool {
44 match self {
45 AttributeValue::I8(v) => v.len() == 1,
46 AttributeValue::I16(v) => v.len() == 1,
47 AttributeValue::I32(v) => v.len() == 1,
48 AttributeValue::I64(v) => v.len() == 1,
49 AttributeValue::U8(v) => v.len() == 1,
50 AttributeValue::U16(v) => v.len() == 1,
51 AttributeValue::U32(v) => v.len() == 1,
52 AttributeValue::U64(v) => v.len() == 1,
53 AttributeValue::F32(v) => v.len() == 1,
54 AttributeValue::F64(v) => v.len() == 1,
55 AttributeValue::String(_) => true,
56 AttributeValue::Raw(_) => true,
57 }
58 }
59
60 pub fn as_i32(&self) -> Option<i32> {
62 match self {
63 AttributeValue::I32(v) if v.len() == 1 => Some(v[0]),
64 _ => None,
65 }
66 }
67
68 pub fn as_i64(&self) -> Option<i64> {
70 match self {
71 AttributeValue::I64(v) if v.len() == 1 => Some(v[0]),
72 _ => None,
73 }
74 }
75
76 pub fn as_f32(&self) -> Option<f32> {
78 match self {
79 AttributeValue::F32(v) if v.len() == 1 => Some(v[0]),
80 _ => None,
81 }
82 }
83
84 pub fn as_f64(&self) -> Option<f64> {
86 match self {
87 AttributeValue::F64(v) if v.len() == 1 => Some(v[0]),
88 _ => None,
89 }
90 }
91
92 pub fn as_str(&self) -> Option<&str> {
94 match self {
95 AttributeValue::String(s) => Some(s),
96 _ => None,
97 }
98 }
99}
100
101#[derive(Debug, Clone)]
103pub struct Attribute {
104 pub name: String,
106 pub value: AttributeValue,
108}
109
110#[derive(Debug, Clone)]
114pub struct AttributeMessage {
115 pub name: String,
117 pub dtype: DataType,
119 pub dataspace: DataspaceMessage,
121 pub raw_value: Bytes,
123}
124
125impl AttributeMessage {
126 pub fn decode(&self) -> AttributeValue {
128 let n = num_elements(&self.dataspace.dimensions) as usize;
129 let raw = &self.raw_value;
130
131 match &self.dtype {
132 DataType::FixedPoint {
133 size,
134 signed,
135 byte_order,
136 ..
137 } => decode_fixed_point(raw, *size, *signed, *byte_order, n),
138
139 DataType::FloatingPoint {
140 size, byte_order, ..
141 } => decode_floating_point(raw, *size, *byte_order, n),
142
143 DataType::String { size, padding, .. } => {
144 let s = if *size == 0 {
145 String::new()
147 } else {
148 let end = (*size as usize).min(raw.len());
149 let bytes = &raw[..end];
150 let s = String::from_utf8_lossy(bytes);
151 match padding {
152 StringPadding::NullTerminate => {
153 s.split('\0').next().unwrap_or("").to_string()
154 }
155 StringPadding::NullPad => s.trim_end_matches('\0').to_string(),
156 StringPadding::SpacePad => s.trim_end().to_string(),
157 }
158 };
159 AttributeValue::String(s)
160 }
161
162 DataType::Enum { base_type, .. } => {
164 match base_type.as_ref() {
166 DataType::FixedPoint {
167 size,
168 signed,
169 byte_order,
170 ..
171 } => decode_fixed_point(raw, *size, *signed, *byte_order, n),
172 _ => AttributeValue::Raw(raw.to_vec()),
173 }
174 }
175
176 _ => AttributeValue::Raw(raw.to_vec()),
177 }
178 }
179
180 pub fn to_attribute(&self) -> Attribute {
182 Attribute {
183 name: self.name.clone(),
184 value: self.decode(),
185 }
186 }
187
188 pub async fn decode_with_reader(
193 &self,
194 reader: &Arc<dyn AsyncFileReader>,
195 size_of_offsets: u8,
196 size_of_lengths: u8,
197 ) -> Result<AttributeValue> {
198 match &self.dtype {
199 DataType::VarLen {
200 is_string: true, ..
201 } => {
202 let raw = &self.raw_value;
204 if raw.len() < 4 + size_of_offsets as usize + 4 {
205 return Ok(AttributeValue::String(String::new()));
206 }
207 let mut r = HDF5Reader::with_sizes(
208 self.raw_value.clone(),
209 size_of_offsets,
210 size_of_lengths,
211 );
212 let _seq_len = r.read_u32()?;
213 let collection_addr = r.read_offset()?;
214 let object_index = r.read_u32()?;
215
216 if HDF5Reader::is_undef_addr(collection_addr, size_of_offsets) {
218 return Ok(AttributeValue::String(String::new()));
219 }
220
221 let obj_data = heap::global::read_global_heap_object(
222 reader,
223 collection_addr,
224 object_index,
225 size_of_offsets,
226 size_of_lengths,
227 )
228 .await?;
229
230 let s = String::from_utf8_lossy(&obj_data);
231 let s = s.split('\0').next().unwrap_or("").to_string();
233 Ok(AttributeValue::String(s))
234 }
235 _ => Ok(self.decode()),
236 }
237 }
238
239 pub async fn to_attribute_resolved(
241 &self,
242 reader: &Arc<dyn AsyncFileReader>,
243 size_of_offsets: u8,
244 size_of_lengths: u8,
245 ) -> Result<Attribute> {
246 let value = self
247 .decode_with_reader(reader, size_of_offsets, size_of_lengths)
248 .await?;
249 Ok(Attribute {
250 name: self.name.clone(),
251 value,
252 })
253 }
254
255 pub fn parse(data: &Bytes, size_of_offsets: u8, size_of_lengths: u8) -> Result<Self> {
257 let mut r = HDF5Reader::with_sizes(data.clone(), size_of_offsets, size_of_lengths);
258
259 let version = r.read_u8()?;
260
261 match version {
262 1 => Self::parse_v1(&mut r, data, size_of_offsets, size_of_lengths),
263 2 => Self::parse_v2(&mut r, data, size_of_offsets, size_of_lengths),
264 3 => Self::parse_v3(&mut r, data, size_of_offsets, size_of_lengths),
265 _ => {
266 Ok(Self {
268 name: String::new(),
269 dtype: DataType::Opaque {
270 size: 0,
271 tag: String::new(),
272 },
273 dataspace: DataspaceMessage {
274 rank: 0,
275 dataspace_type: 0,
276 dimensions: vec![],
277 max_dimensions: None,
278 },
279 raw_value: Bytes::new(),
280 })
281 }
282 }
283 }
284
285 fn parse_v1(
286 r: &mut HDF5Reader,
287 data: &Bytes,
288 _size_of_offsets: u8,
289 size_of_lengths: u8,
290 ) -> Result<Self> {
291 r.skip(1); let name_size = r.read_u16()? as usize;
293 let datatype_size = r.read_u16()? as usize;
294 let dataspace_size = r.read_u16()? as usize;
295
296 let name_bytes = r.read_bytes(name_size)?;
298 let name = String::from_utf8_lossy(&name_bytes)
299 .trim_end_matches('\0')
300 .to_string();
301 r.skip_field_padding(name_size, 8);
302
303 let (dtype, dataspace, raw_value) = parse_dtype_dataspace_value(
304 r,
305 data,
306 datatype_size,
307 dataspace_size,
308 size_of_lengths,
309 true,
310 )?;
311
312 Ok(Self {
313 name,
314 dtype,
315 dataspace,
316 raw_value,
317 })
318 }
319
320 fn parse_v2(
321 r: &mut HDF5Reader,
322 data: &Bytes,
323 _size_of_offsets: u8,
324 size_of_lengths: u8,
325 ) -> Result<Self> {
326 let _flags = r.read_u8()?;
328 let name_size = r.read_u16()? as usize;
329 let datatype_size = r.read_u16()? as usize;
330 let dataspace_size = r.read_u16()? as usize;
331
332 let name_bytes = r.read_bytes(name_size)?;
333 let name = String::from_utf8_lossy(&name_bytes)
334 .trim_end_matches('\0')
335 .to_string();
336
337 let (dtype, dataspace, raw_value) = parse_dtype_dataspace_value(
338 r,
339 data,
340 datatype_size,
341 dataspace_size,
342 size_of_lengths,
343 false,
344 )?;
345
346 Ok(Self {
347 name,
348 dtype,
349 dataspace,
350 raw_value,
351 })
352 }
353
354 fn parse_v3(
355 r: &mut HDF5Reader,
356 data: &Bytes,
357 _size_of_offsets: u8,
358 size_of_lengths: u8,
359 ) -> Result<Self> {
360 let flags = r.read_u8()?;
362 let name_size = r.read_u16()? as usize;
363 let datatype_size = r.read_u16()? as usize;
364 let dataspace_size = r.read_u16()? as usize;
365
366 let _charset = if flags & 0x10 != 0 { r.read_u8()? } else { 0 };
367
368 let name_bytes = r.read_bytes(name_size)?;
369 let name = String::from_utf8_lossy(&name_bytes)
370 .trim_end_matches('\0')
371 .to_string();
372
373 let (dtype, dataspace, raw_value) = parse_dtype_dataspace_value(
374 r,
375 data,
376 datatype_size,
377 dataspace_size,
378 size_of_lengths,
379 false,
380 )?;
381
382 Ok(Self {
383 name,
384 dtype,
385 dataspace,
386 raw_value,
387 })
388 }
389}
390
391fn num_elements(dimensions: &[u64]) -> u64 {
394 dimensions
395 .iter()
396 .copied()
397 .fold(1u64, |acc, d| acc.saturating_mul(d))
398 .max(1)
399}
400
401fn extract_raw_value(
404 r: &HDF5Reader,
405 data: &Bytes,
406 dataspace: &DataspaceMessage,
407 dtype: &DataType,
408) -> Bytes {
409 let n = num_elements(&dataspace.dimensions);
410 let value_size = n.saturating_mul(dtype.size() as u64) as usize;
411 let val_start = r.position() as usize;
412 if val_start + value_size <= data.len() {
413 data.slice(val_start..val_start + value_size)
414 } else {
415 Bytes::new()
416 }
417}
418
419fn parse_dtype_dataspace_value(
423 r: &mut HDF5Reader,
424 data: &Bytes,
425 datatype_size: usize,
426 dataspace_size: usize,
427 size_of_lengths: u8,
428 pad_to_8: bool,
429) -> Result<(DataType, DataspaceMessage, Bytes)> {
430 let dt_start = r.position() as usize;
431 let dt_bytes = data.slice(dt_start..dt_start + datatype_size);
432 let dtype = DataType::parse(&dt_bytes)?;
433 r.skip(datatype_size as u64);
434 if pad_to_8 {
435 r.skip_field_padding(datatype_size, 8);
436 }
437
438 let ds_start = r.position() as usize;
439 let ds_bytes = data.slice(ds_start..ds_start + dataspace_size);
440 let dataspace = DataspaceMessage::parse(&ds_bytes, size_of_lengths)?;
441 r.skip(dataspace_size as u64);
442 if pad_to_8 {
443 r.skip_field_padding(dataspace_size, 8);
444 }
445
446 let raw_value = extract_raw_value(r, data, &dataspace, &dtype);
447 Ok((dtype, dataspace, raw_value))
448}
449
450macro_rules! decode_numeric {
453 ($raw:expr, $n:expr, $is_le:expr, $width:expr, $ty:ty) => {{
454 $raw.chunks_exact($width)
455 .take($n)
456 .map(|c| {
457 let arr: [u8; $width] = c.try_into().unwrap();
458 if $is_le {
459 <$ty>::from_le_bytes(arr)
460 } else {
461 <$ty>::from_be_bytes(arr)
462 }
463 })
464 .collect::<Vec<$ty>>()
465 }};
466}
467
468fn decode_fixed_point(
470 raw: &[u8],
471 size: u32,
472 signed: bool,
473 byte_order: ByteOrder,
474 n: usize,
475) -> AttributeValue {
476 let is_le = matches!(byte_order, ByteOrder::LittleEndian);
477 match (size, signed) {
478 (1, true) => AttributeValue::I8(raw.iter().take(n).map(|&b| b as i8).collect()),
479 (1, false) => AttributeValue::U8(raw.iter().take(n).copied().collect()),
480 (2, true) => AttributeValue::I16(decode_numeric!(raw, n, is_le, 2, i16)),
481 (2, false) => AttributeValue::U16(decode_numeric!(raw, n, is_le, 2, u16)),
482 (4, true) => AttributeValue::I32(decode_numeric!(raw, n, is_le, 4, i32)),
483 (4, false) => AttributeValue::U32(decode_numeric!(raw, n, is_le, 4, u32)),
484 (8, true) => AttributeValue::I64(decode_numeric!(raw, n, is_le, 8, i64)),
485 (8, false) => AttributeValue::U64(decode_numeric!(raw, n, is_le, 8, u64)),
486 _ => AttributeValue::Raw(raw.to_vec()),
487 }
488}
489
490fn decode_floating_point(raw: &[u8], size: u32, byte_order: ByteOrder, n: usize) -> AttributeValue {
492 let is_le = matches!(byte_order, ByteOrder::LittleEndian);
493 match size {
494 4 => AttributeValue::F32(decode_numeric!(raw, n, is_le, 4, f32)),
495 8 => AttributeValue::F64(decode_numeric!(raw, n, is_le, 8, f64)),
496 _ => AttributeValue::Raw(raw.to_vec()),
497 }
498}