1use hdf5_reader::{ByteOrder, Dataset, Datatype, StringPadding, StringSize, VarLenKind};
4use ndarray::{ArrayD, IxDyn};
5
6use crate::error::{Error, Result};
7use crate::types::{NcIntegerValue, NcType};
8
9#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct NcEnumValue {
12 pub value: NcIntegerValue,
14 pub member: Option<String>,
16}
17
18#[derive(Debug, Clone, PartialEq)]
20pub struct NcCompoundValueField {
21 pub name: String,
22 pub value: NcValue,
23}
24
25#[derive(Debug, Clone, PartialEq)]
27pub struct NcArrayValue {
28 pub dims: Vec<u64>,
29 pub values: Vec<NcValue>,
30}
31
32#[derive(Debug, Clone, PartialEq)]
34pub enum NcValue {
35 Byte(i8),
36 Char(u8),
37 Short(i16),
38 Int(i32),
39 Float(f32),
40 Double(f64),
41 UByte(u8),
42 UShort(u16),
43 UInt(u32),
44 Int64(i64),
45 UInt64(u64),
46 String(String),
47 Enum(NcEnumValue),
48 Opaque(Vec<u8>),
49 Compound(Vec<NcCompoundValueField>),
50 Array(NcArrayValue),
51 VLen(Vec<NcValue>),
52}
53
54#[derive(Clone, Copy)]
60pub struct NcValueView<'a> {
61 dataset: &'a Dataset,
62 dtype: &'a Datatype,
63 bytes: &'a [u8],
64}
65
66impl<'a> NcValueView<'a> {
67 pub(crate) fn new(dataset: &'a Dataset, dtype: &'a Datatype, bytes: &'a [u8]) -> Self {
68 Self {
69 dataset,
70 dtype,
71 bytes,
72 }
73 }
74
75 pub fn nc_type(&self) -> Result<NcType> {
77 crate::nc4::types::hdf5_to_nc_type(self.dtype)
78 }
79
80 pub fn to_owned_value(&self) -> Result<NcValue> {
82 decode_value(self.dataset, self.dtype, self.bytes)
83 }
84
85 pub fn integer(&self) -> Result<NcIntegerValue> {
87 match self.dtype {
88 Datatype::FixedPoint {
89 size,
90 signed,
91 byte_order,
92 } => crate::nc4::types::decode_fixed_point_integer(
93 self.bytes,
94 *size,
95 *signed,
96 *byte_order,
97 ),
98 Datatype::Enum { base, .. } => crate::nc4::types::decode_enum_integer(base, self.bytes),
99 other => Err(Error::TypeMismatch {
100 expected: "integer or enum value".to_string(),
101 actual: format!("{other:?}"),
102 }),
103 }
104 }
105
106 pub fn f32(&self) -> Result<f32> {
108 match self.dtype {
109 Datatype::FloatingPoint {
110 size: 4,
111 byte_order,
112 } => Ok(f32::from_ne_bytes(read_ordered_bytes::<4>(
113 self.bytes,
114 *byte_order,
115 )?)),
116 other => Err(Error::TypeMismatch {
117 expected: "f32".to_string(),
118 actual: format!("{other:?}"),
119 }),
120 }
121 }
122
123 pub fn f64(&self) -> Result<f64> {
125 match self.dtype {
126 Datatype::FloatingPoint {
127 size: 8,
128 byte_order,
129 } => Ok(f64::from_ne_bytes(read_ordered_bytes::<8>(
130 self.bytes,
131 *byte_order,
132 )?)),
133 other => Err(Error::TypeMismatch {
134 expected: "f64".to_string(),
135 actual: format!("{other:?}"),
136 }),
137 }
138 }
139
140 pub fn enum_value(&self) -> Result<NcEnumValue> {
142 match self.dtype {
143 Datatype::Enum { base, members } => {
144 let value = crate::nc4::types::decode_enum_integer(base, self.bytes)?;
145 let mut member_name = None;
146 for member in members {
147 if crate::nc4::types::decode_enum_integer(base, &member.value)? == value {
148 member_name = Some(member.name.clone());
149 break;
150 }
151 }
152 Ok(NcEnumValue {
153 value,
154 member: member_name,
155 })
156 }
157 other => Err(Error::TypeMismatch {
158 expected: "enum value".to_string(),
159 actual: format!("{other:?}"),
160 }),
161 }
162 }
163
164 pub fn opaque_bytes(&self) -> Result<&'a [u8]> {
166 match self.dtype {
167 Datatype::Opaque { size, .. } => {
168 let size = checked_usize(*size as u64, "opaque byte size")?;
169 require_len(self.bytes, size, "opaque value")?;
170 Ok(&self.bytes[..size])
171 }
172 other => Err(Error::TypeMismatch {
173 expected: "opaque value".to_string(),
174 actual: format!("{other:?}"),
175 }),
176 }
177 }
178
179 pub fn compound_field(&self, name: &str) -> Result<NcValueView<'a>> {
181 match self.dtype {
182 Datatype::Compound { fields, .. } => {
183 let field = fields
184 .iter()
185 .find(|field| field.name == name)
186 .ok_or_else(|| {
187 Error::InvalidData(format!("compound field not found: {name}"))
188 })?;
189 let start = checked_usize(field.byte_offset as u64, "compound field offset")?;
190 let len = value_size(self.dataset, &field.datatype)?;
191 let end = checked_add_usize(start, len, "compound field end")?;
192 require_len(self.bytes, end, "compound value")?;
193 Ok(NcValueView::new(
194 self.dataset,
195 &field.datatype,
196 &self.bytes[start..end],
197 ))
198 }
199 other => Err(Error::TypeMismatch {
200 expected: "compound value".to_string(),
201 actual: format!("{other:?}"),
202 }),
203 }
204 }
205
206 pub fn compound_fields(&self) -> Result<Vec<NcCompoundFieldView<'a>>> {
208 match self.dtype {
209 Datatype::Compound { fields, .. } => fields
210 .iter()
211 .map(|field| {
212 let start = checked_usize(field.byte_offset as u64, "compound field offset")?;
213 let len = value_size(self.dataset, &field.datatype)?;
214 let end = checked_add_usize(start, len, "compound field end")?;
215 require_len(self.bytes, end, "compound value")?;
216 Ok(NcCompoundFieldView {
217 name: &field.name,
218 value: NcValueView::new(
219 self.dataset,
220 &field.datatype,
221 &self.bytes[start..end],
222 ),
223 })
224 })
225 .collect(),
226 other => Err(Error::TypeMismatch {
227 expected: "compound value".to_string(),
228 actual: format!("{other:?}"),
229 }),
230 }
231 }
232
233 pub fn array_elements(&self) -> Result<Vec<NcValueView<'a>>> {
235 match self.dtype {
236 Datatype::Array { base, dims } => {
237 let count = checked_product_u64(dims, "array element count")?;
238 let elem_size = value_size(self.dataset, base)?;
239 let total = checked_mul_usize(count, elem_size, "array byte size")?;
240 require_len(self.bytes, total, "array value")?;
241 let mut values = Vec::with_capacity(count);
242 for index in 0..count {
243 let start = checked_mul_usize(index, elem_size, "array element offset")?;
244 let end = checked_add_usize(start, elem_size, "array element end")?;
245 values.push(NcValueView::new(
246 self.dataset,
247 base,
248 &self.bytes[start..end],
249 ));
250 }
251 Ok(values)
252 }
253 other => Err(Error::TypeMismatch {
254 expected: "array value".to_string(),
255 actual: format!("{other:?}"),
256 }),
257 }
258 }
259
260 pub fn vlen_values(&self) -> Result<Vec<NcValue>> {
262 match self.dtype {
263 Datatype::VarLen {
264 kind: VarLenKind::String,
265 ..
266 } => Err(Error::TypeMismatch {
267 expected: "non-string vlen value".to_string(),
268 actual: format!("{:?}", self.dtype),
269 }),
270 Datatype::VarLen { base, .. } => decode_vlen_values(self.dataset, base, self.bytes),
271 other => Err(Error::TypeMismatch {
272 expected: "vlen value".to_string(),
273 actual: format!("{other:?}"),
274 }),
275 }
276 }
277}
278
279#[derive(Clone, Copy)]
281pub struct NcCompoundFieldView<'a> {
282 pub name: &'a str,
283 pub value: NcValueView<'a>,
284}
285
286pub(crate) fn read_dataset_with_decoder<T, F>(
287 dataset: &Dataset,
288 mut decoder: F,
289) -> Result<ArrayD<T>>
290where
291 F: FnMut(NcValueView<'_>) -> Result<T>,
292{
293 let raw = dataset.read_raw_bytes()?;
294 let count = checked_usize(dataset.num_elements(), "NetCDF-4 variable element count")?;
295 let elem_size = value_size(dataset, dataset.dtype())?;
296 let total = checked_mul_usize(count, elem_size, "NetCDF-4 variable byte size")?;
297 require_len(&raw, total, "NetCDF-4 variable data")?;
298
299 let mut values = Vec::with_capacity(count);
300 for index in 0..count {
301 let start = checked_mul_usize(index, elem_size, "NetCDF-4 element byte offset")?;
302 let end = checked_add_usize(start, elem_size, "NetCDF-4 element byte end")?;
303 values.push(decoder(NcValueView::new(
304 dataset,
305 dataset.dtype(),
306 &raw[start..end],
307 ))?);
308 }
309
310 let shape = dataset
311 .shape()
312 .iter()
313 .map(|&dim| checked_usize(dim, "NetCDF-4 variable dimension"))
314 .collect::<Result<Vec<_>>>()?;
315 ArrayD::from_shape_vec(IxDyn(&shape), values)
316 .map_err(|err| Error::InvalidData(format!("array shape error: {err}")))
317}
318
319pub(crate) fn read_dataset_values(dataset: &Dataset) -> Result<ArrayD<NcValue>> {
320 read_dataset_with_decoder(dataset, |value| value.to_owned_value())
321}
322
323fn decode_value(dataset: &Dataset, dtype: &Datatype, bytes: &[u8]) -> Result<NcValue> {
324 match dtype {
325 Datatype::FixedPoint {
326 size,
327 signed,
328 byte_order,
329 } => integer_to_value(crate::nc4::types::decode_fixed_point_integer(
330 bytes,
331 *size,
332 *signed,
333 *byte_order,
334 )?),
335 Datatype::FloatingPoint {
336 size: 4,
337 byte_order,
338 } => Ok(NcValue::Float(f32::from_ne_bytes(read_ordered_bytes::<4>(
339 bytes,
340 *byte_order,
341 )?))),
342 Datatype::FloatingPoint {
343 size: 8,
344 byte_order,
345 } => Ok(NcValue::Double(f64::from_ne_bytes(
346 read_ordered_bytes::<8>(bytes, *byte_order)?,
347 ))),
348 Datatype::FloatingPoint { size, .. } => Err(Error::InvalidData(format!(
349 "unsupported floating-point size {size}"
350 ))),
351 Datatype::String {
352 size: StringSize::Fixed(len),
353 padding,
354 ..
355 } => {
356 let len = checked_usize(*len as u64, "fixed string length")?;
357 require_len(bytes, len, "fixed string value")?;
358 Ok(NcValue::String(decode_string_bytes(
359 &bytes[..len],
360 *padding,
361 )?))
362 }
363 Datatype::String {
364 size: StringSize::Variable,
365 padding,
366 ..
367 } => {
368 let raw = dataset.resolve_vlen_reference_bytes(bytes, 1)?;
369 Ok(NcValue::String(decode_string_bytes(&raw, *padding)?))
370 }
371 Datatype::Enum { .. } => Ok(NcValue::Enum(
372 NcValueView::new(dataset, dtype, bytes).enum_value()?,
373 )),
374 Datatype::Opaque { size, .. } => {
375 let size = checked_usize(*size as u64, "opaque byte size")?;
376 require_len(bytes, size, "opaque value")?;
377 Ok(NcValue::Opaque(bytes[..size].to_vec()))
378 }
379 Datatype::Compound { fields, .. } => {
380 let mut decoded = Vec::with_capacity(fields.len());
381 for field in fields {
382 let start = checked_usize(field.byte_offset as u64, "compound field offset")?;
383 let len = value_size(dataset, &field.datatype)?;
384 let end = checked_add_usize(start, len, "compound field end")?;
385 require_len(bytes, end, "compound value")?;
386 decoded.push(NcCompoundValueField {
387 name: field.name.clone(),
388 value: decode_value(dataset, &field.datatype, &bytes[start..end])?,
389 });
390 }
391 Ok(NcValue::Compound(decoded))
392 }
393 Datatype::Array { base, dims } => {
394 let count = checked_product_u64(dims, "array element count")?;
395 let elem_size = value_size(dataset, base)?;
396 let total = checked_mul_usize(count, elem_size, "array byte size")?;
397 require_len(bytes, total, "array value")?;
398 let mut values = Vec::with_capacity(count);
399 for index in 0..count {
400 let start = checked_mul_usize(index, elem_size, "array element offset")?;
401 let end = checked_add_usize(start, elem_size, "array element end")?;
402 values.push(decode_value(dataset, base, &bytes[start..end])?);
403 }
404 Ok(NcValue::Array(NcArrayValue {
405 dims: dims.clone(),
406 values,
407 }))
408 }
409 Datatype::VarLen {
410 base,
411 kind: VarLenKind::String,
412 padding,
413 ..
414 } if matches!(base.as_ref(), Datatype::FixedPoint { size: 1, .. }) => {
415 let raw = dataset.resolve_vlen_reference_bytes(bytes, 1)?;
416 Ok(NcValue::String(decode_string_bytes(&raw, *padding)?))
417 }
418 Datatype::VarLen { base, .. } => {
419 Ok(NcValue::VLen(decode_vlen_values(dataset, base, bytes)?))
420 }
421 other => Err(Error::InvalidData(format!(
422 "unsupported NetCDF-4 user-defined datatype: {other:?}"
423 ))),
424 }
425}
426
427fn decode_vlen_values(
428 dataset: &Dataset,
429 base: &Datatype,
430 reference: &[u8],
431) -> Result<Vec<NcValue>> {
432 let elem_size = value_size(dataset, base)?;
433 let raw = dataset.resolve_vlen_reference_bytes(reference, elem_size)?;
434 if elem_size == 0 {
435 return Err(Error::InvalidData(
436 "vlen base type has zero byte size".to_string(),
437 ));
438 }
439 if raw.len() % elem_size != 0 {
440 return Err(Error::InvalidData(format!(
441 "vlen payload has {} bytes, not a multiple of element size {}",
442 raw.len(),
443 elem_size
444 )));
445 }
446
447 let count = raw.len() / elem_size;
448 let mut values = Vec::with_capacity(count);
449 for index in 0..count {
450 let start = checked_mul_usize(index, elem_size, "vlen element offset")?;
451 let end = checked_add_usize(start, elem_size, "vlen element end")?;
452 values.push(decode_value(dataset, base, &raw[start..end])?);
453 }
454 Ok(values)
455}
456
457fn integer_to_value(value: NcIntegerValue) -> Result<NcValue> {
458 Ok(match value {
459 NcIntegerValue::I8(value) => NcValue::Byte(value),
460 NcIntegerValue::U8(value) => NcValue::UByte(value),
461 NcIntegerValue::I16(value) => NcValue::Short(value),
462 NcIntegerValue::U16(value) => NcValue::UShort(value),
463 NcIntegerValue::I32(value) => NcValue::Int(value),
464 NcIntegerValue::U32(value) => NcValue::UInt(value),
465 NcIntegerValue::I64(value) => NcValue::Int64(value),
466 NcIntegerValue::U64(value) => NcValue::UInt64(value),
467 })
468}
469
470fn value_size(dataset: &Dataset, dtype: &Datatype) -> Result<usize> {
471 match dtype {
472 Datatype::String {
473 size: StringSize::Variable,
474 ..
475 }
476 | Datatype::VarLen { .. } => Ok(dataset.vlen_reference_size()),
477 Datatype::Array { base, dims } => {
478 let count = checked_product_u64(dims, "array element count")?;
479 let elem_size = value_size(dataset, base)?;
480 checked_mul_usize(count, elem_size, "array byte size")
481 }
482 Datatype::Enum { base, .. } => value_size(dataset, base),
483 Datatype::FixedPoint { size, .. }
484 | Datatype::FloatingPoint { size, .. }
485 | Datatype::Bitfield { size, .. }
486 | Datatype::Reference { size, .. } => Ok(*size as usize),
487 Datatype::String {
488 size: StringSize::Fixed(len),
489 ..
490 } => Ok(*len as usize),
491 Datatype::Compound { size, .. } | Datatype::Opaque { size, .. } => Ok(*size as usize),
492 }
493}
494
495fn decode_string_bytes(bytes: &[u8], padding: StringPadding) -> Result<String> {
496 let trimmed = match padding {
497 StringPadding::NullTerminate => {
498 let end = bytes
499 .iter()
500 .position(|&byte| byte == 0)
501 .unwrap_or(bytes.len());
502 &bytes[..end]
503 }
504 StringPadding::NullPad => {
505 let end = bytes
506 .iter()
507 .rposition(|&byte| byte != 0)
508 .map_or(0, |idx| idx + 1);
509 &bytes[..end]
510 }
511 StringPadding::SpacePad => {
512 let end = bytes
513 .iter()
514 .rposition(|&byte| byte != b' ')
515 .map_or(0, |idx| idx + 1);
516 &bytes[..end]
517 }
518 };
519 String::from_utf8(trimmed.to_vec())
520 .map_err(|err| Error::InvalidData(format!("invalid string data: {err}")))
521}
522
523fn read_ordered_bytes<const N: usize>(bytes: &[u8], byte_order: ByteOrder) -> Result<[u8; N]> {
524 require_len(bytes, N, "numeric value")?;
525 let mut out = [0u8; N];
526 out.copy_from_slice(&bytes[..N]);
527 #[cfg(target_endian = "little")]
528 if byte_order == ByteOrder::BigEndian {
529 out.reverse();
530 }
531 #[cfg(target_endian = "big")]
532 if byte_order == ByteOrder::LittleEndian {
533 out.reverse();
534 }
535 Ok(out)
536}
537
538fn require_len(bytes: &[u8], needed: usize, context: &str) -> Result<()> {
539 if bytes.len() < needed {
540 return Err(Error::InvalidData(format!(
541 "{context} too short: need {needed} bytes, have {}",
542 bytes.len()
543 )));
544 }
545 Ok(())
546}
547
548fn checked_usize(value: u64, context: &str) -> Result<usize> {
549 usize::try_from(value)
550 .map_err(|_| Error::InvalidData(format!("{context} exceeds platform usize capacity")))
551}
552
553fn checked_add_usize(left: usize, right: usize, context: &str) -> Result<usize> {
554 left.checked_add(right)
555 .ok_or_else(|| Error::InvalidData(format!("{context} overflowed usize")))
556}
557
558fn checked_mul_usize(left: usize, right: usize, context: &str) -> Result<usize> {
559 left.checked_mul(right)
560 .ok_or_else(|| Error::InvalidData(format!("{context} overflowed usize")))
561}
562
563fn checked_product_u64(values: &[u64], context: &str) -> Result<usize> {
564 let mut product = 1usize;
565 for &value in values {
566 product = checked_mul_usize(product, checked_usize(value, context)?, context)?;
567 }
568 Ok(product)
569}