1use ndarray::ArrayD;
7
8use crate::error::{Error, Result};
9use crate::types::{
10 checked_mul_u64, checked_shape_elements, checked_usize_from_u64, NcType, NcVariable,
11};
12
13use super::data::{self, compute_record_stride, NcReadType};
14use super::ClassicFile;
15
16#[derive(Clone, Debug)]
17enum ResolvedClassicSelectionDim {
18 Index(u64),
19 Slice {
20 start: u64,
21 step: u64,
22 count: usize,
23 is_full_unit_stride: bool,
24 },
25}
26
27impl ResolvedClassicSelectionDim {
28 fn is_full_unit_stride(&self) -> bool {
29 matches!(
30 self,
31 Self::Slice {
32 is_full_unit_stride: true,
33 ..
34 }
35 )
36 }
37}
38
39#[derive(Clone, Debug)]
40struct ResolvedClassicSelection {
41 dims: Vec<ResolvedClassicSelectionDim>,
42 result_shape: Vec<usize>,
43 result_elements: usize,
44}
45
46struct BlockReadContext<'a> {
47 dims: &'a [ResolvedClassicSelectionDim],
48 strides: &'a [u64],
49 file_data: &'a [u8],
50 var_name: &'a str,
51 base_offset: usize,
52 block_elements: usize,
53 block_bytes: usize,
54 elem_size: u64,
55}
56
57struct RecordSliceContext<'a> {
58 file_data: &'a [u8],
59 var_name: &'a str,
60 base_offset: usize,
61 record_stride: usize,
62 inner_shape: &'a [u64],
63 inner_resolved: &'a ResolvedClassicSelection,
64}
65
66impl ClassicFile {
67 pub fn read_variable<T: NcReadType>(&self, name: &str) -> Result<ArrayD<T>> {
72 let var = self.find_variable(name)?;
73
74 let expected = T::nc_type();
76 if var.dtype != expected {
77 return Err(Error::TypeMismatch {
78 expected: format!("{:?}", expected),
79 actual: format!("{:?}", var.dtype),
80 });
81 }
82
83 let file_data = self.data.as_slice();
84
85 if var.is_record_var {
86 let record_stride = compute_record_stride(&self.root_group.variables);
87 data::read_record_variable(file_data, var, self.numrecs, record_stride)
88 } else {
89 data::read_non_record_variable(file_data, var)
90 }
91 }
92
93 pub fn read_variable_as_f64(&self, name: &str) -> Result<ArrayD<f64>> {
98 let var = self.find_variable(name)?;
99 let file_data = self.data.as_slice();
100
101 match var.dtype {
102 NcType::Byte => {
103 let arr = self.read_typed_variable::<i8>(var, file_data)?;
104 Ok(arr.mapv(|v| v as f64))
105 }
106 NcType::Short => {
107 let arr = self.read_typed_variable::<i16>(var, file_data)?;
108 Ok(arr.mapv(|v| v as f64))
109 }
110 NcType::Int => {
111 let arr = self.read_typed_variable::<i32>(var, file_data)?;
112 Ok(arr.mapv(|v| v as f64))
113 }
114 NcType::Float => {
115 let arr = self.read_typed_variable::<f32>(var, file_data)?;
116 Ok(arr.mapv(|v| v as f64))
117 }
118 NcType::Double => self.read_typed_variable::<f64>(var, file_data),
119 NcType::UByte => {
120 let arr = self.read_typed_variable::<u8>(var, file_data)?;
121 Ok(arr.mapv(|v| v as f64))
122 }
123 NcType::UShort => {
124 let arr = self.read_typed_variable::<u16>(var, file_data)?;
125 Ok(arr.mapv(|v| v as f64))
126 }
127 NcType::UInt => {
128 let arr = self.read_typed_variable::<u32>(var, file_data)?;
129 Ok(arr.mapv(|v| v as f64))
130 }
131 NcType::Int64 => {
132 let arr = self.read_typed_variable::<i64>(var, file_data)?;
133 Ok(arr.mapv(|v| v as f64))
134 }
135 NcType::UInt64 => {
136 let arr = self.read_typed_variable::<u64>(var, file_data)?;
137 Ok(arr.mapv(|v| v as f64))
138 }
139 NcType::Char => Err(Error::TypeMismatch {
140 expected: "numeric type".to_string(),
141 actual: "Char".to_string(),
142 }),
143 NcType::String => Err(Error::TypeMismatch {
144 expected: "numeric type".to_string(),
145 actual: "String".to_string(),
146 }),
147 _ => Err(Error::TypeMismatch {
148 expected: "numeric type".to_string(),
149 actual: format!("{:?}", var.dtype),
150 }),
151 }
152 }
153
154 pub fn read_variable_as_string(&self, name: &str) -> Result<String> {
156 let mut strings = self.read_variable_as_strings(name)?;
157 match strings.len() {
158 1 => Ok(strings.swap_remove(0)),
159 0 => Err(Error::InvalidData(format!(
160 "variable '{}' contains no string elements",
161 name
162 ))),
163 count => Err(Error::InvalidData(format!(
164 "variable '{}' contains {count} strings; use read_variable_as_strings()",
165 name
166 ))),
167 }
168 }
169
170 pub fn read_variable_as_strings(&self, name: &str) -> Result<Vec<String>> {
175 let var = self.find_variable(name)?;
176 if var.dtype != NcType::Char {
177 return Err(Error::TypeMismatch {
178 expected: "Char".to_string(),
179 actual: format!("{:?}", var.dtype),
180 });
181 }
182
183 let file_data = self.data.as_slice();
184 let arr = self.read_typed_variable::<u8>(var, file_data)?;
185 let bytes: Vec<u8> = arr.iter().copied().collect();
186 decode_char_variable_strings(var, &bytes)
187 }
188
189 pub fn read_variable_slice<T: NcReadType>(
194 &self,
195 name: &str,
196 selection: &crate::types::NcSliceInfo,
197 ) -> Result<ArrayD<T>> {
198 let var = self.find_variable(name)?;
199 let expected = T::nc_type();
200 if var.dtype != expected {
201 return Err(Error::TypeMismatch {
202 expected: format!("{:?}", expected),
203 actual: format!("{:?}", var.dtype),
204 });
205 }
206 let file_data = self.data.as_slice();
207 let resolved = resolve_classic_selection(
208 var,
209 selection,
210 if var.is_record_var { self.numrecs } else { 0 },
211 )?;
212
213 if !var.is_record_var {
214 return read_non_record_variable_slice_direct(file_data, var, &resolved);
215 }
216
217 let record_stride = compute_record_stride(&self.root_group.variables);
218 read_record_variable_slice_direct(file_data, var, self.numrecs, record_stride, &resolved)
219 }
220
221 pub fn read_variable_slice_as_f64(
223 &self,
224 name: &str,
225 selection: &crate::types::NcSliceInfo,
226 ) -> Result<ArrayD<f64>> {
227 let var = self.find_variable(name)?;
228
229 macro_rules! slice_promoted {
230 ($ty:ty) => {{
231 let sliced = self.read_variable_slice::<$ty>(name, selection)?;
232 Ok(sliced.mapv(|v| v as f64))
233 }};
234 }
235
236 match var.dtype {
237 NcType::Byte => slice_promoted!(i8),
238 NcType::Short => slice_promoted!(i16),
239 NcType::Int => slice_promoted!(i32),
240 NcType::Float => slice_promoted!(f32),
241 NcType::Double => slice_promoted!(f64),
242 NcType::UByte => slice_promoted!(u8),
243 NcType::UShort => slice_promoted!(u16),
244 NcType::UInt => slice_promoted!(u32),
245 NcType::Int64 => slice_promoted!(i64),
246 NcType::UInt64 => slice_promoted!(u64),
247 NcType::Char => Err(Error::TypeMismatch {
248 expected: "numeric type".to_string(),
249 actual: "Char".to_string(),
250 }),
251 _ => Err(Error::TypeMismatch {
252 expected: "numeric type".to_string(),
253 actual: format!("{:?}", var.dtype),
254 }),
255 }
256 }
257
258 fn find_variable(&self, name: &str) -> Result<&NcVariable> {
260 self.root_group
261 .variables
262 .iter()
263 .find(|v| v.name == name)
264 .ok_or_else(|| Error::VariableNotFound(name.to_string()))
265 }
266
267 fn read_typed_variable<T: NcReadType>(
269 &self,
270 var: &NcVariable,
271 file_data: &[u8],
272 ) -> Result<ArrayD<T>> {
273 if var.is_record_var {
274 let record_stride = compute_record_stride(&self.root_group.variables);
275 data::read_record_variable(file_data, var, self.numrecs, record_stride)
276 } else {
277 data::read_non_record_variable(file_data, var)
278 }
279 }
280}
281
282fn decode_char_variable_strings(var: &NcVariable, bytes: &[u8]) -> Result<Vec<String>> {
283 let shape = var.shape();
284 if shape.len() <= 1 {
285 return Ok(vec![decode_char_string(bytes)]);
286 }
287
288 let string_len = checked_usize_from_u64(
289 *shape
290 .last()
291 .ok_or_else(|| Error::InvalidData("char variable missing string axis".into()))?,
292 "char string length",
293 )?;
294 let string_count_u64 = checked_shape_elements(&shape[..shape.len() - 1], "char string count")?;
295 let string_count = checked_usize_from_u64(string_count_u64, "char string count")?;
296 let expected_bytes = string_count.checked_mul(string_len).ok_or_else(|| {
297 Error::InvalidData("char string byte count exceeds platform usize".to_string())
298 })?;
299
300 if bytes.len() < expected_bytes {
301 return Err(Error::InvalidData(format!(
302 "char variable '{}' data too short: need {} bytes, have {}",
303 var.name,
304 expected_bytes,
305 bytes.len()
306 )));
307 }
308
309 if string_len == 0 {
310 return Ok(vec![String::new(); string_count]);
311 }
312
313 Ok(bytes[..expected_bytes]
314 .chunks_exact(string_len)
315 .map(decode_char_string)
316 .collect())
317}
318
319fn decode_char_string(bytes: &[u8]) -> String {
320 String::from_utf8_lossy(bytes)
321 .trim_end_matches('\0')
322 .to_string()
323}
324
325fn variable_shape_for_selection(var: &NcVariable, numrecs: u64) -> Vec<u64> {
326 let mut shape = var.shape();
327 if var.is_record_var && !shape.is_empty() {
328 shape[0] = numrecs;
329 }
330 shape
331}
332
333fn row_major_strides(shape: &[u64], context: &str) -> Result<Vec<u64>> {
334 let ndim = shape.len();
335 if ndim == 0 {
336 return Ok(Vec::new());
337 }
338
339 let mut strides = vec![1u64; ndim];
340 for i in (0..ndim - 1).rev() {
341 strides[i] = checked_mul_u64(strides[i + 1], shape[i + 1], context)?;
342 }
343 Ok(strides)
344}
345
346fn resolve_classic_selection(
347 var: &NcVariable,
348 selection: &crate::types::NcSliceInfo,
349 numrecs: u64,
350) -> Result<ResolvedClassicSelection> {
351 use crate::types::NcSliceInfoElem;
352
353 let shape = variable_shape_for_selection(var, numrecs);
354 if selection.selections.len() != shape.len() {
355 return Err(Error::InvalidData(format!(
356 "selection has {} dimensions but variable '{}' has {}",
357 selection.selections.len(),
358 var.name,
359 shape.len()
360 )));
361 }
362
363 let mut dims = Vec::with_capacity(shape.len());
364 let mut result_shape = Vec::new();
365 let mut result_elements = 1usize;
366
367 for (dim, (sel, &dim_size)) in selection.selections.iter().zip(shape.iter()).enumerate() {
368 match sel {
369 NcSliceInfoElem::Index(idx) => {
370 if *idx >= dim_size {
371 return Err(Error::InvalidData(format!(
372 "index {} out of bounds for dimension {} (size {})",
373 idx, dim, dim_size
374 )));
375 }
376 dims.push(ResolvedClassicSelectionDim::Index(*idx));
377 }
378 NcSliceInfoElem::Slice { start, end, step } => {
379 if *step == 0 {
380 return Err(Error::InvalidData("slice step cannot be 0".to_string()));
381 }
382 if *start > dim_size {
383 return Err(Error::InvalidData(format!(
384 "slice start {} out of bounds for dimension {} (size {})",
385 start, dim, dim_size
386 )));
387 }
388
389 let actual_end = if *end == u64::MAX {
390 dim_size
391 } else {
392 (*end).min(dim_size)
393 };
394 let count_u64 = if *start >= actual_end {
395 0
396 } else {
397 (actual_end - *start).div_ceil(*step)
398 };
399 let count = checked_usize_from_u64(count_u64, "classic slice result dimension")?;
400
401 result_shape.push(count);
402 result_elements = result_elements.checked_mul(count).ok_or_else(|| {
403 Error::InvalidData(
404 "classic slice result element count exceeds platform usize".to_string(),
405 )
406 })?;
407 dims.push(ResolvedClassicSelectionDim::Slice {
408 start: *start,
409 step: *step,
410 count,
411 is_full_unit_stride: *start == 0 && actual_end == dim_size && *step == 1,
412 });
413 }
414 }
415 }
416
417 Ok(ResolvedClassicSelection {
418 dims,
419 result_shape,
420 result_elements,
421 })
422}
423
424fn read_non_record_variable_slice_direct<T: NcReadType>(
425 file_data: &[u8],
426 var: &NcVariable,
427 resolved: &ResolvedClassicSelection,
428) -> Result<ArrayD<T>> {
429 let shape = variable_shape_for_selection(var, 0);
430 let base_offset = checked_usize_from_u64(var.data_offset, "classic slice data offset")?;
431 build_array_from_contiguous_selection::<T>(file_data, &var.name, base_offset, &shape, resolved)
432}
433
434fn read_record_variable_slice_direct<T: NcReadType>(
435 file_data: &[u8],
436 var: &NcVariable,
437 numrecs: u64,
438 record_stride: u64,
439 resolved: &ResolvedClassicSelection,
440) -> Result<ArrayD<T>> {
441 use ndarray::IxDyn;
442
443 if resolved.result_elements == 0 {
444 return ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), Vec::new())
445 .map_err(|e| Error::InvalidData(format!("failed to create array: {e}")));
446 }
447
448 let shape = variable_shape_for_selection(var, numrecs);
449 let inner_shape = &shape[1..];
450 let inner_dims = resolved.dims[1..].to_vec();
451 let inner_resolved = ResolvedClassicSelection {
452 result_shape: selection_result_shape(&inner_dims),
453 result_elements: selection_result_elements(&inner_dims)?,
454 dims: inner_dims,
455 };
456 let base_offset = checked_usize_from_u64(var.data_offset, "classic slice data offset")?;
457 let record_stride = checked_usize_from_u64(record_stride, "classic record stride")?;
458 let mut values = Vec::with_capacity(resolved.result_elements);
459 let context = RecordSliceContext {
460 file_data,
461 var_name: &var.name,
462 base_offset,
463 record_stride,
464 inner_shape,
465 inner_resolved: &inner_resolved,
466 };
467
468 match &resolved.dims[0] {
469 ResolvedClassicSelectionDim::Index(record) => {
470 append_one_record_slice::<T>(&context, *record, &mut values)?
471 }
472 ResolvedClassicSelectionDim::Slice {
473 start, step, count, ..
474 } => {
475 for ordinal in 0..*count {
476 let record = start
477 .checked_add(checked_mul_u64(
478 ordinal as u64,
479 *step,
480 "classic record slice coordinate",
481 )?)
482 .ok_or_else(|| {
483 Error::InvalidData(
484 "classic record slice coordinate exceeds u64".to_string(),
485 )
486 })?;
487 append_one_record_slice::<T>(&context, record, &mut values)?;
488 }
489 }
490 }
491
492 debug_assert_eq!(values.len(), resolved.result_elements);
493 ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), values)
494 .map_err(|e| Error::InvalidData(format!("failed to create array: {e}")))
495}
496
497fn selection_result_shape(dims: &[ResolvedClassicSelectionDim]) -> Vec<usize> {
498 dims.iter()
499 .filter_map(|dim| match dim {
500 ResolvedClassicSelectionDim::Index(_) => None,
501 ResolvedClassicSelectionDim::Slice { count, .. } => Some(*count),
502 })
503 .collect()
504}
505
506fn selection_result_elements(dims: &[ResolvedClassicSelectionDim]) -> Result<usize> {
507 let mut elements = 1usize;
508 for dim in dims {
509 if let ResolvedClassicSelectionDim::Slice { count, .. } = dim {
510 elements = elements.checked_mul(*count).ok_or_else(|| {
511 Error::InvalidData(
512 "classic slice result element count exceeds platform usize".to_string(),
513 )
514 })?;
515 }
516 }
517 Ok(elements)
518}
519
520fn build_array_from_contiguous_selection<T: NcReadType>(
521 file_data: &[u8],
522 var_name: &str,
523 base_offset: usize,
524 shape: &[u64],
525 resolved: &ResolvedClassicSelection,
526) -> Result<ArrayD<T>> {
527 use ndarray::IxDyn;
528
529 let values =
530 read_contiguous_selection_values::<T>(file_data, var_name, base_offset, shape, resolved)?;
531 ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), values)
532 .map_err(|e| Error::InvalidData(format!("failed to create array: {e}")))
533}
534
535fn read_contiguous_selection_values<T: NcReadType>(
536 file_data: &[u8],
537 var_name: &str,
538 base_offset: usize,
539 shape: &[u64],
540 resolved: &ResolvedClassicSelection,
541) -> Result<Vec<T>> {
542 if resolved.result_elements == 0 {
543 return Ok(Vec::new());
544 }
545
546 let strides = row_major_strides(shape, "classic slice stride")?;
547 let tail_start = resolved
548 .dims
549 .iter()
550 .rposition(|dim| !dim.is_full_unit_stride())
551 .map_or(0, |idx| idx + 1);
552 let block_elements_u64 = checked_shape_elements(
553 &shape[tail_start..],
554 "classic slice contiguous block element count",
555 )?;
556 let block_elements = checked_usize_from_u64(
557 block_elements_u64,
558 "classic slice contiguous block element count",
559 )?;
560 let block_bytes = checked_usize_from_u64(
561 checked_mul_u64(
562 block_elements_u64,
563 T::element_size() as u64,
564 "classic slice contiguous block size in bytes",
565 )?,
566 "classic slice contiguous block size in bytes",
567 )?;
568
569 let mut values = Vec::with_capacity(resolved.result_elements);
570 let context = BlockReadContext {
571 dims: &resolved.dims,
572 strides: &strides,
573 file_data,
574 var_name,
575 base_offset,
576 block_elements,
577 block_bytes,
578 elem_size: T::element_size() as u64,
579 };
580 read_selected_blocks_recursive::<T>(0, tail_start, 0, &context, &mut values)?;
581 Ok(values)
582}
583
584fn append_one_record_slice<T: NcReadType>(
585 context: &RecordSliceContext<'_>,
586 record: u64,
587 values: &mut Vec<T>,
588) -> Result<()> {
589 let record = checked_usize_from_u64(record, "classic record index")?;
590 let record_offset = context
591 .base_offset
592 .checked_add(record.checked_mul(context.record_stride).ok_or_else(|| {
593 Error::InvalidData("classic record byte offset exceeds platform usize".to_string())
594 })?)
595 .ok_or_else(|| {
596 Error::InvalidData("classic record byte offset exceeds platform usize".to_string())
597 })?;
598 let mut decoded = read_contiguous_selection_values::<T>(
599 context.file_data,
600 context.var_name,
601 record_offset,
602 context.inner_shape,
603 context.inner_resolved,
604 )?;
605 values.append(&mut decoded);
606 Ok(())
607}
608
609fn read_selected_blocks_recursive<T: NcReadType>(
610 level: usize,
611 tail_start: usize,
612 current_offset: u64,
613 context: &BlockReadContext<'_>,
614 values: &mut Vec<T>,
615) -> Result<()> {
616 if level == tail_start {
617 let byte_offset = checked_usize_from_u64(
618 checked_mul_u64(
619 current_offset,
620 context.elem_size,
621 "classic slice element byte offset",
622 )?,
623 "classic slice element byte offset",
624 )?;
625 let start = context
626 .base_offset
627 .checked_add(byte_offset)
628 .ok_or_else(|| {
629 Error::InvalidData("classic slice byte offset exceeds platform usize".to_string())
630 })?;
631 let end = start.checked_add(context.block_bytes).ok_or_else(|| {
632 Error::InvalidData("classic slice byte range exceeds platform usize".to_string())
633 })?;
634 if end > context.file_data.len() {
635 return Err(Error::InvalidData(format!(
636 "variable '{}' slice data extends beyond file",
637 context.var_name
638 )));
639 }
640
641 let mut decoded =
642 T::decode_bulk_be(&context.file_data[start..end], context.block_elements)?;
643 values.append(&mut decoded);
644 return Ok(());
645 }
646
647 match &context.dims[level] {
648 ResolvedClassicSelectionDim::Index(idx) => read_selected_blocks_recursive::<T>(
649 level + 1,
650 tail_start,
651 current_offset
652 .checked_add(checked_mul_u64(
653 *idx,
654 context.strides[level],
655 "classic slice logical element offset",
656 )?)
657 .ok_or_else(|| {
658 Error::InvalidData(
659 "classic slice logical element offset exceeds u64".to_string(),
660 )
661 })?,
662 context,
663 values,
664 ),
665 ResolvedClassicSelectionDim::Slice {
666 start, step, count, ..
667 } => {
668 let start = *start;
669 let step = *step;
670 let count = *count;
671 for ordinal in 0..count {
672 let coord = start
673 .checked_add(checked_mul_u64(
674 ordinal as u64,
675 step,
676 "classic slice coordinate",
677 )?)
678 .ok_or_else(|| {
679 Error::InvalidData("classic slice coordinate exceeds u64".to_string())
680 })?;
681 read_selected_blocks_recursive::<T>(
682 level + 1,
683 tail_start,
684 current_offset
685 .checked_add(checked_mul_u64(
686 coord,
687 context.strides[level],
688 "classic slice logical element offset",
689 )?)
690 .ok_or_else(|| {
691 Error::InvalidData(
692 "classic slice logical element offset exceeds u64".to_string(),
693 )
694 })?,
695 context,
696 values,
697 )?;
698 }
699 Ok(())
700 }
701 }
702}
703
704#[cfg(test)]
705mod tests {
706 use super::*;
707 use crate::types::NcDimension;
708
709 fn char_variable(shape: &[u64]) -> NcVariable {
710 NcVariable {
711 name: "chars".to_string(),
712 dimensions: shape
713 .iter()
714 .enumerate()
715 .map(|(i, &size)| NcDimension {
716 name: format!("d{i}"),
717 size,
718 is_unlimited: false,
719 })
720 .collect(),
721 dtype: NcType::Char,
722 attributes: vec![],
723 data_offset: 0,
724 _data_size: 0,
725 is_record_var: false,
726 record_size: 0,
727 }
728 }
729
730 #[test]
731 fn test_decode_char_variable_strings_1d() {
732 let var = char_variable(&[5]);
733 let strings = decode_char_variable_strings(&var, b"alpha").unwrap();
734 assert_eq!(strings, vec!["alpha"]);
735 }
736
737 #[test]
738 fn test_decode_char_variable_strings_2d() {
739 let var = char_variable(&[2, 5]);
740 let strings = decode_char_variable_strings(&var, b"alphabeta\0").unwrap();
741 assert_eq!(strings, vec!["alpha", "beta"]);
742 }
743}