1use ndarray::ArrayD;
7
8use crate::error::{Error, Result};
9use crate::types::{
10 checked_mul_u64, checked_shape_elements, checked_usize_from_u64, NcType, NcVariable,
11};
12
13use super::data::{self, compute_record_stride, NcReadType};
14use super::ClassicFile;
15
16#[derive(Clone, Debug)]
17enum ResolvedClassicSelectionDim {
18 Index(u64),
19 Slice {
20 start: u64,
21 step: u64,
22 count: usize,
23 is_full_unit_stride: bool,
24 },
25}
26
27impl ResolvedClassicSelectionDim {
28 fn is_full_unit_stride(&self) -> bool {
29 matches!(
30 self,
31 Self::Slice {
32 is_full_unit_stride: true,
33 ..
34 }
35 )
36 }
37}
38
39#[derive(Clone, Debug)]
40struct ResolvedClassicSelection {
41 dims: Vec<ResolvedClassicSelectionDim>,
42 result_shape: Vec<usize>,
43 result_elements: usize,
44}
45
46struct BlockReadContext<'a> {
47 file_data: &'a [u8],
48 var_name: &'a str,
49 base_offset: usize,
50 plan: &'a ContiguousSelectionPlan,
51}
52
53struct ContiguousSelectionPlan {
54 dims: Vec<ResolvedClassicSelectionDim>,
55 strides: Vec<u64>,
56 tail_start: usize,
57 block_elements: usize,
58 block_bytes: usize,
59 elem_size: u64,
60}
61
62struct RecordSliceContext<'a> {
63 file_data: &'a [u8],
64 var_name: &'a str,
65 base_offset: usize,
66 record_stride: usize,
67 inner_resolved: &'a ResolvedClassicSelection,
68 inner_plan: &'a ContiguousSelectionPlan,
69}
70
71impl ClassicFile {
72 pub fn read_variable<T: NcReadType>(&self, name: &str) -> Result<ArrayD<T>> {
77 let var = self.find_variable(name)?;
78
79 let expected = T::nc_type();
81 if var.dtype != expected {
82 return Err(Error::TypeMismatch {
83 expected: format!("{:?}", expected),
84 actual: format!("{:?}", var.dtype),
85 });
86 }
87
88 let file_data = self.data.as_slice();
89
90 if var.is_record_var {
91 let record_stride = compute_record_stride(&self.root_group.variables);
92 data::read_record_variable(file_data, var, self.numrecs, record_stride)
93 } else {
94 data::read_non_record_variable(file_data, var)
95 }
96 }
97
98 pub fn read_variable_as_f64(&self, name: &str) -> Result<ArrayD<f64>> {
103 let var = self.find_variable(name)?;
104 let file_data = self.data.as_slice();
105
106 match var.dtype {
107 NcType::Byte => {
108 let arr = self.read_typed_variable::<i8>(var, file_data)?;
109 Ok(arr.mapv(|v| v as f64))
110 }
111 NcType::Short => {
112 let arr = self.read_typed_variable::<i16>(var, file_data)?;
113 Ok(arr.mapv(|v| v as f64))
114 }
115 NcType::Int => {
116 let arr = self.read_typed_variable::<i32>(var, file_data)?;
117 Ok(arr.mapv(|v| v as f64))
118 }
119 NcType::Float => {
120 let arr = self.read_typed_variable::<f32>(var, file_data)?;
121 Ok(arr.mapv(|v| v as f64))
122 }
123 NcType::Double => self.read_typed_variable::<f64>(var, file_data),
124 NcType::UByte => {
125 let arr = self.read_typed_variable::<u8>(var, file_data)?;
126 Ok(arr.mapv(|v| v as f64))
127 }
128 NcType::UShort => {
129 let arr = self.read_typed_variable::<u16>(var, file_data)?;
130 Ok(arr.mapv(|v| v as f64))
131 }
132 NcType::UInt => {
133 let arr = self.read_typed_variable::<u32>(var, file_data)?;
134 Ok(arr.mapv(|v| v as f64))
135 }
136 NcType::Int64 => {
137 let arr = self.read_typed_variable::<i64>(var, file_data)?;
138 Ok(arr.mapv(|v| v as f64))
139 }
140 NcType::UInt64 => {
141 let arr = self.read_typed_variable::<u64>(var, file_data)?;
142 Ok(arr.mapv(|v| v as f64))
143 }
144 NcType::Char => Err(Error::TypeMismatch {
145 expected: "numeric type".to_string(),
146 actual: "Char".to_string(),
147 }),
148 NcType::String => Err(Error::TypeMismatch {
149 expected: "numeric type".to_string(),
150 actual: "String".to_string(),
151 }),
152 _ => Err(Error::TypeMismatch {
153 expected: "numeric type".to_string(),
154 actual: format!("{:?}", var.dtype),
155 }),
156 }
157 }
158
159 pub fn read_variable_as_string(&self, name: &str) -> Result<String> {
161 let mut strings = self.read_variable_as_strings(name)?;
162 match strings.len() {
163 1 => Ok(strings.swap_remove(0)),
164 0 => Err(Error::InvalidData(format!(
165 "variable '{}' contains no string elements",
166 name
167 ))),
168 count => Err(Error::InvalidData(format!(
169 "variable '{}' contains {count} strings; use read_variable_as_strings()",
170 name
171 ))),
172 }
173 }
174
175 pub fn read_variable_as_strings(&self, name: &str) -> Result<Vec<String>> {
180 let var = self.find_variable(name)?;
181 if var.dtype != NcType::Char {
182 return Err(Error::TypeMismatch {
183 expected: "Char".to_string(),
184 actual: format!("{:?}", var.dtype),
185 });
186 }
187
188 let file_data = self.data.as_slice();
189 let arr = self.read_typed_variable::<u8>(var, file_data)?;
190 let bytes: Vec<u8> = arr.iter().copied().collect();
191 decode_char_variable_strings(var, &bytes)
192 }
193
194 pub fn read_variable_slice<T: NcReadType>(
199 &self,
200 name: &str,
201 selection: &crate::types::NcSliceInfo,
202 ) -> Result<ArrayD<T>> {
203 let var = self.find_variable(name)?;
204 let expected = T::nc_type();
205 if var.dtype != expected {
206 return Err(Error::TypeMismatch {
207 expected: format!("{:?}", expected),
208 actual: format!("{:?}", var.dtype),
209 });
210 }
211 let file_data = self.data.as_slice();
212 let resolved = resolve_classic_selection(
213 var,
214 selection,
215 if var.is_record_var { self.numrecs } else { 0 },
216 )?;
217
218 if !var.is_record_var {
219 return read_non_record_variable_slice_direct(file_data, var, &resolved);
220 }
221
222 let record_stride = compute_record_stride(&self.root_group.variables);
223 read_record_variable_slice_direct(file_data, var, self.numrecs, record_stride, &resolved)
224 }
225
226 pub fn read_variable_slice_as_f64(
228 &self,
229 name: &str,
230 selection: &crate::types::NcSliceInfo,
231 ) -> Result<ArrayD<f64>> {
232 let var = self.find_variable(name)?;
233
234 macro_rules! slice_promoted {
235 ($ty:ty) => {{
236 let sliced = self.read_variable_slice::<$ty>(name, selection)?;
237 Ok(sliced.mapv(|v| v as f64))
238 }};
239 }
240
241 match var.dtype {
242 NcType::Byte => slice_promoted!(i8),
243 NcType::Short => slice_promoted!(i16),
244 NcType::Int => slice_promoted!(i32),
245 NcType::Float => slice_promoted!(f32),
246 NcType::Double => slice_promoted!(f64),
247 NcType::UByte => slice_promoted!(u8),
248 NcType::UShort => slice_promoted!(u16),
249 NcType::UInt => slice_promoted!(u32),
250 NcType::Int64 => slice_promoted!(i64),
251 NcType::UInt64 => slice_promoted!(u64),
252 NcType::Char => Err(Error::TypeMismatch {
253 expected: "numeric type".to_string(),
254 actual: "Char".to_string(),
255 }),
256 _ => Err(Error::TypeMismatch {
257 expected: "numeric type".to_string(),
258 actual: format!("{:?}", var.dtype),
259 }),
260 }
261 }
262
263 fn find_variable(&self, name: &str) -> Result<&NcVariable> {
265 self.root_group
266 .variables
267 .iter()
268 .find(|v| v.name == name)
269 .ok_or_else(|| Error::VariableNotFound(name.to_string()))
270 }
271
272 fn read_typed_variable<T: NcReadType>(
274 &self,
275 var: &NcVariable,
276 file_data: &[u8],
277 ) -> Result<ArrayD<T>> {
278 if var.is_record_var {
279 let record_stride = compute_record_stride(&self.root_group.variables);
280 data::read_record_variable(file_data, var, self.numrecs, record_stride)
281 } else {
282 data::read_non_record_variable(file_data, var)
283 }
284 }
285}
286
287fn decode_char_variable_strings(var: &NcVariable, bytes: &[u8]) -> Result<Vec<String>> {
288 let shape = var.shape();
289 if shape.len() <= 1 {
290 return Ok(vec![decode_char_string(bytes)]);
291 }
292
293 let string_len = checked_usize_from_u64(
294 *shape
295 .last()
296 .ok_or_else(|| Error::InvalidData("char variable missing string axis".into()))?,
297 "char string length",
298 )?;
299 let string_count_u64 = checked_shape_elements(&shape[..shape.len() - 1], "char string count")?;
300 let string_count = checked_usize_from_u64(string_count_u64, "char string count")?;
301 let expected_bytes = string_count.checked_mul(string_len).ok_or_else(|| {
302 Error::InvalidData("char string byte count exceeds platform usize".to_string())
303 })?;
304
305 if bytes.len() < expected_bytes {
306 return Err(Error::InvalidData(format!(
307 "char variable '{}' data too short: need {} bytes, have {}",
308 var.name,
309 expected_bytes,
310 bytes.len()
311 )));
312 }
313
314 if string_len == 0 {
315 return Ok(vec![String::new(); string_count]);
316 }
317
318 Ok(bytes[..expected_bytes]
319 .chunks_exact(string_len)
320 .map(decode_char_string)
321 .collect())
322}
323
324fn decode_char_string(bytes: &[u8]) -> String {
325 String::from_utf8_lossy(bytes)
326 .trim_end_matches('\0')
327 .to_string()
328}
329
330fn variable_shape_for_selection(var: &NcVariable, numrecs: u64) -> Vec<u64> {
331 let mut shape = var.shape();
332 if var.is_record_var && !shape.is_empty() {
333 shape[0] = numrecs;
334 }
335 shape
336}
337
338fn row_major_strides(shape: &[u64], context: &str) -> Result<Vec<u64>> {
339 let ndim = shape.len();
340 if ndim == 0 {
341 return Ok(Vec::new());
342 }
343
344 let mut strides = vec![1u64; ndim];
345 for i in (0..ndim - 1).rev() {
346 strides[i] = checked_mul_u64(strides[i + 1], shape[i + 1], context)?;
347 }
348 Ok(strides)
349}
350
351fn resolve_classic_selection(
352 var: &NcVariable,
353 selection: &crate::types::NcSliceInfo,
354 numrecs: u64,
355) -> Result<ResolvedClassicSelection> {
356 use crate::types::NcSliceInfoElem;
357
358 let shape = variable_shape_for_selection(var, numrecs);
359 if selection.selections.len() != shape.len() {
360 return Err(Error::InvalidData(format!(
361 "selection has {} dimensions but variable '{}' has {}",
362 selection.selections.len(),
363 var.name,
364 shape.len()
365 )));
366 }
367
368 let mut dims = Vec::with_capacity(shape.len());
369 let mut result_shape = Vec::new();
370 let mut result_elements = 1usize;
371
372 for (dim, (sel, &dim_size)) in selection.selections.iter().zip(shape.iter()).enumerate() {
373 match sel {
374 NcSliceInfoElem::Index(idx) => {
375 if *idx >= dim_size {
376 return Err(Error::InvalidData(format!(
377 "index {} out of bounds for dimension {} (size {})",
378 idx, dim, dim_size
379 )));
380 }
381 dims.push(ResolvedClassicSelectionDim::Index(*idx));
382 }
383 NcSliceInfoElem::Slice { start, end, step } => {
384 if *step == 0 {
385 return Err(Error::InvalidData("slice step cannot be 0".to_string()));
386 }
387 if *start > dim_size {
388 return Err(Error::InvalidData(format!(
389 "slice start {} out of bounds for dimension {} (size {})",
390 start, dim, dim_size
391 )));
392 }
393
394 let actual_end = if *end == u64::MAX {
395 dim_size
396 } else {
397 (*end).min(dim_size)
398 };
399 let count_u64 = if *start >= actual_end {
400 0
401 } else {
402 (actual_end - *start).div_ceil(*step)
403 };
404 let count = checked_usize_from_u64(count_u64, "classic slice result dimension")?;
405
406 result_shape.push(count);
407 result_elements = result_elements.checked_mul(count).ok_or_else(|| {
408 Error::InvalidData(
409 "classic slice result element count exceeds platform usize".to_string(),
410 )
411 })?;
412 dims.push(ResolvedClassicSelectionDim::Slice {
413 start: *start,
414 step: *step,
415 count,
416 is_full_unit_stride: *start == 0 && actual_end == dim_size && *step == 1,
417 });
418 }
419 }
420 }
421
422 Ok(ResolvedClassicSelection {
423 dims,
424 result_shape,
425 result_elements,
426 })
427}
428
429fn read_non_record_variable_slice_direct<T: NcReadType>(
430 file_data: &[u8],
431 var: &NcVariable,
432 resolved: &ResolvedClassicSelection,
433) -> Result<ArrayD<T>> {
434 let shape = variable_shape_for_selection(var, 0);
435 let base_offset = checked_usize_from_u64(var.data_offset, "classic slice data offset")?;
436 build_array_from_contiguous_selection::<T>(file_data, &var.name, base_offset, &shape, resolved)
437}
438
439fn read_record_variable_slice_direct<T: NcReadType>(
440 file_data: &[u8],
441 var: &NcVariable,
442 numrecs: u64,
443 record_stride: u64,
444 resolved: &ResolvedClassicSelection,
445) -> Result<ArrayD<T>> {
446 use ndarray::IxDyn;
447
448 if resolved.result_elements == 0 {
449 return ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), Vec::new())
450 .map_err(|e| Error::InvalidData(format!("failed to create array: {e}")));
451 }
452
453 let shape = variable_shape_for_selection(var, numrecs);
454 let inner_shape = &shape[1..];
455 let inner_dims = resolved.dims[1..].to_vec();
456 let inner_resolved = ResolvedClassicSelection {
457 result_shape: selection_result_shape(&inner_dims),
458 result_elements: selection_result_elements(&inner_dims)?,
459 dims: inner_dims,
460 };
461 let inner_plan = build_contiguous_selection_plan::<T>(inner_shape, &inner_resolved.dims)?;
462 let base_offset = checked_usize_from_u64(var.data_offset, "classic slice data offset")?;
463 let record_stride = checked_usize_from_u64(record_stride, "classic record stride")?;
464 let mut values = Vec::with_capacity(resolved.result_elements);
465 let context = RecordSliceContext {
466 file_data,
467 var_name: &var.name,
468 base_offset,
469 record_stride,
470 inner_resolved: &inner_resolved,
471 inner_plan: &inner_plan,
472 };
473
474 match &resolved.dims[0] {
475 ResolvedClassicSelectionDim::Index(record) => {
476 append_one_record_slice::<T>(&context, *record, &mut values)?
477 }
478 ResolvedClassicSelectionDim::Slice {
479 start, step, count, ..
480 } => {
481 for ordinal in 0..*count {
482 let record = start
483 .checked_add(checked_mul_u64(
484 ordinal as u64,
485 *step,
486 "classic record slice coordinate",
487 )?)
488 .ok_or_else(|| {
489 Error::InvalidData(
490 "classic record slice coordinate exceeds u64".to_string(),
491 )
492 })?;
493 append_one_record_slice::<T>(&context, record, &mut values)?;
494 }
495 }
496 }
497
498 debug_assert_eq!(values.len(), resolved.result_elements);
499 ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), values)
500 .map_err(|e| Error::InvalidData(format!("failed to create array: {e}")))
501}
502
503fn selection_result_shape(dims: &[ResolvedClassicSelectionDim]) -> Vec<usize> {
504 dims.iter()
505 .filter_map(|dim| match dim {
506 ResolvedClassicSelectionDim::Index(_) => None,
507 ResolvedClassicSelectionDim::Slice { count, .. } => Some(*count),
508 })
509 .collect()
510}
511
512fn selection_result_elements(dims: &[ResolvedClassicSelectionDim]) -> Result<usize> {
513 let mut elements = 1usize;
514 for dim in dims {
515 if let ResolvedClassicSelectionDim::Slice { count, .. } = dim {
516 elements = elements.checked_mul(*count).ok_or_else(|| {
517 Error::InvalidData(
518 "classic slice result element count exceeds platform usize".to_string(),
519 )
520 })?;
521 }
522 }
523 Ok(elements)
524}
525
526fn build_array_from_contiguous_selection<T: NcReadType>(
527 file_data: &[u8],
528 var_name: &str,
529 base_offset: usize,
530 shape: &[u64],
531 resolved: &ResolvedClassicSelection,
532) -> Result<ArrayD<T>> {
533 use ndarray::IxDyn;
534
535 let plan = build_contiguous_selection_plan::<T>(shape, &resolved.dims)?;
536 let values = read_contiguous_selection_values_with_plan::<T>(
537 file_data,
538 var_name,
539 base_offset,
540 &plan,
541 resolved.result_elements,
542 )?;
543 ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), values)
544 .map_err(|e| Error::InvalidData(format!("failed to create array: {e}")))
545}
546
547fn build_contiguous_selection_plan<T: NcReadType>(
548 shape: &[u64],
549 dims: &[ResolvedClassicSelectionDim],
550) -> Result<ContiguousSelectionPlan> {
551 let strides = row_major_strides(shape, "classic slice stride")?;
552 let tail_start = dims
553 .iter()
554 .rposition(|dim| !dim.is_full_unit_stride())
555 .map_or(0, |idx| idx + 1);
556 let block_elements_u64 = checked_shape_elements(
557 &shape[tail_start..],
558 "classic slice contiguous block element count",
559 )?;
560 let block_elements = checked_usize_from_u64(
561 block_elements_u64,
562 "classic slice contiguous block element count",
563 )?;
564 let block_bytes = checked_usize_from_u64(
565 checked_mul_u64(
566 block_elements_u64,
567 T::element_size() as u64,
568 "classic slice contiguous block size in bytes",
569 )?,
570 "classic slice contiguous block size in bytes",
571 )?;
572
573 Ok(ContiguousSelectionPlan {
574 dims: dims.to_vec(),
575 strides,
576 tail_start,
577 block_elements,
578 block_bytes,
579 elem_size: T::element_size() as u64,
580 })
581}
582
583fn read_contiguous_selection_values_with_plan<T: NcReadType>(
584 file_data: &[u8],
585 var_name: &str,
586 base_offset: usize,
587 plan: &ContiguousSelectionPlan,
588 result_elements: usize,
589) -> Result<Vec<T>> {
590 if result_elements == 0 {
591 return Ok(Vec::new());
592 }
593
594 let mut values = Vec::with_capacity(result_elements);
595 let context = BlockReadContext {
596 file_data,
597 var_name,
598 base_offset,
599 plan,
600 };
601 read_selected_blocks_recursive::<T>(0, 0, &context, &mut values)?;
602 Ok(values)
603}
604
605fn append_one_record_slice<T: NcReadType>(
606 context: &RecordSliceContext<'_>,
607 record: u64,
608 values: &mut Vec<T>,
609) -> Result<()> {
610 let record = checked_usize_from_u64(record, "classic record index")?;
611 let record_offset = context
612 .base_offset
613 .checked_add(record.checked_mul(context.record_stride).ok_or_else(|| {
614 Error::InvalidData("classic record byte offset exceeds platform usize".to_string())
615 })?)
616 .ok_or_else(|| {
617 Error::InvalidData("classic record byte offset exceeds platform usize".to_string())
618 })?;
619 let mut decoded = read_contiguous_selection_values_with_plan::<T>(
620 context.file_data,
621 context.var_name,
622 record_offset,
623 context.inner_plan,
624 context.inner_resolved.result_elements,
625 )?;
626 values.append(&mut decoded);
627 Ok(())
628}
629
630fn read_selected_blocks_recursive<T: NcReadType>(
631 level: usize,
632 current_offset: u64,
633 context: &BlockReadContext<'_>,
634 values: &mut Vec<T>,
635) -> Result<()> {
636 if level == context.plan.tail_start {
637 let byte_offset = checked_usize_from_u64(
638 checked_mul_u64(
639 current_offset,
640 context.plan.elem_size,
641 "classic slice element byte offset",
642 )?,
643 "classic slice element byte offset",
644 )?;
645 let start = context
646 .base_offset
647 .checked_add(byte_offset)
648 .ok_or_else(|| {
649 Error::InvalidData("classic slice byte offset exceeds platform usize".to_string())
650 })?;
651 let end = start.checked_add(context.plan.block_bytes).ok_or_else(|| {
652 Error::InvalidData("classic slice byte range exceeds platform usize".to_string())
653 })?;
654 if end > context.file_data.len() {
655 return Err(Error::InvalidData(format!(
656 "variable '{}' slice data extends beyond file",
657 context.var_name
658 )));
659 }
660
661 let mut decoded =
662 T::decode_bulk_be(&context.file_data[start..end], context.plan.block_elements)?;
663 values.append(&mut decoded);
664 return Ok(());
665 }
666
667 match &context.plan.dims[level] {
668 ResolvedClassicSelectionDim::Index(idx) => read_selected_blocks_recursive::<T>(
669 level + 1,
670 current_offset
671 .checked_add(checked_mul_u64(
672 *idx,
673 context.plan.strides[level],
674 "classic slice logical element offset",
675 )?)
676 .ok_or_else(|| {
677 Error::InvalidData(
678 "classic slice logical element offset exceeds u64".to_string(),
679 )
680 })?,
681 context,
682 values,
683 ),
684 ResolvedClassicSelectionDim::Slice {
685 start, step, count, ..
686 } => {
687 let start = *start;
688 let step = *step;
689 let count = *count;
690 for ordinal in 0..count {
691 let coord = start
692 .checked_add(checked_mul_u64(
693 ordinal as u64,
694 step,
695 "classic slice coordinate",
696 )?)
697 .ok_or_else(|| {
698 Error::InvalidData("classic slice coordinate exceeds u64".to_string())
699 })?;
700 read_selected_blocks_recursive::<T>(
701 level + 1,
702 current_offset
703 .checked_add(checked_mul_u64(
704 coord,
705 context.plan.strides[level],
706 "classic slice logical element offset",
707 )?)
708 .ok_or_else(|| {
709 Error::InvalidData(
710 "classic slice logical element offset exceeds u64".to_string(),
711 )
712 })?,
713 context,
714 values,
715 )?;
716 }
717 Ok(())
718 }
719 }
720}
721
722#[cfg(test)]
723mod tests {
724 use super::*;
725 use crate::types::NcDimension;
726
727 fn char_variable(shape: &[u64]) -> NcVariable {
728 NcVariable {
729 name: "chars".to_string(),
730 dimensions: shape
731 .iter()
732 .enumerate()
733 .map(|(i, &size)| NcDimension {
734 name: format!("d{i}"),
735 size,
736 is_unlimited: false,
737 })
738 .collect(),
739 dtype: NcType::Char,
740 attributes: vec![],
741 data_offset: 0,
742 _data_size: 0,
743 is_record_var: false,
744 record_size: 0,
745 }
746 }
747
748 #[test]
749 fn test_decode_char_variable_strings_1d() {
750 let var = char_variable(&[5]);
751 let strings = decode_char_variable_strings(&var, b"alpha").unwrap();
752 assert_eq!(strings, vec!["alpha"]);
753 }
754
755 #[test]
756 fn test_decode_char_variable_strings_2d() {
757 let var = char_variable(&[2, 5]);
758 let strings = decode_char_variable_strings(&var, b"alphabeta\0").unwrap();
759 assert_eq!(strings, vec!["alpha", "beta"]);
760 }
761}