Skip to main content

netcdf_reader/classic/
data.rs

1//! Data reading for classic (CDF-1/2/5) NetCDF files.
2//!
3//! Two layout types:
4//! - **Non-record variables**: contiguous data at the offset stored in the variable header.
5//! - **Record variables**: data is interleaved across records. Each record contains one
6//!   slice from every record variable, in the order they appear in the header. The total
7//!   record size is the sum of all record variables' vsize values (each padded to 4-byte
8//!   boundary in CDF-1/2).
9
10use ndarray::{ArrayD, IxDyn};
11#[cfg(feature = "rayon")]
12use rayon::prelude::*;
13
14use crate::error::{Error, Result};
15use crate::types::{NcType, NcVariable};
16
17use super::storage::ClassicStorage;
18
19/// Trait for types that can be read from classic NetCDF data.
20pub trait NcReadType: Clone + Default + Send + 'static {
21    /// The NetCDF type this Rust type corresponds to.
22    fn nc_type() -> NcType;
23
24    /// Read a single element from big-endian bytes.
25    fn from_be_bytes(bytes: &[u8]) -> Result<Self>;
26
27    /// Size in bytes of one element.
28    fn element_size() -> usize;
29
30    /// Bulk decode `count` elements from a contiguous big-endian byte slice.
31    ///
32    /// Default implementation falls back to per-element decoding. Types with
33    /// multi-byte elements override this with an optimized bulk path using
34    /// `chunks_exact` + byte-swap (on LE hosts) or `copy_nonoverlapping`
35    /// (on BE hosts).
36    fn decode_bulk_be(raw: &[u8], count: usize) -> Result<Vec<Self>> {
37        let elem_size = Self::element_size();
38        let needed = count.checked_mul(elem_size).ok_or_else(|| {
39            Error::InvalidData("classic decode byte count exceeds platform usize".to_string())
40        })?;
41        if raw.len() < needed {
42            return Err(Error::InvalidData(format!(
43                "need {} bytes for {} elements, got {}",
44                needed,
45                count,
46                raw.len()
47            )));
48        }
49        let mut values = Vec::with_capacity(count);
50        for i in 0..count {
51            let start = i * elem_size;
52            values.push(Self::from_be_bytes(&raw[start..start + elem_size])?);
53        }
54        Ok(values)
55    }
56
57    /// Bulk decode elements from a contiguous big-endian byte slice into a
58    /// caller-provided destination buffer.
59    fn decode_bulk_be_into(raw: &[u8], dst: &mut [Self]) -> Result<()> {
60        let elem_size = Self::element_size();
61        let needed = dst.len().checked_mul(elem_size).ok_or_else(|| {
62            Error::InvalidData("classic decode byte count exceeds platform usize".to_string())
63        })?;
64        if raw.len() < needed {
65            return Err(Error::InvalidData(format!(
66                "need {} bytes for {} elements, got {}",
67                needed,
68                dst.len(),
69                raw.len()
70            )));
71        }
72        for (out, chunk) in dst.iter_mut().zip(raw[..needed].chunks_exact(elem_size)) {
73            *out = Self::from_be_bytes(chunk)?;
74        }
75        Ok(())
76    }
77}
78
79macro_rules! impl_nc_read_type {
80    ($ty:ty, $nc_type:expr, $size:expr) => {
81        impl NcReadType for $ty {
82            fn nc_type() -> NcType {
83                $nc_type
84            }
85
86            fn from_be_bytes(bytes: &[u8]) -> Result<Self> {
87                if bytes.len() < $size {
88                    return Err(Error::InvalidData(format!(
89                        "need {} bytes for {}, got {}",
90                        $size,
91                        stringify!($ty),
92                        bytes.len()
93                    )));
94                }
95                let mut arr = [0u8; $size];
96                arr.copy_from_slice(&bytes[..$size]);
97                Ok(<$ty>::from_be_bytes(arr))
98            }
99
100            fn element_size() -> usize {
101                $size
102            }
103
104            fn decode_bulk_be(raw: &[u8], count: usize) -> Result<Vec<Self>> {
105                let total_bytes = count.checked_mul($size).ok_or_else(|| {
106                    Error::InvalidData(
107                        "classic decode byte count exceeds platform usize".to_string(),
108                    )
109                })?;
110                if raw.len() < total_bytes {
111                    return Err(Error::InvalidData(format!(
112                        "need {} bytes for {} elements of {}, got {}",
113                        total_bytes,
114                        count,
115                        stringify!($ty),
116                        raw.len()
117                    )));
118                }
119                let bytes = &raw[..total_bytes];
120                #[cfg(target_endian = "big")]
121                {
122                    // Native BE: memcpy is safe for any element size.
123                    let mut values = Vec::<$ty>::with_capacity(count);
124                    unsafe {
125                        std::ptr::copy_nonoverlapping(
126                            bytes.as_ptr(),
127                            values.as_mut_ptr() as *mut u8,
128                            total_bytes,
129                        );
130                        values.set_len(count);
131                    }
132                    Ok(values)
133                }
134                #[cfg(target_endian = "little")]
135                {
136                    // LE host reading BE data: chunks_exact + byte-swap.
137                    Ok(bytes
138                        .chunks_exact($size)
139                        .map(|chunk| {
140                            let mut arr = [0u8; $size];
141                            arr.copy_from_slice(chunk);
142                            <$ty>::from_be_bytes(arr)
143                        })
144                        .collect())
145                }
146            }
147
148            fn decode_bulk_be_into(raw: &[u8], dst: &mut [Self]) -> Result<()> {
149                let total_bytes = dst.len().checked_mul($size).ok_or_else(|| {
150                    Error::InvalidData(
151                        "classic decode byte count exceeds platform usize".to_string(),
152                    )
153                })?;
154                if raw.len() < total_bytes {
155                    return Err(Error::InvalidData(format!(
156                        "need {} bytes for {} elements of {}, got {}",
157                        total_bytes,
158                        dst.len(),
159                        stringify!($ty),
160                        raw.len()
161                    )));
162                }
163                let bytes = &raw[..total_bytes];
164                #[cfg(target_endian = "big")]
165                {
166                    unsafe {
167                        std::ptr::copy_nonoverlapping(
168                            bytes.as_ptr(),
169                            dst.as_mut_ptr() as *mut u8,
170                            total_bytes,
171                        );
172                    }
173                    Ok(())
174                }
175                #[cfg(target_endian = "little")]
176                {
177                    for (out, chunk) in dst.iter_mut().zip(bytes.chunks_exact($size)) {
178                        let mut arr = [0u8; $size];
179                        arr.copy_from_slice(chunk);
180                        *out = <$ty>::from_be_bytes(arr);
181                    }
182                    Ok(())
183                }
184            }
185        }
186    };
187}
188
189impl_nc_read_type!(i8, NcType::Byte, 1);
190impl_nc_read_type!(i16, NcType::Short, 2);
191impl_nc_read_type!(i32, NcType::Int, 4);
192impl_nc_read_type!(f32, NcType::Float, 4);
193impl_nc_read_type!(f64, NcType::Double, 8);
194impl_nc_read_type!(u8, NcType::UByte, 1);
195impl_nc_read_type!(u16, NcType::UShort, 2);
196impl_nc_read_type!(u32, NcType::UInt, 4);
197impl_nc_read_type!(i64, NcType::Int64, 8);
198impl_nc_read_type!(u64, NcType::UInt64, 8);
199
200/// Read the entire data for a non-record variable into an ndarray.
201///
202/// The data is located at a contiguous region starting at `var.data_offset`
203/// with total size `var.data_size`.
204pub fn read_non_record_variable<T: NcReadType>(
205    file_data: &[u8],
206    var: &NcVariable,
207) -> Result<ArrayD<T>> {
208    if var.is_record_var {
209        return Err(Error::InvalidData(
210            "use read_record_variable for record variables".to_string(),
211        ));
212    }
213
214    let offset = crate::types::checked_usize_from_u64(var.data_offset, "variable data offset")?;
215    let total_elements = checked_non_record_element_count(var)?;
216    let elem_size = T::element_size();
217    let total_bytes = total_elements.checked_mul(elem_size).ok_or_else(|| {
218        Error::InvalidData(format!(
219            "variable '{}' size in bytes exceeds platform usize",
220            var.name
221        ))
222    })?;
223
224    let end = offset.checked_add(total_bytes).ok_or_else(|| {
225        Error::InvalidData(format!(
226            "variable '{}' byte range exceeds platform usize",
227            var.name
228        ))
229    })?;
230    if end > file_data.len() {
231        return Err(Error::InvalidData(format!(
232            "variable '{}' data extends beyond file: offset={}, size={}, file_len={}",
233            var.name,
234            offset,
235            total_bytes,
236            file_data.len()
237        )));
238    }
239
240    let data_slice = &file_data[offset..end];
241    let values = T::decode_bulk_be(data_slice, total_elements)?;
242
243    let shape: Vec<usize> = var
244        .shape()
245        .iter()
246        .map(|&s| crate::types::checked_usize_from_u64(s, "variable dimension"))
247        .collect::<Result<Vec<_>>>()?;
248    if shape.is_empty() {
249        // Scalar variable.
250        ArrayD::from_shape_vec(IxDyn(&[]), values)
251    } else {
252        ArrayD::from_shape_vec(IxDyn(&shape), values)
253    }
254    .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
255}
256
257/// Read the entire data for a non-record variable from range-backed storage.
258pub(crate) fn read_non_record_variable_from_storage<T: NcReadType>(
259    storage: &ClassicStorage,
260    var: &NcVariable,
261) -> Result<ArrayD<T>> {
262    if var.is_record_var {
263        return Err(Error::InvalidData(
264            "use read_record_variable_from_storage for record variables".to_string(),
265        ));
266    }
267
268    let total_elements = checked_non_record_element_count(var)?;
269    let total_bytes = variable_data_bytes::<T>(var.name.as_str(), total_elements)?;
270    let data = storage.read_range(var.data_offset, total_bytes)?;
271    let values = T::decode_bulk_be(data.as_ref(), total_elements)?;
272    let shape = checked_variable_shape(var)?;
273
274    ArrayD::from_shape_vec(IxDyn(&shape), values)
275        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
276}
277
278/// Read the entire data for a non-record variable using Rayon for large reads.
279#[cfg(feature = "rayon")]
280pub(crate) fn read_non_record_variable_parallel_from_storage<T: NcReadType>(
281    storage: &ClassicStorage,
282    var: &NcVariable,
283) -> Result<ArrayD<T>> {
284    if var.is_record_var {
285        return Err(Error::InvalidData(
286            "use read_record_variable_parallel_from_storage for record variables".to_string(),
287        ));
288    }
289
290    let total_elements = checked_non_record_element_count(var)?;
291    let total_bytes = variable_data_bytes::<T>(var.name.as_str(), total_elements)?;
292    let policy = storage.parallel_read_policy();
293    if total_bytes < policy.min_bytes || total_elements == 0 {
294        return read_non_record_variable_from_storage(storage, var);
295    }
296
297    let values = read_contiguous_range_parallel::<T>(
298        storage,
299        var.data_offset,
300        total_elements,
301        policy.target_chunk_bytes,
302    )?;
303    let shape = checked_variable_shape(var)?;
304
305    ArrayD::from_shape_vec(IxDyn(&shape), values)
306        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
307}
308
309/// Read the entire data for a non-record variable into a caller-provided buffer.
310pub fn read_non_record_variable_into<T: NcReadType>(
311    file_data: &[u8],
312    var: &NcVariable,
313    dst: &mut [T],
314) -> Result<()> {
315    if var.is_record_var {
316        return Err(Error::InvalidData(
317            "use read_record_variable_into for record variables".to_string(),
318        ));
319    }
320
321    let total_elements = checked_non_record_element_count(var)?;
322    if dst.len() != total_elements {
323        return Err(Error::InvalidData(format!(
324            "destination has {} elements, variable '{}' requires {}",
325            dst.len(),
326            var.name,
327            total_elements
328        )));
329    }
330
331    let offset = crate::types::checked_usize_from_u64(var.data_offset, "variable data offset")?;
332    let elem_size = T::element_size();
333    let total_bytes = total_elements.checked_mul(elem_size).ok_or_else(|| {
334        Error::InvalidData(format!(
335            "variable '{}' size in bytes exceeds platform usize",
336            var.name
337        ))
338    })?;
339
340    let end = offset.checked_add(total_bytes).ok_or_else(|| {
341        Error::InvalidData(format!(
342            "variable '{}' byte range exceeds platform usize",
343            var.name
344        ))
345    })?;
346    if end > file_data.len() {
347        return Err(Error::InvalidData(format!(
348            "variable '{}' data extends beyond file: offset={}, size={}, file_len={}",
349            var.name,
350            offset,
351            total_bytes,
352            file_data.len()
353        )));
354    }
355
356    T::decode_bulk_be_into(&file_data[offset..end], dst)
357}
358
359/// Read a non-record variable from range-backed storage into a caller-provided buffer.
360pub(crate) fn read_non_record_variable_into_from_storage<T: NcReadType>(
361    storage: &ClassicStorage,
362    var: &NcVariable,
363    dst: &mut [T],
364) -> Result<()> {
365    if var.is_record_var {
366        return Err(Error::InvalidData(
367            "use read_record_variable_into_from_storage for record variables".to_string(),
368        ));
369    }
370
371    let total_elements = checked_non_record_element_count(var)?;
372    if dst.len() != total_elements {
373        return Err(Error::InvalidData(format!(
374            "destination has {} elements, variable '{}' requires {}",
375            dst.len(),
376            var.name,
377            total_elements
378        )));
379    }
380
381    let total_bytes = variable_data_bytes::<T>(var.name.as_str(), total_elements)?;
382    let data = storage.read_range(var.data_offset, total_bytes)?;
383    T::decode_bulk_be_into(data.as_ref(), dst)
384}
385
386/// Read the entire data for a record variable into an ndarray.
387///
388/// Record variables are interleaved: for each of `numrecs` records, every record
389/// variable contributes `record_size` bytes (padded to 4-byte alignment for CDF-1/2).
390/// The `record_stride` is the total size of one record across all record variables.
391///
392/// Parameters:
393/// - `file_data`: the raw file bytes
394/// - `var`: the record variable to read
395/// - `numrecs`: number of records (from the file header)
396/// - `record_stride`: total bytes per record (sum of all record variables' padded vsizes)
397pub fn read_record_variable<T: NcReadType>(
398    file_data: &[u8],
399    var: &NcVariable,
400    numrecs: u64,
401    record_stride: u64,
402) -> Result<ArrayD<T>> {
403    if !var.is_record_var {
404        return Err(Error::InvalidData(
405            "use read_non_record_variable for non-record variables".to_string(),
406        ));
407    }
408
409    let elem_size = T::element_size();
410    let base_offset =
411        crate::types::checked_usize_from_u64(var.data_offset, "record variable data offset")?;
412    let numrecs_usize = crate::types::checked_usize_from_u64(numrecs, "record count")?;
413    let record_stride_usize = crate::types::checked_usize_from_u64(record_stride, "record stride")?;
414
415    // Shape: the first dimension is the unlimited dimension, replaced by numrecs.
416    let mut shape: Vec<usize> = var
417        .shape()
418        .iter()
419        .map(|&s| crate::types::checked_usize_from_u64(s, "record variable dimension"))
420        .collect::<Result<Vec<_>>>()?;
421    if shape.is_empty() {
422        return Err(Error::InvalidData(
423            "record variable must have at least one dimension".to_string(),
424        ));
425    }
426    shape[0] = numrecs_usize;
427
428    // Number of elements per record (product of all dims except the first).
429    let elements_per_record: usize = shape[1..].iter().product::<usize>().max(1);
430    let bytes_per_record = elements_per_record.checked_mul(elem_size).ok_or_else(|| {
431        Error::InvalidData(format!(
432            "record variable '{}' bytes per record exceed platform usize",
433            var.name
434        ))
435    })?;
436    let total_elements = numrecs_usize
437        .checked_mul(elements_per_record)
438        .ok_or_else(|| {
439            Error::InvalidData(format!(
440                "record variable '{}' element count exceeds platform usize",
441                var.name
442            ))
443        })?;
444
445    let mut values = Vec::with_capacity(total_elements);
446
447    for rec in 0..numrecs_usize {
448        let rec_offset = base_offset
449            .checked_add(rec.checked_mul(record_stride_usize).ok_or_else(|| {
450                Error::InvalidData(format!(
451                    "record variable '{}' byte offset exceeds platform usize",
452                    var.name
453                ))
454            })?)
455            .ok_or_else(|| {
456                Error::InvalidData(format!(
457                    "record variable '{}' byte offset exceeds platform usize",
458                    var.name
459                ))
460            })?;
461        let rec_end = rec_offset.checked_add(bytes_per_record).ok_or_else(|| {
462            Error::InvalidData(format!(
463                "record variable '{}' record range exceeds platform usize",
464                var.name
465            ))
466        })?;
467        if rec_end > file_data.len() {
468            return Err(Error::InvalidData(format!(
469                "record {} for variable '{}' extends beyond file",
470                rec, var.name
471            )));
472        }
473        let rec_slice = &file_data[rec_offset..rec_end];
474        let rec_values = T::decode_bulk_be(rec_slice, elements_per_record)?;
475        values.extend(rec_values);
476    }
477
478    ArrayD::from_shape_vec(IxDyn(&shape), values)
479        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
480}
481
482/// Read the entire data for a record variable from range-backed storage.
483pub(crate) fn read_record_variable_from_storage<T: NcReadType>(
484    storage: &ClassicStorage,
485    var: &NcVariable,
486    numrecs: u64,
487    record_stride: u64,
488) -> Result<ArrayD<T>> {
489    if !var.is_record_var {
490        return Err(Error::InvalidData(
491            "use read_non_record_variable_from_storage for non-record variables".to_string(),
492        ));
493    }
494
495    let shape = checked_record_shape(var, numrecs)?;
496    let elements_per_record = checked_record_elements_per_record(var)?;
497    let bytes_per_record = variable_data_bytes::<T>(var.name.as_str(), elements_per_record)?;
498    let numrecs_usize = crate::types::checked_usize_from_u64(numrecs, "record count")?;
499    let total_elements = numrecs_usize
500        .checked_mul(elements_per_record)
501        .ok_or_else(|| {
502            Error::InvalidData(format!(
503                "record variable '{}' element count exceeds platform usize",
504                var.name
505            ))
506        })?;
507    let mut values = Vec::with_capacity(total_elements);
508
509    for rec in 0..numrecs {
510        let rec_offset = record_byte_offset(var, rec, record_stride)?;
511        let rec_slice = storage.read_range(rec_offset, bytes_per_record)?;
512        let rec_values = T::decode_bulk_be(rec_slice.as_ref(), elements_per_record)?;
513        values.extend(rec_values);
514    }
515
516    ArrayD::from_shape_vec(IxDyn(&shape), values)
517        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
518}
519
520/// Read the entire data for a record variable using Rayon for large reads.
521#[cfg(feature = "rayon")]
522pub(crate) fn read_record_variable_parallel_from_storage<T: NcReadType>(
523    storage: &ClassicStorage,
524    var: &NcVariable,
525    numrecs: u64,
526    record_stride: u64,
527) -> Result<ArrayD<T>> {
528    if !var.is_record_var {
529        return Err(Error::InvalidData(
530            "use read_non_record_variable_parallel_from_storage for non-record variables"
531                .to_string(),
532        ));
533    }
534
535    let shape = checked_record_shape(var, numrecs)?;
536    let elements_per_record = checked_record_elements_per_record(var)?;
537    let bytes_per_record = variable_data_bytes::<T>(var.name.as_str(), elements_per_record)?;
538    let numrecs_usize = crate::types::checked_usize_from_u64(numrecs, "record count")?;
539    let total_elements = numrecs_usize
540        .checked_mul(elements_per_record)
541        .ok_or_else(|| {
542            Error::InvalidData(format!(
543                "record variable '{}' element count exceeds platform usize",
544                var.name
545            ))
546        })?;
547    let logical_bytes = numrecs_usize.checked_mul(bytes_per_record).ok_or_else(|| {
548        Error::InvalidData(format!(
549            "record variable '{}' logical byte count exceeds platform usize",
550            var.name
551        ))
552    })?;
553    let policy = storage.parallel_read_policy();
554    if logical_bytes < policy.min_bytes || numrecs_usize <= 1 {
555        return read_record_variable_from_storage(storage, var, numrecs, record_stride);
556    }
557
558    let records_per_chunk = (policy.target_chunk_bytes / bytes_per_record.max(1)).max(1);
559    let elements_per_chunk = records_per_chunk
560        .checked_mul(elements_per_record)
561        .ok_or_else(|| {
562            Error::InvalidData(
563                "classic record chunk element count exceeds platform usize".to_string(),
564            )
565        })?;
566    let mut values = vec![T::default(); total_elements];
567    let chunk_plan = RecordChunkReadPlan {
568        var,
569        record_stride,
570        elements_per_record,
571        bytes_per_record,
572    };
573    values
574        .par_chunks_mut(elements_per_chunk)
575        .enumerate()
576        .try_for_each(|(chunk, dst)| {
577            let first_record = chunk.checked_mul(records_per_chunk).ok_or_else(|| {
578                Error::InvalidData("classic record chunk offset exceeds platform usize".to_string())
579            })?;
580            let records = dst.len().checked_div(elements_per_record).ok_or_else(|| {
581                Error::InvalidData("classic record elements per record is zero".to_string())
582            })?;
583            read_record_chunk_into::<T>(storage, &chunk_plan, first_record as u64, records, dst)
584        })?;
585
586    ArrayD::from_shape_vec(IxDyn(&shape), values)
587        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
588}
589
590/// Read the entire data for a record variable into a caller-provided buffer.
591pub fn read_record_variable_into<T: NcReadType>(
592    file_data: &[u8],
593    var: &NcVariable,
594    numrecs: u64,
595    record_stride: u64,
596    dst: &mut [T],
597) -> Result<()> {
598    if !var.is_record_var {
599        return Err(Error::InvalidData(
600            "use read_non_record_variable_into for non-record variables".to_string(),
601        ));
602    }
603
604    let elem_size = T::element_size();
605    let base_offset =
606        crate::types::checked_usize_from_u64(var.data_offset, "record variable data offset")?;
607    let numrecs_usize = crate::types::checked_usize_from_u64(numrecs, "record count")?;
608    let record_stride_usize = crate::types::checked_usize_from_u64(record_stride, "record stride")?;
609
610    if var.dimensions.is_empty() {
611        return Err(Error::InvalidData(
612            "record variable must have at least one dimension".to_string(),
613        ));
614    }
615
616    let elements_per_record = checked_record_elements_per_record(var)?;
617    let bytes_per_record = elements_per_record.checked_mul(elem_size).ok_or_else(|| {
618        Error::InvalidData(format!(
619            "record variable '{}' bytes per record exceed platform usize",
620            var.name
621        ))
622    })?;
623    let total_elements = numrecs_usize
624        .checked_mul(elements_per_record)
625        .ok_or_else(|| {
626            Error::InvalidData(format!(
627                "record variable '{}' element count exceeds platform usize",
628                var.name
629            ))
630        })?;
631    if dst.len() != total_elements {
632        return Err(Error::InvalidData(format!(
633            "destination has {} elements, variable '{}' requires {}",
634            dst.len(),
635            var.name,
636            total_elements
637        )));
638    }
639
640    for rec in 0..numrecs_usize {
641        let rec_offset = base_offset
642            .checked_add(rec.checked_mul(record_stride_usize).ok_or_else(|| {
643                Error::InvalidData(format!(
644                    "record variable '{}' byte offset exceeds platform usize",
645                    var.name
646                ))
647            })?)
648            .ok_or_else(|| {
649                Error::InvalidData(format!(
650                    "record variable '{}' byte offset exceeds platform usize",
651                    var.name
652                ))
653            })?;
654        let rec_end = rec_offset.checked_add(bytes_per_record).ok_or_else(|| {
655            Error::InvalidData(format!(
656                "record variable '{}' record range exceeds platform usize",
657                var.name
658            ))
659        })?;
660        if rec_end > file_data.len() {
661            return Err(Error::InvalidData(format!(
662                "record {} for variable '{}' extends beyond file",
663                rec, var.name
664            )));
665        }
666
667        let dst_start = rec.checked_mul(elements_per_record).ok_or_else(|| {
668            Error::InvalidData(format!(
669                "record variable '{}' destination offset exceeds platform usize",
670                var.name
671            ))
672        })?;
673        let dst_end = dst_start.checked_add(elements_per_record).ok_or_else(|| {
674            Error::InvalidData(format!(
675                "record variable '{}' destination range exceeds platform usize",
676                var.name
677            ))
678        })?;
679        T::decode_bulk_be_into(
680            &file_data[rec_offset..rec_end],
681            &mut dst[dst_start..dst_end],
682        )?;
683    }
684
685    Ok(())
686}
687
688/// Read a record variable from range-backed storage into a caller-provided buffer.
689pub(crate) fn read_record_variable_into_from_storage<T: NcReadType>(
690    storage: &ClassicStorage,
691    var: &NcVariable,
692    numrecs: u64,
693    record_stride: u64,
694    dst: &mut [T],
695) -> Result<()> {
696    if !var.is_record_var {
697        return Err(Error::InvalidData(
698            "use read_non_record_variable_into_from_storage for non-record variables".to_string(),
699        ));
700    }
701
702    if var.dimensions.is_empty() {
703        return Err(Error::InvalidData(
704            "record variable must have at least one dimension".to_string(),
705        ));
706    }
707
708    let elements_per_record = checked_record_elements_per_record(var)?;
709    let bytes_per_record = variable_data_bytes::<T>(var.name.as_str(), elements_per_record)?;
710    let numrecs_usize = crate::types::checked_usize_from_u64(numrecs, "record count")?;
711    let total_elements = numrecs_usize
712        .checked_mul(elements_per_record)
713        .ok_or_else(|| {
714            Error::InvalidData(format!(
715                "record variable '{}' element count exceeds platform usize",
716                var.name
717            ))
718        })?;
719    if dst.len() != total_elements {
720        return Err(Error::InvalidData(format!(
721            "destination has {} elements, variable '{}' requires {}",
722            dst.len(),
723            var.name,
724            total_elements
725        )));
726    }
727
728    for rec in 0..numrecs {
729        let rec_offset = record_byte_offset(var, rec, record_stride)?;
730        let rec_slice = storage.read_range(rec_offset, bytes_per_record)?;
731        let dst_start = crate::types::checked_usize_from_u64(rec, "record index")?
732            .checked_mul(elements_per_record)
733            .ok_or_else(|| {
734                Error::InvalidData(format!(
735                    "record variable '{}' destination offset exceeds platform usize",
736                    var.name
737                ))
738            })?;
739        let dst_end = dst_start.checked_add(elements_per_record).ok_or_else(|| {
740            Error::InvalidData(format!(
741                "record variable '{}' destination range exceeds platform usize",
742                var.name
743            ))
744        })?;
745        T::decode_bulk_be_into(rec_slice.as_ref(), &mut dst[dst_start..dst_end])?;
746    }
747
748    Ok(())
749}
750
751/// Compute the record stride: total bytes per record across all record variables.
752///
753/// Each record variable's per-record contribution is its `record_size` (already stored
754/// as vsize from the header), padded to 4-byte boundary.
755pub fn compute_record_stride(variables: &[NcVariable]) -> u64 {
756    variables
757        .iter()
758        .filter(|v| v.is_record_var)
759        .map(|v| {
760            let size = v.record_size;
761            // Pad each variable's per-record size to 4-byte boundary.
762            let rem = size % 4;
763            if rem == 0 {
764                size
765            } else {
766                size + (4 - rem)
767            }
768        })
769        .sum()
770}
771
772fn checked_non_record_element_count(var: &NcVariable) -> Result<usize> {
773    let mut total = 1u64;
774    for dim in &var.dimensions {
775        total = total.checked_mul(dim.size).ok_or_else(|| {
776            Error::InvalidData("variable element count overflows u64".to_string())
777        })?;
778    }
779    crate::types::checked_usize_from_u64(total, "variable element count")
780}
781
782pub(crate) fn checked_variable_shape(var: &NcVariable) -> Result<Vec<usize>> {
783    var.shape()
784        .iter()
785        .map(|&s| crate::types::checked_usize_from_u64(s, "variable dimension"))
786        .collect::<Result<Vec<_>>>()
787}
788
789fn checked_record_shape(var: &NcVariable, numrecs: u64) -> Result<Vec<usize>> {
790    let mut shape: Vec<usize> = var
791        .shape()
792        .iter()
793        .map(|&s| crate::types::checked_usize_from_u64(s, "record variable dimension"))
794        .collect::<Result<Vec<_>>>()?;
795    if shape.is_empty() {
796        return Err(Error::InvalidData(
797            "record variable must have at least one dimension".to_string(),
798        ));
799    }
800    shape[0] = crate::types::checked_usize_from_u64(numrecs, "record count")?;
801    Ok(shape)
802}
803
804fn checked_record_elements_per_record(var: &NcVariable) -> Result<usize> {
805    let mut elements = 1usize;
806    for dim in var.dimensions.iter().skip(1) {
807        let size = crate::types::checked_usize_from_u64(dim.size, "record variable dimension")?;
808        elements = elements.checked_mul(size).ok_or_else(|| {
809            Error::InvalidData(format!(
810                "record variable '{}' elements per record exceed platform usize",
811                var.name
812            ))
813        })?;
814    }
815    Ok(elements)
816}
817
818pub(crate) fn variable_data_bytes<T: NcReadType>(
819    var_name: &str,
820    element_count: usize,
821) -> Result<usize> {
822    element_count.checked_mul(T::element_size()).ok_or_else(|| {
823        Error::InvalidData(format!(
824            "variable '{var_name}' size in bytes exceeds platform usize"
825        ))
826    })
827}
828
829pub(crate) fn record_byte_offset(var: &NcVariable, record: u64, record_stride: u64) -> Result<u64> {
830    var.data_offset
831        .checked_add(record.checked_mul(record_stride).ok_or_else(|| {
832            Error::InvalidData(format!(
833                "record variable '{}' byte offset exceeds u64",
834                var.name
835            ))
836        })?)
837        .ok_or_else(|| {
838            Error::InvalidData(format!(
839                "record variable '{}' byte offset exceeds u64",
840                var.name
841            ))
842        })
843}
844
845#[cfg(feature = "rayon")]
846pub(crate) fn read_contiguous_range_parallel<T: NcReadType>(
847    storage: &ClassicStorage,
848    base_offset: u64,
849    total_elements: usize,
850    target_chunk_bytes: usize,
851) -> Result<Vec<T>> {
852    if total_elements == 0 {
853        return Ok(Vec::new());
854    }
855
856    let elem_size = T::element_size();
857    let elements_per_chunk = (target_chunk_bytes / elem_size.max(1)).max(1);
858    let mut values = vec![T::default(); total_elements];
859    values
860        .par_chunks_mut(elements_per_chunk)
861        .enumerate()
862        .try_for_each(|(chunk, dst)| {
863            let start_element = chunk.checked_mul(elements_per_chunk).ok_or_else(|| {
864                Error::InvalidData(
865                    "classic parallel chunk offset exceeds platform usize".to_string(),
866                )
867            })?;
868            let byte_offset = crate::types::checked_mul_u64(
869                start_element as u64,
870                elem_size as u64,
871                "classic parallel byte offset",
872            )?;
873            let offset = base_offset.checked_add(byte_offset).ok_or_else(|| {
874                Error::InvalidData("classic parallel byte offset exceeds u64".to_string())
875            })?;
876            let bytes = variable_data_bytes::<T>("parallel chunk", dst.len())?;
877            let data = storage.read_range(offset, bytes)?;
878            T::decode_bulk_be_into(data.as_ref(), dst)
879        })?;
880
881    Ok(values)
882}
883
884#[cfg(feature = "rayon")]
885struct RecordChunkReadPlan<'a> {
886    var: &'a NcVariable,
887    record_stride: u64,
888    elements_per_record: usize,
889    bytes_per_record: usize,
890}
891
892#[cfg(feature = "rayon")]
893fn read_record_chunk_into<T: NcReadType>(
894    storage: &ClassicStorage,
895    plan: &RecordChunkReadPlan<'_>,
896    first_record: u64,
897    records: usize,
898    dst: &mut [T],
899) -> Result<()> {
900    if records == 0 {
901        return Ok(());
902    }
903
904    let expected_elements = records
905        .checked_mul(plan.elements_per_record)
906        .ok_or_else(|| {
907            Error::InvalidData(format!(
908                "record variable '{}' chunk element count exceeds platform usize",
909                plan.var.name
910            ))
911        })?;
912    if dst.len() != expected_elements {
913        return Err(Error::InvalidData(format!(
914            "record variable '{}' chunk destination has {} elements, expected {}",
915            plan.var.name,
916            dst.len(),
917            expected_elements
918        )));
919    }
920
921    if plan.record_stride == plan.bytes_per_record as u64 {
922        let offset = record_byte_offset(plan.var, first_record, plan.record_stride)?;
923        let bytes = records.checked_mul(plan.bytes_per_record).ok_or_else(|| {
924            Error::InvalidData(format!(
925                "record variable '{}' chunk byte count exceeds platform usize",
926                plan.var.name
927            ))
928        })?;
929        let data = storage.read_range(offset, bytes)?;
930        return T::decode_bulk_be_into(data.as_ref(), dst);
931    }
932
933    for ordinal in 0..records {
934        let record = first_record
935            .checked_add(ordinal as u64)
936            .ok_or_else(|| Error::InvalidData("classic record index exceeds u64".to_string()))?;
937        let offset = record_byte_offset(plan.var, record, plan.record_stride)?;
938        let data = storage.read_range(offset, plan.bytes_per_record)?;
939        let dst_start = ordinal
940            .checked_mul(plan.elements_per_record)
941            .ok_or_else(|| {
942                Error::InvalidData(format!(
943                    "record variable '{}' chunk destination offset exceeds platform usize",
944                    plan.var.name
945                ))
946            })?;
947        let dst_end = dst_start
948            .checked_add(plan.elements_per_record)
949            .ok_or_else(|| {
950                Error::InvalidData(format!(
951                    "record variable '{}' chunk destination range exceeds platform usize",
952                    plan.var.name
953                ))
954            })?;
955        T::decode_bulk_be_into(data.as_ref(), &mut dst[dst_start..dst_end])?;
956    }
957    Ok(())
958}
959
960#[cfg(test)]
961mod tests {
962    use super::*;
963    use crate::types::NcDimension;
964
965    #[test]
966    fn read_non_record_1d_float() {
967        // Create a fake file with 3 floats starting at offset 100.
968        let mut file_data = vec![0u8; 200];
969        let values = [1.0f32, 2.0f32, 3.0f32];
970        for (i, &v) in values.iter().enumerate() {
971            let bytes = v.to_be_bytes();
972            file_data[100 + i * 4..100 + i * 4 + 4].copy_from_slice(&bytes);
973        }
974
975        let var = NcVariable {
976            name: "temp".to_string(),
977            dimensions: vec![NcDimension {
978                name: "x".to_string(),
979                size: 3,
980                is_unlimited: false,
981            }],
982            dtype: NcType::Float,
983            attributes: vec![],
984            data_offset: 100,
985            _data_size: 12,
986            is_record_var: false,
987            record_size: 0,
988        };
989
990        let arr: ArrayD<f32> = read_non_record_variable(&file_data, &var).unwrap();
991        assert_eq!(arr.shape(), &[3]);
992        assert_eq!(arr[[0]], 1.0f32);
993        assert_eq!(arr[[1]], 2.0f32);
994        assert_eq!(arr[[2]], 3.0f32);
995    }
996
997    #[test]
998    fn non_record_variable_into_copies_values() {
999        let mut file_data = vec![0u8; 200];
1000        let values = [1.0f32, 2.0f32, 3.0f32];
1001        for (i, &v) in values.iter().enumerate() {
1002            file_data[100 + i * 4..100 + i * 4 + 4].copy_from_slice(&v.to_be_bytes());
1003        }
1004
1005        let var = NcVariable {
1006            name: "temp".to_string(),
1007            dimensions: vec![NcDimension {
1008                name: "x".to_string(),
1009                size: 3,
1010                is_unlimited: false,
1011            }],
1012            dtype: NcType::Float,
1013            attributes: vec![],
1014            data_offset: 100,
1015            _data_size: 12,
1016            is_record_var: false,
1017            record_size: 0,
1018        };
1019
1020        let mut dst = [0.0f32; 3];
1021        read_non_record_variable_into(&file_data, &var, &mut dst).unwrap();
1022        assert_eq!(dst, values);
1023    }
1024
1025    #[test]
1026    fn read_non_record_2d_int() {
1027        // 2x3 array of i32 at offset 0
1028        let values: Vec<i32> = vec![10, 20, 30, 40, 50, 60];
1029        let mut file_data = Vec::new();
1030        for &v in &values {
1031            file_data.extend_from_slice(&v.to_be_bytes());
1032        }
1033
1034        let var = NcVariable {
1035            name: "grid".to_string(),
1036            dimensions: vec![
1037                NcDimension {
1038                    name: "y".to_string(),
1039                    size: 2,
1040                    is_unlimited: false,
1041                },
1042                NcDimension {
1043                    name: "x".to_string(),
1044                    size: 3,
1045                    is_unlimited: false,
1046                },
1047            ],
1048            dtype: NcType::Int,
1049            attributes: vec![],
1050            data_offset: 0,
1051            _data_size: 24,
1052            is_record_var: false,
1053            record_size: 0,
1054        };
1055
1056        let arr: ArrayD<i32> = read_non_record_variable(&file_data, &var).unwrap();
1057        assert_eq!(arr.shape(), &[2, 3]);
1058        assert_eq!(arr[[0, 0]], 10);
1059        assert_eq!(arr[[0, 2]], 30);
1060        assert_eq!(arr[[1, 0]], 40);
1061        assert_eq!(arr[[1, 2]], 60);
1062    }
1063
1064    #[test]
1065    fn read_non_record_variable_into_rejects_wrong_destination_len() {
1066        let var = NcVariable {
1067            name: "grid".to_string(),
1068            dimensions: vec![NcDimension {
1069                name: "x".to_string(),
1070                size: 3,
1071                is_unlimited: false,
1072            }],
1073            dtype: NcType::Float,
1074            attributes: vec![],
1075            data_offset: 0,
1076            _data_size: 12,
1077            is_record_var: false,
1078            record_size: 0,
1079        };
1080
1081        let mut dst = [0.0f32; 2];
1082        let err = read_non_record_variable_into(&[0; 12], &var, &mut dst).unwrap_err();
1083        assert!(matches!(err, Error::InvalidData(_)));
1084    }
1085
1086    #[test]
1087    fn record_stride_sums_padded_record_variables() {
1088        let vars = vec![
1089            NcVariable {
1090                name: "a".to_string(),
1091                dimensions: vec![],
1092                dtype: NcType::Float,
1093                attributes: vec![],
1094                data_offset: 0,
1095                _data_size: 0,
1096                is_record_var: true,
1097                record_size: 20, // 5 floats
1098            },
1099            NcVariable {
1100                name: "b".to_string(),
1101                dimensions: vec![],
1102                dtype: NcType::Short,
1103                attributes: vec![],
1104                data_offset: 0,
1105                _data_size: 0,
1106                is_record_var: true,
1107                record_size: 6, // 3 shorts -> padded to 8
1108            },
1109            NcVariable {
1110                name: "c".to_string(),
1111                dimensions: vec![],
1112                dtype: NcType::Double,
1113                attributes: vec![],
1114                data_offset: 0,
1115                _data_size: 100,
1116                is_record_var: false, // not a record var, should be excluded
1117                record_size: 0,
1118            },
1119        ];
1120        // a: 20 (already 4-aligned), b: 6 -> 8 = total 28
1121        assert_eq!(compute_record_stride(&vars), 28);
1122    }
1123
1124    #[test]
1125    fn record_variable_reads_all_records() {
1126        // Single record variable "temp" with shape [time, x] where x=2.
1127        // 3 records, each with 2 floats = 8 bytes per record.
1128        // Record stride = 8 (only one record var, already 4-aligned).
1129        let mut file_data = vec![0u8; 200];
1130        let base = 100usize;
1131        let record_values: Vec<Vec<f32>> = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
1132        for (rec, vals) in record_values.iter().enumerate() {
1133            for (i, &v) in vals.iter().enumerate() {
1134                let offset = base + rec * 8 + i * 4;
1135                file_data[offset..offset + 4].copy_from_slice(&v.to_be_bytes());
1136            }
1137        }
1138
1139        let var = NcVariable {
1140            name: "temp".to_string(),
1141            dimensions: vec![
1142                NcDimension {
1143                    name: "time".to_string(),
1144                    size: 0, // unlimited
1145                    is_unlimited: true,
1146                },
1147                NcDimension {
1148                    name: "x".to_string(),
1149                    size: 2,
1150                    is_unlimited: false,
1151                },
1152            ],
1153            dtype: NcType::Float,
1154            attributes: vec![],
1155            data_offset: 100,
1156            _data_size: 0,
1157            is_record_var: true,
1158            record_size: 8,
1159        };
1160
1161        let arr: ArrayD<f32> = read_record_variable(&file_data, &var, 3, 8).unwrap();
1162        assert_eq!(arr.shape(), &[3, 2]);
1163        assert_eq!(arr[[0, 0]], 1.0);
1164        assert_eq!(arr[[0, 1]], 2.0);
1165        assert_eq!(arr[[1, 0]], 3.0);
1166        assert_eq!(arr[[2, 1]], 6.0);
1167    }
1168
1169    #[test]
1170    fn record_variable_into_copies_values() {
1171        let mut file_data = vec![0u8; 200];
1172        let base = 100usize;
1173        let record_values: Vec<Vec<f32>> = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
1174        for (rec, vals) in record_values.iter().enumerate() {
1175            for (i, &v) in vals.iter().enumerate() {
1176                let offset = base + rec * 8 + i * 4;
1177                file_data[offset..offset + 4].copy_from_slice(&v.to_be_bytes());
1178            }
1179        }
1180
1181        let var = NcVariable {
1182            name: "temp".to_string(),
1183            dimensions: vec![
1184                NcDimension {
1185                    name: "time".to_string(),
1186                    size: 0,
1187                    is_unlimited: true,
1188                },
1189                NcDimension {
1190                    name: "x".to_string(),
1191                    size: 2,
1192                    is_unlimited: false,
1193                },
1194            ],
1195            dtype: NcType::Float,
1196            attributes: vec![],
1197            data_offset: 100,
1198            _data_size: 0,
1199            is_record_var: true,
1200            record_size: 8,
1201        };
1202
1203        let mut dst = [0.0f32; 6];
1204        read_record_variable_into(&file_data, &var, 3, 8, &mut dst).unwrap();
1205        assert_eq!(dst, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1206    }
1207
1208    #[test]
1209    fn read_record_variable_into_rejects_wrong_destination_len() {
1210        let var = NcVariable {
1211            name: "temp".to_string(),
1212            dimensions: vec![
1213                NcDimension {
1214                    name: "time".to_string(),
1215                    size: 0,
1216                    is_unlimited: true,
1217                },
1218                NcDimension {
1219                    name: "x".to_string(),
1220                    size: 2,
1221                    is_unlimited: false,
1222                },
1223            ],
1224            dtype: NcType::Float,
1225            attributes: vec![],
1226            data_offset: 0,
1227            _data_size: 0,
1228            is_record_var: true,
1229            record_size: 8,
1230        };
1231
1232        let mut dst = [0.0f32; 5];
1233        let err = read_record_variable_into(&[0; 24], &var, 3, 8, &mut dst).unwrap_err();
1234        assert!(matches!(err, Error::InvalidData(_)));
1235    }
1236
1237    #[test]
1238    fn read_non_record_variable_rejects_element_count_overflow() {
1239        let var = NcVariable {
1240            name: "huge".to_string(),
1241            dimensions: vec![
1242                NcDimension {
1243                    name: "y".to_string(),
1244                    size: u64::MAX,
1245                    is_unlimited: false,
1246                },
1247                NcDimension {
1248                    name: "x".to_string(),
1249                    size: 2,
1250                    is_unlimited: false,
1251                },
1252            ],
1253            dtype: NcType::Float,
1254            attributes: vec![],
1255            data_offset: 0,
1256            _data_size: 0,
1257            is_record_var: false,
1258            record_size: 0,
1259        };
1260
1261        let err = read_non_record_variable::<f32>(&[], &var).unwrap_err();
1262        assert!(matches!(err, Error::InvalidData(_)));
1263    }
1264
1265    #[test]
1266    fn read_record_variable_rejects_record_offset_overflow() {
1267        let var = NcVariable {
1268            name: "huge_record".to_string(),
1269            dimensions: vec![
1270                NcDimension {
1271                    name: "time".to_string(),
1272                    size: 0,
1273                    is_unlimited: true,
1274                },
1275                NcDimension {
1276                    name: "x".to_string(),
1277                    size: 1,
1278                    is_unlimited: false,
1279                },
1280            ],
1281            dtype: NcType::Float,
1282            attributes: vec![],
1283            data_offset: u64::MAX,
1284            _data_size: 0,
1285            is_record_var: true,
1286            record_size: 4,
1287        };
1288
1289        let err = read_record_variable::<f32>(&[], &var, 1, 4).unwrap_err();
1290        assert!(matches!(err, Error::InvalidData(_)));
1291    }
1292}