Skip to main content

netcdf_reader/classic/
data.rs

1//! Data reading for classic (CDF-1/2/5) NetCDF files.
2//!
3//! Two layout types:
4//! - **Non-record variables**: contiguous data at the offset stored in the variable header.
5//! - **Record variables**: data is interleaved across records. Each record contains one
6//!   slice from every record variable, in the order they appear in the header. The total
7//!   record size is the sum of all record variables' vsize values (each padded to 4-byte
8//!   boundary in CDF-1/2).
9
10use ndarray::{ArrayD, IxDyn};
11#[cfg(feature = "rayon")]
12use rayon::prelude::*;
13
14use crate::error::{Error, Result};
15use crate::types::{NcType, NcVariable};
16
17use super::storage::ClassicStorage;
18
19/// Trait for types that can be read from classic NetCDF data.
20pub trait NcReadType: Clone + Default + Send + 'static {
21    /// The NetCDF type this Rust type corresponds to.
22    fn nc_type() -> NcType;
23
24    /// Read a single element from big-endian bytes.
25    fn from_be_bytes(bytes: &[u8]) -> Result<Self>;
26
27    /// Size in bytes of one element.
28    fn element_size() -> usize;
29
30    /// Bulk decode `count` elements from a contiguous big-endian byte slice.
31    ///
32    /// Default implementation falls back to per-element decoding. Types with
33    /// multi-byte elements override this with an optimized bulk path using
34    /// `chunks_exact` + byte-swap (on LE hosts) or `copy_nonoverlapping`
35    /// (on BE hosts).
36    fn decode_bulk_be(raw: &[u8], count: usize) -> Result<Vec<Self>> {
37        let elem_size = Self::element_size();
38        let needed = count.checked_mul(elem_size).ok_or_else(|| {
39            Error::InvalidData("classic decode byte count exceeds platform usize".to_string())
40        })?;
41        if raw.len() < needed {
42            return Err(Error::InvalidData(format!(
43                "need {} bytes for {} elements, got {}",
44                needed,
45                count,
46                raw.len()
47            )));
48        }
49        let mut values = Vec::with_capacity(count);
50        for i in 0..count {
51            let start = i * elem_size;
52            values.push(Self::from_be_bytes(&raw[start..start + elem_size])?);
53        }
54        Ok(values)
55    }
56
57    /// Bulk decode elements from a contiguous big-endian byte slice into a
58    /// caller-provided destination buffer.
59    fn decode_bulk_be_into(raw: &[u8], dst: &mut [Self]) -> Result<()> {
60        let elem_size = Self::element_size();
61        let needed = dst.len().checked_mul(elem_size).ok_or_else(|| {
62            Error::InvalidData("classic decode byte count exceeds platform usize".to_string())
63        })?;
64        if raw.len() < needed {
65            return Err(Error::InvalidData(format!(
66                "need {} bytes for {} elements, got {}",
67                needed,
68                dst.len(),
69                raw.len()
70            )));
71        }
72        for (out, chunk) in dst.iter_mut().zip(raw[..needed].chunks_exact(elem_size)) {
73            *out = Self::from_be_bytes(chunk)?;
74        }
75        Ok(())
76    }
77}
78
79macro_rules! impl_nc_read_type {
80    ($ty:ty, $nc_type:expr, $size:expr) => {
81        impl NcReadType for $ty {
82            fn nc_type() -> NcType {
83                $nc_type
84            }
85
86            fn from_be_bytes(bytes: &[u8]) -> Result<Self> {
87                if bytes.len() < $size {
88                    return Err(Error::InvalidData(format!(
89                        "need {} bytes for {}, got {}",
90                        $size,
91                        stringify!($ty),
92                        bytes.len()
93                    )));
94                }
95                let mut arr = [0u8; $size];
96                arr.copy_from_slice(&bytes[..$size]);
97                Ok(<$ty>::from_be_bytes(arr))
98            }
99
100            fn element_size() -> usize {
101                $size
102            }
103
104            fn decode_bulk_be(raw: &[u8], count: usize) -> Result<Vec<Self>> {
105                let total_bytes = count.checked_mul($size).ok_or_else(|| {
106                    Error::InvalidData(
107                        "classic decode byte count exceeds platform usize".to_string(),
108                    )
109                })?;
110                if raw.len() < total_bytes {
111                    return Err(Error::InvalidData(format!(
112                        "need {} bytes for {} elements of {}, got {}",
113                        total_bytes,
114                        count,
115                        stringify!($ty),
116                        raw.len()
117                    )));
118                }
119                let bytes = &raw[..total_bytes];
120                #[cfg(target_endian = "big")]
121                {
122                    // Native BE: memcpy is safe for any element size.
123                    let mut values = Vec::<$ty>::with_capacity(count);
124                    unsafe {
125                        std::ptr::copy_nonoverlapping(
126                            bytes.as_ptr(),
127                            values.as_mut_ptr() as *mut u8,
128                            total_bytes,
129                        );
130                        values.set_len(count);
131                    }
132                    Ok(values)
133                }
134                #[cfg(target_endian = "little")]
135                {
136                    // LE host reading BE data: chunks_exact + byte-swap.
137                    Ok(bytes
138                        .chunks_exact($size)
139                        .map(|chunk| {
140                            let mut arr = [0u8; $size];
141                            arr.copy_from_slice(chunk);
142                            <$ty>::from_be_bytes(arr)
143                        })
144                        .collect())
145                }
146            }
147
148            fn decode_bulk_be_into(raw: &[u8], dst: &mut [Self]) -> Result<()> {
149                let total_bytes = dst.len().checked_mul($size).ok_or_else(|| {
150                    Error::InvalidData(
151                        "classic decode byte count exceeds platform usize".to_string(),
152                    )
153                })?;
154                if raw.len() < total_bytes {
155                    return Err(Error::InvalidData(format!(
156                        "need {} bytes for {} elements of {}, got {}",
157                        total_bytes,
158                        dst.len(),
159                        stringify!($ty),
160                        raw.len()
161                    )));
162                }
163                let bytes = &raw[..total_bytes];
164                #[cfg(target_endian = "big")]
165                {
166                    unsafe {
167                        std::ptr::copy_nonoverlapping(
168                            bytes.as_ptr(),
169                            dst.as_mut_ptr() as *mut u8,
170                            total_bytes,
171                        );
172                    }
173                    Ok(())
174                }
175                #[cfg(target_endian = "little")]
176                {
177                    for (out, chunk) in dst.iter_mut().zip(bytes.chunks_exact($size)) {
178                        let mut arr = [0u8; $size];
179                        arr.copy_from_slice(chunk);
180                        *out = <$ty>::from_be_bytes(arr);
181                    }
182                    Ok(())
183                }
184            }
185        }
186    };
187}
188
189impl_nc_read_type!(i8, NcType::Byte, 1);
190impl_nc_read_type!(i16, NcType::Short, 2);
191impl_nc_read_type!(i32, NcType::Int, 4);
192impl_nc_read_type!(f32, NcType::Float, 4);
193impl_nc_read_type!(f64, NcType::Double, 8);
194impl_nc_read_type!(u8, NcType::UByte, 1);
195impl_nc_read_type!(u16, NcType::UShort, 2);
196impl_nc_read_type!(u32, NcType::UInt, 4);
197impl_nc_read_type!(i64, NcType::Int64, 8);
198impl_nc_read_type!(u64, NcType::UInt64, 8);
199
200/// Read the entire data for a non-record variable into an ndarray.
201///
202/// The data is located at a contiguous region starting at `var.data_offset`
203/// with total size `var.data_size`.
204pub fn read_non_record_variable<T: NcReadType>(
205    file_data: &[u8],
206    var: &NcVariable,
207) -> Result<ArrayD<T>> {
208    if var.is_record_var {
209        return Err(Error::InvalidData(
210            "use read_record_variable for record variables".to_string(),
211        ));
212    }
213
214    let offset = crate::types::checked_usize_from_u64(var.data_offset, "variable data offset")?;
215    let total_elements = checked_non_record_element_count(var)?;
216    let elem_size = T::element_size();
217    let total_bytes = total_elements.checked_mul(elem_size).ok_or_else(|| {
218        Error::InvalidData(format!(
219            "variable '{}' size in bytes exceeds platform usize",
220            var.name
221        ))
222    })?;
223
224    let end = offset.checked_add(total_bytes).ok_or_else(|| {
225        Error::InvalidData(format!(
226            "variable '{}' byte range exceeds platform usize",
227            var.name
228        ))
229    })?;
230    if end > file_data.len() {
231        return Err(Error::InvalidData(format!(
232            "variable '{}' data extends beyond file: offset={}, size={}, file_len={}",
233            var.name,
234            offset,
235            total_bytes,
236            file_data.len()
237        )));
238    }
239
240    let data_slice = &file_data[offset..end];
241    let values = T::decode_bulk_be(data_slice, total_elements)?;
242
243    let shape: Vec<usize> = var
244        .shape()
245        .iter()
246        .map(|&s| crate::types::checked_usize_from_u64(s, "variable dimension"))
247        .collect::<Result<Vec<_>>>()?;
248    if shape.is_empty() {
249        // Scalar variable.
250        ArrayD::from_shape_vec(IxDyn(&[]), values)
251    } else {
252        ArrayD::from_shape_vec(IxDyn(&shape), values)
253    }
254    .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
255}
256
257/// Read the entire data for a non-record variable from range-backed storage.
258pub(crate) fn read_non_record_variable_from_storage<T: NcReadType>(
259    storage: &ClassicStorage,
260    var: &NcVariable,
261) -> Result<ArrayD<T>> {
262    if var.is_record_var {
263        return Err(Error::InvalidData(
264            "use read_record_variable_from_storage for record variables".to_string(),
265        ));
266    }
267
268    let total_elements = checked_non_record_element_count(var)?;
269    let total_bytes = variable_data_bytes::<T>(var.name.as_str(), total_elements)?;
270    let data = storage.read_range(var.data_offset, total_bytes)?;
271    let values = T::decode_bulk_be(data.as_ref(), total_elements)?;
272    let shape = checked_variable_shape(var)?;
273
274    ArrayD::from_shape_vec(IxDyn(&shape), values)
275        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
276}
277
278/// Read the entire data for a non-record variable using Rayon for large reads.
279#[cfg(feature = "rayon")]
280pub(crate) fn read_non_record_variable_parallel_from_storage<T: NcReadType>(
281    storage: &ClassicStorage,
282    var: &NcVariable,
283) -> Result<ArrayD<T>> {
284    if var.is_record_var {
285        return Err(Error::InvalidData(
286            "use read_record_variable_parallel_from_storage for record variables".to_string(),
287        ));
288    }
289
290    let total_elements = checked_non_record_element_count(var)?;
291    let total_bytes = variable_data_bytes::<T>(var.name.as_str(), total_elements)?;
292    let policy = storage.parallel_read_policy();
293    if total_bytes < policy.min_bytes || total_elements == 0 {
294        return read_non_record_variable_from_storage(storage, var);
295    }
296
297    let values = read_contiguous_range_parallel::<T>(
298        storage,
299        var.data_offset,
300        total_elements,
301        policy.target_chunk_bytes,
302    )?;
303    let shape = checked_variable_shape(var)?;
304
305    ArrayD::from_shape_vec(IxDyn(&shape), values)
306        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
307}
308
309/// Read the entire data for a non-record variable into a caller-provided buffer.
310pub fn read_non_record_variable_into<T: NcReadType>(
311    file_data: &[u8],
312    var: &NcVariable,
313    dst: &mut [T],
314) -> Result<()> {
315    if var.is_record_var {
316        return Err(Error::InvalidData(
317            "use read_record_variable_into for record variables".to_string(),
318        ));
319    }
320
321    let total_elements = checked_non_record_element_count(var)?;
322    if dst.len() != total_elements {
323        return Err(Error::InvalidData(format!(
324            "destination has {} elements, variable '{}' requires {}",
325            dst.len(),
326            var.name,
327            total_elements
328        )));
329    }
330
331    let offset = crate::types::checked_usize_from_u64(var.data_offset, "variable data offset")?;
332    let elem_size = T::element_size();
333    let total_bytes = total_elements.checked_mul(elem_size).ok_or_else(|| {
334        Error::InvalidData(format!(
335            "variable '{}' size in bytes exceeds platform usize",
336            var.name
337        ))
338    })?;
339
340    let end = offset.checked_add(total_bytes).ok_or_else(|| {
341        Error::InvalidData(format!(
342            "variable '{}' byte range exceeds platform usize",
343            var.name
344        ))
345    })?;
346    if end > file_data.len() {
347        return Err(Error::InvalidData(format!(
348            "variable '{}' data extends beyond file: offset={}, size={}, file_len={}",
349            var.name,
350            offset,
351            total_bytes,
352            file_data.len()
353        )));
354    }
355
356    T::decode_bulk_be_into(&file_data[offset..end], dst)
357}
358
359/// Read a non-record variable from range-backed storage into a caller-provided buffer.
360pub(crate) fn read_non_record_variable_into_from_storage<T: NcReadType>(
361    storage: &ClassicStorage,
362    var: &NcVariable,
363    dst: &mut [T],
364) -> Result<()> {
365    if var.is_record_var {
366        return Err(Error::InvalidData(
367            "use read_record_variable_into_from_storage for record variables".to_string(),
368        ));
369    }
370
371    let total_elements = checked_non_record_element_count(var)?;
372    if dst.len() != total_elements {
373        return Err(Error::InvalidData(format!(
374            "destination has {} elements, variable '{}' requires {}",
375            dst.len(),
376            var.name,
377            total_elements
378        )));
379    }
380
381    let total_bytes = variable_data_bytes::<T>(var.name.as_str(), total_elements)?;
382    let data = storage.read_range(var.data_offset, total_bytes)?;
383    T::decode_bulk_be_into(data.as_ref(), dst)
384}
385
386/// Read the entire data for a record variable into an ndarray.
387///
388/// Record variables are interleaved: for each of `numrecs` records, every record
389/// variable contributes `record_size` bytes (padded to 4-byte alignment for CDF-1/2).
390/// The `record_stride` is the total size of one record across all record variables.
391///
392/// Parameters:
393/// - `file_data`: the raw file bytes
394/// - `var`: the record variable to read
395/// - `numrecs`: number of records (from the file header)
396/// - `record_stride`: total bytes per record (sum of all record variables' padded vsizes)
397pub fn read_record_variable<T: NcReadType>(
398    file_data: &[u8],
399    var: &NcVariable,
400    numrecs: u64,
401    record_stride: u64,
402) -> Result<ArrayD<T>> {
403    if !var.is_record_var {
404        return Err(Error::InvalidData(
405            "use read_non_record_variable for non-record variables".to_string(),
406        ));
407    }
408
409    let elem_size = T::element_size();
410    let base_offset =
411        crate::types::checked_usize_from_u64(var.data_offset, "record variable data offset")?;
412    let record_stride_usize = crate::types::checked_usize_from_u64(record_stride, "record stride")?;
413
414    // Shape: the first dimension is the unlimited dimension, replaced by numrecs.
415    let shape = checked_record_shape(var, numrecs)?;
416    let numrecs_usize = shape[0];
417
418    // Number of elements per record (product of all dims except the first).
419    let elements_per_record = checked_record_elements_per_record(var)?;
420    let bytes_per_record = elements_per_record.checked_mul(elem_size).ok_or_else(|| {
421        Error::InvalidData(format!(
422            "record variable '{}' bytes per record exceed platform usize",
423            var.name
424        ))
425    })?;
426    let total_elements = numrecs_usize
427        .checked_mul(elements_per_record)
428        .ok_or_else(|| {
429            Error::InvalidData(format!(
430                "record variable '{}' element count exceeds platform usize",
431                var.name
432            ))
433        })?;
434
435    let mut values = Vec::with_capacity(total_elements);
436
437    for rec in 0..numrecs_usize {
438        let rec_offset = base_offset
439            .checked_add(rec.checked_mul(record_stride_usize).ok_or_else(|| {
440                Error::InvalidData(format!(
441                    "record variable '{}' byte offset exceeds platform usize",
442                    var.name
443                ))
444            })?)
445            .ok_or_else(|| {
446                Error::InvalidData(format!(
447                    "record variable '{}' byte offset exceeds platform usize",
448                    var.name
449                ))
450            })?;
451        let rec_end = rec_offset.checked_add(bytes_per_record).ok_or_else(|| {
452            Error::InvalidData(format!(
453                "record variable '{}' record range exceeds platform usize",
454                var.name
455            ))
456        })?;
457        if rec_end > file_data.len() {
458            return Err(Error::InvalidData(format!(
459                "record {} for variable '{}' extends beyond file",
460                rec, var.name
461            )));
462        }
463        let rec_slice = &file_data[rec_offset..rec_end];
464        let rec_values = T::decode_bulk_be(rec_slice, elements_per_record)?;
465        values.extend(rec_values);
466    }
467
468    ArrayD::from_shape_vec(IxDyn(&shape), values)
469        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
470}
471
472/// Read the entire data for a record variable from range-backed storage.
473pub(crate) fn read_record_variable_from_storage<T: NcReadType>(
474    storage: &ClassicStorage,
475    var: &NcVariable,
476    numrecs: u64,
477    record_stride: u64,
478) -> Result<ArrayD<T>> {
479    if !var.is_record_var {
480        return Err(Error::InvalidData(
481            "use read_non_record_variable_from_storage for non-record variables".to_string(),
482        ));
483    }
484
485    let shape = checked_record_shape(var, numrecs)?;
486    let elements_per_record = checked_record_elements_per_record(var)?;
487    let bytes_per_record = variable_data_bytes::<T>(var.name.as_str(), elements_per_record)?;
488    let numrecs_usize = crate::types::checked_usize_from_u64(numrecs, "record count")?;
489    let total_elements = numrecs_usize
490        .checked_mul(elements_per_record)
491        .ok_or_else(|| {
492            Error::InvalidData(format!(
493                "record variable '{}' element count exceeds platform usize",
494                var.name
495            ))
496        })?;
497    let mut values = Vec::with_capacity(total_elements);
498
499    for rec in 0..numrecs {
500        let rec_offset = record_byte_offset(var, rec, record_stride)?;
501        let rec_slice = storage.read_range(rec_offset, bytes_per_record)?;
502        let rec_values = T::decode_bulk_be(rec_slice.as_ref(), elements_per_record)?;
503        values.extend(rec_values);
504    }
505
506    ArrayD::from_shape_vec(IxDyn(&shape), values)
507        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
508}
509
510/// Read the entire data for a record variable using Rayon for large reads.
511#[cfg(feature = "rayon")]
512pub(crate) fn read_record_variable_parallel_from_storage<T: NcReadType>(
513    storage: &ClassicStorage,
514    var: &NcVariable,
515    numrecs: u64,
516    record_stride: u64,
517) -> Result<ArrayD<T>> {
518    if !var.is_record_var {
519        return Err(Error::InvalidData(
520            "use read_non_record_variable_parallel_from_storage for non-record variables"
521                .to_string(),
522        ));
523    }
524
525    let shape = checked_record_shape(var, numrecs)?;
526    let elements_per_record = checked_record_elements_per_record(var)?;
527    let bytes_per_record = variable_data_bytes::<T>(var.name.as_str(), elements_per_record)?;
528    let numrecs_usize = crate::types::checked_usize_from_u64(numrecs, "record count")?;
529    let total_elements = numrecs_usize
530        .checked_mul(elements_per_record)
531        .ok_or_else(|| {
532            Error::InvalidData(format!(
533                "record variable '{}' element count exceeds platform usize",
534                var.name
535            ))
536        })?;
537    let logical_bytes = numrecs_usize.checked_mul(bytes_per_record).ok_or_else(|| {
538        Error::InvalidData(format!(
539            "record variable '{}' logical byte count exceeds platform usize",
540            var.name
541        ))
542    })?;
543    let policy = storage.parallel_read_policy();
544    if logical_bytes < policy.min_bytes || numrecs_usize <= 1 {
545        return read_record_variable_from_storage(storage, var, numrecs, record_stride);
546    }
547
548    let records_per_chunk = (policy.target_chunk_bytes / bytes_per_record.max(1)).max(1);
549    let elements_per_chunk = records_per_chunk
550        .checked_mul(elements_per_record)
551        .ok_or_else(|| {
552            Error::InvalidData(
553                "classic record chunk element count exceeds platform usize".to_string(),
554            )
555        })?;
556    let mut values = vec![T::default(); total_elements];
557    let chunk_plan = RecordChunkReadPlan {
558        var,
559        record_stride,
560        elements_per_record,
561        bytes_per_record,
562    };
563    values
564        .par_chunks_mut(elements_per_chunk)
565        .enumerate()
566        .try_for_each(|(chunk, dst)| {
567            let first_record = chunk.checked_mul(records_per_chunk).ok_or_else(|| {
568                Error::InvalidData("classic record chunk offset exceeds platform usize".to_string())
569            })?;
570            let records = dst.len().checked_div(elements_per_record).ok_or_else(|| {
571                Error::InvalidData("classic record elements per record is zero".to_string())
572            })?;
573            read_record_chunk_into::<T>(storage, &chunk_plan, first_record as u64, records, dst)
574        })?;
575
576    ArrayD::from_shape_vec(IxDyn(&shape), values)
577        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
578}
579
580/// Read the entire data for a record variable into a caller-provided buffer.
581pub fn read_record_variable_into<T: NcReadType>(
582    file_data: &[u8],
583    var: &NcVariable,
584    numrecs: u64,
585    record_stride: u64,
586    dst: &mut [T],
587) -> Result<()> {
588    if !var.is_record_var {
589        return Err(Error::InvalidData(
590            "use read_non_record_variable_into for non-record variables".to_string(),
591        ));
592    }
593
594    let elem_size = T::element_size();
595    let base_offset =
596        crate::types::checked_usize_from_u64(var.data_offset, "record variable data offset")?;
597    let numrecs_usize = crate::types::checked_usize_from_u64(numrecs, "record count")?;
598    let record_stride_usize = crate::types::checked_usize_from_u64(record_stride, "record stride")?;
599
600    if var.dimensions.is_empty() {
601        return Err(Error::InvalidData(
602            "record variable must have at least one dimension".to_string(),
603        ));
604    }
605
606    let elements_per_record = checked_record_elements_per_record(var)?;
607    let bytes_per_record = elements_per_record.checked_mul(elem_size).ok_or_else(|| {
608        Error::InvalidData(format!(
609            "record variable '{}' bytes per record exceed platform usize",
610            var.name
611        ))
612    })?;
613    let total_elements = numrecs_usize
614        .checked_mul(elements_per_record)
615        .ok_or_else(|| {
616            Error::InvalidData(format!(
617                "record variable '{}' element count exceeds platform usize",
618                var.name
619            ))
620        })?;
621    if dst.len() != total_elements {
622        return Err(Error::InvalidData(format!(
623            "destination has {} elements, variable '{}' requires {}",
624            dst.len(),
625            var.name,
626            total_elements
627        )));
628    }
629
630    for rec in 0..numrecs_usize {
631        let rec_offset = base_offset
632            .checked_add(rec.checked_mul(record_stride_usize).ok_or_else(|| {
633                Error::InvalidData(format!(
634                    "record variable '{}' byte offset exceeds platform usize",
635                    var.name
636                ))
637            })?)
638            .ok_or_else(|| {
639                Error::InvalidData(format!(
640                    "record variable '{}' byte offset exceeds platform usize",
641                    var.name
642                ))
643            })?;
644        let rec_end = rec_offset.checked_add(bytes_per_record).ok_or_else(|| {
645            Error::InvalidData(format!(
646                "record variable '{}' record range exceeds platform usize",
647                var.name
648            ))
649        })?;
650        if rec_end > file_data.len() {
651            return Err(Error::InvalidData(format!(
652                "record {} for variable '{}' extends beyond file",
653                rec, var.name
654            )));
655        }
656
657        let dst_start = rec.checked_mul(elements_per_record).ok_or_else(|| {
658            Error::InvalidData(format!(
659                "record variable '{}' destination offset exceeds platform usize",
660                var.name
661            ))
662        })?;
663        let dst_end = dst_start.checked_add(elements_per_record).ok_or_else(|| {
664            Error::InvalidData(format!(
665                "record variable '{}' destination range exceeds platform usize",
666                var.name
667            ))
668        })?;
669        T::decode_bulk_be_into(
670            &file_data[rec_offset..rec_end],
671            &mut dst[dst_start..dst_end],
672        )?;
673    }
674
675    Ok(())
676}
677
678/// Read a record variable from range-backed storage into a caller-provided buffer.
679pub(crate) fn read_record_variable_into_from_storage<T: NcReadType>(
680    storage: &ClassicStorage,
681    var: &NcVariable,
682    numrecs: u64,
683    record_stride: u64,
684    dst: &mut [T],
685) -> Result<()> {
686    if !var.is_record_var {
687        return Err(Error::InvalidData(
688            "use read_non_record_variable_into_from_storage for non-record variables".to_string(),
689        ));
690    }
691
692    if var.dimensions.is_empty() {
693        return Err(Error::InvalidData(
694            "record variable must have at least one dimension".to_string(),
695        ));
696    }
697
698    let elements_per_record = checked_record_elements_per_record(var)?;
699    let bytes_per_record = variable_data_bytes::<T>(var.name.as_str(), elements_per_record)?;
700    let numrecs_usize = crate::types::checked_usize_from_u64(numrecs, "record count")?;
701    let total_elements = numrecs_usize
702        .checked_mul(elements_per_record)
703        .ok_or_else(|| {
704            Error::InvalidData(format!(
705                "record variable '{}' element count exceeds platform usize",
706                var.name
707            ))
708        })?;
709    if dst.len() != total_elements {
710        return Err(Error::InvalidData(format!(
711            "destination has {} elements, variable '{}' requires {}",
712            dst.len(),
713            var.name,
714            total_elements
715        )));
716    }
717
718    for rec in 0..numrecs {
719        let rec_offset = record_byte_offset(var, rec, record_stride)?;
720        let rec_slice = storage.read_range(rec_offset, bytes_per_record)?;
721        let dst_start = crate::types::checked_usize_from_u64(rec, "record index")?
722            .checked_mul(elements_per_record)
723            .ok_or_else(|| {
724                Error::InvalidData(format!(
725                    "record variable '{}' destination offset exceeds platform usize",
726                    var.name
727                ))
728            })?;
729        let dst_end = dst_start.checked_add(elements_per_record).ok_or_else(|| {
730            Error::InvalidData(format!(
731                "record variable '{}' destination range exceeds platform usize",
732                var.name
733            ))
734        })?;
735        T::decode_bulk_be_into(rec_slice.as_ref(), &mut dst[dst_start..dst_end])?;
736    }
737
738    Ok(())
739}
740
741/// Compute the record stride: total bytes per record across all record variables.
742///
743/// Each record variable's per-record contribution is its `record_size` (already stored
744/// as vsize from the header), padded to 4-byte boundary.
745pub fn compute_record_stride(variables: &[NcVariable]) -> Result<u64> {
746    variables
747        .iter()
748        .filter(|v| v.is_record_var)
749        .try_fold(0u64, |stride, v| {
750            let size = v.record_size;
751            // Pad each variable's per-record size to 4-byte boundary.
752            let rem = size % 4;
753            let padded = if rem == 0 {
754                size
755            } else {
756                size.checked_add(4 - rem).ok_or_else(|| {
757                    Error::InvalidData(format!(
758                        "record variable '{}' padded record size exceeds u64",
759                        v.name
760                    ))
761                })?
762            };
763            stride
764                .checked_add(padded)
765                .ok_or_else(|| Error::InvalidData("record stride exceeds u64".to_string()))
766        })
767}
768
769fn checked_non_record_element_count(var: &NcVariable) -> Result<usize> {
770    let mut total = 1u64;
771    for dim in &var.dimensions {
772        total = total.checked_mul(dim.size).ok_or_else(|| {
773            Error::InvalidData("variable element count overflows u64".to_string())
774        })?;
775    }
776    crate::types::checked_usize_from_u64(total, "variable element count")
777}
778
779pub(crate) fn checked_variable_shape(var: &NcVariable) -> Result<Vec<usize>> {
780    var.shape()
781        .iter()
782        .map(|&s| crate::types::checked_usize_from_u64(s, "variable dimension"))
783        .collect::<Result<Vec<_>>>()
784}
785
786fn checked_record_shape(var: &NcVariable, numrecs: u64) -> Result<Vec<usize>> {
787    let mut shape: Vec<usize> = var
788        .shape()
789        .iter()
790        .map(|&s| crate::types::checked_usize_from_u64(s, "record variable dimension"))
791        .collect::<Result<Vec<_>>>()?;
792    if shape.is_empty() {
793        return Err(Error::InvalidData(
794            "record variable must have at least one dimension".to_string(),
795        ));
796    }
797    shape[0] = crate::types::checked_usize_from_u64(numrecs, "record count")?;
798    Ok(shape)
799}
800
801fn checked_record_elements_per_record(var: &NcVariable) -> Result<usize> {
802    let mut elements = 1usize;
803    for dim in var.dimensions.iter().skip(1) {
804        let size = crate::types::checked_usize_from_u64(dim.size, "record variable dimension")?;
805        elements = elements.checked_mul(size).ok_or_else(|| {
806            Error::InvalidData(format!(
807                "record variable '{}' elements per record exceed platform usize",
808                var.name
809            ))
810        })?;
811    }
812    Ok(elements)
813}
814
815pub(crate) fn variable_data_bytes<T: NcReadType>(
816    var_name: &str,
817    element_count: usize,
818) -> Result<usize> {
819    element_count.checked_mul(T::element_size()).ok_or_else(|| {
820        Error::InvalidData(format!(
821            "variable '{var_name}' size in bytes exceeds platform usize"
822        ))
823    })
824}
825
826pub(crate) fn record_byte_offset(var: &NcVariable, record: u64, record_stride: u64) -> Result<u64> {
827    var.data_offset
828        .checked_add(record.checked_mul(record_stride).ok_or_else(|| {
829            Error::InvalidData(format!(
830                "record variable '{}' byte offset exceeds u64",
831                var.name
832            ))
833        })?)
834        .ok_or_else(|| {
835            Error::InvalidData(format!(
836                "record variable '{}' byte offset exceeds u64",
837                var.name
838            ))
839        })
840}
841
842#[cfg(feature = "rayon")]
843pub(crate) fn read_contiguous_range_parallel<T: NcReadType>(
844    storage: &ClassicStorage,
845    base_offset: u64,
846    total_elements: usize,
847    target_chunk_bytes: usize,
848) -> Result<Vec<T>> {
849    if total_elements == 0 {
850        return Ok(Vec::new());
851    }
852
853    let elem_size = T::element_size();
854    let elements_per_chunk = (target_chunk_bytes / elem_size.max(1)).max(1);
855    let mut values = vec![T::default(); total_elements];
856    values
857        .par_chunks_mut(elements_per_chunk)
858        .enumerate()
859        .try_for_each(|(chunk, dst)| {
860            let start_element = chunk.checked_mul(elements_per_chunk).ok_or_else(|| {
861                Error::InvalidData(
862                    "classic parallel chunk offset exceeds platform usize".to_string(),
863                )
864            })?;
865            let byte_offset = crate::types::checked_mul_u64(
866                start_element as u64,
867                elem_size as u64,
868                "classic parallel byte offset",
869            )?;
870            let offset = base_offset.checked_add(byte_offset).ok_or_else(|| {
871                Error::InvalidData("classic parallel byte offset exceeds u64".to_string())
872            })?;
873            let bytes = variable_data_bytes::<T>("parallel chunk", dst.len())?;
874            let data = storage.read_range(offset, bytes)?;
875            T::decode_bulk_be_into(data.as_ref(), dst)
876        })?;
877
878    Ok(values)
879}
880
881#[cfg(feature = "rayon")]
882struct RecordChunkReadPlan<'a> {
883    var: &'a NcVariable,
884    record_stride: u64,
885    elements_per_record: usize,
886    bytes_per_record: usize,
887}
888
889#[cfg(feature = "rayon")]
890fn read_record_chunk_into<T: NcReadType>(
891    storage: &ClassicStorage,
892    plan: &RecordChunkReadPlan<'_>,
893    first_record: u64,
894    records: usize,
895    dst: &mut [T],
896) -> Result<()> {
897    if records == 0 {
898        return Ok(());
899    }
900
901    let expected_elements = records
902        .checked_mul(plan.elements_per_record)
903        .ok_or_else(|| {
904            Error::InvalidData(format!(
905                "record variable '{}' chunk element count exceeds platform usize",
906                plan.var.name
907            ))
908        })?;
909    if dst.len() != expected_elements {
910        return Err(Error::InvalidData(format!(
911            "record variable '{}' chunk destination has {} elements, expected {}",
912            plan.var.name,
913            dst.len(),
914            expected_elements
915        )));
916    }
917
918    if plan.record_stride == plan.bytes_per_record as u64 {
919        let offset = record_byte_offset(plan.var, first_record, plan.record_stride)?;
920        let bytes = records.checked_mul(plan.bytes_per_record).ok_or_else(|| {
921            Error::InvalidData(format!(
922                "record variable '{}' chunk byte count exceeds platform usize",
923                plan.var.name
924            ))
925        })?;
926        let data = storage.read_range(offset, bytes)?;
927        return T::decode_bulk_be_into(data.as_ref(), dst);
928    }
929
930    for ordinal in 0..records {
931        let record = first_record
932            .checked_add(ordinal as u64)
933            .ok_or_else(|| Error::InvalidData("classic record index exceeds u64".to_string()))?;
934        let offset = record_byte_offset(plan.var, record, plan.record_stride)?;
935        let data = storage.read_range(offset, plan.bytes_per_record)?;
936        let dst_start = ordinal
937            .checked_mul(plan.elements_per_record)
938            .ok_or_else(|| {
939                Error::InvalidData(format!(
940                    "record variable '{}' chunk destination offset exceeds platform usize",
941                    plan.var.name
942                ))
943            })?;
944        let dst_end = dst_start
945            .checked_add(plan.elements_per_record)
946            .ok_or_else(|| {
947                Error::InvalidData(format!(
948                    "record variable '{}' chunk destination range exceeds platform usize",
949                    plan.var.name
950                ))
951            })?;
952        T::decode_bulk_be_into(data.as_ref(), &mut dst[dst_start..dst_end])?;
953    }
954    Ok(())
955}
956
957#[cfg(test)]
958mod tests {
959    use super::*;
960    use crate::types::NcDimension;
961
962    #[test]
963    fn read_non_record_1d_float() {
964        // Create a fake file with 3 floats starting at offset 100.
965        let mut file_data = vec![0u8; 200];
966        let values = [1.0f32, 2.0f32, 3.0f32];
967        for (i, &v) in values.iter().enumerate() {
968            let bytes = v.to_be_bytes();
969            file_data[100 + i * 4..100 + i * 4 + 4].copy_from_slice(&bytes);
970        }
971
972        let var = NcVariable {
973            name: "temp".to_string(),
974            dimensions: vec![NcDimension {
975                name: "x".to_string(),
976                size: 3,
977                is_unlimited: false,
978            }],
979            dtype: NcType::Float,
980            attributes: vec![],
981            data_offset: 100,
982            _data_size: 12,
983            is_record_var: false,
984            record_size: 0,
985        };
986
987        let arr: ArrayD<f32> = read_non_record_variable(&file_data, &var).unwrap();
988        assert_eq!(arr.shape(), &[3]);
989        assert_eq!(arr[[0]], 1.0f32);
990        assert_eq!(arr[[1]], 2.0f32);
991        assert_eq!(arr[[2]], 3.0f32);
992    }
993
994    #[test]
995    fn non_record_variable_into_copies_values() {
996        let mut file_data = vec![0u8; 200];
997        let values = [1.0f32, 2.0f32, 3.0f32];
998        for (i, &v) in values.iter().enumerate() {
999            file_data[100 + i * 4..100 + i * 4 + 4].copy_from_slice(&v.to_be_bytes());
1000        }
1001
1002        let var = NcVariable {
1003            name: "temp".to_string(),
1004            dimensions: vec![NcDimension {
1005                name: "x".to_string(),
1006                size: 3,
1007                is_unlimited: false,
1008            }],
1009            dtype: NcType::Float,
1010            attributes: vec![],
1011            data_offset: 100,
1012            _data_size: 12,
1013            is_record_var: false,
1014            record_size: 0,
1015        };
1016
1017        let mut dst = [0.0f32; 3];
1018        read_non_record_variable_into(&file_data, &var, &mut dst).unwrap();
1019        assert_eq!(dst, values);
1020    }
1021
1022    #[test]
1023    fn read_non_record_2d_int() {
1024        // 2x3 array of i32 at offset 0
1025        let values: Vec<i32> = vec![10, 20, 30, 40, 50, 60];
1026        let mut file_data = Vec::new();
1027        for &v in &values {
1028            file_data.extend_from_slice(&v.to_be_bytes());
1029        }
1030
1031        let var = NcVariable {
1032            name: "grid".to_string(),
1033            dimensions: vec![
1034                NcDimension {
1035                    name: "y".to_string(),
1036                    size: 2,
1037                    is_unlimited: false,
1038                },
1039                NcDimension {
1040                    name: "x".to_string(),
1041                    size: 3,
1042                    is_unlimited: false,
1043                },
1044            ],
1045            dtype: NcType::Int,
1046            attributes: vec![],
1047            data_offset: 0,
1048            _data_size: 24,
1049            is_record_var: false,
1050            record_size: 0,
1051        };
1052
1053        let arr: ArrayD<i32> = read_non_record_variable(&file_data, &var).unwrap();
1054        assert_eq!(arr.shape(), &[2, 3]);
1055        assert_eq!(arr[[0, 0]], 10);
1056        assert_eq!(arr[[0, 2]], 30);
1057        assert_eq!(arr[[1, 0]], 40);
1058        assert_eq!(arr[[1, 2]], 60);
1059    }
1060
1061    #[test]
1062    fn read_non_record_variable_into_rejects_wrong_destination_len() {
1063        let var = NcVariable {
1064            name: "grid".to_string(),
1065            dimensions: vec![NcDimension {
1066                name: "x".to_string(),
1067                size: 3,
1068                is_unlimited: false,
1069            }],
1070            dtype: NcType::Float,
1071            attributes: vec![],
1072            data_offset: 0,
1073            _data_size: 12,
1074            is_record_var: false,
1075            record_size: 0,
1076        };
1077
1078        let mut dst = [0.0f32; 2];
1079        let err = read_non_record_variable_into(&[0; 12], &var, &mut dst).unwrap_err();
1080        assert!(matches!(err, Error::InvalidData(_)));
1081    }
1082
1083    #[test]
1084    fn record_stride_sums_padded_record_variables() {
1085        let vars = vec![
1086            NcVariable {
1087                name: "a".to_string(),
1088                dimensions: vec![],
1089                dtype: NcType::Float,
1090                attributes: vec![],
1091                data_offset: 0,
1092                _data_size: 0,
1093                is_record_var: true,
1094                record_size: 20, // 5 floats
1095            },
1096            NcVariable {
1097                name: "b".to_string(),
1098                dimensions: vec![],
1099                dtype: NcType::Short,
1100                attributes: vec![],
1101                data_offset: 0,
1102                _data_size: 0,
1103                is_record_var: true,
1104                record_size: 6, // 3 shorts -> padded to 8
1105            },
1106            NcVariable {
1107                name: "c".to_string(),
1108                dimensions: vec![],
1109                dtype: NcType::Double,
1110                attributes: vec![],
1111                data_offset: 0,
1112                _data_size: 100,
1113                is_record_var: false, // not a record var, should be excluded
1114                record_size: 0,
1115            },
1116        ];
1117        // a: 20 (already 4-aligned), b: 6 -> 8 = total 28
1118        assert_eq!(compute_record_stride(&vars).unwrap(), 28);
1119    }
1120
1121    #[test]
1122    fn record_stride_rejects_padded_size_overflow() {
1123        let vars = vec![NcVariable {
1124            name: "huge".to_string(),
1125            dimensions: vec![],
1126            dtype: NcType::Byte,
1127            attributes: vec![],
1128            data_offset: 0,
1129            _data_size: 0,
1130            is_record_var: true,
1131            record_size: u64::MAX,
1132        }];
1133
1134        let err = compute_record_stride(&vars).unwrap_err();
1135        assert!(matches!(err, Error::InvalidData(_)));
1136    }
1137
1138    #[test]
1139    fn record_stride_rejects_sum_overflow() {
1140        let vars = vec![
1141            NcVariable {
1142                name: "a".to_string(),
1143                dimensions: vec![],
1144                dtype: NcType::Byte,
1145                attributes: vec![],
1146                data_offset: 0,
1147                _data_size: 0,
1148                is_record_var: true,
1149                record_size: u64::MAX - 7,
1150            },
1151            NcVariable {
1152                name: "b".to_string(),
1153                dimensions: vec![],
1154                dtype: NcType::Byte,
1155                attributes: vec![],
1156                data_offset: 0,
1157                _data_size: 0,
1158                is_record_var: true,
1159                record_size: 8,
1160            },
1161        ];
1162
1163        let err = compute_record_stride(&vars).unwrap_err();
1164        assert!(matches!(err, Error::InvalidData(_)));
1165    }
1166
1167    #[test]
1168    fn record_variable_reads_all_records() {
1169        // Single record variable "temp" with shape [time, x] where x=2.
1170        // 3 records, each with 2 floats = 8 bytes per record.
1171        // Record stride = 8 (only one record var, already 4-aligned).
1172        let mut file_data = vec![0u8; 200];
1173        let base = 100usize;
1174        let record_values: Vec<Vec<f32>> = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
1175        for (rec, vals) in record_values.iter().enumerate() {
1176            for (i, &v) in vals.iter().enumerate() {
1177                let offset = base + rec * 8 + i * 4;
1178                file_data[offset..offset + 4].copy_from_slice(&v.to_be_bytes());
1179            }
1180        }
1181
1182        let var = NcVariable {
1183            name: "temp".to_string(),
1184            dimensions: vec![
1185                NcDimension {
1186                    name: "time".to_string(),
1187                    size: 0, // unlimited
1188                    is_unlimited: true,
1189                },
1190                NcDimension {
1191                    name: "x".to_string(),
1192                    size: 2,
1193                    is_unlimited: false,
1194                },
1195            ],
1196            dtype: NcType::Float,
1197            attributes: vec![],
1198            data_offset: 100,
1199            _data_size: 0,
1200            is_record_var: true,
1201            record_size: 8,
1202        };
1203
1204        let arr: ArrayD<f32> = read_record_variable(&file_data, &var, 3, 8).unwrap();
1205        assert_eq!(arr.shape(), &[3, 2]);
1206        assert_eq!(arr[[0, 0]], 1.0);
1207        assert_eq!(arr[[0, 1]], 2.0);
1208        assert_eq!(arr[[1, 0]], 3.0);
1209        assert_eq!(arr[[2, 1]], 6.0);
1210    }
1211
1212    #[test]
1213    fn record_variable_into_copies_values() {
1214        let mut file_data = vec![0u8; 200];
1215        let base = 100usize;
1216        let record_values: Vec<Vec<f32>> = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
1217        for (rec, vals) in record_values.iter().enumerate() {
1218            for (i, &v) in vals.iter().enumerate() {
1219                let offset = base + rec * 8 + i * 4;
1220                file_data[offset..offset + 4].copy_from_slice(&v.to_be_bytes());
1221            }
1222        }
1223
1224        let var = NcVariable {
1225            name: "temp".to_string(),
1226            dimensions: vec![
1227                NcDimension {
1228                    name: "time".to_string(),
1229                    size: 0,
1230                    is_unlimited: true,
1231                },
1232                NcDimension {
1233                    name: "x".to_string(),
1234                    size: 2,
1235                    is_unlimited: false,
1236                },
1237            ],
1238            dtype: NcType::Float,
1239            attributes: vec![],
1240            data_offset: 100,
1241            _data_size: 0,
1242            is_record_var: true,
1243            record_size: 8,
1244        };
1245
1246        let mut dst = [0.0f32; 6];
1247        read_record_variable_into(&file_data, &var, 3, 8, &mut dst).unwrap();
1248        assert_eq!(dst, [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
1249    }
1250
1251    #[test]
1252    fn read_record_variable_into_rejects_wrong_destination_len() {
1253        let var = NcVariable {
1254            name: "temp".to_string(),
1255            dimensions: vec![
1256                NcDimension {
1257                    name: "time".to_string(),
1258                    size: 0,
1259                    is_unlimited: true,
1260                },
1261                NcDimension {
1262                    name: "x".to_string(),
1263                    size: 2,
1264                    is_unlimited: false,
1265                },
1266            ],
1267            dtype: NcType::Float,
1268            attributes: vec![],
1269            data_offset: 0,
1270            _data_size: 0,
1271            is_record_var: true,
1272            record_size: 8,
1273        };
1274
1275        let mut dst = [0.0f32; 5];
1276        let err = read_record_variable_into(&[0; 24], &var, 3, 8, &mut dst).unwrap_err();
1277        assert!(matches!(err, Error::InvalidData(_)));
1278    }
1279
1280    #[test]
1281    fn read_non_record_variable_rejects_element_count_overflow() {
1282        let var = NcVariable {
1283            name: "huge".to_string(),
1284            dimensions: vec![
1285                NcDimension {
1286                    name: "y".to_string(),
1287                    size: u64::MAX,
1288                    is_unlimited: false,
1289                },
1290                NcDimension {
1291                    name: "x".to_string(),
1292                    size: 2,
1293                    is_unlimited: false,
1294                },
1295            ],
1296            dtype: NcType::Float,
1297            attributes: vec![],
1298            data_offset: 0,
1299            _data_size: 0,
1300            is_record_var: false,
1301            record_size: 0,
1302        };
1303
1304        let err = read_non_record_variable::<f32>(&[], &var).unwrap_err();
1305        assert!(matches!(err, Error::InvalidData(_)));
1306    }
1307
1308    #[test]
1309    fn read_record_variable_rejects_elements_per_record_overflow() {
1310        let var = NcVariable {
1311            name: "huge_record".to_string(),
1312            dimensions: vec![
1313                NcDimension {
1314                    name: "time".to_string(),
1315                    size: 0,
1316                    is_unlimited: true,
1317                },
1318                NcDimension {
1319                    name: "y".to_string(),
1320                    size: usize::MAX as u64,
1321                    is_unlimited: false,
1322                },
1323                NcDimension {
1324                    name: "x".to_string(),
1325                    size: 2,
1326                    is_unlimited: false,
1327                },
1328            ],
1329            dtype: NcType::Float,
1330            attributes: vec![],
1331            data_offset: 0,
1332            _data_size: 0,
1333            is_record_var: true,
1334            record_size: 4,
1335        };
1336
1337        let err = read_record_variable::<f32>(&[], &var, 1, 4).unwrap_err();
1338        assert!(matches!(err, Error::InvalidData(_)));
1339    }
1340
1341    #[test]
1342    fn read_record_variable_rejects_record_offset_overflow() {
1343        let var = NcVariable {
1344            name: "huge_record".to_string(),
1345            dimensions: vec![
1346                NcDimension {
1347                    name: "time".to_string(),
1348                    size: 0,
1349                    is_unlimited: true,
1350                },
1351                NcDimension {
1352                    name: "x".to_string(),
1353                    size: 1,
1354                    is_unlimited: false,
1355                },
1356            ],
1357            dtype: NcType::Float,
1358            attributes: vec![],
1359            data_offset: u64::MAX,
1360            _data_size: 0,
1361            is_record_var: true,
1362            record_size: 4,
1363        };
1364
1365        let err = read_record_variable::<f32>(&[], &var, 1, 4).unwrap_err();
1366        assert!(matches!(err, Error::InvalidData(_)));
1367    }
1368}