Skip to main content

netcdf_reader/classic/
data.rs

1//! Data reading for classic (CDF-1/2/5) NetCDF files.
2//!
3//! Two layout types:
4//! - **Non-record variables**: contiguous data at the offset stored in the variable header.
5//! - **Record variables**: data is interleaved across records. Each record contains one
6//!   slice from every record variable, in the order they appear in the header. The total
7//!   record size is the sum of all record variables' vsize values (each padded to 4-byte
8//!   boundary in CDF-1/2).
9
10use ndarray::{ArrayD, IxDyn};
11
12use crate::error::{Error, Result};
13use crate::types::{NcType, NcVariable};
14
15/// Trait for types that can be read from classic NetCDF data.
16pub trait NcReadType: Clone + Default + Send + 'static {
17    /// The NetCDF type this Rust type corresponds to.
18    fn nc_type() -> NcType;
19
20    /// Read a single element from big-endian bytes.
21    fn from_be_bytes(bytes: &[u8]) -> Result<Self>;
22
23    /// Size in bytes of one element.
24    fn element_size() -> usize;
25
26    /// Bulk decode `count` elements from a contiguous big-endian byte slice.
27    ///
28    /// Default implementation falls back to per-element decoding. Types with
29    /// multi-byte elements override this with an optimized bulk path using
30    /// `chunks_exact` + byte-swap (on LE hosts) or `copy_nonoverlapping`
31    /// (on BE hosts).
32    fn decode_bulk_be(raw: &[u8], count: usize) -> Result<Vec<Self>> {
33        let elem_size = Self::element_size();
34        let needed = count * elem_size;
35        if raw.len() < needed {
36            return Err(Error::InvalidData(format!(
37                "need {} bytes for {} elements, got {}",
38                needed,
39                count,
40                raw.len()
41            )));
42        }
43        let mut values = Vec::with_capacity(count);
44        for i in 0..count {
45            let start = i * elem_size;
46            values.push(Self::from_be_bytes(&raw[start..start + elem_size])?);
47        }
48        Ok(values)
49    }
50}
51
52macro_rules! impl_nc_read_type {
53    ($ty:ty, $nc_type:expr, $size:expr) => {
54        impl NcReadType for $ty {
55            fn nc_type() -> NcType {
56                $nc_type
57            }
58
59            fn from_be_bytes(bytes: &[u8]) -> Result<Self> {
60                if bytes.len() < $size {
61                    return Err(Error::InvalidData(format!(
62                        "need {} bytes for {}, got {}",
63                        $size,
64                        stringify!($ty),
65                        bytes.len()
66                    )));
67                }
68                let mut arr = [0u8; $size];
69                arr.copy_from_slice(&bytes[..$size]);
70                Ok(<$ty>::from_be_bytes(arr))
71            }
72
73            fn element_size() -> usize {
74                $size
75            }
76
77            fn decode_bulk_be(raw: &[u8], count: usize) -> Result<Vec<Self>> {
78                let total_bytes = count * $size;
79                if raw.len() < total_bytes {
80                    return Err(Error::InvalidData(format!(
81                        "need {} bytes for {} elements of {}, got {}",
82                        total_bytes,
83                        count,
84                        stringify!($ty),
85                        raw.len()
86                    )));
87                }
88                let bytes = &raw[..total_bytes];
89                #[cfg(target_endian = "big")]
90                {
91                    // Native BE: memcpy is safe for any element size.
92                    let mut values = Vec::<$ty>::with_capacity(count);
93                    unsafe {
94                        std::ptr::copy_nonoverlapping(
95                            bytes.as_ptr(),
96                            values.as_mut_ptr() as *mut u8,
97                            total_bytes,
98                        );
99                        values.set_len(count);
100                    }
101                    Ok(values)
102                }
103                #[cfg(target_endian = "little")]
104                {
105                    // LE host reading BE data: chunks_exact + byte-swap.
106                    Ok(bytes
107                        .chunks_exact($size)
108                        .map(|chunk| {
109                            let mut arr = [0u8; $size];
110                            arr.copy_from_slice(chunk);
111                            <$ty>::from_be_bytes(arr)
112                        })
113                        .collect())
114                }
115            }
116        }
117    };
118}
119
120impl_nc_read_type!(i8, NcType::Byte, 1);
121impl_nc_read_type!(i16, NcType::Short, 2);
122impl_nc_read_type!(i32, NcType::Int, 4);
123impl_nc_read_type!(f32, NcType::Float, 4);
124impl_nc_read_type!(f64, NcType::Double, 8);
125impl_nc_read_type!(u8, NcType::UByte, 1);
126impl_nc_read_type!(u16, NcType::UShort, 2);
127impl_nc_read_type!(u32, NcType::UInt, 4);
128impl_nc_read_type!(i64, NcType::Int64, 8);
129impl_nc_read_type!(u64, NcType::UInt64, 8);
130
131/// Read the entire data for a non-record variable into an ndarray.
132///
133/// The data is located at a contiguous region starting at `var.data_offset`
134/// with total size `var.data_size`.
135pub fn read_non_record_variable<T: NcReadType>(
136    file_data: &[u8],
137    var: &NcVariable,
138) -> Result<ArrayD<T>> {
139    if var.is_record_var {
140        return Err(Error::InvalidData(
141            "use read_record_variable for record variables".to_string(),
142        ));
143    }
144
145    let offset = crate::types::checked_usize_from_u64(var.data_offset, "variable data offset")?;
146    let total_elements = crate::types::checked_usize_from_u64(
147        var.checked_num_elements()?,
148        "variable element count",
149    )?;
150    let elem_size = T::element_size();
151    let total_bytes = total_elements.checked_mul(elem_size).ok_or_else(|| {
152        Error::InvalidData(format!(
153            "variable '{}' size in bytes exceeds platform usize",
154            var.name
155        ))
156    })?;
157
158    let end = offset.checked_add(total_bytes).ok_or_else(|| {
159        Error::InvalidData(format!(
160            "variable '{}' byte range exceeds platform usize",
161            var.name
162        ))
163    })?;
164    if end > file_data.len() {
165        return Err(Error::InvalidData(format!(
166            "variable '{}' data extends beyond file: offset={}, size={}, file_len={}",
167            var.name,
168            offset,
169            total_bytes,
170            file_data.len()
171        )));
172    }
173
174    let data_slice = &file_data[offset..end];
175    let values = T::decode_bulk_be(data_slice, total_elements)?;
176
177    let shape: Vec<usize> = var
178        .shape()
179        .iter()
180        .map(|&s| crate::types::checked_usize_from_u64(s, "variable dimension"))
181        .collect::<Result<Vec<_>>>()?;
182    if shape.is_empty() {
183        // Scalar variable.
184        ArrayD::from_shape_vec(IxDyn(&[]), values)
185    } else {
186        ArrayD::from_shape_vec(IxDyn(&shape), values)
187    }
188    .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
189}
190
191/// Read the entire data for a record variable into an ndarray.
192///
193/// Record variables are interleaved: for each of `numrecs` records, every record
194/// variable contributes `record_size` bytes (padded to 4-byte alignment for CDF-1/2).
195/// The `record_stride` is the total size of one record across all record variables.
196///
197/// Parameters:
198/// - `file_data`: the raw file bytes
199/// - `var`: the record variable to read
200/// - `numrecs`: number of records (from the file header)
201/// - `record_stride`: total bytes per record (sum of all record variables' padded vsizes)
202pub fn read_record_variable<T: NcReadType>(
203    file_data: &[u8],
204    var: &NcVariable,
205    numrecs: u64,
206    record_stride: u64,
207) -> Result<ArrayD<T>> {
208    if !var.is_record_var {
209        return Err(Error::InvalidData(
210            "use read_non_record_variable for non-record variables".to_string(),
211        ));
212    }
213
214    let elem_size = T::element_size();
215    let base_offset =
216        crate::types::checked_usize_from_u64(var.data_offset, "record variable data offset")?;
217    let numrecs_usize = crate::types::checked_usize_from_u64(numrecs, "record count")?;
218    let record_stride_usize = crate::types::checked_usize_from_u64(record_stride, "record stride")?;
219
220    // Shape: the first dimension is the unlimited dimension, replaced by numrecs.
221    let mut shape: Vec<usize> = var
222        .shape()
223        .iter()
224        .map(|&s| crate::types::checked_usize_from_u64(s, "record variable dimension"))
225        .collect::<Result<Vec<_>>>()?;
226    if shape.is_empty() {
227        return Err(Error::InvalidData(
228            "record variable must have at least one dimension".to_string(),
229        ));
230    }
231    shape[0] = numrecs_usize;
232
233    // Number of elements per record (product of all dims except the first).
234    let elements_per_record: usize = shape[1..].iter().product::<usize>().max(1);
235    let bytes_per_record = elements_per_record.checked_mul(elem_size).ok_or_else(|| {
236        Error::InvalidData(format!(
237            "record variable '{}' bytes per record exceed platform usize",
238            var.name
239        ))
240    })?;
241    let total_elements = numrecs_usize
242        .checked_mul(elements_per_record)
243        .ok_or_else(|| {
244            Error::InvalidData(format!(
245                "record variable '{}' element count exceeds platform usize",
246                var.name
247            ))
248        })?;
249
250    let mut values = Vec::with_capacity(total_elements);
251
252    for rec in 0..numrecs_usize {
253        let rec_offset = base_offset
254            .checked_add(rec.checked_mul(record_stride_usize).ok_or_else(|| {
255                Error::InvalidData(format!(
256                    "record variable '{}' byte offset exceeds platform usize",
257                    var.name
258                ))
259            })?)
260            .ok_or_else(|| {
261                Error::InvalidData(format!(
262                    "record variable '{}' byte offset exceeds platform usize",
263                    var.name
264                ))
265            })?;
266        let rec_end = rec_offset.checked_add(bytes_per_record).ok_or_else(|| {
267            Error::InvalidData(format!(
268                "record variable '{}' record range exceeds platform usize",
269                var.name
270            ))
271        })?;
272        if rec_end > file_data.len() {
273            return Err(Error::InvalidData(format!(
274                "record {} for variable '{}' extends beyond file",
275                rec, var.name
276            )));
277        }
278        let rec_slice = &file_data[rec_offset..rec_end];
279        let rec_values = T::decode_bulk_be(rec_slice, elements_per_record)?;
280        values.extend(rec_values);
281    }
282
283    ArrayD::from_shape_vec(IxDyn(&shape), values)
284        .map_err(|e| Error::InvalidData(format!("failed to create array: {}", e)))
285}
286
287/// Compute the record stride: total bytes per record across all record variables.
288///
289/// Each record variable's per-record contribution is its `record_size` (already stored
290/// as vsize from the header), padded to 4-byte boundary.
291pub fn compute_record_stride(variables: &[NcVariable]) -> u64 {
292    variables
293        .iter()
294        .filter(|v| v.is_record_var)
295        .map(|v| {
296            let size = v.record_size;
297            // Pad each variable's per-record size to 4-byte boundary.
298            let rem = size % 4;
299            if rem == 0 {
300                size
301            } else {
302                size + (4 - rem)
303            }
304        })
305        .sum()
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311    use crate::types::NcDimension;
312
313    #[test]
314    fn test_read_non_record_1d_float() {
315        // Create a fake file with 3 floats starting at offset 100.
316        let mut file_data = vec![0u8; 200];
317        let values = [1.0f32, 2.0f32, 3.0f32];
318        for (i, &v) in values.iter().enumerate() {
319            let bytes = v.to_be_bytes();
320            file_data[100 + i * 4..100 + i * 4 + 4].copy_from_slice(&bytes);
321        }
322
323        let var = NcVariable {
324            name: "temp".to_string(),
325            dimensions: vec![NcDimension {
326                name: "x".to_string(),
327                size: 3,
328                is_unlimited: false,
329            }],
330            dtype: NcType::Float,
331            attributes: vec![],
332            data_offset: 100,
333            _data_size: 12,
334            is_record_var: false,
335            record_size: 0,
336        };
337
338        let arr: ArrayD<f32> = read_non_record_variable(&file_data, &var).unwrap();
339        assert_eq!(arr.shape(), &[3]);
340        assert_eq!(arr[[0]], 1.0f32);
341        assert_eq!(arr[[1]], 2.0f32);
342        assert_eq!(arr[[2]], 3.0f32);
343    }
344
345    #[test]
346    fn test_read_non_record_2d_int() {
347        // 2x3 array of i32 at offset 0
348        let values: Vec<i32> = vec![10, 20, 30, 40, 50, 60];
349        let mut file_data = Vec::new();
350        for &v in &values {
351            file_data.extend_from_slice(&v.to_be_bytes());
352        }
353
354        let var = NcVariable {
355            name: "grid".to_string(),
356            dimensions: vec![
357                NcDimension {
358                    name: "y".to_string(),
359                    size: 2,
360                    is_unlimited: false,
361                },
362                NcDimension {
363                    name: "x".to_string(),
364                    size: 3,
365                    is_unlimited: false,
366                },
367            ],
368            dtype: NcType::Int,
369            attributes: vec![],
370            data_offset: 0,
371            _data_size: 24,
372            is_record_var: false,
373            record_size: 0,
374        };
375
376        let arr: ArrayD<i32> = read_non_record_variable(&file_data, &var).unwrap();
377        assert_eq!(arr.shape(), &[2, 3]);
378        assert_eq!(arr[[0, 0]], 10);
379        assert_eq!(arr[[0, 2]], 30);
380        assert_eq!(arr[[1, 0]], 40);
381        assert_eq!(arr[[1, 2]], 60);
382    }
383
384    #[test]
385    fn test_compute_record_stride() {
386        let vars = vec![
387            NcVariable {
388                name: "a".to_string(),
389                dimensions: vec![],
390                dtype: NcType::Float,
391                attributes: vec![],
392                data_offset: 0,
393                _data_size: 0,
394                is_record_var: true,
395                record_size: 20, // 5 floats
396            },
397            NcVariable {
398                name: "b".to_string(),
399                dimensions: vec![],
400                dtype: NcType::Short,
401                attributes: vec![],
402                data_offset: 0,
403                _data_size: 0,
404                is_record_var: true,
405                record_size: 6, // 3 shorts -> padded to 8
406            },
407            NcVariable {
408                name: "c".to_string(),
409                dimensions: vec![],
410                dtype: NcType::Double,
411                attributes: vec![],
412                data_offset: 0,
413                _data_size: 100,
414                is_record_var: false, // not a record var, should be excluded
415                record_size: 0,
416            },
417        ];
418        // a: 20 (already 4-aligned), b: 6 -> 8 = total 28
419        assert_eq!(compute_record_stride(&vars), 28);
420    }
421
422    #[test]
423    fn test_read_record_variable() {
424        // Single record variable "temp" with shape [time, x] where x=2.
425        // 3 records, each with 2 floats = 8 bytes per record.
426        // Record stride = 8 (only one record var, already 4-aligned).
427        let mut file_data = vec![0u8; 200];
428        let base = 100usize;
429        let record_values: Vec<Vec<f32>> = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
430        for (rec, vals) in record_values.iter().enumerate() {
431            for (i, &v) in vals.iter().enumerate() {
432                let offset = base + rec * 8 + i * 4;
433                file_data[offset..offset + 4].copy_from_slice(&v.to_be_bytes());
434            }
435        }
436
437        let var = NcVariable {
438            name: "temp".to_string(),
439            dimensions: vec![
440                NcDimension {
441                    name: "time".to_string(),
442                    size: 0, // unlimited
443                    is_unlimited: true,
444                },
445                NcDimension {
446                    name: "x".to_string(),
447                    size: 2,
448                    is_unlimited: false,
449                },
450            ],
451            dtype: NcType::Float,
452            attributes: vec![],
453            data_offset: 100,
454            _data_size: 0,
455            is_record_var: true,
456            record_size: 8,
457        };
458
459        let arr: ArrayD<f32> = read_record_variable(&file_data, &var, 3, 8).unwrap();
460        assert_eq!(arr.shape(), &[3, 2]);
461        assert_eq!(arr[[0, 0]], 1.0);
462        assert_eq!(arr[[0, 1]], 2.0);
463        assert_eq!(arr[[1, 0]], 3.0);
464        assert_eq!(arr[[2, 1]], 6.0);
465    }
466
467    #[test]
468    fn test_read_non_record_variable_rejects_element_count_overflow() {
469        let var = NcVariable {
470            name: "huge".to_string(),
471            dimensions: vec![
472                NcDimension {
473                    name: "y".to_string(),
474                    size: u64::MAX,
475                    is_unlimited: false,
476                },
477                NcDimension {
478                    name: "x".to_string(),
479                    size: 2,
480                    is_unlimited: false,
481                },
482            ],
483            dtype: NcType::Float,
484            attributes: vec![],
485            data_offset: 0,
486            _data_size: 0,
487            is_record_var: false,
488            record_size: 0,
489        };
490
491        let err = read_non_record_variable::<f32>(&[], &var).unwrap_err();
492        assert!(matches!(err, Error::InvalidData(_)));
493    }
494
495    #[test]
496    fn test_read_record_variable_rejects_record_offset_overflow() {
497        let var = NcVariable {
498            name: "huge_record".to_string(),
499            dimensions: vec![
500                NcDimension {
501                    name: "time".to_string(),
502                    size: 0,
503                    is_unlimited: true,
504                },
505                NcDimension {
506                    name: "x".to_string(),
507                    size: 1,
508                    is_unlimited: false,
509                },
510            ],
511            dtype: NcType::Float,
512            attributes: vec![],
513            data_offset: u64::MAX,
514            _data_size: 0,
515            is_record_var: true,
516            record_size: 4,
517        };
518
519        let err = read_record_variable::<f32>(&[], &var, 1, 4).unwrap_err();
520        assert!(matches!(err, Error::InvalidData(_)));
521    }
522}