Skip to main content

netcdf_reader/
lib.rs

1//! Pure-Rust NetCDF file reader.
2//!
3//! Supports:
4//! - **CDF-1** (classic): `CDF\x01` magic
5//! - **CDF-2** (64-bit offset): `CDF\x02` magic
6//! - **CDF-5** (64-bit data): `CDF\x05` magic
7//! - **NetCDF-4** (HDF5-backed): `\x89HDF\r\n\x1a\n` magic (requires `netcdf4` feature)
8//!
9//! PnetCDF-produced CDF-1/2/5 files are supported as files. NetCDF-C files
10//! created through parallel NetCDF-4/HDF5 APIs are supported when the final
11//! HDF5 file uses supported HDF5 features. This crate does not implement
12//! MPI communicators, `nc_open_par`, `nc_create_par`, collective/independent
13//! access modes, PnetCDF subfiling, or write APIs.
14//!
15//! # Example
16//!
17//! ```no_run
18//! use netcdf_reader::NcFile;
19//!
20//! let file = NcFile::open("example.nc").unwrap();
21//! println!("format: {:?}", file.format());
22//! for var in file.variables().unwrap() {
23//!     println!("  variable: {} shape={:?}", var.name(), var.shape());
24//! }
25//! ```
26
27pub mod classic;
28pub mod error;
29pub mod masked;
30pub mod types;
31pub mod unpack;
32
33#[cfg(feature = "netcdf4")]
34pub mod nc4;
35#[cfg(feature = "netcdf4")]
36pub mod user_defined;
37
38#[cfg(feature = "cf")]
39pub mod cf;
40
41pub use error::{Error, Result};
42#[cfg(feature = "netcdf4")]
43pub use hdf5_reader::storage::DynStorage;
44#[cfg(feature = "netcdf4")]
45pub use hdf5_reader::{
46    BlockCacheStats, BlockCacheStorage, BytesStorage, ChunkCacheStats, DatasetChunk,
47    DatasetChunkIterator, ExternalFileResolver, ExternalLinkResolver, FileStorage,
48    FilesystemExternalFileResolver, FilesystemExternalLinkResolver, MmapStorage,
49    RangeRequestStorage, Storage, StorageBuffer,
50};
51pub use types::*;
52#[cfg(feature = "netcdf4")]
53pub use user_defined::{
54    NcArrayValue, NcCompoundFieldView, NcCompoundValueField, NcEnumValue, NcValue, NcValueView,
55};
56
57use std::fs::File;
58use std::io::Read;
59use std::path::Path;
60#[cfg(feature = "netcdf4")]
61use std::sync::Arc;
62
63use memmap2::Mmap;
64use ndarray::ArrayD;
65#[cfg(feature = "rayon")]
66use rayon::ThreadPool;
67
68/// Trait alias for types readable from both classic and NetCDF-4 files.
69///
70/// This unifies `classic::data::NcReadType` (for CDF-1/2/5) and
71/// `hdf5_reader::H5Type` (for NetCDF-4/HDF5) so that `NcFile::read_variable`
72/// works across all formats with a single type parameter.
73#[cfg(feature = "netcdf4")]
74pub trait NcReadable: classic::data::NcReadType + hdf5_reader::H5Type {}
75#[cfg(feature = "netcdf4")]
76impl<T: classic::data::NcReadType + hdf5_reader::H5Type> NcReadable for T {}
77
78#[cfg(not(feature = "netcdf4"))]
79pub trait NcReadable: classic::data::NcReadType {}
80#[cfg(not(feature = "netcdf4"))]
81impl<T: classic::data::NcReadType> NcReadable for T {}
82
83/// NetCDF file format.
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85pub enum NcFormat {
86    /// CDF-1 classic format.
87    Classic,
88    /// CDF-2 64-bit offset format.
89    Offset64,
90    /// CDF-5 64-bit data format.
91    Cdf5,
92    /// NetCDF-4 (HDF5-backed).
93    Nc4,
94    /// NetCDF-4 classic model (HDF5-backed, restricted data model).
95    Nc4Classic,
96}
97
98/// NetCDF-4 metadata reconstruction policy.
99#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
100pub enum NcMetadataMode {
101    /// Fail the open if NetCDF-4 metadata cannot be reconstructed exactly.
102    #[default]
103    Strict,
104    /// Allow heuristic reconstruction for malformed or partially-supported files.
105    Lossy,
106}
107
108/// An opened NetCDF file.
109pub struct NcFile {
110    format: NcFormat,
111    inner: NcFileInner,
112}
113
114enum NcFileInner {
115    Classic(classic::ClassicFile),
116    #[cfg(feature = "netcdf4")]
117    Nc4(Box<nc4::Nc4File>),
118}
119
120/// HDF5 magic bytes: `\x89HDF\r\n\x1a\n`
121const HDF5_MAGIC: [u8; 8] = [0x89, b'H', b'D', b'F', 0x0D, 0x0A, 0x1A, 0x0A];
122
123/// Detect the NetCDF format from the first bytes of a file.
124fn detect_format(data: &[u8]) -> Result<NcFormat> {
125    if data.len() < 4 {
126        return Err(Error::InvalidMagic);
127    }
128
129    // Check for CDF magic: "CDF" followed by version byte.
130    if data[0] == b'C' && data[1] == b'D' && data[2] == b'F' {
131        return match data[3] {
132            1 => Ok(NcFormat::Classic),
133            2 => Ok(NcFormat::Offset64),
134            5 => Ok(NcFormat::Cdf5),
135            v => Err(Error::UnsupportedVersion(v)),
136        };
137    }
138
139    // Check for HDF5 magic (8 bytes).
140    if data.len() >= 8 && data[..8] == HDF5_MAGIC {
141        return Ok(NcFormat::Nc4);
142    }
143
144    Err(Error::InvalidMagic)
145}
146
147fn read_magic_prefix(reader: &mut impl Read) -> std::io::Result<([u8; 8], usize)> {
148    let mut magic = [0u8; 8];
149    let mut read_len = 0;
150    while read_len < magic.len() {
151        let n = reader.read(&mut magic[read_len..])?;
152        if n == 0 {
153            break;
154        }
155        read_len += n;
156    }
157    Ok((magic, read_len))
158}
159
160#[cfg(feature = "cf")]
161fn parent_group_path(path: &str) -> &str {
162    let trimmed = path.trim_matches('/');
163    trimmed
164        .rsplit_once('/')
165        .map(|(group_path, _)| group_path)
166        .unwrap_or("")
167}
168
169impl NcFile {
170    /// Open a NetCDF file from a path.
171    ///
172    /// The format is auto-detected from the file's magic bytes.
173    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
174        Self::open_with_options(path, NcOpenOptions::default())
175    }
176
177    /// Open a NetCDF file from in-memory bytes.
178    ///
179    /// The format is auto-detected from the magic bytes.
180    pub fn from_bytes(data: &[u8]) -> Result<Self> {
181        Self::from_bytes_with_options(data, NcOpenOptions::default())
182    }
183
184    /// Open a NetCDF file from a custom random-access storage backend.
185    ///
186    /// Both NetCDF-4 and classic CDF-1/2/5 files stay range-backed.
187    #[cfg(feature = "netcdf4")]
188    pub fn from_storage(storage: DynStorage) -> Result<Self> {
189        Self::from_storage_with_options(storage, NcOpenOptions::default())
190    }
191
192    /// Open a NetCDF file from a custom random-access storage backend with custom options.
193    #[cfg(feature = "netcdf4")]
194    pub fn from_storage_with_options(storage: DynStorage, options: NcOpenOptions) -> Result<Self> {
195        let magic_len = storage.len().min(HDF5_MAGIC.len() as u64) as usize;
196        let magic = storage.read_range(0, magic_len)?;
197        let format = detect_format(magic.as_ref())?;
198
199        match format {
200            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
201                let classic = classic::ClassicFile::from_storage(storage, format)?;
202                Ok(NcFile {
203                    format,
204                    inner: NcFileInner::Classic(classic),
205                })
206            }
207            NcFormat::Nc4 | NcFormat::Nc4Classic => {
208                let nc4 = nc4::Nc4File::from_storage_with_options(storage, options)?;
209                let actual_format = if nc4.is_classic_model() {
210                    NcFormat::Nc4Classic
211                } else {
212                    NcFormat::Nc4
213                };
214                Ok(NcFile {
215                    format: actual_format,
216                    inner: NcFileInner::Nc4(Box::new(nc4)),
217                })
218            }
219        }
220    }
221
222    /// Open a NetCDF file from in-memory bytes with custom options.
223    ///
224    /// NC4 options are applied when the payload is HDF5-backed.
225    pub fn from_bytes_with_options(data: &[u8], options: NcOpenOptions) -> Result<Self> {
226        let format = detect_format(data)?;
227
228        match format {
229            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
230                let classic = classic::ClassicFile::from_bytes(data, format)?;
231                Ok(NcFile {
232                    format,
233                    inner: NcFileInner::Classic(classic),
234                })
235            }
236            NcFormat::Nc4 | NcFormat::Nc4Classic => {
237                #[cfg(feature = "netcdf4")]
238                {
239                    let nc4 = nc4::Nc4File::from_bytes_with_options(data, options)?;
240                    let actual_format = if nc4.is_classic_model() {
241                        NcFormat::Nc4Classic
242                    } else {
243                        NcFormat::Nc4
244                    };
245                    Ok(NcFile {
246                        format: actual_format,
247                        inner: NcFileInner::Nc4(Box::new(nc4)),
248                    })
249                }
250                #[cfg(not(feature = "netcdf4"))]
251                {
252                    let _ = options;
253                    Err(Error::Nc4NotEnabled)
254                }
255            }
256        }
257    }
258
259    /// The detected file format.
260    pub fn format(&self) -> NcFormat {
261        self.format
262    }
263
264    /// The root group of the file.
265    ///
266    /// Classic files have a single implicit root group containing all
267    /// dimensions, variables, and global attributes. NetCDF-4 files
268    /// can have nested sub-groups.
269    pub fn root_group(&self) -> Result<&NcGroup> {
270        match &self.inner {
271            NcFileInner::Classic(c) => Ok(c.root_group()),
272            #[cfg(feature = "netcdf4")]
273            NcFileInner::Nc4(n) => n.root_group(),
274        }
275    }
276
277    /// Convenience: dimensions in the root group.
278    pub fn dimensions(&self) -> Result<&[NcDimension]> {
279        match &self.inner {
280            NcFileInner::Classic(c) => Ok(&c.root_group().dimensions),
281            #[cfg(feature = "netcdf4")]
282            NcFileInner::Nc4(n) => n.dimensions(),
283        }
284    }
285
286    /// Convenience: variables in the root group.
287    pub fn variables(&self) -> Result<&[NcVariable]> {
288        match &self.inner {
289            NcFileInner::Classic(c) => Ok(&c.root_group().variables),
290            #[cfg(feature = "netcdf4")]
291            NcFileInner::Nc4(n) => n.variables(),
292        }
293    }
294
295    /// Convenience: global attributes (attributes of the root group).
296    pub fn global_attributes(&self) -> Result<&[NcAttribute]> {
297        match &self.inner {
298            NcFileInner::Classic(c) => Ok(&c.root_group().attributes),
299            #[cfg(feature = "netcdf4")]
300            NcFileInner::Nc4(n) => n.global_attributes(),
301        }
302    }
303
304    /// Find a group by path relative to the root group.
305    pub fn group(&self, path: &str) -> Result<&NcGroup> {
306        match &self.inner {
307            NcFileInner::Classic(c) => c
308                .root_group()
309                .group(path)
310                .ok_or_else(|| Error::GroupNotFound(path.to_string())),
311            #[cfg(feature = "netcdf4")]
312            NcFileInner::Nc4(n) => n.group(path),
313        }
314    }
315
316    /// Find a variable by name or path relative to the root group.
317    pub fn variable(&self, name: &str) -> Result<&NcVariable> {
318        match &self.inner {
319            NcFileInner::Classic(c) => c
320                .root_group()
321                .variable(name)
322                .ok_or_else(|| Error::VariableNotFound(name.to_string())),
323            #[cfg(feature = "netcdf4")]
324            NcFileInner::Nc4(n) => n.variable(name),
325        }
326    }
327
328    /// Find a dimension by name or path relative to the root group.
329    pub fn dimension(&self, name: &str) -> Result<&NcDimension> {
330        match &self.inner {
331            NcFileInner::Classic(c) => c
332                .root_group()
333                .dimension(name)
334                .ok_or_else(|| Error::DimensionNotFound(name.to_string())),
335            #[cfg(feature = "netcdf4")]
336            NcFileInner::Nc4(n) => n.dimension(name),
337        }
338    }
339
340    /// Find the coordinate variable for a dimension name or path.
341    pub fn coordinate_variable(&self, name: &str) -> Result<&NcVariable> {
342        self.root_group()?
343            .coordinate_variable(name)
344            .ok_or_else(|| Error::VariableNotFound(format!("coordinate variable for {name}")))
345    }
346
347    /// Discover CF axes from coordinate variables in a group.
348    #[cfg(feature = "cf")]
349    pub fn cf_coordinate_axes(&self, group_path: &str) -> Result<Vec<cf::CfCoordinateAxis<'_>>> {
350        let group = self.group(group_path)?;
351        Ok(cf::discover_coordinate_axes(group))
352    }
353
354    /// Discover CF axes used by a variable from its coordinate variables.
355    #[cfg(feature = "cf")]
356    pub fn cf_variable_axes(&self, name: &str) -> Result<Vec<cf::CfCoordinateAxis<'_>>> {
357        let variable = self.variable(name)?;
358        let group = self.group(parent_group_path(name))?;
359        Ok(cf::discover_variable_axes(variable, group))
360    }
361
362    /// Discover CF time coordinate variables in a group.
363    #[cfg(feature = "cf")]
364    pub fn cf_time_coordinates(&self, group_path: &str) -> Result<Vec<cf::CfTimeCoordinate<'_>>> {
365        let group = self.group(group_path)?;
366        cf::discover_time_coordinates(group)
367    }
368
369    /// Discover the CF time coordinate used by a variable, if one exists.
370    #[cfg(feature = "cf")]
371    pub fn cf_variable_time_coordinate(
372        &self,
373        name: &str,
374    ) -> Result<Option<cf::CfTimeCoordinate<'_>>> {
375        let variable = self.variable(name)?;
376        let group = self.group(parent_group_path(name))?;
377        cf::discover_variable_time_coordinate(variable, group)
378    }
379
380    /// Find a group attribute by name or path relative to the root group.
381    pub fn global_attribute(&self, name: &str) -> Result<&NcAttribute> {
382        match &self.inner {
383            NcFileInner::Classic(c) => c
384                .root_group()
385                .attribute(name)
386                .ok_or_else(|| Error::AttributeNotFound(name.to_string())),
387            #[cfg(feature = "netcdf4")]
388            NcFileInner::Nc4(n) => n.global_attribute(name),
389        }
390    }
391
392    /// Read a variable's data as a typed array.
393    ///
394    /// Works for both classic (CDF-1/2/5) and NetCDF-4 files. NetCDF-4 nested
395    /// variables can be addressed with paths like `group/subgroup/var`. The type
396    /// parameter `T` must implement `NcReadable`, which is satisfied by:
397    /// `i8, u8, i16, u16, i32, u32, i64, u64, f32, f64`.
398    pub fn read_variable<T: NcReadable>(&self, name: &str) -> Result<ArrayD<T>> {
399        match &self.inner {
400            NcFileInner::Classic(c) => c.read_variable::<T>(name),
401            #[cfg(feature = "netcdf4")]
402            NcFileInner::Nc4(n) => Ok(n.read_variable::<T>(name)?),
403        }
404    }
405
406    /// Read a variable into a caller-provided typed buffer.
407    pub fn read_variable_into<T: NcReadable>(&self, name: &str, dst: &mut [T]) -> Result<()> {
408        match &self.inner {
409            NcFileInner::Classic(c) => c.read_variable_into::<T>(name, dst),
410            #[cfg(feature = "netcdf4")]
411            NcFileInner::Nc4(n) => Ok(n.read_variable_into::<T>(name, dst)?),
412        }
413    }
414
415    /// Read a NetCDF-4 variable as logical raw bytes in HDF5 datatype byte order.
416    #[cfg(feature = "netcdf4")]
417    pub fn read_variable_raw_bytes(&self, name: &str) -> Result<Vec<u8>> {
418        match &self.inner {
419            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
420                expected: "NetCDF-4 variable".to_string(),
421                actual: "classic NetCDF variable".to_string(),
422            }),
423            NcFileInner::Nc4(n) => Ok(n.read_variable_raw_bytes(name)?),
424        }
425    }
426
427    /// Read a NetCDF-4 variable as logical raw bytes into a caller-provided buffer.
428    #[cfg(feature = "netcdf4")]
429    pub fn read_variable_raw_bytes_into(&self, name: &str, dst: &mut [u8]) -> Result<()> {
430        match &self.inner {
431            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
432                expected: "NetCDF-4 variable".to_string(),
433                actual: "classic NetCDF variable".to_string(),
434            }),
435            NcFileInner::Nc4(n) => Ok(n.read_variable_raw_bytes_into(name, dst)?),
436        }
437    }
438
439    /// Read a NetCDF-4 variable as logical raw bytes with numeric fields in native endian.
440    #[cfg(feature = "netcdf4")]
441    pub fn read_variable_native_bytes(&self, name: &str) -> Result<Vec<u8>> {
442        match &self.inner {
443            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
444                expected: "NetCDF-4 variable".to_string(),
445                actual: "classic NetCDF variable".to_string(),
446            }),
447            NcFileInner::Nc4(n) => Ok(n.read_variable_native_bytes(name)?),
448        }
449    }
450
451    /// Read native-endian logical raw bytes for a NetCDF-4 variable into a buffer.
452    #[cfg(feature = "netcdf4")]
453    pub fn read_variable_native_bytes_into(&self, name: &str, dst: &mut [u8]) -> Result<()> {
454        match &self.inner {
455            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
456                expected: "NetCDF-4 variable".to_string(),
457                actual: "classic NetCDF variable".to_string(),
458            }),
459            NcFileInner::Nc4(n) => Ok(n.read_variable_native_bytes_into(name, dst)?),
460        }
461    }
462
463    /// Iterate decoded HDF5 chunks for a NetCDF-4 variable.
464    #[cfg(feature = "netcdf4")]
465    pub fn iter_variable_chunks(&self, name: &str) -> Result<DatasetChunkIterator> {
466        match &self.inner {
467            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
468                expected: "NetCDF-4 chunked variable".to_string(),
469                actual: "classic NetCDF variable".to_string(),
470            }),
471            NcFileInner::Nc4(n) => Ok(n.iter_variable_chunks(name)?),
472        }
473    }
474
475    /// Return chunk-cache statistics for NetCDF-4 files.
476    #[cfg(feature = "netcdf4")]
477    pub fn chunk_cache_stats(&self) -> Option<ChunkCacheStats> {
478        match &self.inner {
479            NcFileInner::Classic(_) => None,
480            NcFileInner::Nc4(n) => Some(n.chunk_cache_stats()),
481        }
482    }
483
484    /// Read a variable using internal chunk-level parallelism when available.
485    ///
486    /// Classic formats fall back to `read_variable`.
487    #[cfg(feature = "rayon")]
488    pub fn read_variable_parallel<T: NcReadable>(&self, name: &str) -> Result<ArrayD<T>> {
489        match &self.inner {
490            NcFileInner::Classic(c) => c.read_variable_parallel::<T>(name),
491            #[cfg(feature = "netcdf4")]
492            NcFileInner::Nc4(n) => Ok(n.read_variable_parallel::<T>(name)?),
493        }
494    }
495
496    /// Read a variable using the provided Rayon thread pool when available.
497    ///
498    /// Classic formats fall back to `read_variable`.
499    #[cfg(feature = "rayon")]
500    pub fn read_variable_in_pool<T: NcReadable>(
501        &self,
502        name: &str,
503        pool: &ThreadPool,
504    ) -> Result<ArrayD<T>> {
505        match &self.inner {
506            NcFileInner::Classic(c) => pool.install(|| c.read_variable_parallel::<T>(name)),
507            #[cfg(feature = "netcdf4")]
508            NcFileInner::Nc4(n) => Ok(n.read_variable_in_pool::<T>(name, pool)?),
509        }
510    }
511
512    /// Access the underlying classic file (for reading data).
513    ///
514    /// Returns `None` if this is a NetCDF-4 file.
515    pub fn as_classic(&self) -> Option<&classic::ClassicFile> {
516        match &self.inner {
517            NcFileInner::Classic(c) => Some(c),
518            #[cfg(feature = "netcdf4")]
519            NcFileInner::Nc4(_) => None,
520        }
521    }
522
523    /// Read a variable with automatic type promotion to f64.
524    ///
525    /// Reads in the native storage type (i8, i16, i32, f32, f64, u8, etc.)
526    /// and promotes all values to f64. This avoids the `TypeMismatch` error
527    /// that `read_variable::<f64>` produces for non-f64 variables.
528    pub fn read_variable_as_f64(&self, name: &str) -> Result<ArrayD<f64>> {
529        match &self.inner {
530            NcFileInner::Classic(c) => c.read_variable_as_f64(name),
531            #[cfg(feature = "netcdf4")]
532            NcFileInner::Nc4(n) => n.read_variable_as_f64(name),
533        }
534    }
535
536    /// Read a string variable as a single string.
537    ///
538    /// Use [`NcFile::read_variable_as_strings`] when the variable contains
539    /// multiple string elements.
540    pub fn read_variable_as_string(&self, name: &str) -> Result<String> {
541        match &self.inner {
542            NcFileInner::Classic(c) => c.read_variable_as_string(name),
543            #[cfg(feature = "netcdf4")]
544            NcFileInner::Nc4(n) => n.read_variable_as_string(name),
545        }
546    }
547
548    /// Read a string or char variable as a flat vector of strings.
549    ///
550    /// Classic char arrays interpret the last dimension as the string length
551    /// and flatten the leading dimensions.
552    pub fn read_variable_as_strings(&self, name: &str) -> Result<Vec<String>> {
553        match &self.inner {
554            NcFileInner::Classic(c) => c.read_variable_as_strings(name),
555            #[cfg(feature = "netcdf4")]
556            NcFileInner::Nc4(n) => n.read_variable_as_strings(name),
557        }
558    }
559
560    /// Read a NetCDF-4 user-defined variable into dynamic values.
561    ///
562    /// This supports enum, opaque, compound, fixed-size array, and non-string
563    /// vlen datatypes. Primitive values nested inside those types are decoded
564    /// according to the HDF5 datatype byte order.
565    #[cfg(feature = "netcdf4")]
566    pub fn read_variable_user_defined(&self, name: &str) -> Result<ArrayD<NcValue>> {
567        match &self.inner {
568            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
569                expected: "NetCDF-4 user-defined variable".to_string(),
570                actual: "classic NetCDF variable".to_string(),
571            }),
572            NcFileInner::Nc4(n) => n.read_variable_user_defined(name),
573        }
574    }
575
576    /// Read a NetCDF-4 user-defined variable through a caller-provided decoder.
577    ///
578    /// The decoder receives one [`NcValueView`] per logical variable element,
579    /// allowing direct construction of application structs without allocating
580    /// an intermediate [`NcValue`] tree.
581    #[cfg(feature = "netcdf4")]
582    pub fn read_variable_user_defined_with<T, F>(&self, name: &str, decoder: F) -> Result<ArrayD<T>>
583    where
584        F: FnMut(NcValueView<'_>) -> Result<T>,
585    {
586        match &self.inner {
587            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
588                expected: "NetCDF-4 user-defined variable".to_string(),
589                actual: "classic NetCDF variable".to_string(),
590            }),
591            NcFileInner::Nc4(n) => n.read_variable_user_defined_with(name, decoder),
592        }
593    }
594
595    /// Read a variable and apply `scale_factor`/`add_offset` unpacking.
596    ///
597    /// Returns `actual = stored * scale_factor + add_offset`.
598    /// If neither attribute is present, returns the raw data as f64.
599    /// Uses type-promoting read so it works with any numeric storage type.
600    pub fn read_variable_unpacked(&self, name: &str) -> Result<ArrayD<f64>> {
601        let var = self.variable(name)?;
602        let params = unpack::UnpackParams::from_variable(var);
603        let mut data = self.read_variable_as_f64(name)?;
604        if let Some(p) = params {
605            p.apply(&mut data);
606        }
607        Ok(data)
608    }
609
610    /// Read a variable, replace `_FillValue`/`missing_value` with NaN,
611    /// and mask values outside `valid_min`/`valid_max`/`valid_range`.
612    /// Uses type-promoting read so it works with any numeric storage type.
613    pub fn read_variable_masked(&self, name: &str) -> Result<ArrayD<f64>> {
614        let var = self.variable(name)?;
615        let params = masked::MaskParams::from_variable(var);
616        let mut data = self.read_variable_as_f64(name)?;
617        if let Some(p) = params {
618            p.apply(&mut data);
619        }
620        Ok(data)
621    }
622
623    /// Read a variable with both masking and unpacking (CF spec order).
624    ///
625    /// Order: read → mask fill/missing → unpack (scale+offset).
626    /// Uses type-promoting read so it works with any numeric storage type.
627    pub fn read_variable_unpacked_masked(&self, name: &str) -> Result<ArrayD<f64>> {
628        let var = self.variable(name)?;
629        let mask_params = masked::MaskParams::from_variable(var);
630        let unpack_params = unpack::UnpackParams::from_variable(var);
631        let mut data = self.read_variable_as_f64(name)?;
632        if let Some(p) = mask_params {
633            p.apply(&mut data);
634        }
635        if let Some(p) = unpack_params {
636            p.apply(&mut data);
637        }
638        Ok(data)
639    }
640
641    // ----- Slice API -----
642
643    /// Read a slice (hyperslab) of a variable as a typed array.
644    pub fn read_variable_slice<T: NcReadable>(
645        &self,
646        name: &str,
647        selection: &NcSliceInfo,
648    ) -> Result<ArrayD<T>> {
649        match &self.inner {
650            NcFileInner::Classic(c) => c.read_variable_slice::<T>(name, selection),
651            #[cfg(feature = "netcdf4")]
652            NcFileInner::Nc4(n) => Ok(n.read_variable_slice::<T>(name, selection)?),
653        }
654    }
655
656    /// Read a slice (hyperslab) using chunk-level parallelism when available.
657    ///
658    /// For NetCDF-4 chunked datasets, overlapping chunks are decompressed in
659    /// parallel via Rayon. Classic formats fall back to `read_variable_slice`.
660    #[cfg(feature = "rayon")]
661    pub fn read_variable_slice_parallel<T: NcReadable>(
662        &self,
663        name: &str,
664        selection: &NcSliceInfo,
665    ) -> Result<ArrayD<T>> {
666        match &self.inner {
667            NcFileInner::Classic(c) => c.read_variable_slice_parallel::<T>(name, selection),
668            #[cfg(feature = "netcdf4")]
669            NcFileInner::Nc4(n) => Ok(n.read_variable_slice_parallel::<T>(name, selection)?),
670        }
671    }
672
673    /// Read a slice of a variable with automatic type promotion to f64.
674    pub fn read_variable_slice_as_f64(
675        &self,
676        name: &str,
677        selection: &NcSliceInfo,
678    ) -> Result<ArrayD<f64>> {
679        match &self.inner {
680            NcFileInner::Classic(c) => c.read_variable_slice_as_f64(name, selection),
681            #[cfg(feature = "netcdf4")]
682            NcFileInner::Nc4(n) => n.read_variable_slice_as_f64(name, selection),
683        }
684    }
685
686    /// Read a slice with `scale_factor`/`add_offset` unpacking.
687    pub fn read_variable_slice_unpacked(
688        &self,
689        name: &str,
690        selection: &NcSliceInfo,
691    ) -> Result<ArrayD<f64>> {
692        let var = self.variable(name)?;
693        let params = unpack::UnpackParams::from_variable(var);
694        let mut data = self.read_variable_slice_as_f64(name, selection)?;
695        if let Some(p) = params {
696            p.apply(&mut data);
697        }
698        Ok(data)
699    }
700
701    /// Read a slice with fill/missing value masking.
702    pub fn read_variable_slice_masked(
703        &self,
704        name: &str,
705        selection: &NcSliceInfo,
706    ) -> Result<ArrayD<f64>> {
707        let var = self.variable(name)?;
708        let params = masked::MaskParams::from_variable(var);
709        let mut data = self.read_variable_slice_as_f64(name, selection)?;
710        if let Some(p) = params {
711            p.apply(&mut data);
712        }
713        Ok(data)
714    }
715
716    /// Read a slice with both masking and unpacking (CF spec order).
717    pub fn read_variable_slice_unpacked_masked(
718        &self,
719        name: &str,
720        selection: &NcSliceInfo,
721    ) -> Result<ArrayD<f64>> {
722        let var = self.variable(name)?;
723        let mask_params = masked::MaskParams::from_variable(var);
724        let unpack_params = unpack::UnpackParams::from_variable(var);
725        let mut data = self.read_variable_slice_as_f64(name, selection)?;
726        if let Some(p) = mask_params {
727            p.apply(&mut data);
728        }
729        if let Some(p) = unpack_params {
730            p.apply(&mut data);
731        }
732        Ok(data)
733    }
734
735    // ----- Lazy Slice Iterator -----
736
737    /// Create an iterator that yields one slice per index along a given dimension.
738    ///
739    /// Each call to `next()` reads one slice using the slice API. This is
740    /// useful for iterating time steps, levels, etc. without loading the
741    /// entire dataset into memory.
742    pub fn iter_slices<T: NcReadable>(
743        &self,
744        name: &str,
745        dim: usize,
746    ) -> Result<NcSliceIterator<'_, T>> {
747        let var = self.variable(name)?;
748        let ndim = var.ndim();
749        if dim >= ndim {
750            return Err(Error::InvalidData(format!(
751                "dimension index {} out of range for {}-dimensional variable '{}'",
752                dim, ndim, name
753            )));
754        }
755        let dim_size = var.dimensions[dim].size;
756        Ok(NcSliceIterator {
757            file: self,
758            name: name.to_string(),
759            dim,
760            dim_size,
761            current: 0,
762            ndim,
763            _marker: std::marker::PhantomData,
764        })
765    }
766}
767
768/// Configuration options for opening a NetCDF file.
769pub struct NcOpenOptions {
770    /// Maximum bytes for the chunk cache (NC4 only). Default: 64 MiB.
771    pub chunk_cache_bytes: usize,
772    /// Maximum number of chunk cache slots (NC4 only). Default: 521.
773    pub chunk_cache_slots: usize,
774    /// NetCDF-4 metadata reconstruction policy. Default: strict.
775    pub metadata_mode: NcMetadataMode,
776    /// Custom filter registry (NC4 only).
777    #[cfg(feature = "netcdf4")]
778    pub filter_registry: Option<hdf5_reader::FilterRegistry>,
779    /// Resolver for HDF5 external raw data files (NC4 only). If `None`,
780    /// external raw data files are not resolved.
781    #[cfg(feature = "netcdf4")]
782    pub external_file_resolver: Option<Arc<dyn hdf5_reader::ExternalFileResolver>>,
783    /// Resolver for HDF5 external links (NC4 only).
784    #[cfg(feature = "netcdf4")]
785    pub external_link_resolver: Option<Arc<dyn hdf5_reader::ExternalLinkResolver>>,
786}
787
788impl Default for NcOpenOptions {
789    fn default() -> Self {
790        NcOpenOptions {
791            chunk_cache_bytes: 64 * 1024 * 1024,
792            chunk_cache_slots: 521,
793            metadata_mode: NcMetadataMode::Strict,
794            #[cfg(feature = "netcdf4")]
795            filter_registry: None,
796            #[cfg(feature = "netcdf4")]
797            external_file_resolver: None,
798            #[cfg(feature = "netcdf4")]
799            external_link_resolver: None,
800        }
801    }
802}
803
804impl NcFile {
805    /// Open a NetCDF file with custom options.
806    pub fn open_with_options(path: impl AsRef<Path>, options: NcOpenOptions) -> Result<Self> {
807        let path = path.as_ref();
808        let mut file = File::open(path)?;
809        let (magic, n) = read_magic_prefix(&mut file)?;
810        let format = detect_format(&magic[..n])?;
811
812        match format {
813            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
814                let file = File::open(path)?;
815                // SAFETY: read-only mapping; caller must not modify the file concurrently.
816                let mmap = unsafe { Mmap::map(&file)? };
817                let classic = classic::ClassicFile::from_mmap(mmap, format)?;
818                Ok(NcFile {
819                    format,
820                    inner: NcFileInner::Classic(classic),
821                })
822            }
823            NcFormat::Nc4 | NcFormat::Nc4Classic => {
824                #[cfg(feature = "netcdf4")]
825                {
826                    let metadata_mode = options.metadata_mode;
827                    let hdf5 = hdf5_reader::Hdf5File::open_with_options(
828                        path,
829                        hdf5_reader::OpenOptions {
830                            chunk_cache_bytes: options.chunk_cache_bytes,
831                            chunk_cache_slots: options.chunk_cache_slots,
832                            filter_registry: options.filter_registry,
833                            external_file_resolver: options.external_file_resolver,
834                            external_link_resolver: options.external_link_resolver,
835                        },
836                    )?;
837                    let nc4 = nc4::Nc4File::from_hdf5(hdf5, metadata_mode)?;
838                    let actual_format = if nc4.is_classic_model() {
839                        NcFormat::Nc4Classic
840                    } else {
841                        NcFormat::Nc4
842                    };
843                    Ok(NcFile {
844                        format: actual_format,
845                        inner: NcFileInner::Nc4(Box::new(nc4)),
846                    })
847                }
848                #[cfg(not(feature = "netcdf4"))]
849                {
850                    let _ = options;
851                    Err(Error::Nc4NotEnabled)
852                }
853            }
854        }
855    }
856}
857
858/// Lazy iterator over slices of a variable along a given dimension.
859pub struct NcSliceIterator<'f, T: NcReadable> {
860    file: &'f NcFile,
861    name: String,
862    dim: usize,
863    dim_size: u64,
864    current: u64,
865    ndim: usize,
866    _marker: std::marker::PhantomData<T>,
867}
868
869impl<'f, T: NcReadable> Iterator for NcSliceIterator<'f, T> {
870    type Item = Result<ArrayD<T>>;
871
872    fn next(&mut self) -> Option<Self::Item> {
873        if self.current >= self.dim_size {
874            return None;
875        }
876        let mut selections = Vec::with_capacity(self.ndim);
877        for d in 0..self.ndim {
878            if d == self.dim {
879                selections.push(NcSliceInfoElem::Index(self.current));
880            } else {
881                selections.push(NcSliceInfoElem::Slice {
882                    start: 0,
883                    end: u64::MAX,
884                    step: 1,
885                });
886            }
887        }
888        let selection = NcSliceInfo { selections };
889        self.current += 1;
890        Some(self.file.read_variable_slice::<T>(&self.name, &selection))
891    }
892
893    fn size_hint(&self) -> (usize, Option<usize>) {
894        let remaining_u64 = self.dim_size.saturating_sub(self.current);
895        let remaining = remaining_u64.min(usize::MAX as u64) as usize;
896        (remaining, Some(remaining))
897    }
898}
899
900#[cfg(test)]
901mod tests {
902    use super::*;
903    #[cfg(feature = "netcdf4")]
904    use std::sync::Arc;
905    #[cfg(feature = "netcdf4")]
906    use std::sync::Mutex;
907
908    #[test]
909    fn detect_cdf1() {
910        let data = b"CDF\x01rest_of_file";
911        assert_eq!(detect_format(data).unwrap(), NcFormat::Classic);
912    }
913
914    #[test]
915    fn detect_cdf2() {
916        let data = b"CDF\x02rest_of_file";
917        assert_eq!(detect_format(data).unwrap(), NcFormat::Offset64);
918    }
919
920    #[test]
921    fn detect_cdf5() {
922        let data = b"CDF\x05rest_of_file";
923        assert_eq!(detect_format(data).unwrap(), NcFormat::Cdf5);
924    }
925
926    #[test]
927    fn detect_hdf5() {
928        let mut data = vec![0x89, b'H', b'D', b'F', 0x0D, 0x0A, 0x1A, 0x0A];
929        data.extend_from_slice(b"rest_of_file");
930        assert_eq!(detect_format(&data).unwrap(), NcFormat::Nc4);
931    }
932
933    #[test]
934    fn detect_invalid_magic() {
935        let data = b"XXXX";
936        assert!(matches!(
937            detect_format(data).unwrap_err(),
938            Error::InvalidMagic
939        ));
940    }
941
942    #[test]
943    fn detect_unsupported_version() {
944        let data = b"CDF\x03";
945        assert!(matches!(
946            detect_format(data).unwrap_err(),
947            Error::UnsupportedVersion(3)
948        ));
949    }
950
951    #[test]
952    fn detect_too_short() {
953        let data = b"CD";
954        assert!(matches!(
955            detect_format(data).unwrap_err(),
956            Error::InvalidMagic
957        ));
958    }
959
960    #[test]
961    fn from_bytes_minimal_cdf1() {
962        // Minimal valid CDF-1 file: magic + numrecs + absent dim/att/var lists.
963        let mut data = Vec::new();
964        data.extend_from_slice(b"CDF\x01");
965        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
966                                                     // dim_list: ABSENT
967        data.extend_from_slice(&0u32.to_be_bytes()); // tag = 0
968        data.extend_from_slice(&0u32.to_be_bytes()); // count = 0
969                                                     // att_list: ABSENT
970        data.extend_from_slice(&0u32.to_be_bytes());
971        data.extend_from_slice(&0u32.to_be_bytes());
972        // var_list: ABSENT
973        data.extend_from_slice(&0u32.to_be_bytes());
974        data.extend_from_slice(&0u32.to_be_bytes());
975
976        let file = NcFile::from_bytes(&data).unwrap();
977        assert_eq!(file.format(), NcFormat::Classic);
978        assert!(file.dimensions().unwrap().is_empty());
979        assert!(file.variables().unwrap().is_empty());
980        assert!(file.global_attributes().unwrap().is_empty());
981    }
982
983    #[cfg(feature = "netcdf4")]
984    struct CountingStorage {
985        data: Arc<[u8]>,
986        reads: Mutex<Vec<(u64, usize)>>,
987    }
988
989    #[cfg(feature = "netcdf4")]
990    impl CountingStorage {
991        fn new(data: Vec<u8>) -> Self {
992            Self {
993                data: Arc::<[u8]>::from(data),
994                reads: Mutex::new(Vec::new()),
995            }
996        }
997
998        fn reads(&self) -> Vec<(u64, usize)> {
999            self.reads.lock().unwrap().clone()
1000        }
1001    }
1002
1003    #[cfg(feature = "netcdf4")]
1004    impl hdf5_reader::Storage for CountingStorage {
1005        fn len(&self) -> u64 {
1006            self.data.len() as u64
1007        }
1008
1009        fn read_range(
1010            &self,
1011            offset: u64,
1012            len: usize,
1013        ) -> hdf5_reader::error::Result<hdf5_reader::StorageBuffer> {
1014            self.reads.lock().unwrap().push((offset, len));
1015            let start = usize::try_from(offset)
1016                .map_err(|_| hdf5_reader::error::Error::OffsetOutOfBounds(offset))?;
1017            let end = start
1018                .checked_add(len)
1019                .ok_or(hdf5_reader::error::Error::OffsetOutOfBounds(offset))?;
1020            if end > self.data.len() {
1021                return Err(hdf5_reader::error::Error::UnexpectedEof {
1022                    offset,
1023                    needed: len as u64,
1024                    available: self.len().saturating_sub(offset),
1025                });
1026            }
1027            Ok(hdf5_reader::StorageBuffer::from_vec(
1028                self.data[start..end].to_vec(),
1029            ))
1030        }
1031    }
1032
1033    #[cfg(feature = "netcdf4")]
1034    fn classic_large_offset_fixture() -> Vec<u8> {
1035        fn write_name(buf: &mut Vec<u8>, name: &str) {
1036            buf.extend_from_slice(&(name.len() as u32).to_be_bytes());
1037            buf.extend_from_slice(name.as_bytes());
1038            while buf.len() % 4 != 0 {
1039                buf.push(0);
1040            }
1041        }
1042
1043        const DATA_OFFSET: usize = 70 * 1024;
1044        let mut buf = Vec::new();
1045        buf.extend_from_slice(b"CDF\x01");
1046        buf.extend_from_slice(&0u32.to_be_bytes());
1047        buf.extend_from_slice(&0x0000_000Au32.to_be_bytes());
1048        buf.extend_from_slice(&1u32.to_be_bytes());
1049        write_name(&mut buf, "x");
1050        buf.extend_from_slice(&4u32.to_be_bytes());
1051        buf.extend_from_slice(&0u32.to_be_bytes());
1052        buf.extend_from_slice(&0u32.to_be_bytes());
1053        buf.extend_from_slice(&0x0000_000Bu32.to_be_bytes());
1054        buf.extend_from_slice(&1u32.to_be_bytes());
1055        write_name(&mut buf, "data");
1056        buf.extend_from_slice(&1u32.to_be_bytes());
1057        buf.extend_from_slice(&0u32.to_be_bytes());
1058        buf.extend_from_slice(&0u32.to_be_bytes());
1059        buf.extend_from_slice(&0u32.to_be_bytes());
1060        buf.extend_from_slice(&4u32.to_be_bytes());
1061        buf.extend_from_slice(&16u32.to_be_bytes());
1062        buf.extend_from_slice(&(DATA_OFFSET as u32).to_be_bytes());
1063
1064        buf.resize(DATA_OFFSET, 0);
1065        for value in [10i32, 20, 30, 40] {
1066            buf.extend_from_slice(&value.to_be_bytes());
1067        }
1068        buf
1069    }
1070
1071    fn write_cdf1_name(buf: &mut Vec<u8>, name: &str) {
1072        buf.extend_from_slice(&(name.len() as u32).to_be_bytes());
1073        buf.extend_from_slice(name.as_bytes());
1074        while buf.len() % 4 != 0 {
1075            buf.push(0);
1076        }
1077    }
1078
1079    fn write_cdf5_count(buf: &mut Vec<u8>, value: u64) {
1080        buf.extend_from_slice(&value.to_be_bytes());
1081    }
1082
1083    fn write_cdf5_name(buf: &mut Vec<u8>, name: &str) {
1084        write_cdf5_count(buf, name.len() as u64);
1085        buf.extend_from_slice(name.as_bytes());
1086        while buf.len() % 4 != 0 {
1087            buf.push(0);
1088        }
1089    }
1090
1091    fn cdf5_huge_dimension_fixture() -> Vec<u8> {
1092        let mut buf = Vec::new();
1093        buf.extend_from_slice(b"CDF\x05");
1094        write_cdf5_count(&mut buf, 0);
1095
1096        buf.extend_from_slice(&0x0000_000Au32.to_be_bytes());
1097        write_cdf5_count(&mut buf, 1);
1098        write_cdf5_name(&mut buf, "n");
1099        write_cdf5_count(&mut buf, u64::MAX);
1100
1101        buf.extend_from_slice(&0u32.to_be_bytes());
1102        write_cdf5_count(&mut buf, 0);
1103
1104        buf.extend_from_slice(&0x0000_000Bu32.to_be_bytes());
1105        write_cdf5_count(&mut buf, 1);
1106        write_cdf5_name(&mut buf, "big");
1107        write_cdf5_count(&mut buf, 1);
1108        write_cdf5_count(&mut buf, 0);
1109        buf.extend_from_slice(&0u32.to_be_bytes());
1110        write_cdf5_count(&mut buf, 0);
1111        buf.extend_from_slice(&4u32.to_be_bytes());
1112        write_cdf5_count(&mut buf, 4);
1113        let offset_pos = buf.len();
1114        buf.extend_from_slice(&0u64.to_be_bytes());
1115
1116        let data_offset = buf.len() as u64;
1117        buf[offset_pos..offset_pos + 8].copy_from_slice(&data_offset.to_be_bytes());
1118        buf.extend_from_slice(&123i32.to_be_bytes());
1119        buf
1120    }
1121
1122    fn subfiling_marker_fixture() -> Vec<u8> {
1123        let mut buf = Vec::new();
1124        buf.extend_from_slice(b"CDF\x01");
1125        buf.extend_from_slice(&0u32.to_be_bytes());
1126        buf.extend_from_slice(&0u32.to_be_bytes());
1127        buf.extend_from_slice(&0u32.to_be_bytes());
1128
1129        buf.extend_from_slice(&0x0000_000Cu32.to_be_bytes());
1130        buf.extend_from_slice(&1u32.to_be_bytes());
1131        write_cdf1_name(&mut buf, "_PnetCDF_SubFiling_enabled");
1132        buf.extend_from_slice(&4u32.to_be_bytes());
1133        buf.extend_from_slice(&1u32.to_be_bytes());
1134        buf.extend_from_slice(&1i32.to_be_bytes());
1135
1136        buf.extend_from_slice(&0u32.to_be_bytes());
1137        buf.extend_from_slice(&0u32.to_be_bytes());
1138        buf
1139    }
1140
1141    fn streaming_cdf1_record_fixture(values: &[i32], trailing: &[u8]) -> Vec<u8> {
1142        let mut buf = Vec::new();
1143        buf.extend_from_slice(b"CDF\x01");
1144        buf.extend_from_slice(&u32::MAX.to_be_bytes());
1145
1146        buf.extend_from_slice(&0x0000_000Au32.to_be_bytes());
1147        buf.extend_from_slice(&1u32.to_be_bytes());
1148        write_cdf1_name(&mut buf, "time");
1149        buf.extend_from_slice(&0u32.to_be_bytes());
1150
1151        buf.extend_from_slice(&0u32.to_be_bytes());
1152        buf.extend_from_slice(&0u32.to_be_bytes());
1153
1154        buf.extend_from_slice(&0x0000_000Bu32.to_be_bytes());
1155        buf.extend_from_slice(&1u32.to_be_bytes());
1156        write_cdf1_name(&mut buf, "temp");
1157        buf.extend_from_slice(&1u32.to_be_bytes());
1158        buf.extend_from_slice(&0u32.to_be_bytes());
1159        buf.extend_from_slice(&0u32.to_be_bytes());
1160        buf.extend_from_slice(&0u32.to_be_bytes());
1161        buf.extend_from_slice(&4u32.to_be_bytes());
1162        buf.extend_from_slice(&4u32.to_be_bytes());
1163        let offset_pos = buf.len();
1164        buf.extend_from_slice(&0u32.to_be_bytes());
1165
1166        let data_offset = buf.len() as u32;
1167        buf[offset_pos..offset_pos + 4].copy_from_slice(&data_offset.to_be_bytes());
1168
1169        for value in values {
1170            buf.extend_from_slice(&value.to_be_bytes());
1171        }
1172        buf.extend_from_slice(trailing);
1173        buf
1174    }
1175
1176    fn streaming_cdf2_record_fixture(values: &[i32]) -> Vec<u8> {
1177        let mut buf = Vec::new();
1178        buf.extend_from_slice(b"CDF\x02");
1179        buf.extend_from_slice(&u32::MAX.to_be_bytes());
1180
1181        buf.extend_from_slice(&0x0000_000Au32.to_be_bytes());
1182        buf.extend_from_slice(&1u32.to_be_bytes());
1183        write_cdf1_name(&mut buf, "time");
1184        buf.extend_from_slice(&0u32.to_be_bytes());
1185
1186        buf.extend_from_slice(&0u32.to_be_bytes());
1187        buf.extend_from_slice(&0u32.to_be_bytes());
1188
1189        buf.extend_from_slice(&0x0000_000Bu32.to_be_bytes());
1190        buf.extend_from_slice(&1u32.to_be_bytes());
1191        write_cdf1_name(&mut buf, "temp");
1192        buf.extend_from_slice(&1u32.to_be_bytes());
1193        buf.extend_from_slice(&0u32.to_be_bytes());
1194        buf.extend_from_slice(&0u32.to_be_bytes());
1195        buf.extend_from_slice(&0u32.to_be_bytes());
1196        buf.extend_from_slice(&4u32.to_be_bytes());
1197        buf.extend_from_slice(&4u32.to_be_bytes());
1198        let offset_pos = buf.len();
1199        buf.extend_from_slice(&0u64.to_be_bytes());
1200
1201        let data_offset = buf.len() as u64;
1202        buf[offset_pos..offset_pos + 8].copy_from_slice(&data_offset.to_be_bytes());
1203
1204        for value in values {
1205            buf.extend_from_slice(&value.to_be_bytes());
1206        }
1207        buf
1208    }
1209
1210    fn streaming_cdf1_two_record_var_fixture(records: &[(i32, i16)]) -> Vec<u8> {
1211        let mut buf = Vec::new();
1212        buf.extend_from_slice(b"CDF\x01");
1213        buf.extend_from_slice(&u32::MAX.to_be_bytes());
1214
1215        buf.extend_from_slice(&0x0000_000Au32.to_be_bytes());
1216        buf.extend_from_slice(&1u32.to_be_bytes());
1217        write_cdf1_name(&mut buf, "time");
1218        buf.extend_from_slice(&0u32.to_be_bytes());
1219
1220        buf.extend_from_slice(&0u32.to_be_bytes());
1221        buf.extend_from_slice(&0u32.to_be_bytes());
1222
1223        buf.extend_from_slice(&0x0000_000Bu32.to_be_bytes());
1224        buf.extend_from_slice(&2u32.to_be_bytes());
1225
1226        write_cdf1_name(&mut buf, "temp");
1227        buf.extend_from_slice(&1u32.to_be_bytes());
1228        buf.extend_from_slice(&0u32.to_be_bytes());
1229        buf.extend_from_slice(&0u32.to_be_bytes());
1230        buf.extend_from_slice(&0u32.to_be_bytes());
1231        buf.extend_from_slice(&4u32.to_be_bytes());
1232        buf.extend_from_slice(&4u32.to_be_bytes());
1233        let temp_offset_pos = buf.len();
1234        buf.extend_from_slice(&0u32.to_be_bytes());
1235
1236        write_cdf1_name(&mut buf, "flag");
1237        buf.extend_from_slice(&1u32.to_be_bytes());
1238        buf.extend_from_slice(&0u32.to_be_bytes());
1239        buf.extend_from_slice(&0u32.to_be_bytes());
1240        buf.extend_from_slice(&0u32.to_be_bytes());
1241        buf.extend_from_slice(&3u32.to_be_bytes());
1242        buf.extend_from_slice(&2u32.to_be_bytes());
1243        let flag_offset_pos = buf.len();
1244        buf.extend_from_slice(&0u32.to_be_bytes());
1245
1246        let data_offset = buf.len() as u32;
1247        let flag_offset = data_offset + 4;
1248        buf[temp_offset_pos..temp_offset_pos + 4].copy_from_slice(&data_offset.to_be_bytes());
1249        buf[flag_offset_pos..flag_offset_pos + 4].copy_from_slice(&flag_offset.to_be_bytes());
1250
1251        for &(temp, flag) in records {
1252            buf.extend_from_slice(&temp.to_be_bytes());
1253            buf.extend_from_slice(&flag.to_be_bytes());
1254            buf.extend_from_slice(&[0, 0]);
1255        }
1256        buf
1257    }
1258
1259    #[test]
1260    fn cdf5_huge_dimension_can_slice_but_full_read_errors_cleanly() {
1261        let file = NcFile::from_bytes(&cdf5_huge_dimension_fixture()).unwrap();
1262        let selection = NcSliceInfo {
1263            selections: vec![NcSliceInfoElem::Index(0)],
1264        };
1265
1266        let sliced: ndarray::ArrayD<i32> = file.read_variable_slice("big", &selection).unwrap();
1267        assert_eq!(sliced.as_slice().unwrap(), &[123]);
1268
1269        let err = file.read_variable::<i32>("big").unwrap_err();
1270        assert!(matches!(err, Error::InvalidData(_)));
1271    }
1272
1273    #[test]
1274    fn classic_subfiling_marker_returns_unsupported_feature() {
1275        let err = match NcFile::from_bytes(&subfiling_marker_fixture()) {
1276            Ok(_) => panic!("subfiling marker should be rejected"),
1277            Err(err) => err,
1278        };
1279        assert!(matches!(
1280            err,
1281            Error::UnsupportedFeature(message) if message.contains("PnetCDF subfiling")
1282        ));
1283    }
1284
1285    #[test]
1286    fn streaming_cdf1_numrecs_are_derived_from_file_length() {
1287        let file = NcFile::from_bytes(&streaming_cdf1_record_fixture(&[10, 20, 30], &[])).unwrap();
1288
1289        assert_eq!(file.as_classic().unwrap().numrecs(), 3);
1290        assert_eq!(file.dimension("time").unwrap().size, 3);
1291        assert_eq!(file.variable("temp").unwrap().shape(), vec![3]);
1292
1293        let values: ndarray::ArrayD<i32> = file.read_variable("temp").unwrap();
1294        assert_eq!(values.as_slice().unwrap(), &[10, 20, 30]);
1295    }
1296
1297    #[test]
1298    fn streaming_cdf1_numrecs_ignore_trailing_partial_record() {
1299        let file =
1300            NcFile::from_bytes(&streaming_cdf1_record_fixture(&[10, 20], &[0xAA, 0xBB])).unwrap();
1301
1302        assert_eq!(file.as_classic().unwrap().numrecs(), 2);
1303        assert_eq!(file.variable("temp").unwrap().shape(), vec![2]);
1304
1305        let values: ndarray::ArrayD<i32> = file.read_variable("temp").unwrap();
1306        assert_eq!(values.as_slice().unwrap(), &[10, 20]);
1307    }
1308
1309    #[test]
1310    fn streaming_cdf2_numrecs_are_derived_from_file_length() {
1311        let file = NcFile::from_bytes(&streaming_cdf2_record_fixture(&[10, 20, 30])).unwrap();
1312
1313        assert_eq!(file.format(), NcFormat::Offset64);
1314        assert_eq!(file.as_classic().unwrap().numrecs(), 3);
1315        assert_eq!(file.dimension("time").unwrap().size, 3);
1316        assert_eq!(file.variable("temp").unwrap().shape(), vec![3]);
1317
1318        let values: ndarray::ArrayD<i32> = file.read_variable("temp").unwrap();
1319        assert_eq!(values.as_slice().unwrap(), &[10, 20, 30]);
1320    }
1321
1322    #[test]
1323    fn streaming_cdf1_numrecs_use_full_stride_for_multiple_record_variables() {
1324        let file = NcFile::from_bytes(&streaming_cdf1_two_record_var_fixture(&[
1325            (10, 1),
1326            (20, 2),
1327            (30, 3),
1328        ]))
1329        .unwrap();
1330
1331        assert_eq!(file.as_classic().unwrap().numrecs(), 3);
1332        assert_eq!(file.variable("temp").unwrap().shape(), vec![3]);
1333        assert_eq!(file.variable("flag").unwrap().shape(), vec![3]);
1334
1335        let temps: ndarray::ArrayD<i32> = file.read_variable("temp").unwrap();
1336        let flags: ndarray::ArrayD<i16> = file.read_variable("flag").unwrap();
1337        assert_eq!(temps.as_slice().unwrap(), &[10, 20, 30]);
1338        assert_eq!(flags.as_slice().unwrap(), &[1, 2, 3]);
1339    }
1340
1341    #[cfg(feature = "netcdf4")]
1342    #[test]
1343    fn streaming_cdf1_from_storage_derives_numrecs() {
1344        let file = NcFile::from_storage(Arc::new(BytesStorage::new(
1345            streaming_cdf1_record_fixture(&[10, 20, 30], &[]),
1346        )))
1347        .unwrap();
1348
1349        assert_eq!(file.as_classic().unwrap().numrecs(), 3);
1350        assert_eq!(file.dimension("time").unwrap().size, 3);
1351
1352        let values: ndarray::ArrayD<i32> = file.read_variable("temp").unwrap();
1353        assert_eq!(values.as_slice().unwrap(), &[10, 20, 30]);
1354    }
1355
1356    #[cfg(feature = "netcdf4")]
1357    #[test]
1358    fn classic_from_storage_keeps_open_range_backed() {
1359        let data = classic_large_offset_fixture();
1360        let full_len = data.len();
1361        let storage = Arc::new(CountingStorage::new(data));
1362
1363        let file = NcFile::from_storage(storage.clone()).unwrap();
1364        assert_eq!(file.format(), NcFormat::Classic);
1365        assert!(!storage
1366            .reads()
1367            .iter()
1368            .any(|&(offset, len)| offset == 0 && len == full_len));
1369
1370        let values: ndarray::ArrayD<i32> = file.read_variable("data").unwrap();
1371        assert_eq!(values.as_slice().unwrap(), &[10, 20, 30, 40]);
1372        assert!(storage
1373            .reads()
1374            .iter()
1375            .any(|&(offset, len)| { offset == (70 * 1024) as u64 && len == 16 }));
1376    }
1377
1378    #[cfg(feature = "netcdf4")]
1379    #[test]
1380    fn classic_from_storage_slice_reads_planned_range() {
1381        let storage = Arc::new(CountingStorage::new(classic_large_offset_fixture()));
1382        let file = NcFile::from_storage(storage.clone()).unwrap();
1383        let selection = NcSliceInfo {
1384            selections: vec![NcSliceInfoElem::Slice {
1385                start: 1,
1386                end: 3,
1387                step: 1,
1388            }],
1389        };
1390
1391        let values: ndarray::ArrayD<i32> = file.read_variable_slice("data", &selection).unwrap();
1392        assert_eq!(values.as_slice().unwrap(), &[20, 30]);
1393        assert!(storage
1394            .reads()
1395            .iter()
1396            .any(|&(offset, len)| { offset == (70 * 1024 + 4) as u64 && len == 8 }));
1397    }
1398
1399    #[cfg(feature = "netcdf4")]
1400    #[test]
1401    fn from_storage_minimal_cdf1() {
1402        // Minimal valid CDF-1 file: magic + numrecs + absent dim/att/var lists.
1403        let mut data = Vec::new();
1404        data.extend_from_slice(b"CDF\x01");
1405        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
1406                                                     // dim_list: ABSENT
1407        data.extend_from_slice(&0u32.to_be_bytes()); // tag = 0
1408        data.extend_from_slice(&0u32.to_be_bytes()); // count = 0
1409                                                     // att_list: ABSENT
1410        data.extend_from_slice(&0u32.to_be_bytes());
1411        data.extend_from_slice(&0u32.to_be_bytes());
1412        // var_list: ABSENT
1413        data.extend_from_slice(&0u32.to_be_bytes());
1414        data.extend_from_slice(&0u32.to_be_bytes());
1415
1416        let file = NcFile::from_storage(Arc::new(BytesStorage::new(data))).unwrap();
1417        assert_eq!(file.format(), NcFormat::Classic);
1418        assert!(file.dimensions().unwrap().is_empty());
1419        assert!(file.variables().unwrap().is_empty());
1420        assert!(file.global_attributes().unwrap().is_empty());
1421    }
1422
1423    #[cfg(feature = "netcdf4")]
1424    #[test]
1425    fn from_storage_short_input_reports_invalid_magic() {
1426        let err = NcFile::from_storage(Arc::new(BytesStorage::new(vec![b'C', b'D'])))
1427            .err()
1428            .expect("short storage should not parse as NetCDF");
1429        assert!(matches!(err, Error::InvalidMagic));
1430    }
1431
1432    #[test]
1433    fn from_bytes_cdf1_with_data() {
1434        // Build a CDF-1 file with one dimension, one global attribute, and one variable.
1435        let mut data = Vec::new();
1436        data.extend_from_slice(b"CDF\x01");
1437        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
1438
1439        // dim_list: 1 dimension "x" with size 3
1440        data.extend_from_slice(&0x0000_000Au32.to_be_bytes()); // NC_DIMENSION tag
1441        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
1442                                                     // name "x": length=1, "x", 3 bytes padding
1443        data.extend_from_slice(&1u32.to_be_bytes());
1444        data.push(b'x');
1445        data.extend_from_slice(&[0, 0, 0]); // padding to 4
1446                                            // dim size
1447        data.extend_from_slice(&3u32.to_be_bytes());
1448
1449        // att_list: 1 attribute "title" = "test"
1450        data.extend_from_slice(&0x0000_000Cu32.to_be_bytes()); // NC_ATTRIBUTE tag
1451        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
1452                                                     // name "title"
1453        data.extend_from_slice(&5u32.to_be_bytes());
1454        data.extend_from_slice(b"title");
1455        data.extend_from_slice(&[0, 0, 0]); // padding
1456                                            // nc_type = NC_CHAR = 2
1457        data.extend_from_slice(&2u32.to_be_bytes());
1458        // nvalues = 4
1459        data.extend_from_slice(&4u32.to_be_bytes());
1460        data.extend_from_slice(b"test"); // exactly 4 bytes, no padding needed
1461
1462        // var_list: 1 variable "vals" with dim x, type float
1463        data.extend_from_slice(&0x0000_000Bu32.to_be_bytes()); // NC_VARIABLE tag
1464        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
1465                                                     // name "vals"
1466        data.extend_from_slice(&4u32.to_be_bytes());
1467        data.extend_from_slice(b"vals");
1468        // ndims = 1
1469        data.extend_from_slice(&1u32.to_be_bytes());
1470        // dimid = 0
1471        data.extend_from_slice(&0u32.to_be_bytes());
1472        // att_list: absent
1473        data.extend_from_slice(&0u32.to_be_bytes());
1474        data.extend_from_slice(&0u32.to_be_bytes());
1475        // nc_type = NC_FLOAT = 5
1476        data.extend_from_slice(&5u32.to_be_bytes());
1477        // vsize = 12 (3 floats * 4 bytes)
1478        data.extend_from_slice(&12u32.to_be_bytes());
1479        // begin (offset): we'll put data right after this header
1480        let data_offset = data.len() as u32 + 4; // +4 for this field itself
1481        data.extend_from_slice(&data_offset.to_be_bytes());
1482
1483        // Now append the variable data: 3 floats
1484        data.extend_from_slice(&1.5f32.to_be_bytes());
1485        data.extend_from_slice(&2.5f32.to_be_bytes());
1486        data.extend_from_slice(&3.5f32.to_be_bytes());
1487
1488        let file = NcFile::from_bytes(&data).unwrap();
1489        assert_eq!(file.format(), NcFormat::Classic);
1490        assert_eq!(file.dimensions().unwrap().len(), 1);
1491        assert_eq!(file.dimensions().unwrap()[0].name, "x");
1492        assert_eq!(file.dimensions().unwrap()[0].size, 3);
1493
1494        assert_eq!(file.global_attributes().unwrap().len(), 1);
1495        assert_eq!(file.global_attributes().unwrap()[0].name, "title");
1496        assert_eq!(
1497            file.global_attributes().unwrap()[0]
1498                .value
1499                .as_string()
1500                .unwrap(),
1501            "test"
1502        );
1503
1504        assert_eq!(file.variables().unwrap().len(), 1);
1505        let var = file.variable("vals").unwrap();
1506        assert_eq!(var.dtype(), &NcType::Float);
1507        assert_eq!(var.shape(), vec![3]);
1508
1509        // Read the actual data through the classic file.
1510        let classic = file.as_classic().unwrap();
1511        let arr: ndarray::ArrayD<f32> = classic.read_variable("vals").unwrap();
1512        assert_eq!(arr.shape(), &[3]);
1513        assert_eq!(arr[[0]], 1.5f32);
1514        assert_eq!(arr[[1]], 2.5f32);
1515        assert_eq!(arr[[2]], 3.5f32);
1516    }
1517
1518    #[test]
1519    fn variable_not_found() {
1520        let mut data = Vec::new();
1521        data.extend_from_slice(b"CDF\x01");
1522        data.extend_from_slice(&0u32.to_be_bytes());
1523        // All absent.
1524        data.extend_from_slice(&0u32.to_be_bytes());
1525        data.extend_from_slice(&0u32.to_be_bytes());
1526        data.extend_from_slice(&0u32.to_be_bytes());
1527        data.extend_from_slice(&0u32.to_be_bytes());
1528        data.extend_from_slice(&0u32.to_be_bytes());
1529        data.extend_from_slice(&0u32.to_be_bytes());
1530
1531        let file = NcFile::from_bytes(&data).unwrap();
1532        assert!(matches!(
1533            file.variable("nonexistent").unwrap_err(),
1534            Error::VariableNotFound(_)
1535        ));
1536    }
1537
1538    #[test]
1539    fn group_not_found() {
1540        let mut data = Vec::new();
1541        data.extend_from_slice(b"CDF\x01");
1542        data.extend_from_slice(&0u32.to_be_bytes());
1543        data.extend_from_slice(&0u32.to_be_bytes());
1544        data.extend_from_slice(&0u32.to_be_bytes());
1545        data.extend_from_slice(&0u32.to_be_bytes());
1546        data.extend_from_slice(&0u32.to_be_bytes());
1547        data.extend_from_slice(&0u32.to_be_bytes());
1548        data.extend_from_slice(&0u32.to_be_bytes());
1549
1550        let file = NcFile::from_bytes(&data).unwrap();
1551        assert!(matches!(
1552            file.group("nonexistent").unwrap_err(),
1553            Error::GroupNotFound(_)
1554        ));
1555    }
1556}