Skip to main content

netcdf_reader/
lib.rs

1//! Pure-Rust NetCDF file reader.
2//!
3//! Supports:
4//! - **CDF-1** (classic): `CDF\x01` magic
5//! - **CDF-2** (64-bit offset): `CDF\x02` magic
6//! - **CDF-5** (64-bit data): `CDF\x05` magic
7//! - **NetCDF-4** (HDF5-backed): `\x89HDF\r\n\x1a\n` magic (requires `netcdf4` feature)
8//!
9//! # Example
10//!
11//! ```no_run
12//! use netcdf_reader::NcFile;
13//!
14//! let file = NcFile::open("example.nc").unwrap();
15//! println!("format: {:?}", file.format());
16//! for var in file.variables().unwrap() {
17//!     println!("  variable: {} shape={:?}", var.name(), var.shape());
18//! }
19//! ```
20
21pub mod classic;
22pub mod error;
23pub mod masked;
24pub mod types;
25pub mod unpack;
26
27#[cfg(feature = "netcdf4")]
28pub mod nc4;
29#[cfg(feature = "netcdf4")]
30pub mod user_defined;
31
32#[cfg(feature = "cf")]
33pub mod cf;
34
35pub use error::{Error, Result};
36#[cfg(feature = "netcdf4")]
37pub use hdf5_reader::storage::DynStorage;
38#[cfg(feature = "netcdf4")]
39pub use hdf5_reader::{
40    BlockCacheStats, BlockCacheStorage, BytesStorage, ChunkCacheStats, DatasetChunk,
41    DatasetChunkIterator, ExternalFileResolver, ExternalLinkResolver, FileStorage,
42    FilesystemExternalFileResolver, FilesystemExternalLinkResolver, MmapStorage,
43    RangeRequestStorage, Storage, StorageBuffer,
44};
45pub use types::*;
46#[cfg(feature = "netcdf4")]
47pub use user_defined::{
48    NcArrayValue, NcCompoundFieldView, NcCompoundValueField, NcEnumValue, NcValue, NcValueView,
49};
50
51use std::fs::File;
52use std::io::Read;
53use std::path::Path;
54#[cfg(feature = "netcdf4")]
55use std::sync::Arc;
56
57use memmap2::Mmap;
58use ndarray::ArrayD;
59#[cfg(feature = "rayon")]
60use rayon::ThreadPool;
61
62/// Trait alias for types readable from both classic and NetCDF-4 files.
63///
64/// This unifies `classic::data::NcReadType` (for CDF-1/2/5) and
65/// `hdf5_reader::H5Type` (for NetCDF-4/HDF5) so that `NcFile::read_variable`
66/// works across all formats with a single type parameter.
67#[cfg(feature = "netcdf4")]
68pub trait NcReadable: classic::data::NcReadType + hdf5_reader::H5Type {}
69#[cfg(feature = "netcdf4")]
70impl<T: classic::data::NcReadType + hdf5_reader::H5Type> NcReadable for T {}
71
72#[cfg(not(feature = "netcdf4"))]
73pub trait NcReadable: classic::data::NcReadType {}
74#[cfg(not(feature = "netcdf4"))]
75impl<T: classic::data::NcReadType> NcReadable for T {}
76
77/// NetCDF file format.
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79pub enum NcFormat {
80    /// CDF-1 classic format.
81    Classic,
82    /// CDF-2 64-bit offset format.
83    Offset64,
84    /// CDF-5 64-bit data format.
85    Cdf5,
86    /// NetCDF-4 (HDF5-backed).
87    Nc4,
88    /// NetCDF-4 classic model (HDF5-backed, restricted data model).
89    Nc4Classic,
90}
91
92/// NetCDF-4 metadata reconstruction policy.
93#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
94pub enum NcMetadataMode {
95    /// Fail the open if NetCDF-4 metadata cannot be reconstructed exactly.
96    #[default]
97    Strict,
98    /// Allow heuristic reconstruction for malformed or partially-supported files.
99    Lossy,
100}
101
102/// An opened NetCDF file.
103pub struct NcFile {
104    format: NcFormat,
105    inner: NcFileInner,
106}
107
108enum NcFileInner {
109    Classic(classic::ClassicFile),
110    #[cfg(feature = "netcdf4")]
111    Nc4(Box<nc4::Nc4File>),
112}
113
114/// HDF5 magic bytes: `\x89HDF\r\n\x1a\n`
115const HDF5_MAGIC: [u8; 8] = [0x89, b'H', b'D', b'F', 0x0D, 0x0A, 0x1A, 0x0A];
116
117/// Detect the NetCDF format from the first bytes of a file.
118fn detect_format(data: &[u8]) -> Result<NcFormat> {
119    if data.len() < 4 {
120        return Err(Error::InvalidMagic);
121    }
122
123    // Check for CDF magic: "CDF" followed by version byte.
124    if data[0] == b'C' && data[1] == b'D' && data[2] == b'F' {
125        return match data[3] {
126            1 => Ok(NcFormat::Classic),
127            2 => Ok(NcFormat::Offset64),
128            5 => Ok(NcFormat::Cdf5),
129            v => Err(Error::UnsupportedVersion(v)),
130        };
131    }
132
133    // Check for HDF5 magic (8 bytes).
134    if data.len() >= 8 && data[..8] == HDF5_MAGIC {
135        return Ok(NcFormat::Nc4);
136    }
137
138    Err(Error::InvalidMagic)
139}
140
141fn read_magic_prefix(reader: &mut impl Read) -> std::io::Result<([u8; 8], usize)> {
142    let mut magic = [0u8; 8];
143    let mut read_len = 0;
144    while read_len < magic.len() {
145        let n = reader.read(&mut magic[read_len..])?;
146        if n == 0 {
147            break;
148        }
149        read_len += n;
150    }
151    Ok((magic, read_len))
152}
153
154#[cfg(feature = "cf")]
155fn parent_group_path(path: &str) -> &str {
156    let trimmed = path.trim_matches('/');
157    trimmed
158        .rsplit_once('/')
159        .map(|(group_path, _)| group_path)
160        .unwrap_or("")
161}
162
163impl NcFile {
164    /// Open a NetCDF file from a path.
165    ///
166    /// The format is auto-detected from the file's magic bytes.
167    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
168        Self::open_with_options(path, NcOpenOptions::default())
169    }
170
171    /// Open a NetCDF file from in-memory bytes.
172    ///
173    /// The format is auto-detected from the magic bytes.
174    pub fn from_bytes(data: &[u8]) -> Result<Self> {
175        Self::from_bytes_with_options(data, NcOpenOptions::default())
176    }
177
178    /// Open a NetCDF file from a custom random-access storage backend.
179    ///
180    /// NetCDF-4 files stay fully range-backed. Classic formats are read from
181    /// the provided storage into an owned buffer.
182    #[cfg(feature = "netcdf4")]
183    pub fn from_storage(storage: DynStorage) -> Result<Self> {
184        Self::from_storage_with_options(storage, NcOpenOptions::default())
185    }
186
187    /// Open a NetCDF file from a custom random-access storage backend with custom options.
188    #[cfg(feature = "netcdf4")]
189    pub fn from_storage_with_options(storage: DynStorage, options: NcOpenOptions) -> Result<Self> {
190        let magic_len = storage.len().min(HDF5_MAGIC.len() as u64) as usize;
191        let magic = storage.read_range(0, magic_len)?;
192        let format = detect_format(magic.as_ref())?;
193
194        match format {
195            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
196                let len = usize::try_from(storage.len()).map_err(|_| {
197                    Error::InvalidData(
198                        "classic storage length exceeds platform usize capacity".into(),
199                    )
200                })?;
201                let bytes = storage.read_range(0, len)?;
202                let classic = classic::ClassicFile::from_bytes(bytes.as_ref(), format)?;
203                Ok(NcFile {
204                    format,
205                    inner: NcFileInner::Classic(classic),
206                })
207            }
208            NcFormat::Nc4 | NcFormat::Nc4Classic => {
209                let nc4 = nc4::Nc4File::from_storage_with_options(storage, options)?;
210                let actual_format = if nc4.is_classic_model() {
211                    NcFormat::Nc4Classic
212                } else {
213                    NcFormat::Nc4
214                };
215                Ok(NcFile {
216                    format: actual_format,
217                    inner: NcFileInner::Nc4(Box::new(nc4)),
218                })
219            }
220        }
221    }
222
223    /// Open a NetCDF file from in-memory bytes with custom options.
224    ///
225    /// NC4 options are applied when the payload is HDF5-backed.
226    pub fn from_bytes_with_options(data: &[u8], options: NcOpenOptions) -> Result<Self> {
227        let format = detect_format(data)?;
228
229        match format {
230            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
231                let classic = classic::ClassicFile::from_bytes(data, format)?;
232                Ok(NcFile {
233                    format,
234                    inner: NcFileInner::Classic(classic),
235                })
236            }
237            NcFormat::Nc4 | NcFormat::Nc4Classic => {
238                #[cfg(feature = "netcdf4")]
239                {
240                    let nc4 = nc4::Nc4File::from_bytes_with_options(data, options)?;
241                    let actual_format = if nc4.is_classic_model() {
242                        NcFormat::Nc4Classic
243                    } else {
244                        NcFormat::Nc4
245                    };
246                    Ok(NcFile {
247                        format: actual_format,
248                        inner: NcFileInner::Nc4(Box::new(nc4)),
249                    })
250                }
251                #[cfg(not(feature = "netcdf4"))]
252                {
253                    let _ = options;
254                    Err(Error::Nc4NotEnabled)
255                }
256            }
257        }
258    }
259
260    /// The detected file format.
261    pub fn format(&self) -> NcFormat {
262        self.format
263    }
264
265    /// The root group of the file.
266    ///
267    /// Classic files have a single implicit root group containing all
268    /// dimensions, variables, and global attributes. NetCDF-4 files
269    /// can have nested sub-groups.
270    pub fn root_group(&self) -> Result<&NcGroup> {
271        match &self.inner {
272            NcFileInner::Classic(c) => Ok(c.root_group()),
273            #[cfg(feature = "netcdf4")]
274            NcFileInner::Nc4(n) => n.root_group(),
275        }
276    }
277
278    /// Convenience: dimensions in the root group.
279    pub fn dimensions(&self) -> Result<&[NcDimension]> {
280        match &self.inner {
281            NcFileInner::Classic(c) => Ok(&c.root_group().dimensions),
282            #[cfg(feature = "netcdf4")]
283            NcFileInner::Nc4(n) => n.dimensions(),
284        }
285    }
286
287    /// Convenience: variables in the root group.
288    pub fn variables(&self) -> Result<&[NcVariable]> {
289        match &self.inner {
290            NcFileInner::Classic(c) => Ok(&c.root_group().variables),
291            #[cfg(feature = "netcdf4")]
292            NcFileInner::Nc4(n) => n.variables(),
293        }
294    }
295
296    /// Convenience: global attributes (attributes of the root group).
297    pub fn global_attributes(&self) -> Result<&[NcAttribute]> {
298        match &self.inner {
299            NcFileInner::Classic(c) => Ok(&c.root_group().attributes),
300            #[cfg(feature = "netcdf4")]
301            NcFileInner::Nc4(n) => n.global_attributes(),
302        }
303    }
304
305    /// Find a group by path relative to the root group.
306    pub fn group(&self, path: &str) -> Result<&NcGroup> {
307        match &self.inner {
308            NcFileInner::Classic(c) => c
309                .root_group()
310                .group(path)
311                .ok_or_else(|| Error::GroupNotFound(path.to_string())),
312            #[cfg(feature = "netcdf4")]
313            NcFileInner::Nc4(n) => n.group(path),
314        }
315    }
316
317    /// Find a variable by name or path relative to the root group.
318    pub fn variable(&self, name: &str) -> Result<&NcVariable> {
319        match &self.inner {
320            NcFileInner::Classic(c) => c
321                .root_group()
322                .variable(name)
323                .ok_or_else(|| Error::VariableNotFound(name.to_string())),
324            #[cfg(feature = "netcdf4")]
325            NcFileInner::Nc4(n) => n.variable(name),
326        }
327    }
328
329    /// Find a dimension by name or path relative to the root group.
330    pub fn dimension(&self, name: &str) -> Result<&NcDimension> {
331        match &self.inner {
332            NcFileInner::Classic(c) => c
333                .root_group()
334                .dimension(name)
335                .ok_or_else(|| Error::DimensionNotFound(name.to_string())),
336            #[cfg(feature = "netcdf4")]
337            NcFileInner::Nc4(n) => n.dimension(name),
338        }
339    }
340
341    /// Find the coordinate variable for a dimension name or path.
342    pub fn coordinate_variable(&self, name: &str) -> Result<&NcVariable> {
343        self.root_group()?
344            .coordinate_variable(name)
345            .ok_or_else(|| Error::VariableNotFound(format!("coordinate variable for {name}")))
346    }
347
348    /// Discover CF axes from coordinate variables in a group.
349    #[cfg(feature = "cf")]
350    pub fn cf_coordinate_axes(&self, group_path: &str) -> Result<Vec<cf::CfCoordinateAxis<'_>>> {
351        let group = self.group(group_path)?;
352        Ok(cf::discover_coordinate_axes(group))
353    }
354
355    /// Discover CF axes used by a variable from its coordinate variables.
356    #[cfg(feature = "cf")]
357    pub fn cf_variable_axes(&self, name: &str) -> Result<Vec<cf::CfCoordinateAxis<'_>>> {
358        let variable = self.variable(name)?;
359        let group = self.group(parent_group_path(name))?;
360        Ok(cf::discover_variable_axes(variable, group))
361    }
362
363    /// Discover CF time coordinate variables in a group.
364    #[cfg(feature = "cf")]
365    pub fn cf_time_coordinates(&self, group_path: &str) -> Result<Vec<cf::CfTimeCoordinate<'_>>> {
366        let group = self.group(group_path)?;
367        cf::discover_time_coordinates(group)
368    }
369
370    /// Discover the CF time coordinate used by a variable, if one exists.
371    #[cfg(feature = "cf")]
372    pub fn cf_variable_time_coordinate(
373        &self,
374        name: &str,
375    ) -> Result<Option<cf::CfTimeCoordinate<'_>>> {
376        let variable = self.variable(name)?;
377        let group = self.group(parent_group_path(name))?;
378        cf::discover_variable_time_coordinate(variable, group)
379    }
380
381    /// Find a group attribute by name or path relative to the root group.
382    pub fn global_attribute(&self, name: &str) -> Result<&NcAttribute> {
383        match &self.inner {
384            NcFileInner::Classic(c) => c
385                .root_group()
386                .attribute(name)
387                .ok_or_else(|| Error::AttributeNotFound(name.to_string())),
388            #[cfg(feature = "netcdf4")]
389            NcFileInner::Nc4(n) => n.global_attribute(name),
390        }
391    }
392
393    /// Read a variable's data as a typed array.
394    ///
395    /// Works for both classic (CDF-1/2/5) and NetCDF-4 files. NetCDF-4 nested
396    /// variables can be addressed with paths like `group/subgroup/var`. The type
397    /// parameter `T` must implement `NcReadable`, which is satisfied by:
398    /// `i8, u8, i16, u16, i32, u32, i64, u64, f32, f64`.
399    pub fn read_variable<T: NcReadable>(&self, name: &str) -> Result<ArrayD<T>> {
400        match &self.inner {
401            NcFileInner::Classic(c) => c.read_variable::<T>(name),
402            #[cfg(feature = "netcdf4")]
403            NcFileInner::Nc4(n) => Ok(n.read_variable::<T>(name)?),
404        }
405    }
406
407    /// Read a variable into a caller-provided typed buffer.
408    pub fn read_variable_into<T: NcReadable>(&self, name: &str, dst: &mut [T]) -> Result<()> {
409        match &self.inner {
410            NcFileInner::Classic(c) => c.read_variable_into::<T>(name, dst),
411            #[cfg(feature = "netcdf4")]
412            NcFileInner::Nc4(n) => Ok(n.read_variable_into::<T>(name, dst)?),
413        }
414    }
415
416    /// Read a NetCDF-4 variable as logical raw bytes in HDF5 datatype byte order.
417    #[cfg(feature = "netcdf4")]
418    pub fn read_variable_raw_bytes(&self, name: &str) -> Result<Vec<u8>> {
419        match &self.inner {
420            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
421                expected: "NetCDF-4 variable".to_string(),
422                actual: "classic NetCDF variable".to_string(),
423            }),
424            NcFileInner::Nc4(n) => Ok(n.read_variable_raw_bytes(name)?),
425        }
426    }
427
428    /// Read a NetCDF-4 variable as logical raw bytes into a caller-provided buffer.
429    #[cfg(feature = "netcdf4")]
430    pub fn read_variable_raw_bytes_into(&self, name: &str, dst: &mut [u8]) -> Result<()> {
431        match &self.inner {
432            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
433                expected: "NetCDF-4 variable".to_string(),
434                actual: "classic NetCDF variable".to_string(),
435            }),
436            NcFileInner::Nc4(n) => Ok(n.read_variable_raw_bytes_into(name, dst)?),
437        }
438    }
439
440    /// Read a NetCDF-4 variable as logical raw bytes with numeric fields in native endian.
441    #[cfg(feature = "netcdf4")]
442    pub fn read_variable_native_bytes(&self, name: &str) -> Result<Vec<u8>> {
443        match &self.inner {
444            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
445                expected: "NetCDF-4 variable".to_string(),
446                actual: "classic NetCDF variable".to_string(),
447            }),
448            NcFileInner::Nc4(n) => Ok(n.read_variable_native_bytes(name)?),
449        }
450    }
451
452    /// Read native-endian logical raw bytes for a NetCDF-4 variable into a buffer.
453    #[cfg(feature = "netcdf4")]
454    pub fn read_variable_native_bytes_into(&self, name: &str, dst: &mut [u8]) -> Result<()> {
455        match &self.inner {
456            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
457                expected: "NetCDF-4 variable".to_string(),
458                actual: "classic NetCDF variable".to_string(),
459            }),
460            NcFileInner::Nc4(n) => Ok(n.read_variable_native_bytes_into(name, dst)?),
461        }
462    }
463
464    /// Iterate decoded HDF5 chunks for a NetCDF-4 variable.
465    #[cfg(feature = "netcdf4")]
466    pub fn iter_variable_chunks(&self, name: &str) -> Result<DatasetChunkIterator> {
467        match &self.inner {
468            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
469                expected: "NetCDF-4 chunked variable".to_string(),
470                actual: "classic NetCDF variable".to_string(),
471            }),
472            NcFileInner::Nc4(n) => Ok(n.iter_variable_chunks(name)?),
473        }
474    }
475
476    /// Return chunk-cache statistics for NetCDF-4 files.
477    #[cfg(feature = "netcdf4")]
478    pub fn chunk_cache_stats(&self) -> Option<ChunkCacheStats> {
479        match &self.inner {
480            NcFileInner::Classic(_) => None,
481            NcFileInner::Nc4(n) => Some(n.chunk_cache_stats()),
482        }
483    }
484
485    /// Read a variable using internal chunk-level parallelism when available.
486    ///
487    /// Classic formats fall back to `read_variable`.
488    #[cfg(feature = "rayon")]
489    pub fn read_variable_parallel<T: NcReadable>(&self, name: &str) -> Result<ArrayD<T>> {
490        match &self.inner {
491            NcFileInner::Classic(c) => c.read_variable::<T>(name),
492            #[cfg(feature = "netcdf4")]
493            NcFileInner::Nc4(n) => Ok(n.read_variable_parallel::<T>(name)?),
494        }
495    }
496
497    /// Read a variable using the provided Rayon thread pool when available.
498    ///
499    /// Classic formats fall back to `read_variable`.
500    #[cfg(feature = "rayon")]
501    pub fn read_variable_in_pool<T: NcReadable>(
502        &self,
503        name: &str,
504        pool: &ThreadPool,
505    ) -> Result<ArrayD<T>> {
506        match &self.inner {
507            NcFileInner::Classic(c) => c.read_variable::<T>(name),
508            #[cfg(feature = "netcdf4")]
509            NcFileInner::Nc4(n) => Ok(n.read_variable_in_pool::<T>(name, pool)?),
510        }
511    }
512
513    /// Access the underlying classic file (for reading data).
514    ///
515    /// Returns `None` if this is a NetCDF-4 file.
516    pub fn as_classic(&self) -> Option<&classic::ClassicFile> {
517        match &self.inner {
518            NcFileInner::Classic(c) => Some(c),
519            #[cfg(feature = "netcdf4")]
520            NcFileInner::Nc4(_) => None,
521        }
522    }
523
524    /// Read a variable with automatic type promotion to f64.
525    ///
526    /// Reads in the native storage type (i8, i16, i32, f32, f64, u8, etc.)
527    /// and promotes all values to f64. This avoids the `TypeMismatch` error
528    /// that `read_variable::<f64>` produces for non-f64 variables.
529    pub fn read_variable_as_f64(&self, name: &str) -> Result<ArrayD<f64>> {
530        match &self.inner {
531            NcFileInner::Classic(c) => c.read_variable_as_f64(name),
532            #[cfg(feature = "netcdf4")]
533            NcFileInner::Nc4(n) => n.read_variable_as_f64(name),
534        }
535    }
536
537    /// Read a string variable as a single string.
538    ///
539    /// Use [`NcFile::read_variable_as_strings`] when the variable contains
540    /// multiple string elements.
541    pub fn read_variable_as_string(&self, name: &str) -> Result<String> {
542        match &self.inner {
543            NcFileInner::Classic(c) => c.read_variable_as_string(name),
544            #[cfg(feature = "netcdf4")]
545            NcFileInner::Nc4(n) => n.read_variable_as_string(name),
546        }
547    }
548
549    /// Read a string or char variable as a flat vector of strings.
550    ///
551    /// Classic char arrays interpret the last dimension as the string length
552    /// and flatten the leading dimensions.
553    pub fn read_variable_as_strings(&self, name: &str) -> Result<Vec<String>> {
554        match &self.inner {
555            NcFileInner::Classic(c) => c.read_variable_as_strings(name),
556            #[cfg(feature = "netcdf4")]
557            NcFileInner::Nc4(n) => n.read_variable_as_strings(name),
558        }
559    }
560
561    /// Read a NetCDF-4 user-defined variable into dynamic values.
562    ///
563    /// This supports enum, opaque, compound, fixed-size array, and non-string
564    /// vlen datatypes. Primitive values nested inside those types are decoded
565    /// according to the HDF5 datatype byte order.
566    #[cfg(feature = "netcdf4")]
567    pub fn read_variable_user_defined(&self, name: &str) -> Result<ArrayD<NcValue>> {
568        match &self.inner {
569            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
570                expected: "NetCDF-4 user-defined variable".to_string(),
571                actual: "classic NetCDF variable".to_string(),
572            }),
573            NcFileInner::Nc4(n) => n.read_variable_user_defined(name),
574        }
575    }
576
577    /// Read a NetCDF-4 user-defined variable through a caller-provided decoder.
578    ///
579    /// The decoder receives one [`NcValueView`] per logical variable element,
580    /// allowing direct construction of application structs without allocating
581    /// an intermediate [`NcValue`] tree.
582    #[cfg(feature = "netcdf4")]
583    pub fn read_variable_user_defined_with<T, F>(&self, name: &str, decoder: F) -> Result<ArrayD<T>>
584    where
585        F: FnMut(NcValueView<'_>) -> Result<T>,
586    {
587        match &self.inner {
588            NcFileInner::Classic(_) => Err(Error::TypeMismatch {
589                expected: "NetCDF-4 user-defined variable".to_string(),
590                actual: "classic NetCDF variable".to_string(),
591            }),
592            NcFileInner::Nc4(n) => n.read_variable_user_defined_with(name, decoder),
593        }
594    }
595
596    /// Read a variable and apply `scale_factor`/`add_offset` unpacking.
597    ///
598    /// Returns `actual = stored * scale_factor + add_offset`.
599    /// If neither attribute is present, returns the raw data as f64.
600    /// Uses type-promoting read so it works with any numeric storage type.
601    pub fn read_variable_unpacked(&self, name: &str) -> Result<ArrayD<f64>> {
602        let var = self.variable(name)?;
603        let params = unpack::UnpackParams::from_variable(var);
604        let mut data = self.read_variable_as_f64(name)?;
605        if let Some(p) = params {
606            p.apply(&mut data);
607        }
608        Ok(data)
609    }
610
611    /// Read a variable, replace `_FillValue`/`missing_value` with NaN,
612    /// and mask values outside `valid_min`/`valid_max`/`valid_range`.
613    /// Uses type-promoting read so it works with any numeric storage type.
614    pub fn read_variable_masked(&self, name: &str) -> Result<ArrayD<f64>> {
615        let var = self.variable(name)?;
616        let params = masked::MaskParams::from_variable(var);
617        let mut data = self.read_variable_as_f64(name)?;
618        if let Some(p) = params {
619            p.apply(&mut data);
620        }
621        Ok(data)
622    }
623
624    /// Read a variable with both masking and unpacking (CF spec order).
625    ///
626    /// Order: read → mask fill/missing → unpack (scale+offset).
627    /// Uses type-promoting read so it works with any numeric storage type.
628    pub fn read_variable_unpacked_masked(&self, name: &str) -> Result<ArrayD<f64>> {
629        let var = self.variable(name)?;
630        let mask_params = masked::MaskParams::from_variable(var);
631        let unpack_params = unpack::UnpackParams::from_variable(var);
632        let mut data = self.read_variable_as_f64(name)?;
633        if let Some(p) = mask_params {
634            p.apply(&mut data);
635        }
636        if let Some(p) = unpack_params {
637            p.apply(&mut data);
638        }
639        Ok(data)
640    }
641
642    // ----- Slice API -----
643
644    /// Read a slice (hyperslab) of a variable as a typed array.
645    pub fn read_variable_slice<T: NcReadable>(
646        &self,
647        name: &str,
648        selection: &NcSliceInfo,
649    ) -> Result<ArrayD<T>> {
650        match &self.inner {
651            NcFileInner::Classic(c) => c.read_variable_slice::<T>(name, selection),
652            #[cfg(feature = "netcdf4")]
653            NcFileInner::Nc4(n) => Ok(n.read_variable_slice::<T>(name, selection)?),
654        }
655    }
656
657    /// Read a slice (hyperslab) using chunk-level parallelism when available.
658    ///
659    /// For NetCDF-4 chunked datasets, overlapping chunks are decompressed in
660    /// parallel via Rayon. Classic formats fall back to `read_variable_slice`.
661    #[cfg(feature = "rayon")]
662    pub fn read_variable_slice_parallel<T: NcReadable>(
663        &self,
664        name: &str,
665        selection: &NcSliceInfo,
666    ) -> Result<ArrayD<T>> {
667        match &self.inner {
668            NcFileInner::Classic(c) => c.read_variable_slice::<T>(name, selection),
669            #[cfg(feature = "netcdf4")]
670            NcFileInner::Nc4(n) => Ok(n.read_variable_slice_parallel::<T>(name, selection)?),
671        }
672    }
673
674    /// Read a slice of a variable with automatic type promotion to f64.
675    pub fn read_variable_slice_as_f64(
676        &self,
677        name: &str,
678        selection: &NcSliceInfo,
679    ) -> Result<ArrayD<f64>> {
680        match &self.inner {
681            NcFileInner::Classic(c) => c.read_variable_slice_as_f64(name, selection),
682            #[cfg(feature = "netcdf4")]
683            NcFileInner::Nc4(n) => n.read_variable_slice_as_f64(name, selection),
684        }
685    }
686
687    /// Read a slice with `scale_factor`/`add_offset` unpacking.
688    pub fn read_variable_slice_unpacked(
689        &self,
690        name: &str,
691        selection: &NcSliceInfo,
692    ) -> Result<ArrayD<f64>> {
693        let var = self.variable(name)?;
694        let params = unpack::UnpackParams::from_variable(var);
695        let mut data = self.read_variable_slice_as_f64(name, selection)?;
696        if let Some(p) = params {
697            p.apply(&mut data);
698        }
699        Ok(data)
700    }
701
702    /// Read a slice with fill/missing value masking.
703    pub fn read_variable_slice_masked(
704        &self,
705        name: &str,
706        selection: &NcSliceInfo,
707    ) -> Result<ArrayD<f64>> {
708        let var = self.variable(name)?;
709        let params = masked::MaskParams::from_variable(var);
710        let mut data = self.read_variable_slice_as_f64(name, selection)?;
711        if let Some(p) = params {
712            p.apply(&mut data);
713        }
714        Ok(data)
715    }
716
717    /// Read a slice with both masking and unpacking (CF spec order).
718    pub fn read_variable_slice_unpacked_masked(
719        &self,
720        name: &str,
721        selection: &NcSliceInfo,
722    ) -> Result<ArrayD<f64>> {
723        let var = self.variable(name)?;
724        let mask_params = masked::MaskParams::from_variable(var);
725        let unpack_params = unpack::UnpackParams::from_variable(var);
726        let mut data = self.read_variable_slice_as_f64(name, selection)?;
727        if let Some(p) = mask_params {
728            p.apply(&mut data);
729        }
730        if let Some(p) = unpack_params {
731            p.apply(&mut data);
732        }
733        Ok(data)
734    }
735
736    // ----- Lazy Slice Iterator -----
737
738    /// Create an iterator that yields one slice per index along a given dimension.
739    ///
740    /// Each call to `next()` reads one slice using the slice API. This is
741    /// useful for iterating time steps, levels, etc. without loading the
742    /// entire dataset into memory.
743    pub fn iter_slices<T: NcReadable>(
744        &self,
745        name: &str,
746        dim: usize,
747    ) -> Result<NcSliceIterator<'_, T>> {
748        let var = self.variable(name)?;
749        let ndim = var.ndim();
750        if dim >= ndim {
751            return Err(Error::InvalidData(format!(
752                "dimension index {} out of range for {}-dimensional variable '{}'",
753                dim, ndim, name
754            )));
755        }
756        let dim_size = var.dimensions[dim].size;
757        Ok(NcSliceIterator {
758            file: self,
759            name: name.to_string(),
760            dim,
761            dim_size,
762            current: 0,
763            ndim,
764            _marker: std::marker::PhantomData,
765        })
766    }
767}
768
769/// Configuration options for opening a NetCDF file.
770pub struct NcOpenOptions {
771    /// Maximum bytes for the chunk cache (NC4 only). Default: 64 MiB.
772    pub chunk_cache_bytes: usize,
773    /// Maximum number of chunk cache slots (NC4 only). Default: 521.
774    pub chunk_cache_slots: usize,
775    /// NetCDF-4 metadata reconstruction policy. Default: strict.
776    pub metadata_mode: NcMetadataMode,
777    /// Custom filter registry (NC4 only).
778    #[cfg(feature = "netcdf4")]
779    pub filter_registry: Option<hdf5_reader::FilterRegistry>,
780    /// Resolver for HDF5 external raw data files (NC4 only).
781    #[cfg(feature = "netcdf4")]
782    pub external_file_resolver: Option<Arc<dyn hdf5_reader::ExternalFileResolver>>,
783    /// Resolver for HDF5 external links (NC4 only).
784    #[cfg(feature = "netcdf4")]
785    pub external_link_resolver: Option<Arc<dyn hdf5_reader::ExternalLinkResolver>>,
786}
787
788impl Default for NcOpenOptions {
789    fn default() -> Self {
790        NcOpenOptions {
791            chunk_cache_bytes: 64 * 1024 * 1024,
792            chunk_cache_slots: 521,
793            metadata_mode: NcMetadataMode::Strict,
794            #[cfg(feature = "netcdf4")]
795            filter_registry: None,
796            #[cfg(feature = "netcdf4")]
797            external_file_resolver: None,
798            #[cfg(feature = "netcdf4")]
799            external_link_resolver: None,
800        }
801    }
802}
803
804impl NcFile {
805    /// Open a NetCDF file with custom options.
806    pub fn open_with_options(path: impl AsRef<Path>, options: NcOpenOptions) -> Result<Self> {
807        let path = path.as_ref();
808        let mut file = File::open(path)?;
809        let (magic, n) = read_magic_prefix(&mut file)?;
810        let format = detect_format(&magic[..n])?;
811
812        match format {
813            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
814                let file = File::open(path)?;
815                // SAFETY: read-only mapping; caller must not modify the file concurrently.
816                let mmap = unsafe { Mmap::map(&file)? };
817                let classic = classic::ClassicFile::from_mmap(mmap, format)?;
818                Ok(NcFile {
819                    format,
820                    inner: NcFileInner::Classic(classic),
821                })
822            }
823            NcFormat::Nc4 | NcFormat::Nc4Classic => {
824                #[cfg(feature = "netcdf4")]
825                {
826                    let metadata_mode = options.metadata_mode;
827                    let hdf5 = hdf5_reader::Hdf5File::open_with_options(
828                        path,
829                        hdf5_reader::OpenOptions {
830                            chunk_cache_bytes: options.chunk_cache_bytes,
831                            chunk_cache_slots: options.chunk_cache_slots,
832                            filter_registry: options.filter_registry,
833                            external_file_resolver: options.external_file_resolver,
834                            external_link_resolver: options.external_link_resolver,
835                        },
836                    )?;
837                    let nc4 = nc4::Nc4File::from_hdf5(hdf5, metadata_mode)?;
838                    let actual_format = if nc4.is_classic_model() {
839                        NcFormat::Nc4Classic
840                    } else {
841                        NcFormat::Nc4
842                    };
843                    Ok(NcFile {
844                        format: actual_format,
845                        inner: NcFileInner::Nc4(Box::new(nc4)),
846                    })
847                }
848                #[cfg(not(feature = "netcdf4"))]
849                {
850                    let _ = options;
851                    Err(Error::Nc4NotEnabled)
852                }
853            }
854        }
855    }
856}
857
858/// Lazy iterator over slices of a variable along a given dimension.
859pub struct NcSliceIterator<'f, T: NcReadable> {
860    file: &'f NcFile,
861    name: String,
862    dim: usize,
863    dim_size: u64,
864    current: u64,
865    ndim: usize,
866    _marker: std::marker::PhantomData<T>,
867}
868
869impl<'f, T: NcReadable> Iterator for NcSliceIterator<'f, T> {
870    type Item = Result<ArrayD<T>>;
871
872    fn next(&mut self) -> Option<Self::Item> {
873        if self.current >= self.dim_size {
874            return None;
875        }
876        let mut selections = Vec::with_capacity(self.ndim);
877        for d in 0..self.ndim {
878            if d == self.dim {
879                selections.push(NcSliceInfoElem::Index(self.current));
880            } else {
881                selections.push(NcSliceInfoElem::Slice {
882                    start: 0,
883                    end: u64::MAX,
884                    step: 1,
885                });
886            }
887        }
888        let selection = NcSliceInfo { selections };
889        self.current += 1;
890        Some(self.file.read_variable_slice::<T>(&self.name, &selection))
891    }
892
893    fn size_hint(&self) -> (usize, Option<usize>) {
894        let remaining_u64 = self.dim_size.saturating_sub(self.current);
895        let remaining = remaining_u64.min(usize::MAX as u64) as usize;
896        (remaining, Some(remaining))
897    }
898}
899
900#[cfg(test)]
901mod tests {
902    use super::*;
903    #[cfg(feature = "netcdf4")]
904    use std::sync::Arc;
905
906    #[test]
907    fn test_detect_cdf1() {
908        let data = b"CDF\x01rest_of_file";
909        assert_eq!(detect_format(data).unwrap(), NcFormat::Classic);
910    }
911
912    #[test]
913    fn test_detect_cdf2() {
914        let data = b"CDF\x02rest_of_file";
915        assert_eq!(detect_format(data).unwrap(), NcFormat::Offset64);
916    }
917
918    #[test]
919    fn test_detect_cdf5() {
920        let data = b"CDF\x05rest_of_file";
921        assert_eq!(detect_format(data).unwrap(), NcFormat::Cdf5);
922    }
923
924    #[test]
925    fn test_detect_hdf5() {
926        let mut data = vec![0x89, b'H', b'D', b'F', 0x0D, 0x0A, 0x1A, 0x0A];
927        data.extend_from_slice(b"rest_of_file");
928        assert_eq!(detect_format(&data).unwrap(), NcFormat::Nc4);
929    }
930
931    #[test]
932    fn test_detect_invalid_magic() {
933        let data = b"XXXX";
934        assert!(matches!(
935            detect_format(data).unwrap_err(),
936            Error::InvalidMagic
937        ));
938    }
939
940    #[test]
941    fn test_detect_unsupported_version() {
942        let data = b"CDF\x03";
943        assert!(matches!(
944            detect_format(data).unwrap_err(),
945            Error::UnsupportedVersion(3)
946        ));
947    }
948
949    #[test]
950    fn test_detect_too_short() {
951        let data = b"CD";
952        assert!(matches!(
953            detect_format(data).unwrap_err(),
954            Error::InvalidMagic
955        ));
956    }
957
958    #[test]
959    fn test_from_bytes_minimal_cdf1() {
960        // Minimal valid CDF-1 file: magic + numrecs + absent dim/att/var lists.
961        let mut data = Vec::new();
962        data.extend_from_slice(b"CDF\x01");
963        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
964                                                     // dim_list: ABSENT
965        data.extend_from_slice(&0u32.to_be_bytes()); // tag = 0
966        data.extend_from_slice(&0u32.to_be_bytes()); // count = 0
967                                                     // att_list: ABSENT
968        data.extend_from_slice(&0u32.to_be_bytes());
969        data.extend_from_slice(&0u32.to_be_bytes());
970        // var_list: ABSENT
971        data.extend_from_slice(&0u32.to_be_bytes());
972        data.extend_from_slice(&0u32.to_be_bytes());
973
974        let file = NcFile::from_bytes(&data).unwrap();
975        assert_eq!(file.format(), NcFormat::Classic);
976        assert!(file.dimensions().unwrap().is_empty());
977        assert!(file.variables().unwrap().is_empty());
978        assert!(file.global_attributes().unwrap().is_empty());
979    }
980
981    #[cfg(feature = "netcdf4")]
982    #[test]
983    fn test_from_storage_minimal_cdf1() {
984        // Minimal valid CDF-1 file: magic + numrecs + absent dim/att/var lists.
985        let mut data = Vec::new();
986        data.extend_from_slice(b"CDF\x01");
987        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
988                                                     // dim_list: ABSENT
989        data.extend_from_slice(&0u32.to_be_bytes()); // tag = 0
990        data.extend_from_slice(&0u32.to_be_bytes()); // count = 0
991                                                     // att_list: ABSENT
992        data.extend_from_slice(&0u32.to_be_bytes());
993        data.extend_from_slice(&0u32.to_be_bytes());
994        // var_list: ABSENT
995        data.extend_from_slice(&0u32.to_be_bytes());
996        data.extend_from_slice(&0u32.to_be_bytes());
997
998        let file = NcFile::from_storage(Arc::new(BytesStorage::new(data))).unwrap();
999        assert_eq!(file.format(), NcFormat::Classic);
1000        assert!(file.dimensions().unwrap().is_empty());
1001        assert!(file.variables().unwrap().is_empty());
1002        assert!(file.global_attributes().unwrap().is_empty());
1003    }
1004
1005    #[cfg(feature = "netcdf4")]
1006    #[test]
1007    fn test_from_storage_short_input_reports_invalid_magic() {
1008        let err = NcFile::from_storage(Arc::new(BytesStorage::new(vec![b'C', b'D'])))
1009            .err()
1010            .expect("short storage should not parse as NetCDF");
1011        assert!(matches!(err, Error::InvalidMagic));
1012    }
1013
1014    #[test]
1015    fn test_from_bytes_cdf1_with_data() {
1016        // Build a CDF-1 file with one dimension, one global attribute, and one variable.
1017        let mut data = Vec::new();
1018        data.extend_from_slice(b"CDF\x01");
1019        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
1020
1021        // dim_list: 1 dimension "x" with size 3
1022        data.extend_from_slice(&0x0000_000Au32.to_be_bytes()); // NC_DIMENSION tag
1023        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
1024                                                     // name "x": length=1, "x", 3 bytes padding
1025        data.extend_from_slice(&1u32.to_be_bytes());
1026        data.push(b'x');
1027        data.extend_from_slice(&[0, 0, 0]); // padding to 4
1028                                            // dim size
1029        data.extend_from_slice(&3u32.to_be_bytes());
1030
1031        // att_list: 1 attribute "title" = "test"
1032        data.extend_from_slice(&0x0000_000Cu32.to_be_bytes()); // NC_ATTRIBUTE tag
1033        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
1034                                                     // name "title"
1035        data.extend_from_slice(&5u32.to_be_bytes());
1036        data.extend_from_slice(b"title");
1037        data.extend_from_slice(&[0, 0, 0]); // padding
1038                                            // nc_type = NC_CHAR = 2
1039        data.extend_from_slice(&2u32.to_be_bytes());
1040        // nvalues = 4
1041        data.extend_from_slice(&4u32.to_be_bytes());
1042        data.extend_from_slice(b"test"); // exactly 4 bytes, no padding needed
1043
1044        // var_list: 1 variable "vals" with dim x, type float
1045        data.extend_from_slice(&0x0000_000Bu32.to_be_bytes()); // NC_VARIABLE tag
1046        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
1047                                                     // name "vals"
1048        data.extend_from_slice(&4u32.to_be_bytes());
1049        data.extend_from_slice(b"vals");
1050        // ndims = 1
1051        data.extend_from_slice(&1u32.to_be_bytes());
1052        // dimid = 0
1053        data.extend_from_slice(&0u32.to_be_bytes());
1054        // att_list: absent
1055        data.extend_from_slice(&0u32.to_be_bytes());
1056        data.extend_from_slice(&0u32.to_be_bytes());
1057        // nc_type = NC_FLOAT = 5
1058        data.extend_from_slice(&5u32.to_be_bytes());
1059        // vsize = 12 (3 floats * 4 bytes)
1060        data.extend_from_slice(&12u32.to_be_bytes());
1061        // begin (offset): we'll put data right after this header
1062        let data_offset = data.len() as u32 + 4; // +4 for this field itself
1063        data.extend_from_slice(&data_offset.to_be_bytes());
1064
1065        // Now append the variable data: 3 floats
1066        data.extend_from_slice(&1.5f32.to_be_bytes());
1067        data.extend_from_slice(&2.5f32.to_be_bytes());
1068        data.extend_from_slice(&3.5f32.to_be_bytes());
1069
1070        let file = NcFile::from_bytes(&data).unwrap();
1071        assert_eq!(file.format(), NcFormat::Classic);
1072        assert_eq!(file.dimensions().unwrap().len(), 1);
1073        assert_eq!(file.dimensions().unwrap()[0].name, "x");
1074        assert_eq!(file.dimensions().unwrap()[0].size, 3);
1075
1076        assert_eq!(file.global_attributes().unwrap().len(), 1);
1077        assert_eq!(file.global_attributes().unwrap()[0].name, "title");
1078        assert_eq!(
1079            file.global_attributes().unwrap()[0]
1080                .value
1081                .as_string()
1082                .unwrap(),
1083            "test"
1084        );
1085
1086        assert_eq!(file.variables().unwrap().len(), 1);
1087        let var = file.variable("vals").unwrap();
1088        assert_eq!(var.dtype(), &NcType::Float);
1089        assert_eq!(var.shape(), vec![3]);
1090
1091        // Read the actual data through the classic file.
1092        let classic = file.as_classic().unwrap();
1093        let arr: ndarray::ArrayD<f32> = classic.read_variable("vals").unwrap();
1094        assert_eq!(arr.shape(), &[3]);
1095        assert_eq!(arr[[0]], 1.5f32);
1096        assert_eq!(arr[[1]], 2.5f32);
1097        assert_eq!(arr[[2]], 3.5f32);
1098    }
1099
1100    #[test]
1101    fn test_variable_not_found() {
1102        let mut data = Vec::new();
1103        data.extend_from_slice(b"CDF\x01");
1104        data.extend_from_slice(&0u32.to_be_bytes());
1105        // All absent.
1106        data.extend_from_slice(&0u32.to_be_bytes());
1107        data.extend_from_slice(&0u32.to_be_bytes());
1108        data.extend_from_slice(&0u32.to_be_bytes());
1109        data.extend_from_slice(&0u32.to_be_bytes());
1110        data.extend_from_slice(&0u32.to_be_bytes());
1111        data.extend_from_slice(&0u32.to_be_bytes());
1112
1113        let file = NcFile::from_bytes(&data).unwrap();
1114        assert!(matches!(
1115            file.variable("nonexistent").unwrap_err(),
1116            Error::VariableNotFound(_)
1117        ));
1118    }
1119
1120    #[test]
1121    fn test_group_not_found() {
1122        let mut data = Vec::new();
1123        data.extend_from_slice(b"CDF\x01");
1124        data.extend_from_slice(&0u32.to_be_bytes());
1125        data.extend_from_slice(&0u32.to_be_bytes());
1126        data.extend_from_slice(&0u32.to_be_bytes());
1127        data.extend_from_slice(&0u32.to_be_bytes());
1128        data.extend_from_slice(&0u32.to_be_bytes());
1129        data.extend_from_slice(&0u32.to_be_bytes());
1130        data.extend_from_slice(&0u32.to_be_bytes());
1131
1132        let file = NcFile::from_bytes(&data).unwrap();
1133        assert!(matches!(
1134            file.group("nonexistent").unwrap_err(),
1135            Error::GroupNotFound(_)
1136        ));
1137    }
1138}