Skip to main content

netcdf_reader/
lib.rs

1//! Pure-Rust NetCDF file reader.
2//!
3//! Supports:
4//! - **CDF-1** (classic): `CDF\x01` magic
5//! - **CDF-2** (64-bit offset): `CDF\x02` magic
6//! - **CDF-5** (64-bit data): `CDF\x05` magic
7//! - **NetCDF-4** (HDF5-backed): `\x89HDF\r\n\x1a\n` magic (requires `netcdf4` feature)
8//!
9//! # Example
10//!
11//! ```no_run
12//! use netcdf_reader::NcFile;
13//!
14//! let file = NcFile::open("example.nc").unwrap();
15//! println!("format: {:?}", file.format());
16//! for var in file.variables().unwrap() {
17//!     println!("  variable: {} shape={:?}", var.name(), var.shape());
18//! }
19//! ```
20
21pub mod classic;
22pub mod error;
23pub mod masked;
24pub mod types;
25pub mod unpack;
26
27#[cfg(feature = "netcdf4")]
28pub mod nc4;
29
30#[cfg(feature = "cf")]
31pub mod cf;
32
33pub use error::{Error, Result};
34#[cfg(feature = "netcdf4")]
35pub use hdf5_reader::storage::DynStorage;
36#[cfg(feature = "netcdf4")]
37pub use hdf5_reader::{BytesStorage, FileStorage, MmapStorage, Storage, StorageBuffer};
38pub use types::*;
39
40use std::fs::File;
41use std::io::Read;
42use std::path::Path;
43
44use memmap2::Mmap;
45use ndarray::ArrayD;
46#[cfg(feature = "rayon")]
47use rayon::ThreadPool;
48
49/// Trait alias for types readable from both classic and NetCDF-4 files.
50///
51/// This unifies `classic::data::NcReadType` (for CDF-1/2/5) and
52/// `hdf5_reader::H5Type` (for NetCDF-4/HDF5) so that `NcFile::read_variable`
53/// works across all formats with a single type parameter.
54#[cfg(feature = "netcdf4")]
55pub trait NcReadable: classic::data::NcReadType + hdf5_reader::H5Type {}
56#[cfg(feature = "netcdf4")]
57impl<T: classic::data::NcReadType + hdf5_reader::H5Type> NcReadable for T {}
58
59#[cfg(not(feature = "netcdf4"))]
60pub trait NcReadable: classic::data::NcReadType {}
61#[cfg(not(feature = "netcdf4"))]
62impl<T: classic::data::NcReadType> NcReadable for T {}
63
64/// NetCDF file format.
65#[derive(Debug, Clone, Copy, PartialEq, Eq)]
66pub enum NcFormat {
67    /// CDF-1 classic format.
68    Classic,
69    /// CDF-2 64-bit offset format.
70    Offset64,
71    /// CDF-5 64-bit data format.
72    Cdf5,
73    /// NetCDF-4 (HDF5-backed).
74    Nc4,
75    /// NetCDF-4 classic model (HDF5-backed, restricted data model).
76    Nc4Classic,
77}
78
79/// NetCDF-4 metadata reconstruction policy.
80#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
81pub enum NcMetadataMode {
82    /// Fail the open if NetCDF-4 metadata cannot be reconstructed exactly.
83    #[default]
84    Strict,
85    /// Allow heuristic reconstruction for malformed or partially-supported files.
86    Lossy,
87}
88
89/// An opened NetCDF file.
90pub struct NcFile {
91    format: NcFormat,
92    inner: NcFileInner,
93}
94
95enum NcFileInner {
96    Classic(classic::ClassicFile),
97    #[cfg(feature = "netcdf4")]
98    Nc4(Box<nc4::Nc4File>),
99}
100
101/// HDF5 magic bytes: `\x89HDF\r\n\x1a\n`
102const HDF5_MAGIC: [u8; 8] = [0x89, b'H', b'D', b'F', 0x0D, 0x0A, 0x1A, 0x0A];
103
104/// Detect the NetCDF format from the first bytes of a file.
105fn detect_format(data: &[u8]) -> Result<NcFormat> {
106    if data.len() < 4 {
107        return Err(Error::InvalidMagic);
108    }
109
110    // Check for CDF magic: "CDF" followed by version byte.
111    if data[0] == b'C' && data[1] == b'D' && data[2] == b'F' {
112        return match data[3] {
113            1 => Ok(NcFormat::Classic),
114            2 => Ok(NcFormat::Offset64),
115            5 => Ok(NcFormat::Cdf5),
116            v => Err(Error::UnsupportedVersion(v)),
117        };
118    }
119
120    // Check for HDF5 magic (8 bytes).
121    if data.len() >= 8 && data[..8] == HDF5_MAGIC {
122        return Ok(NcFormat::Nc4);
123    }
124
125    Err(Error::InvalidMagic)
126}
127
128fn read_magic_prefix(reader: &mut impl Read) -> std::io::Result<([u8; 8], usize)> {
129    let mut magic = [0u8; 8];
130    let mut read_len = 0;
131    while read_len < magic.len() {
132        let n = reader.read(&mut magic[read_len..])?;
133        if n == 0 {
134            break;
135        }
136        read_len += n;
137    }
138    Ok((magic, read_len))
139}
140
141#[cfg(feature = "cf")]
142fn parent_group_path(path: &str) -> &str {
143    let trimmed = path.trim_matches('/');
144    trimmed
145        .rsplit_once('/')
146        .map(|(group_path, _)| group_path)
147        .unwrap_or("")
148}
149
150impl NcFile {
151    /// Open a NetCDF file from a path.
152    ///
153    /// The format is auto-detected from the file's magic bytes.
154    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
155        Self::open_with_options(path, NcOpenOptions::default())
156    }
157
158    /// Open a NetCDF file from in-memory bytes.
159    ///
160    /// The format is auto-detected from the magic bytes.
161    pub fn from_bytes(data: &[u8]) -> Result<Self> {
162        Self::from_bytes_with_options(data, NcOpenOptions::default())
163    }
164
165    /// Open a NetCDF file from a custom random-access storage backend.
166    ///
167    /// NetCDF-4 files stay fully range-backed. Classic formats are read from
168    /// the provided storage into an owned buffer.
169    #[cfg(feature = "netcdf4")]
170    pub fn from_storage(storage: DynStorage) -> Result<Self> {
171        Self::from_storage_with_options(storage, NcOpenOptions::default())
172    }
173
174    /// Open a NetCDF file from a custom random-access storage backend with custom options.
175    #[cfg(feature = "netcdf4")]
176    pub fn from_storage_with_options(storage: DynStorage, options: NcOpenOptions) -> Result<Self> {
177        let magic_len = storage.len().min(HDF5_MAGIC.len() as u64) as usize;
178        let magic = storage.read_range(0, magic_len)?;
179        let format = detect_format(magic.as_ref())?;
180
181        match format {
182            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
183                let len = usize::try_from(storage.len()).map_err(|_| {
184                    Error::InvalidData(
185                        "classic storage length exceeds platform usize capacity".into(),
186                    )
187                })?;
188                let bytes = storage.read_range(0, len)?;
189                let classic = classic::ClassicFile::from_bytes(bytes.as_ref(), format)?;
190                Ok(NcFile {
191                    format,
192                    inner: NcFileInner::Classic(classic),
193                })
194            }
195            NcFormat::Nc4 | NcFormat::Nc4Classic => {
196                let nc4 = nc4::Nc4File::from_storage_with_options(storage, options)?;
197                let actual_format = if nc4.is_classic_model() {
198                    NcFormat::Nc4Classic
199                } else {
200                    NcFormat::Nc4
201                };
202                Ok(NcFile {
203                    format: actual_format,
204                    inner: NcFileInner::Nc4(Box::new(nc4)),
205                })
206            }
207        }
208    }
209
210    /// Open a NetCDF file from in-memory bytes with custom options.
211    ///
212    /// NC4 options are applied when the payload is HDF5-backed.
213    pub fn from_bytes_with_options(data: &[u8], options: NcOpenOptions) -> Result<Self> {
214        let format = detect_format(data)?;
215
216        match format {
217            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
218                let classic = classic::ClassicFile::from_bytes(data, format)?;
219                Ok(NcFile {
220                    format,
221                    inner: NcFileInner::Classic(classic),
222                })
223            }
224            NcFormat::Nc4 | NcFormat::Nc4Classic => {
225                #[cfg(feature = "netcdf4")]
226                {
227                    let nc4 = nc4::Nc4File::from_bytes_with_options(data, options)?;
228                    let actual_format = if nc4.is_classic_model() {
229                        NcFormat::Nc4Classic
230                    } else {
231                        NcFormat::Nc4
232                    };
233                    Ok(NcFile {
234                        format: actual_format,
235                        inner: NcFileInner::Nc4(Box::new(nc4)),
236                    })
237                }
238                #[cfg(not(feature = "netcdf4"))]
239                {
240                    let _ = options;
241                    Err(Error::Nc4NotEnabled)
242                }
243            }
244        }
245    }
246
247    /// The detected file format.
248    pub fn format(&self) -> NcFormat {
249        self.format
250    }
251
252    /// The root group of the file.
253    ///
254    /// Classic files have a single implicit root group containing all
255    /// dimensions, variables, and global attributes. NetCDF-4 files
256    /// can have nested sub-groups.
257    pub fn root_group(&self) -> Result<&NcGroup> {
258        match &self.inner {
259            NcFileInner::Classic(c) => Ok(c.root_group()),
260            #[cfg(feature = "netcdf4")]
261            NcFileInner::Nc4(n) => n.root_group(),
262        }
263    }
264
265    /// Convenience: dimensions in the root group.
266    pub fn dimensions(&self) -> Result<&[NcDimension]> {
267        match &self.inner {
268            NcFileInner::Classic(c) => Ok(&c.root_group().dimensions),
269            #[cfg(feature = "netcdf4")]
270            NcFileInner::Nc4(n) => n.dimensions(),
271        }
272    }
273
274    /// Convenience: variables in the root group.
275    pub fn variables(&self) -> Result<&[NcVariable]> {
276        match &self.inner {
277            NcFileInner::Classic(c) => Ok(&c.root_group().variables),
278            #[cfg(feature = "netcdf4")]
279            NcFileInner::Nc4(n) => n.variables(),
280        }
281    }
282
283    /// Convenience: global attributes (attributes of the root group).
284    pub fn global_attributes(&self) -> Result<&[NcAttribute]> {
285        match &self.inner {
286            NcFileInner::Classic(c) => Ok(&c.root_group().attributes),
287            #[cfg(feature = "netcdf4")]
288            NcFileInner::Nc4(n) => n.global_attributes(),
289        }
290    }
291
292    /// Find a group by path relative to the root group.
293    pub fn group(&self, path: &str) -> Result<&NcGroup> {
294        match &self.inner {
295            NcFileInner::Classic(c) => c
296                .root_group()
297                .group(path)
298                .ok_or_else(|| Error::GroupNotFound(path.to_string())),
299            #[cfg(feature = "netcdf4")]
300            NcFileInner::Nc4(n) => n.group(path),
301        }
302    }
303
304    /// Find a variable by name or path relative to the root group.
305    pub fn variable(&self, name: &str) -> Result<&NcVariable> {
306        match &self.inner {
307            NcFileInner::Classic(c) => c
308                .root_group()
309                .variable(name)
310                .ok_or_else(|| Error::VariableNotFound(name.to_string())),
311            #[cfg(feature = "netcdf4")]
312            NcFileInner::Nc4(n) => n.variable(name),
313        }
314    }
315
316    /// Find a dimension by name or path relative to the root group.
317    pub fn dimension(&self, name: &str) -> Result<&NcDimension> {
318        match &self.inner {
319            NcFileInner::Classic(c) => c
320                .root_group()
321                .dimension(name)
322                .ok_or_else(|| Error::DimensionNotFound(name.to_string())),
323            #[cfg(feature = "netcdf4")]
324            NcFileInner::Nc4(n) => n.dimension(name),
325        }
326    }
327
328    /// Find the coordinate variable for a dimension name or path.
329    pub fn coordinate_variable(&self, name: &str) -> Result<&NcVariable> {
330        self.root_group()?
331            .coordinate_variable(name)
332            .ok_or_else(|| Error::VariableNotFound(format!("coordinate variable for {name}")))
333    }
334
335    /// Discover CF axes from coordinate variables in a group.
336    #[cfg(feature = "cf")]
337    pub fn cf_coordinate_axes(&self, group_path: &str) -> Result<Vec<cf::CfCoordinateAxis<'_>>> {
338        let group = self.group(group_path)?;
339        Ok(cf::discover_coordinate_axes(group))
340    }
341
342    /// Discover CF axes used by a variable from its coordinate variables.
343    #[cfg(feature = "cf")]
344    pub fn cf_variable_axes(&self, name: &str) -> Result<Vec<cf::CfCoordinateAxis<'_>>> {
345        let variable = self.variable(name)?;
346        let group = self.group(parent_group_path(name))?;
347        Ok(cf::discover_variable_axes(variable, group))
348    }
349
350    /// Discover CF time coordinate variables in a group.
351    #[cfg(feature = "cf")]
352    pub fn cf_time_coordinates(&self, group_path: &str) -> Result<Vec<cf::CfTimeCoordinate<'_>>> {
353        let group = self.group(group_path)?;
354        cf::discover_time_coordinates(group)
355    }
356
357    /// Discover the CF time coordinate used by a variable, if one exists.
358    #[cfg(feature = "cf")]
359    pub fn cf_variable_time_coordinate(
360        &self,
361        name: &str,
362    ) -> Result<Option<cf::CfTimeCoordinate<'_>>> {
363        let variable = self.variable(name)?;
364        let group = self.group(parent_group_path(name))?;
365        cf::discover_variable_time_coordinate(variable, group)
366    }
367
368    /// Find a group attribute by name or path relative to the root group.
369    pub fn global_attribute(&self, name: &str) -> Result<&NcAttribute> {
370        match &self.inner {
371            NcFileInner::Classic(c) => c
372                .root_group()
373                .attribute(name)
374                .ok_or_else(|| Error::AttributeNotFound(name.to_string())),
375            #[cfg(feature = "netcdf4")]
376            NcFileInner::Nc4(n) => n.global_attribute(name),
377        }
378    }
379
380    /// Read a variable's data as a typed array.
381    ///
382    /// Works for both classic (CDF-1/2/5) and NetCDF-4 files. NetCDF-4 nested
383    /// variables can be addressed with paths like `group/subgroup/var`. The type
384    /// parameter `T` must implement `NcReadable`, which is satisfied by:
385    /// `i8, u8, i16, u16, i32, u32, i64, u64, f32, f64`.
386    pub fn read_variable<T: NcReadable>(&self, name: &str) -> Result<ArrayD<T>> {
387        match &self.inner {
388            NcFileInner::Classic(c) => c.read_variable::<T>(name),
389            #[cfg(feature = "netcdf4")]
390            NcFileInner::Nc4(n) => Ok(n.read_variable::<T>(name)?),
391        }
392    }
393
394    /// Read a variable using internal chunk-level parallelism when available.
395    ///
396    /// Classic formats fall back to `read_variable`.
397    #[cfg(feature = "rayon")]
398    pub fn read_variable_parallel<T: NcReadable>(&self, name: &str) -> Result<ArrayD<T>> {
399        match &self.inner {
400            NcFileInner::Classic(c) => c.read_variable::<T>(name),
401            #[cfg(feature = "netcdf4")]
402            NcFileInner::Nc4(n) => Ok(n.read_variable_parallel::<T>(name)?),
403        }
404    }
405
406    /// Read a variable using the provided Rayon thread pool when available.
407    ///
408    /// Classic formats fall back to `read_variable`.
409    #[cfg(feature = "rayon")]
410    pub fn read_variable_in_pool<T: NcReadable>(
411        &self,
412        name: &str,
413        pool: &ThreadPool,
414    ) -> Result<ArrayD<T>> {
415        match &self.inner {
416            NcFileInner::Classic(c) => c.read_variable::<T>(name),
417            #[cfg(feature = "netcdf4")]
418            NcFileInner::Nc4(n) => Ok(n.read_variable_in_pool::<T>(name, pool)?),
419        }
420    }
421
422    /// Access the underlying classic file (for reading data).
423    ///
424    /// Returns `None` if this is a NetCDF-4 file.
425    pub fn as_classic(&self) -> Option<&classic::ClassicFile> {
426        match &self.inner {
427            NcFileInner::Classic(c) => Some(c),
428            #[cfg(feature = "netcdf4")]
429            NcFileInner::Nc4(_) => None,
430        }
431    }
432
433    /// Read a variable with automatic type promotion to f64.
434    ///
435    /// Reads in the native storage type (i8, i16, i32, f32, f64, u8, etc.)
436    /// and promotes all values to f64. This avoids the `TypeMismatch` error
437    /// that `read_variable::<f64>` produces for non-f64 variables.
438    pub fn read_variable_as_f64(&self, name: &str) -> Result<ArrayD<f64>> {
439        match &self.inner {
440            NcFileInner::Classic(c) => c.read_variable_as_f64(name),
441            #[cfg(feature = "netcdf4")]
442            NcFileInner::Nc4(n) => n.read_variable_as_f64(name),
443        }
444    }
445
446    /// Read a string variable as a single string.
447    ///
448    /// Use [`NcFile::read_variable_as_strings`] when the variable contains
449    /// multiple string elements.
450    pub fn read_variable_as_string(&self, name: &str) -> Result<String> {
451        match &self.inner {
452            NcFileInner::Classic(c) => c.read_variable_as_string(name),
453            #[cfg(feature = "netcdf4")]
454            NcFileInner::Nc4(n) => n.read_variable_as_string(name),
455        }
456    }
457
458    /// Read a string or char variable as a flat vector of strings.
459    ///
460    /// Classic char arrays interpret the last dimension as the string length
461    /// and flatten the leading dimensions.
462    pub fn read_variable_as_strings(&self, name: &str) -> Result<Vec<String>> {
463        match &self.inner {
464            NcFileInner::Classic(c) => c.read_variable_as_strings(name),
465            #[cfg(feature = "netcdf4")]
466            NcFileInner::Nc4(n) => n.read_variable_as_strings(name),
467        }
468    }
469
470    /// Read a variable and apply `scale_factor`/`add_offset` unpacking.
471    ///
472    /// Returns `actual = stored * scale_factor + add_offset`.
473    /// If neither attribute is present, returns the raw data as f64.
474    /// Uses type-promoting read so it works with any numeric storage type.
475    pub fn read_variable_unpacked(&self, name: &str) -> Result<ArrayD<f64>> {
476        let var = self.variable(name)?;
477        let params = unpack::UnpackParams::from_variable(var);
478        let mut data = self.read_variable_as_f64(name)?;
479        if let Some(p) = params {
480            p.apply(&mut data);
481        }
482        Ok(data)
483    }
484
485    /// Read a variable, replace `_FillValue`/`missing_value` with NaN,
486    /// and mask values outside `valid_min`/`valid_max`/`valid_range`.
487    /// Uses type-promoting read so it works with any numeric storage type.
488    pub fn read_variable_masked(&self, name: &str) -> Result<ArrayD<f64>> {
489        let var = self.variable(name)?;
490        let params = masked::MaskParams::from_variable(var);
491        let mut data = self.read_variable_as_f64(name)?;
492        if let Some(p) = params {
493            p.apply(&mut data);
494        }
495        Ok(data)
496    }
497
498    /// Read a variable with both masking and unpacking (CF spec order).
499    ///
500    /// Order: read → mask fill/missing → unpack (scale+offset).
501    /// Uses type-promoting read so it works with any numeric storage type.
502    pub fn read_variable_unpacked_masked(&self, name: &str) -> Result<ArrayD<f64>> {
503        let var = self.variable(name)?;
504        let mask_params = masked::MaskParams::from_variable(var);
505        let unpack_params = unpack::UnpackParams::from_variable(var);
506        let mut data = self.read_variable_as_f64(name)?;
507        if let Some(p) = mask_params {
508            p.apply(&mut data);
509        }
510        if let Some(p) = unpack_params {
511            p.apply(&mut data);
512        }
513        Ok(data)
514    }
515
516    // ----- Slice API -----
517
518    /// Read a slice (hyperslab) of a variable as a typed array.
519    pub fn read_variable_slice<T: NcReadable>(
520        &self,
521        name: &str,
522        selection: &NcSliceInfo,
523    ) -> Result<ArrayD<T>> {
524        match &self.inner {
525            NcFileInner::Classic(c) => c.read_variable_slice::<T>(name, selection),
526            #[cfg(feature = "netcdf4")]
527            NcFileInner::Nc4(n) => Ok(n.read_variable_slice::<T>(name, selection)?),
528        }
529    }
530
531    /// Read a slice (hyperslab) using chunk-level parallelism when available.
532    ///
533    /// For NetCDF-4 chunked datasets, overlapping chunks are decompressed in
534    /// parallel via Rayon. Classic formats fall back to `read_variable_slice`.
535    #[cfg(feature = "rayon")]
536    pub fn read_variable_slice_parallel<T: NcReadable>(
537        &self,
538        name: &str,
539        selection: &NcSliceInfo,
540    ) -> Result<ArrayD<T>> {
541        match &self.inner {
542            NcFileInner::Classic(c) => c.read_variable_slice::<T>(name, selection),
543            #[cfg(feature = "netcdf4")]
544            NcFileInner::Nc4(n) => Ok(n.read_variable_slice_parallel::<T>(name, selection)?),
545        }
546    }
547
548    /// Read a slice of a variable with automatic type promotion to f64.
549    pub fn read_variable_slice_as_f64(
550        &self,
551        name: &str,
552        selection: &NcSliceInfo,
553    ) -> Result<ArrayD<f64>> {
554        match &self.inner {
555            NcFileInner::Classic(c) => c.read_variable_slice_as_f64(name, selection),
556            #[cfg(feature = "netcdf4")]
557            NcFileInner::Nc4(n) => n.read_variable_slice_as_f64(name, selection),
558        }
559    }
560
561    /// Read a slice with `scale_factor`/`add_offset` unpacking.
562    pub fn read_variable_slice_unpacked(
563        &self,
564        name: &str,
565        selection: &NcSliceInfo,
566    ) -> Result<ArrayD<f64>> {
567        let var = self.variable(name)?;
568        let params = unpack::UnpackParams::from_variable(var);
569        let mut data = self.read_variable_slice_as_f64(name, selection)?;
570        if let Some(p) = params {
571            p.apply(&mut data);
572        }
573        Ok(data)
574    }
575
576    /// Read a slice with fill/missing value masking.
577    pub fn read_variable_slice_masked(
578        &self,
579        name: &str,
580        selection: &NcSliceInfo,
581    ) -> Result<ArrayD<f64>> {
582        let var = self.variable(name)?;
583        let params = masked::MaskParams::from_variable(var);
584        let mut data = self.read_variable_slice_as_f64(name, selection)?;
585        if let Some(p) = params {
586            p.apply(&mut data);
587        }
588        Ok(data)
589    }
590
591    /// Read a slice with both masking and unpacking (CF spec order).
592    pub fn read_variable_slice_unpacked_masked(
593        &self,
594        name: &str,
595        selection: &NcSliceInfo,
596    ) -> Result<ArrayD<f64>> {
597        let var = self.variable(name)?;
598        let mask_params = masked::MaskParams::from_variable(var);
599        let unpack_params = unpack::UnpackParams::from_variable(var);
600        let mut data = self.read_variable_slice_as_f64(name, selection)?;
601        if let Some(p) = mask_params {
602            p.apply(&mut data);
603        }
604        if let Some(p) = unpack_params {
605            p.apply(&mut data);
606        }
607        Ok(data)
608    }
609
610    // ----- Lazy Slice Iterator -----
611
612    /// Create an iterator that yields one slice per index along a given dimension.
613    ///
614    /// Each call to `next()` reads one slice using the slice API. This is
615    /// useful for iterating time steps, levels, etc. without loading the
616    /// entire dataset into memory.
617    pub fn iter_slices<T: NcReadable>(
618        &self,
619        name: &str,
620        dim: usize,
621    ) -> Result<NcSliceIterator<'_, T>> {
622        let var = self.variable(name)?;
623        let ndim = var.ndim();
624        if dim >= ndim {
625            return Err(Error::InvalidData(format!(
626                "dimension index {} out of range for {}-dimensional variable '{}'",
627                dim, ndim, name
628            )));
629        }
630        let dim_size = var.dimensions[dim].size;
631        Ok(NcSliceIterator {
632            file: self,
633            name: name.to_string(),
634            dim,
635            dim_size,
636            current: 0,
637            ndim,
638            _marker: std::marker::PhantomData,
639        })
640    }
641}
642
643/// Configuration options for opening a NetCDF file.
644pub struct NcOpenOptions {
645    /// Maximum bytes for the chunk cache (NC4 only). Default: 64 MiB.
646    pub chunk_cache_bytes: usize,
647    /// Maximum number of chunk cache slots (NC4 only). Default: 521.
648    pub chunk_cache_slots: usize,
649    /// NetCDF-4 metadata reconstruction policy. Default: strict.
650    pub metadata_mode: NcMetadataMode,
651    /// Custom filter registry (NC4 only).
652    #[cfg(feature = "netcdf4")]
653    pub filter_registry: Option<hdf5_reader::FilterRegistry>,
654}
655
656impl Default for NcOpenOptions {
657    fn default() -> Self {
658        NcOpenOptions {
659            chunk_cache_bytes: 64 * 1024 * 1024,
660            chunk_cache_slots: 521,
661            metadata_mode: NcMetadataMode::Strict,
662            #[cfg(feature = "netcdf4")]
663            filter_registry: None,
664        }
665    }
666}
667
668impl NcFile {
669    /// Open a NetCDF file with custom options.
670    pub fn open_with_options(path: impl AsRef<Path>, options: NcOpenOptions) -> Result<Self> {
671        let path = path.as_ref();
672        let mut file = File::open(path)?;
673        let (magic, n) = read_magic_prefix(&mut file)?;
674        let format = detect_format(&magic[..n])?;
675
676        match format {
677            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
678                let file = File::open(path)?;
679                // SAFETY: read-only mapping; caller must not modify the file concurrently.
680                let mmap = unsafe { Mmap::map(&file)? };
681                let classic = classic::ClassicFile::from_mmap(mmap, format)?;
682                Ok(NcFile {
683                    format,
684                    inner: NcFileInner::Classic(classic),
685                })
686            }
687            NcFormat::Nc4 | NcFormat::Nc4Classic => {
688                #[cfg(feature = "netcdf4")]
689                {
690                    let hdf5 = hdf5_reader::Hdf5File::open_with_options(
691                        path,
692                        hdf5_reader::OpenOptions {
693                            chunk_cache_bytes: options.chunk_cache_bytes,
694                            chunk_cache_slots: options.chunk_cache_slots,
695                            filter_registry: options.filter_registry,
696                            ..Default::default()
697                        },
698                    )?;
699                    let nc4 = nc4::Nc4File::from_hdf5(hdf5, options.metadata_mode)?;
700                    let actual_format = if nc4.is_classic_model() {
701                        NcFormat::Nc4Classic
702                    } else {
703                        NcFormat::Nc4
704                    };
705                    Ok(NcFile {
706                        format: actual_format,
707                        inner: NcFileInner::Nc4(Box::new(nc4)),
708                    })
709                }
710                #[cfg(not(feature = "netcdf4"))]
711                {
712                    let _ = options;
713                    Err(Error::Nc4NotEnabled)
714                }
715            }
716        }
717    }
718}
719
720/// Lazy iterator over slices of a variable along a given dimension.
721pub struct NcSliceIterator<'f, T: NcReadable> {
722    file: &'f NcFile,
723    name: String,
724    dim: usize,
725    dim_size: u64,
726    current: u64,
727    ndim: usize,
728    _marker: std::marker::PhantomData<T>,
729}
730
731impl<'f, T: NcReadable> Iterator for NcSliceIterator<'f, T> {
732    type Item = Result<ArrayD<T>>;
733
734    fn next(&mut self) -> Option<Self::Item> {
735        if self.current >= self.dim_size {
736            return None;
737        }
738        let mut selections = Vec::with_capacity(self.ndim);
739        for d in 0..self.ndim {
740            if d == self.dim {
741                selections.push(NcSliceInfoElem::Index(self.current));
742            } else {
743                selections.push(NcSliceInfoElem::Slice {
744                    start: 0,
745                    end: u64::MAX,
746                    step: 1,
747                });
748            }
749        }
750        let selection = NcSliceInfo { selections };
751        self.current += 1;
752        Some(self.file.read_variable_slice::<T>(&self.name, &selection))
753    }
754
755    fn size_hint(&self) -> (usize, Option<usize>) {
756        let remaining_u64 = self.dim_size.saturating_sub(self.current);
757        let remaining = remaining_u64.min(usize::MAX as u64) as usize;
758        (remaining, Some(remaining))
759    }
760}
761
762#[cfg(test)]
763mod tests {
764    use super::*;
765    #[cfg(feature = "netcdf4")]
766    use std::sync::Arc;
767
768    #[test]
769    fn test_detect_cdf1() {
770        let data = b"CDF\x01rest_of_file";
771        assert_eq!(detect_format(data).unwrap(), NcFormat::Classic);
772    }
773
774    #[test]
775    fn test_detect_cdf2() {
776        let data = b"CDF\x02rest_of_file";
777        assert_eq!(detect_format(data).unwrap(), NcFormat::Offset64);
778    }
779
780    #[test]
781    fn test_detect_cdf5() {
782        let data = b"CDF\x05rest_of_file";
783        assert_eq!(detect_format(data).unwrap(), NcFormat::Cdf5);
784    }
785
786    #[test]
787    fn test_detect_hdf5() {
788        let mut data = vec![0x89, b'H', b'D', b'F', 0x0D, 0x0A, 0x1A, 0x0A];
789        data.extend_from_slice(b"rest_of_file");
790        assert_eq!(detect_format(&data).unwrap(), NcFormat::Nc4);
791    }
792
793    #[test]
794    fn test_detect_invalid_magic() {
795        let data = b"XXXX";
796        assert!(matches!(
797            detect_format(data).unwrap_err(),
798            Error::InvalidMagic
799        ));
800    }
801
802    #[test]
803    fn test_detect_unsupported_version() {
804        let data = b"CDF\x03";
805        assert!(matches!(
806            detect_format(data).unwrap_err(),
807            Error::UnsupportedVersion(3)
808        ));
809    }
810
811    #[test]
812    fn test_detect_too_short() {
813        let data = b"CD";
814        assert!(matches!(
815            detect_format(data).unwrap_err(),
816            Error::InvalidMagic
817        ));
818    }
819
820    #[test]
821    fn test_from_bytes_minimal_cdf1() {
822        // Minimal valid CDF-1 file: magic + numrecs + absent dim/att/var lists.
823        let mut data = Vec::new();
824        data.extend_from_slice(b"CDF\x01");
825        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
826                                                     // dim_list: ABSENT
827        data.extend_from_slice(&0u32.to_be_bytes()); // tag = 0
828        data.extend_from_slice(&0u32.to_be_bytes()); // count = 0
829                                                     // att_list: ABSENT
830        data.extend_from_slice(&0u32.to_be_bytes());
831        data.extend_from_slice(&0u32.to_be_bytes());
832        // var_list: ABSENT
833        data.extend_from_slice(&0u32.to_be_bytes());
834        data.extend_from_slice(&0u32.to_be_bytes());
835
836        let file = NcFile::from_bytes(&data).unwrap();
837        assert_eq!(file.format(), NcFormat::Classic);
838        assert!(file.dimensions().unwrap().is_empty());
839        assert!(file.variables().unwrap().is_empty());
840        assert!(file.global_attributes().unwrap().is_empty());
841    }
842
843    #[cfg(feature = "netcdf4")]
844    #[test]
845    fn test_from_storage_minimal_cdf1() {
846        // Minimal valid CDF-1 file: magic + numrecs + absent dim/att/var lists.
847        let mut data = Vec::new();
848        data.extend_from_slice(b"CDF\x01");
849        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
850                                                     // dim_list: ABSENT
851        data.extend_from_slice(&0u32.to_be_bytes()); // tag = 0
852        data.extend_from_slice(&0u32.to_be_bytes()); // count = 0
853                                                     // att_list: ABSENT
854        data.extend_from_slice(&0u32.to_be_bytes());
855        data.extend_from_slice(&0u32.to_be_bytes());
856        // var_list: ABSENT
857        data.extend_from_slice(&0u32.to_be_bytes());
858        data.extend_from_slice(&0u32.to_be_bytes());
859
860        let file = NcFile::from_storage(Arc::new(BytesStorage::new(data))).unwrap();
861        assert_eq!(file.format(), NcFormat::Classic);
862        assert!(file.dimensions().unwrap().is_empty());
863        assert!(file.variables().unwrap().is_empty());
864        assert!(file.global_attributes().unwrap().is_empty());
865    }
866
867    #[cfg(feature = "netcdf4")]
868    #[test]
869    fn test_from_storage_short_input_reports_invalid_magic() {
870        let err = NcFile::from_storage(Arc::new(BytesStorage::new(vec![b'C', b'D'])))
871            .err()
872            .expect("short storage should not parse as NetCDF");
873        assert!(matches!(err, Error::InvalidMagic));
874    }
875
876    #[test]
877    fn test_from_bytes_cdf1_with_data() {
878        // Build a CDF-1 file with one dimension, one global attribute, and one variable.
879        let mut data = Vec::new();
880        data.extend_from_slice(b"CDF\x01");
881        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
882
883        // dim_list: 1 dimension "x" with size 3
884        data.extend_from_slice(&0x0000_000Au32.to_be_bytes()); // NC_DIMENSION tag
885        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
886                                                     // name "x": length=1, "x", 3 bytes padding
887        data.extend_from_slice(&1u32.to_be_bytes());
888        data.push(b'x');
889        data.extend_from_slice(&[0, 0, 0]); // padding to 4
890                                            // dim size
891        data.extend_from_slice(&3u32.to_be_bytes());
892
893        // att_list: 1 attribute "title" = "test"
894        data.extend_from_slice(&0x0000_000Cu32.to_be_bytes()); // NC_ATTRIBUTE tag
895        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
896                                                     // name "title"
897        data.extend_from_slice(&5u32.to_be_bytes());
898        data.extend_from_slice(b"title");
899        data.extend_from_slice(&[0, 0, 0]); // padding
900                                            // nc_type = NC_CHAR = 2
901        data.extend_from_slice(&2u32.to_be_bytes());
902        // nvalues = 4
903        data.extend_from_slice(&4u32.to_be_bytes());
904        data.extend_from_slice(b"test"); // exactly 4 bytes, no padding needed
905
906        // var_list: 1 variable "vals" with dim x, type float
907        data.extend_from_slice(&0x0000_000Bu32.to_be_bytes()); // NC_VARIABLE tag
908        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
909                                                     // name "vals"
910        data.extend_from_slice(&4u32.to_be_bytes());
911        data.extend_from_slice(b"vals");
912        // ndims = 1
913        data.extend_from_slice(&1u32.to_be_bytes());
914        // dimid = 0
915        data.extend_from_slice(&0u32.to_be_bytes());
916        // att_list: absent
917        data.extend_from_slice(&0u32.to_be_bytes());
918        data.extend_from_slice(&0u32.to_be_bytes());
919        // nc_type = NC_FLOAT = 5
920        data.extend_from_slice(&5u32.to_be_bytes());
921        // vsize = 12 (3 floats * 4 bytes)
922        data.extend_from_slice(&12u32.to_be_bytes());
923        // begin (offset): we'll put data right after this header
924        let data_offset = data.len() as u32 + 4; // +4 for this field itself
925        data.extend_from_slice(&data_offset.to_be_bytes());
926
927        // Now append the variable data: 3 floats
928        data.extend_from_slice(&1.5f32.to_be_bytes());
929        data.extend_from_slice(&2.5f32.to_be_bytes());
930        data.extend_from_slice(&3.5f32.to_be_bytes());
931
932        let file = NcFile::from_bytes(&data).unwrap();
933        assert_eq!(file.format(), NcFormat::Classic);
934        assert_eq!(file.dimensions().unwrap().len(), 1);
935        assert_eq!(file.dimensions().unwrap()[0].name, "x");
936        assert_eq!(file.dimensions().unwrap()[0].size, 3);
937
938        assert_eq!(file.global_attributes().unwrap().len(), 1);
939        assert_eq!(file.global_attributes().unwrap()[0].name, "title");
940        assert_eq!(
941            file.global_attributes().unwrap()[0]
942                .value
943                .as_string()
944                .unwrap(),
945            "test"
946        );
947
948        assert_eq!(file.variables().unwrap().len(), 1);
949        let var = file.variable("vals").unwrap();
950        assert_eq!(var.dtype(), &NcType::Float);
951        assert_eq!(var.shape(), vec![3]);
952
953        // Read the actual data through the classic file.
954        let classic = file.as_classic().unwrap();
955        let arr: ndarray::ArrayD<f32> = classic.read_variable("vals").unwrap();
956        assert_eq!(arr.shape(), &[3]);
957        assert_eq!(arr[[0]], 1.5f32);
958        assert_eq!(arr[[1]], 2.5f32);
959        assert_eq!(arr[[2]], 3.5f32);
960    }
961
962    #[test]
963    fn test_variable_not_found() {
964        let mut data = Vec::new();
965        data.extend_from_slice(b"CDF\x01");
966        data.extend_from_slice(&0u32.to_be_bytes());
967        // All absent.
968        data.extend_from_slice(&0u32.to_be_bytes());
969        data.extend_from_slice(&0u32.to_be_bytes());
970        data.extend_from_slice(&0u32.to_be_bytes());
971        data.extend_from_slice(&0u32.to_be_bytes());
972        data.extend_from_slice(&0u32.to_be_bytes());
973        data.extend_from_slice(&0u32.to_be_bytes());
974
975        let file = NcFile::from_bytes(&data).unwrap();
976        assert!(matches!(
977            file.variable("nonexistent").unwrap_err(),
978            Error::VariableNotFound(_)
979        ));
980    }
981
982    #[test]
983    fn test_group_not_found() {
984        let mut data = Vec::new();
985        data.extend_from_slice(b"CDF\x01");
986        data.extend_from_slice(&0u32.to_be_bytes());
987        data.extend_from_slice(&0u32.to_be_bytes());
988        data.extend_from_slice(&0u32.to_be_bytes());
989        data.extend_from_slice(&0u32.to_be_bytes());
990        data.extend_from_slice(&0u32.to_be_bytes());
991        data.extend_from_slice(&0u32.to_be_bytes());
992        data.extend_from_slice(&0u32.to_be_bytes());
993
994        let file = NcFile::from_bytes(&data).unwrap();
995        assert!(matches!(
996            file.group("nonexistent").unwrap_err(),
997            Error::GroupNotFound(_)
998        ));
999    }
1000}