Skip to main content

netcdf_reader/
lib.rs

1//! Pure-Rust NetCDF file reader.
2//!
3//! Supports:
4//! - **CDF-1** (classic): `CDF\x01` magic
5//! - **CDF-2** (64-bit offset): `CDF\x02` magic
6//! - **CDF-5** (64-bit data): `CDF\x05` magic
7//! - **NetCDF-4** (HDF5-backed): `\x89HDF\r\n\x1a\n` magic (requires `netcdf4` feature)
8//!
9//! # Example
10//!
11//! ```no_run
12//! use netcdf_reader::NcFile;
13//!
14//! let file = NcFile::open("example.nc").unwrap();
15//! println!("format: {:?}", file.format());
16//! for var in file.variables().unwrap() {
17//!     println!("  variable: {} shape={:?}", var.name(), var.shape());
18//! }
19//! ```
20
21pub mod classic;
22pub mod error;
23pub mod masked;
24pub mod types;
25pub mod unpack;
26
27#[cfg(feature = "netcdf4")]
28pub mod nc4;
29
30#[cfg(feature = "cf")]
31pub mod cf;
32
33pub use error::{Error, Result};
34#[cfg(feature = "netcdf4")]
35pub use hdf5_reader::storage::DynStorage;
36#[cfg(feature = "netcdf4")]
37pub use hdf5_reader::{BytesStorage, FileStorage, MmapStorage, Storage, StorageBuffer};
38pub use types::*;
39
40use std::fs::File;
41use std::io::Read;
42use std::path::Path;
43
44use memmap2::Mmap;
45use ndarray::ArrayD;
46#[cfg(feature = "rayon")]
47use rayon::ThreadPool;
48
49/// Trait alias for types readable from both classic and NetCDF-4 files.
50///
51/// This unifies `classic::data::NcReadType` (for CDF-1/2/5) and
52/// `hdf5_reader::H5Type` (for NetCDF-4/HDF5) so that `NcFile::read_variable`
53/// works across all formats with a single type parameter.
54#[cfg(feature = "netcdf4")]
55pub trait NcReadable: classic::data::NcReadType + hdf5_reader::H5Type {}
56#[cfg(feature = "netcdf4")]
57impl<T: classic::data::NcReadType + hdf5_reader::H5Type> NcReadable for T {}
58
59#[cfg(not(feature = "netcdf4"))]
60pub trait NcReadable: classic::data::NcReadType {}
61#[cfg(not(feature = "netcdf4"))]
62impl<T: classic::data::NcReadType> NcReadable for T {}
63
64/// NetCDF file format.
65#[derive(Debug, Clone, Copy, PartialEq, Eq)]
66pub enum NcFormat {
67    /// CDF-1 classic format.
68    Classic,
69    /// CDF-2 64-bit offset format.
70    Offset64,
71    /// CDF-5 64-bit data format.
72    Cdf5,
73    /// NetCDF-4 (HDF5-backed).
74    Nc4,
75    /// NetCDF-4 classic model (HDF5-backed, restricted data model).
76    Nc4Classic,
77}
78
79/// NetCDF-4 metadata reconstruction policy.
80#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
81pub enum NcMetadataMode {
82    /// Fail the open if NetCDF-4 metadata cannot be reconstructed exactly.
83    #[default]
84    Strict,
85    /// Allow heuristic reconstruction for malformed or partially-supported files.
86    Lossy,
87}
88
89/// An opened NetCDF file.
90pub struct NcFile {
91    format: NcFormat,
92    inner: NcFileInner,
93}
94
95enum NcFileInner {
96    Classic(classic::ClassicFile),
97    #[cfg(feature = "netcdf4")]
98    Nc4(Box<nc4::Nc4File>),
99}
100
101/// HDF5 magic bytes: `\x89HDF\r\n\x1a\n`
102const HDF5_MAGIC: [u8; 8] = [0x89, b'H', b'D', b'F', 0x0D, 0x0A, 0x1A, 0x0A];
103
104/// Detect the NetCDF format from the first bytes of a file.
105fn detect_format(data: &[u8]) -> Result<NcFormat> {
106    if data.len() < 4 {
107        return Err(Error::InvalidMagic);
108    }
109
110    // Check for CDF magic: "CDF" followed by version byte.
111    if data[0] == b'C' && data[1] == b'D' && data[2] == b'F' {
112        return match data[3] {
113            1 => Ok(NcFormat::Classic),
114            2 => Ok(NcFormat::Offset64),
115            5 => Ok(NcFormat::Cdf5),
116            v => Err(Error::UnsupportedVersion(v)),
117        };
118    }
119
120    // Check for HDF5 magic (8 bytes).
121    if data.len() >= 8 && data[..8] == HDF5_MAGIC {
122        return Ok(NcFormat::Nc4);
123    }
124
125    Err(Error::InvalidMagic)
126}
127
128fn read_magic_prefix(reader: &mut impl Read) -> std::io::Result<([u8; 8], usize)> {
129    let mut magic = [0u8; 8];
130    let mut read_len = 0;
131    while read_len < magic.len() {
132        let n = reader.read(&mut magic[read_len..])?;
133        if n == 0 {
134            break;
135        }
136        read_len += n;
137    }
138    Ok((magic, read_len))
139}
140
141impl NcFile {
142    /// Open a NetCDF file from a path.
143    ///
144    /// The format is auto-detected from the file's magic bytes.
145    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
146        Self::open_with_options(path, NcOpenOptions::default())
147    }
148
149    /// Open a NetCDF file from in-memory bytes.
150    ///
151    /// The format is auto-detected from the magic bytes.
152    pub fn from_bytes(data: &[u8]) -> Result<Self> {
153        Self::from_bytes_with_options(data, NcOpenOptions::default())
154    }
155
156    /// Open a NetCDF file from a custom random-access storage backend.
157    ///
158    /// NetCDF-4 files stay fully range-backed. Classic formats are read from
159    /// the provided storage into an owned buffer.
160    #[cfg(feature = "netcdf4")]
161    pub fn from_storage(storage: DynStorage) -> Result<Self> {
162        Self::from_storage_with_options(storage, NcOpenOptions::default())
163    }
164
165    /// Open a NetCDF file from a custom random-access storage backend with custom options.
166    #[cfg(feature = "netcdf4")]
167    pub fn from_storage_with_options(storage: DynStorage, options: NcOpenOptions) -> Result<Self> {
168        let magic_len = storage.len().min(HDF5_MAGIC.len() as u64) as usize;
169        let magic = storage.read_range(0, magic_len)?;
170        let format = detect_format(magic.as_ref())?;
171
172        match format {
173            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
174                let len = usize::try_from(storage.len()).map_err(|_| {
175                    Error::InvalidData(
176                        "classic storage length exceeds platform usize capacity".into(),
177                    )
178                })?;
179                let bytes = storage.read_range(0, len)?;
180                let classic = classic::ClassicFile::from_bytes(bytes.as_ref(), format)?;
181                Ok(NcFile {
182                    format,
183                    inner: NcFileInner::Classic(classic),
184                })
185            }
186            NcFormat::Nc4 | NcFormat::Nc4Classic => {
187                let nc4 = nc4::Nc4File::from_storage_with_options(storage, options)?;
188                let actual_format = if nc4.is_classic_model() {
189                    NcFormat::Nc4Classic
190                } else {
191                    NcFormat::Nc4
192                };
193                Ok(NcFile {
194                    format: actual_format,
195                    inner: NcFileInner::Nc4(Box::new(nc4)),
196                })
197            }
198        }
199    }
200
201    /// Open a NetCDF file from in-memory bytes with custom options.
202    ///
203    /// NC4 options are applied when the payload is HDF5-backed.
204    pub fn from_bytes_with_options(data: &[u8], options: NcOpenOptions) -> Result<Self> {
205        let format = detect_format(data)?;
206
207        match format {
208            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
209                let classic = classic::ClassicFile::from_bytes(data, format)?;
210                Ok(NcFile {
211                    format,
212                    inner: NcFileInner::Classic(classic),
213                })
214            }
215            NcFormat::Nc4 | NcFormat::Nc4Classic => {
216                #[cfg(feature = "netcdf4")]
217                {
218                    let nc4 = nc4::Nc4File::from_bytes_with_options(data, options)?;
219                    let actual_format = if nc4.is_classic_model() {
220                        NcFormat::Nc4Classic
221                    } else {
222                        NcFormat::Nc4
223                    };
224                    Ok(NcFile {
225                        format: actual_format,
226                        inner: NcFileInner::Nc4(Box::new(nc4)),
227                    })
228                }
229                #[cfg(not(feature = "netcdf4"))]
230                {
231                    let _ = options;
232                    Err(Error::Nc4NotEnabled)
233                }
234            }
235        }
236    }
237
238    /// The detected file format.
239    pub fn format(&self) -> NcFormat {
240        self.format
241    }
242
243    /// The root group of the file.
244    ///
245    /// Classic files have a single implicit root group containing all
246    /// dimensions, variables, and global attributes. NetCDF-4 files
247    /// can have nested sub-groups.
248    pub fn root_group(&self) -> Result<&NcGroup> {
249        match &self.inner {
250            NcFileInner::Classic(c) => Ok(c.root_group()),
251            #[cfg(feature = "netcdf4")]
252            NcFileInner::Nc4(n) => n.root_group(),
253        }
254    }
255
256    /// Convenience: dimensions in the root group.
257    pub fn dimensions(&self) -> Result<&[NcDimension]> {
258        match &self.inner {
259            NcFileInner::Classic(c) => Ok(&c.root_group().dimensions),
260            #[cfg(feature = "netcdf4")]
261            NcFileInner::Nc4(n) => n.dimensions(),
262        }
263    }
264
265    /// Convenience: variables in the root group.
266    pub fn variables(&self) -> Result<&[NcVariable]> {
267        match &self.inner {
268            NcFileInner::Classic(c) => Ok(&c.root_group().variables),
269            #[cfg(feature = "netcdf4")]
270            NcFileInner::Nc4(n) => n.variables(),
271        }
272    }
273
274    /// Convenience: global attributes (attributes of the root group).
275    pub fn global_attributes(&self) -> Result<&[NcAttribute]> {
276        match &self.inner {
277            NcFileInner::Classic(c) => Ok(&c.root_group().attributes),
278            #[cfg(feature = "netcdf4")]
279            NcFileInner::Nc4(n) => n.global_attributes(),
280        }
281    }
282
283    /// Find a group by path relative to the root group.
284    pub fn group(&self, path: &str) -> Result<&NcGroup> {
285        match &self.inner {
286            NcFileInner::Classic(c) => c
287                .root_group()
288                .group(path)
289                .ok_or_else(|| Error::GroupNotFound(path.to_string())),
290            #[cfg(feature = "netcdf4")]
291            NcFileInner::Nc4(n) => n.group(path),
292        }
293    }
294
295    /// Find a variable by name or path relative to the root group.
296    pub fn variable(&self, name: &str) -> Result<&NcVariable> {
297        match &self.inner {
298            NcFileInner::Classic(c) => c
299                .root_group()
300                .variable(name)
301                .ok_or_else(|| Error::VariableNotFound(name.to_string())),
302            #[cfg(feature = "netcdf4")]
303            NcFileInner::Nc4(n) => n.variable(name),
304        }
305    }
306
307    /// Find a dimension by name or path relative to the root group.
308    pub fn dimension(&self, name: &str) -> Result<&NcDimension> {
309        match &self.inner {
310            NcFileInner::Classic(c) => c
311                .root_group()
312                .dimension(name)
313                .ok_or_else(|| Error::DimensionNotFound(name.to_string())),
314            #[cfg(feature = "netcdf4")]
315            NcFileInner::Nc4(n) => n.dimension(name),
316        }
317    }
318
319    /// Find a group attribute by name or path relative to the root group.
320    pub fn global_attribute(&self, name: &str) -> Result<&NcAttribute> {
321        match &self.inner {
322            NcFileInner::Classic(c) => c
323                .root_group()
324                .attribute(name)
325                .ok_or_else(|| Error::AttributeNotFound(name.to_string())),
326            #[cfg(feature = "netcdf4")]
327            NcFileInner::Nc4(n) => n.global_attribute(name),
328        }
329    }
330
331    /// Read a variable's data as a typed array.
332    ///
333    /// Works for both classic (CDF-1/2/5) and NetCDF-4 files. NetCDF-4 nested
334    /// variables can be addressed with paths like `group/subgroup/var`. The type
335    /// parameter `T` must implement `NcReadable`, which is satisfied by:
336    /// `i8, u8, i16, u16, i32, u32, i64, u64, f32, f64`.
337    pub fn read_variable<T: NcReadable>(&self, name: &str) -> Result<ArrayD<T>> {
338        match &self.inner {
339            NcFileInner::Classic(c) => c.read_variable::<T>(name),
340            #[cfg(feature = "netcdf4")]
341            NcFileInner::Nc4(n) => Ok(n.read_variable::<T>(name)?),
342        }
343    }
344
345    /// Read a variable using internal chunk-level parallelism when available.
346    ///
347    /// Classic formats fall back to `read_variable`.
348    #[cfg(feature = "rayon")]
349    pub fn read_variable_parallel<T: NcReadable>(&self, name: &str) -> Result<ArrayD<T>> {
350        match &self.inner {
351            NcFileInner::Classic(c) => c.read_variable::<T>(name),
352            #[cfg(feature = "netcdf4")]
353            NcFileInner::Nc4(n) => Ok(n.read_variable_parallel::<T>(name)?),
354        }
355    }
356
357    /// Read a variable using the provided Rayon thread pool when available.
358    ///
359    /// Classic formats fall back to `read_variable`.
360    #[cfg(feature = "rayon")]
361    pub fn read_variable_in_pool<T: NcReadable>(
362        &self,
363        name: &str,
364        pool: &ThreadPool,
365    ) -> Result<ArrayD<T>> {
366        match &self.inner {
367            NcFileInner::Classic(c) => c.read_variable::<T>(name),
368            #[cfg(feature = "netcdf4")]
369            NcFileInner::Nc4(n) => Ok(n.read_variable_in_pool::<T>(name, pool)?),
370        }
371    }
372
373    /// Access the underlying classic file (for reading data).
374    ///
375    /// Returns `None` if this is a NetCDF-4 file.
376    pub fn as_classic(&self) -> Option<&classic::ClassicFile> {
377        match &self.inner {
378            NcFileInner::Classic(c) => Some(c),
379            #[cfg(feature = "netcdf4")]
380            NcFileInner::Nc4(_) => None,
381        }
382    }
383
384    /// Read a variable with automatic type promotion to f64.
385    ///
386    /// Reads in the native storage type (i8, i16, i32, f32, f64, u8, etc.)
387    /// and promotes all values to f64. This avoids the `TypeMismatch` error
388    /// that `read_variable::<f64>` produces for non-f64 variables.
389    pub fn read_variable_as_f64(&self, name: &str) -> Result<ArrayD<f64>> {
390        match &self.inner {
391            NcFileInner::Classic(c) => c.read_variable_as_f64(name),
392            #[cfg(feature = "netcdf4")]
393            NcFileInner::Nc4(n) => n.read_variable_as_f64(name),
394        }
395    }
396
397    /// Read a string variable as a single string.
398    ///
399    /// Use [`NcFile::read_variable_as_strings`] when the variable contains
400    /// multiple string elements.
401    pub fn read_variable_as_string(&self, name: &str) -> Result<String> {
402        match &self.inner {
403            NcFileInner::Classic(c) => c.read_variable_as_string(name),
404            #[cfg(feature = "netcdf4")]
405            NcFileInner::Nc4(n) => n.read_variable_as_string(name),
406        }
407    }
408
409    /// Read a string or char variable as a flat vector of strings.
410    ///
411    /// Classic char arrays interpret the last dimension as the string length
412    /// and flatten the leading dimensions.
413    pub fn read_variable_as_strings(&self, name: &str) -> Result<Vec<String>> {
414        match &self.inner {
415            NcFileInner::Classic(c) => c.read_variable_as_strings(name),
416            #[cfg(feature = "netcdf4")]
417            NcFileInner::Nc4(n) => n.read_variable_as_strings(name),
418        }
419    }
420
421    /// Read a variable and apply `scale_factor`/`add_offset` unpacking.
422    ///
423    /// Returns `actual = stored * scale_factor + add_offset`.
424    /// If neither attribute is present, returns the raw data as f64.
425    /// Uses type-promoting read so it works with any numeric storage type.
426    pub fn read_variable_unpacked(&self, name: &str) -> Result<ArrayD<f64>> {
427        let var = self.variable(name)?;
428        let params = unpack::UnpackParams::from_variable(var);
429        let mut data = self.read_variable_as_f64(name)?;
430        if let Some(p) = params {
431            p.apply(&mut data);
432        }
433        Ok(data)
434    }
435
436    /// Read a variable, replace `_FillValue`/`missing_value` with NaN,
437    /// and mask values outside `valid_min`/`valid_max`/`valid_range`.
438    /// Uses type-promoting read so it works with any numeric storage type.
439    pub fn read_variable_masked(&self, name: &str) -> Result<ArrayD<f64>> {
440        let var = self.variable(name)?;
441        let params = masked::MaskParams::from_variable(var);
442        let mut data = self.read_variable_as_f64(name)?;
443        if let Some(p) = params {
444            p.apply(&mut data);
445        }
446        Ok(data)
447    }
448
449    /// Read a variable with both masking and unpacking (CF spec order).
450    ///
451    /// Order: read → mask fill/missing → unpack (scale+offset).
452    /// Uses type-promoting read so it works with any numeric storage type.
453    pub fn read_variable_unpacked_masked(&self, name: &str) -> Result<ArrayD<f64>> {
454        let var = self.variable(name)?;
455        let mask_params = masked::MaskParams::from_variable(var);
456        let unpack_params = unpack::UnpackParams::from_variable(var);
457        let mut data = self.read_variable_as_f64(name)?;
458        if let Some(p) = mask_params {
459            p.apply(&mut data);
460        }
461        if let Some(p) = unpack_params {
462            p.apply(&mut data);
463        }
464        Ok(data)
465    }
466
467    // ----- Slice API -----
468
469    /// Read a slice (hyperslab) of a variable as a typed array.
470    pub fn read_variable_slice<T: NcReadable>(
471        &self,
472        name: &str,
473        selection: &NcSliceInfo,
474    ) -> Result<ArrayD<T>> {
475        match &self.inner {
476            NcFileInner::Classic(c) => c.read_variable_slice::<T>(name, selection),
477            #[cfg(feature = "netcdf4")]
478            NcFileInner::Nc4(n) => Ok(n.read_variable_slice::<T>(name, selection)?),
479        }
480    }
481
482    /// Read a slice (hyperslab) using chunk-level parallelism when available.
483    ///
484    /// For NetCDF-4 chunked datasets, overlapping chunks are decompressed in
485    /// parallel via Rayon. Classic formats fall back to `read_variable_slice`.
486    #[cfg(feature = "rayon")]
487    pub fn read_variable_slice_parallel<T: NcReadable>(
488        &self,
489        name: &str,
490        selection: &NcSliceInfo,
491    ) -> Result<ArrayD<T>> {
492        match &self.inner {
493            NcFileInner::Classic(c) => c.read_variable_slice::<T>(name, selection),
494            #[cfg(feature = "netcdf4")]
495            NcFileInner::Nc4(n) => Ok(n.read_variable_slice_parallel::<T>(name, selection)?),
496        }
497    }
498
499    /// Read a slice of a variable with automatic type promotion to f64.
500    pub fn read_variable_slice_as_f64(
501        &self,
502        name: &str,
503        selection: &NcSliceInfo,
504    ) -> Result<ArrayD<f64>> {
505        match &self.inner {
506            NcFileInner::Classic(c) => c.read_variable_slice_as_f64(name, selection),
507            #[cfg(feature = "netcdf4")]
508            NcFileInner::Nc4(n) => n.read_variable_slice_as_f64(name, selection),
509        }
510    }
511
512    /// Read a slice with `scale_factor`/`add_offset` unpacking.
513    pub fn read_variable_slice_unpacked(
514        &self,
515        name: &str,
516        selection: &NcSliceInfo,
517    ) -> Result<ArrayD<f64>> {
518        let var = self.variable(name)?;
519        let params = unpack::UnpackParams::from_variable(var);
520        let mut data = self.read_variable_slice_as_f64(name, selection)?;
521        if let Some(p) = params {
522            p.apply(&mut data);
523        }
524        Ok(data)
525    }
526
527    /// Read a slice with fill/missing value masking.
528    pub fn read_variable_slice_masked(
529        &self,
530        name: &str,
531        selection: &NcSliceInfo,
532    ) -> Result<ArrayD<f64>> {
533        let var = self.variable(name)?;
534        let params = masked::MaskParams::from_variable(var);
535        let mut data = self.read_variable_slice_as_f64(name, selection)?;
536        if let Some(p) = params {
537            p.apply(&mut data);
538        }
539        Ok(data)
540    }
541
542    /// Read a slice with both masking and unpacking (CF spec order).
543    pub fn read_variable_slice_unpacked_masked(
544        &self,
545        name: &str,
546        selection: &NcSliceInfo,
547    ) -> Result<ArrayD<f64>> {
548        let var = self.variable(name)?;
549        let mask_params = masked::MaskParams::from_variable(var);
550        let unpack_params = unpack::UnpackParams::from_variable(var);
551        let mut data = self.read_variable_slice_as_f64(name, selection)?;
552        if let Some(p) = mask_params {
553            p.apply(&mut data);
554        }
555        if let Some(p) = unpack_params {
556            p.apply(&mut data);
557        }
558        Ok(data)
559    }
560
561    // ----- Lazy Slice Iterator -----
562
563    /// Create an iterator that yields one slice per index along a given dimension.
564    ///
565    /// Each call to `next()` reads one slice using the slice API. This is
566    /// useful for iterating time steps, levels, etc. without loading the
567    /// entire dataset into memory.
568    pub fn iter_slices<T: NcReadable>(
569        &self,
570        name: &str,
571        dim: usize,
572    ) -> Result<NcSliceIterator<'_, T>> {
573        let var = self.variable(name)?;
574        let ndim = var.ndim();
575        if dim >= ndim {
576            return Err(Error::InvalidData(format!(
577                "dimension index {} out of range for {}-dimensional variable '{}'",
578                dim, ndim, name
579            )));
580        }
581        let dim_size = var.dimensions[dim].size;
582        Ok(NcSliceIterator {
583            file: self,
584            name: name.to_string(),
585            dim,
586            dim_size,
587            current: 0,
588            ndim,
589            _marker: std::marker::PhantomData,
590        })
591    }
592}
593
594/// Configuration options for opening a NetCDF file.
595pub struct NcOpenOptions {
596    /// Maximum bytes for the chunk cache (NC4 only). Default: 64 MiB.
597    pub chunk_cache_bytes: usize,
598    /// Maximum number of chunk cache slots (NC4 only). Default: 521.
599    pub chunk_cache_slots: usize,
600    /// NetCDF-4 metadata reconstruction policy. Default: strict.
601    pub metadata_mode: NcMetadataMode,
602    /// Custom filter registry (NC4 only).
603    #[cfg(feature = "netcdf4")]
604    pub filter_registry: Option<hdf5_reader::FilterRegistry>,
605}
606
607impl Default for NcOpenOptions {
608    fn default() -> Self {
609        NcOpenOptions {
610            chunk_cache_bytes: 64 * 1024 * 1024,
611            chunk_cache_slots: 521,
612            metadata_mode: NcMetadataMode::Strict,
613            #[cfg(feature = "netcdf4")]
614            filter_registry: None,
615        }
616    }
617}
618
619impl NcFile {
620    /// Open a NetCDF file with custom options.
621    pub fn open_with_options(path: impl AsRef<Path>, options: NcOpenOptions) -> Result<Self> {
622        let path = path.as_ref();
623        let mut file = File::open(path)?;
624        let (magic, n) = read_magic_prefix(&mut file)?;
625        let format = detect_format(&magic[..n])?;
626
627        match format {
628            NcFormat::Classic | NcFormat::Offset64 | NcFormat::Cdf5 => {
629                let file = File::open(path)?;
630                // SAFETY: read-only mapping; caller must not modify the file concurrently.
631                let mmap = unsafe { Mmap::map(&file)? };
632                let classic = classic::ClassicFile::from_mmap(mmap, format)?;
633                Ok(NcFile {
634                    format,
635                    inner: NcFileInner::Classic(classic),
636                })
637            }
638            NcFormat::Nc4 | NcFormat::Nc4Classic => {
639                #[cfg(feature = "netcdf4")]
640                {
641                    let hdf5 = hdf5_reader::Hdf5File::open_with_options(
642                        path,
643                        hdf5_reader::OpenOptions {
644                            chunk_cache_bytes: options.chunk_cache_bytes,
645                            chunk_cache_slots: options.chunk_cache_slots,
646                            filter_registry: options.filter_registry,
647                        },
648                    )?;
649                    let nc4 = nc4::Nc4File::from_hdf5(hdf5, options.metadata_mode)?;
650                    let actual_format = if nc4.is_classic_model() {
651                        NcFormat::Nc4Classic
652                    } else {
653                        NcFormat::Nc4
654                    };
655                    Ok(NcFile {
656                        format: actual_format,
657                        inner: NcFileInner::Nc4(Box::new(nc4)),
658                    })
659                }
660                #[cfg(not(feature = "netcdf4"))]
661                {
662                    let _ = options;
663                    Err(Error::Nc4NotEnabled)
664                }
665            }
666        }
667    }
668}
669
670/// Lazy iterator over slices of a variable along a given dimension.
671pub struct NcSliceIterator<'f, T: NcReadable> {
672    file: &'f NcFile,
673    name: String,
674    dim: usize,
675    dim_size: u64,
676    current: u64,
677    ndim: usize,
678    _marker: std::marker::PhantomData<T>,
679}
680
681impl<'f, T: NcReadable> Iterator for NcSliceIterator<'f, T> {
682    type Item = Result<ArrayD<T>>;
683
684    fn next(&mut self) -> Option<Self::Item> {
685        if self.current >= self.dim_size {
686            return None;
687        }
688        let mut selections = Vec::with_capacity(self.ndim);
689        for d in 0..self.ndim {
690            if d == self.dim {
691                selections.push(NcSliceInfoElem::Index(self.current));
692            } else {
693                selections.push(NcSliceInfoElem::Slice {
694                    start: 0,
695                    end: u64::MAX,
696                    step: 1,
697                });
698            }
699        }
700        let selection = NcSliceInfo { selections };
701        self.current += 1;
702        Some(self.file.read_variable_slice::<T>(&self.name, &selection))
703    }
704
705    fn size_hint(&self) -> (usize, Option<usize>) {
706        let remaining_u64 = self.dim_size.saturating_sub(self.current);
707        let remaining = remaining_u64.min(usize::MAX as u64) as usize;
708        (remaining, Some(remaining))
709    }
710}
711
712#[cfg(test)]
713mod tests {
714    use super::*;
715    #[cfg(feature = "netcdf4")]
716    use std::sync::Arc;
717
718    #[test]
719    fn test_detect_cdf1() {
720        let data = b"CDF\x01rest_of_file";
721        assert_eq!(detect_format(data).unwrap(), NcFormat::Classic);
722    }
723
724    #[test]
725    fn test_detect_cdf2() {
726        let data = b"CDF\x02rest_of_file";
727        assert_eq!(detect_format(data).unwrap(), NcFormat::Offset64);
728    }
729
730    #[test]
731    fn test_detect_cdf5() {
732        let data = b"CDF\x05rest_of_file";
733        assert_eq!(detect_format(data).unwrap(), NcFormat::Cdf5);
734    }
735
736    #[test]
737    fn test_detect_hdf5() {
738        let mut data = vec![0x89, b'H', b'D', b'F', 0x0D, 0x0A, 0x1A, 0x0A];
739        data.extend_from_slice(b"rest_of_file");
740        assert_eq!(detect_format(&data).unwrap(), NcFormat::Nc4);
741    }
742
743    #[test]
744    fn test_detect_invalid_magic() {
745        let data = b"XXXX";
746        assert!(matches!(
747            detect_format(data).unwrap_err(),
748            Error::InvalidMagic
749        ));
750    }
751
752    #[test]
753    fn test_detect_unsupported_version() {
754        let data = b"CDF\x03";
755        assert!(matches!(
756            detect_format(data).unwrap_err(),
757            Error::UnsupportedVersion(3)
758        ));
759    }
760
761    #[test]
762    fn test_detect_too_short() {
763        let data = b"CD";
764        assert!(matches!(
765            detect_format(data).unwrap_err(),
766            Error::InvalidMagic
767        ));
768    }
769
770    #[test]
771    fn test_from_bytes_minimal_cdf1() {
772        // Minimal valid CDF-1 file: magic + numrecs + absent dim/att/var lists.
773        let mut data = Vec::new();
774        data.extend_from_slice(b"CDF\x01");
775        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
776                                                     // dim_list: ABSENT
777        data.extend_from_slice(&0u32.to_be_bytes()); // tag = 0
778        data.extend_from_slice(&0u32.to_be_bytes()); // count = 0
779                                                     // att_list: ABSENT
780        data.extend_from_slice(&0u32.to_be_bytes());
781        data.extend_from_slice(&0u32.to_be_bytes());
782        // var_list: ABSENT
783        data.extend_from_slice(&0u32.to_be_bytes());
784        data.extend_from_slice(&0u32.to_be_bytes());
785
786        let file = NcFile::from_bytes(&data).unwrap();
787        assert_eq!(file.format(), NcFormat::Classic);
788        assert!(file.dimensions().unwrap().is_empty());
789        assert!(file.variables().unwrap().is_empty());
790        assert!(file.global_attributes().unwrap().is_empty());
791    }
792
793    #[cfg(feature = "netcdf4")]
794    #[test]
795    fn test_from_storage_minimal_cdf1() {
796        // Minimal valid CDF-1 file: magic + numrecs + absent dim/att/var lists.
797        let mut data = Vec::new();
798        data.extend_from_slice(b"CDF\x01");
799        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
800                                                     // dim_list: ABSENT
801        data.extend_from_slice(&0u32.to_be_bytes()); // tag = 0
802        data.extend_from_slice(&0u32.to_be_bytes()); // count = 0
803                                                     // att_list: ABSENT
804        data.extend_from_slice(&0u32.to_be_bytes());
805        data.extend_from_slice(&0u32.to_be_bytes());
806        // var_list: ABSENT
807        data.extend_from_slice(&0u32.to_be_bytes());
808        data.extend_from_slice(&0u32.to_be_bytes());
809
810        let file = NcFile::from_storage(Arc::new(BytesStorage::new(data))).unwrap();
811        assert_eq!(file.format(), NcFormat::Classic);
812        assert!(file.dimensions().unwrap().is_empty());
813        assert!(file.variables().unwrap().is_empty());
814        assert!(file.global_attributes().unwrap().is_empty());
815    }
816
817    #[cfg(feature = "netcdf4")]
818    #[test]
819    fn test_from_storage_short_input_reports_invalid_magic() {
820        let err = NcFile::from_storage(Arc::new(BytesStorage::new(vec![b'C', b'D'])))
821            .err()
822            .expect("short storage should not parse as NetCDF");
823        assert!(matches!(err, Error::InvalidMagic));
824    }
825
826    #[test]
827    fn test_from_bytes_cdf1_with_data() {
828        // Build a CDF-1 file with one dimension, one global attribute, and one variable.
829        let mut data = Vec::new();
830        data.extend_from_slice(b"CDF\x01");
831        data.extend_from_slice(&0u32.to_be_bytes()); // numrecs = 0
832
833        // dim_list: 1 dimension "x" with size 3
834        data.extend_from_slice(&0x0000_000Au32.to_be_bytes()); // NC_DIMENSION tag
835        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
836                                                     // name "x": length=1, "x", 3 bytes padding
837        data.extend_from_slice(&1u32.to_be_bytes());
838        data.push(b'x');
839        data.extend_from_slice(&[0, 0, 0]); // padding to 4
840                                            // dim size
841        data.extend_from_slice(&3u32.to_be_bytes());
842
843        // att_list: 1 attribute "title" = "test"
844        data.extend_from_slice(&0x0000_000Cu32.to_be_bytes()); // NC_ATTRIBUTE tag
845        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
846                                                     // name "title"
847        data.extend_from_slice(&5u32.to_be_bytes());
848        data.extend_from_slice(b"title");
849        data.extend_from_slice(&[0, 0, 0]); // padding
850                                            // nc_type = NC_CHAR = 2
851        data.extend_from_slice(&2u32.to_be_bytes());
852        // nvalues = 4
853        data.extend_from_slice(&4u32.to_be_bytes());
854        data.extend_from_slice(b"test"); // exactly 4 bytes, no padding needed
855
856        // var_list: 1 variable "vals" with dim x, type float
857        data.extend_from_slice(&0x0000_000Bu32.to_be_bytes()); // NC_VARIABLE tag
858        data.extend_from_slice(&1u32.to_be_bytes()); // nelems = 1
859                                                     // name "vals"
860        data.extend_from_slice(&4u32.to_be_bytes());
861        data.extend_from_slice(b"vals");
862        // ndims = 1
863        data.extend_from_slice(&1u32.to_be_bytes());
864        // dimid = 0
865        data.extend_from_slice(&0u32.to_be_bytes());
866        // att_list: absent
867        data.extend_from_slice(&0u32.to_be_bytes());
868        data.extend_from_slice(&0u32.to_be_bytes());
869        // nc_type = NC_FLOAT = 5
870        data.extend_from_slice(&5u32.to_be_bytes());
871        // vsize = 12 (3 floats * 4 bytes)
872        data.extend_from_slice(&12u32.to_be_bytes());
873        // begin (offset): we'll put data right after this header
874        let data_offset = data.len() as u32 + 4; // +4 for this field itself
875        data.extend_from_slice(&data_offset.to_be_bytes());
876
877        // Now append the variable data: 3 floats
878        data.extend_from_slice(&1.5f32.to_be_bytes());
879        data.extend_from_slice(&2.5f32.to_be_bytes());
880        data.extend_from_slice(&3.5f32.to_be_bytes());
881
882        let file = NcFile::from_bytes(&data).unwrap();
883        assert_eq!(file.format(), NcFormat::Classic);
884        assert_eq!(file.dimensions().unwrap().len(), 1);
885        assert_eq!(file.dimensions().unwrap()[0].name, "x");
886        assert_eq!(file.dimensions().unwrap()[0].size, 3);
887
888        assert_eq!(file.global_attributes().unwrap().len(), 1);
889        assert_eq!(file.global_attributes().unwrap()[0].name, "title");
890        assert_eq!(
891            file.global_attributes().unwrap()[0]
892                .value
893                .as_string()
894                .unwrap(),
895            "test"
896        );
897
898        assert_eq!(file.variables().unwrap().len(), 1);
899        let var = file.variable("vals").unwrap();
900        assert_eq!(var.dtype(), &NcType::Float);
901        assert_eq!(var.shape(), vec![3]);
902
903        // Read the actual data through the classic file.
904        let classic = file.as_classic().unwrap();
905        let arr: ndarray::ArrayD<f32> = classic.read_variable("vals").unwrap();
906        assert_eq!(arr.shape(), &[3]);
907        assert_eq!(arr[[0]], 1.5f32);
908        assert_eq!(arr[[1]], 2.5f32);
909        assert_eq!(arr[[2]], 3.5f32);
910    }
911
912    #[test]
913    fn test_variable_not_found() {
914        let mut data = Vec::new();
915        data.extend_from_slice(b"CDF\x01");
916        data.extend_from_slice(&0u32.to_be_bytes());
917        // All absent.
918        data.extend_from_slice(&0u32.to_be_bytes());
919        data.extend_from_slice(&0u32.to_be_bytes());
920        data.extend_from_slice(&0u32.to_be_bytes());
921        data.extend_from_slice(&0u32.to_be_bytes());
922        data.extend_from_slice(&0u32.to_be_bytes());
923        data.extend_from_slice(&0u32.to_be_bytes());
924
925        let file = NcFile::from_bytes(&data).unwrap();
926        assert!(matches!(
927            file.variable("nonexistent").unwrap_err(),
928            Error::VariableNotFound(_)
929        ));
930    }
931
932    #[test]
933    fn test_group_not_found() {
934        let mut data = Vec::new();
935        data.extend_from_slice(b"CDF\x01");
936        data.extend_from_slice(&0u32.to_be_bytes());
937        data.extend_from_slice(&0u32.to_be_bytes());
938        data.extend_from_slice(&0u32.to_be_bytes());
939        data.extend_from_slice(&0u32.to_be_bytes());
940        data.extend_from_slice(&0u32.to_be_bytes());
941        data.extend_from_slice(&0u32.to_be_bytes());
942        data.extend_from_slice(&0u32.to_be_bytes());
943
944        let file = NcFile::from_bytes(&data).unwrap();
945        assert!(matches!(
946            file.group("nonexistent").unwrap_err(),
947            Error::GroupNotFound(_)
948        ));
949    }
950}