netcdf-reader 0.6.0

Pure-Rust NetCDF-3 classic and NetCDF-4 (HDF5-backed) file reader
Documentation
//! Classic (CDF-1/2/5) NetCDF file format support.
//!
//! This module handles the original NetCDF binary format (CDF-1 classic, CDF-2
//! 64-bit offset, and CDF-5 64-bit data). All multi-byte values are big-endian.

pub mod data;
pub mod header;
pub(crate) mod storage;
pub mod types;
pub mod variable;

use std::fs::File;
use std::path::Path;

use memmap2::Mmap;

use crate::error::Result;
use crate::types::NcGroup;
use crate::NcFormat;

use storage::ClassicStorage;

/// An opened classic-format NetCDF file (CDF-1, CDF-2, or CDF-5).
pub struct ClassicFile {
    pub(crate) format: NcFormat,
    pub(crate) root_group: NcGroup,
    pub(crate) storage: ClassicStorage,
    pub(crate) numrecs: u64,
}

impl ClassicFile {
    /// Open a classic NetCDF file from disk using memory-mapping.
    pub fn open(path: &Path, format: NcFormat) -> Result<Self> {
        let file = File::open(path)?;
        // SAFETY: read-only mapping; caller must not modify the file concurrently.
        let mmap = unsafe { Mmap::map(&file)? };
        let header = header::parse_header(&mmap, format)?;
        reject_unsupported_classic_features(&header)?;
        let storage = ClassicStorage::from_mmap(mmap);

        let root_group = NcGroup {
            name: "/".to_string(),
            dimensions: header.dimensions,
            variables: header.variables,
            attributes: header.global_attributes,
            groups: Vec::new(), // Classic format has no sub-groups.
        };

        Ok(ClassicFile {
            format,
            root_group,
            storage,
            numrecs: header.numrecs,
        })
    }

    /// Open a classic NetCDF file from in-memory bytes.
    pub fn from_bytes(bytes: &[u8], format: NcFormat) -> Result<Self> {
        let header = header::parse_header(bytes, format)?;
        reject_unsupported_classic_features(&header)?;
        let storage = ClassicStorage::from_bytes(bytes.to_vec());

        let root_group = NcGroup {
            name: "/".to_string(),
            dimensions: header.dimensions,
            variables: header.variables,
            attributes: header.global_attributes,
            groups: Vec::new(),
        };

        Ok(ClassicFile {
            format,
            root_group,
            storage,
            numrecs: header.numrecs,
        })
    }

    /// Open a classic NetCDF file from an existing memory map (avoids double mmap).
    pub fn from_mmap(mmap: Mmap, format: NcFormat) -> Result<Self> {
        let header = header::parse_header(&mmap, format)?;
        reject_unsupported_classic_features(&header)?;
        let storage = ClassicStorage::from_mmap(mmap);

        let root_group = NcGroup {
            name: "/".to_string(),
            dimensions: header.dimensions,
            variables: header.variables,
            attributes: header.global_attributes,
            groups: Vec::new(),
        };

        Ok(ClassicFile {
            format,
            root_group,
            storage,
            numrecs: header.numrecs,
        })
    }

    /// Open a classic NetCDF file from a random-access storage backend.
    #[cfg(feature = "netcdf4")]
    pub fn from_storage(
        storage: hdf5_reader::storage::DynStorage,
        format: NcFormat,
    ) -> Result<Self> {
        let storage = ClassicStorage::from_range(storage);
        let header = parse_header_from_storage(&storage, format)?;
        reject_unsupported_classic_features(&header)?;

        let root_group = NcGroup {
            name: "/".to_string(),
            dimensions: header.dimensions,
            variables: header.variables,
            attributes: header.global_attributes,
            groups: Vec::new(),
        };

        Ok(ClassicFile {
            format,
            root_group,
            storage,
            numrecs: header.numrecs,
        })
    }

    /// The file format (Classic, Offset64, or Cdf5).
    pub fn format(&self) -> NcFormat {
        self.format
    }

    /// The root group containing all dimensions, variables, and global attributes.
    pub fn root_group(&self) -> &NcGroup {
        &self.root_group
    }

    /// Number of records in the unlimited dimension.
    pub fn numrecs(&self) -> u64 {
        self.numrecs
    }
}

fn reject_unsupported_classic_features(header: &header::ClassicHeader) -> Result<()> {
    let has_subfiling_marker = header
        .global_attributes
        .iter()
        .any(|attr| is_subfiling_attribute_name(&attr.name))
        || header.variables.iter().any(|var| {
            var.attributes
                .iter()
                .any(|attr| is_subfiling_attribute_name(&attr.name))
        });

    if has_subfiling_marker {
        return Err(crate::Error::UnsupportedFeature(
            "PnetCDF subfiling datasets require a virtual multi-file storage adapter".to_string(),
        ));
    }

    Ok(())
}

fn is_subfiling_attribute_name(name: &str) -> bool {
    let lower = name.to_ascii_lowercase();
    lower.starts_with("_pnetcdf_subfiling") || lower.starts_with("subfiling")
}

#[cfg(feature = "netcdf4")]
fn parse_header_from_storage(
    storage: &ClassicStorage,
    format: NcFormat,
) -> Result<header::ClassicHeader> {
    let mut len = storage.initial_header_len();

    loop {
        let prefix = storage.read_header_prefix(len)?;
        match header::parse_header(prefix.as_ref(), format) {
            Ok(header) => return Ok(header),
            Err(crate::Error::UnexpectedEof { .. }) if (prefix.len() as u64) < storage.len() => {
                let current = prefix.len().max(1);
                len = current.saturating_mul(2);
            }
            Err(err) => return Err(err),
        }
    }
}