Skip to main content

netcdf_reader/classic/
header.rs

1//! Parse the NetCDF classic (CDF-1/2/5) binary header.
2//!
3//! The classic header is a sequence of big-endian fields describing dimensions,
4//! global attributes, and variables. All multi-byte integers are big-endian.
5//! Strings are padded to 4-byte alignment. CDF-5 uses 8-byte counts and sizes
6//! where CDF-1/2 use 4-byte values.
7
8use crate::error::{Error, Result};
9use crate::types::{
10    checked_mul_u64, checked_usize_from_u64, NcAttrValue, NcAttribute, NcDimension, NcType,
11    NcVariable,
12};
13use crate::NcFormat;
14
15#[cfg(test)]
16use super::types::pad_to_4;
17use super::types::{nc_type_from_code, padding_to_4};
18
19// Header tag constants.
20const ABSENT: u32 = 0x0000_0000;
21const NC_DIMENSION: u32 = 0x0000_000A;
22const NC_VARIABLE: u32 = 0x0000_000B;
23const NC_ATTRIBUTE: u32 = 0x0000_000C;
24
25/// Streaming (indeterminate) record count sentinel.
26const STREAMING: u32 = 0xFFFF_FFFF;
27
28/// Result of parsing a classic NetCDF header.
29pub struct ClassicHeader {
30    pub dimensions: Vec<NcDimension>,
31    pub global_attributes: Vec<NcAttribute>,
32    pub variables: Vec<NcVariable>,
33    pub numrecs: u64,
34}
35
36/// A cursor for reading big-endian data from a byte slice.
37struct Cursor<'a> {
38    data: &'a [u8],
39    pos: usize,
40}
41
42impl<'a> Cursor<'a> {
43    fn new(data: &'a [u8]) -> Self {
44        Cursor { data, pos: 0 }
45    }
46
47    fn remaining(&self) -> usize {
48        self.data.len().saturating_sub(self.pos)
49    }
50
51    fn ensure(&self, n: usize) -> Result<()> {
52        if self.remaining() < n {
53            Err(Error::UnexpectedEof {
54                offset: self.pos as u64,
55                needed: n as u64,
56                available: self.remaining() as u64,
57            })
58        } else {
59            Ok(())
60        }
61    }
62
63    #[allow(dead_code)]
64    fn read_u8(&mut self) -> Result<u8> {
65        self.ensure(1)?;
66        let v = self.data[self.pos];
67        self.pos += 1;
68        Ok(v)
69    }
70
71    fn read_u16_be(&mut self) -> Result<u16> {
72        self.ensure(2)?;
73        let v = u16::from_be_bytes([self.data[self.pos], self.data[self.pos + 1]]);
74        self.pos += 2;
75        Ok(v)
76    }
77
78    fn read_u32_be(&mut self) -> Result<u32> {
79        self.ensure(4)?;
80        let v = u32::from_be_bytes([
81            self.data[self.pos],
82            self.data[self.pos + 1],
83            self.data[self.pos + 2],
84            self.data[self.pos + 3],
85        ]);
86        self.pos += 4;
87        Ok(v)
88    }
89
90    fn read_i32_be(&mut self) -> Result<i32> {
91        self.ensure(4)?;
92        let v = i32::from_be_bytes([
93            self.data[self.pos],
94            self.data[self.pos + 1],
95            self.data[self.pos + 2],
96            self.data[self.pos + 3],
97        ]);
98        self.pos += 4;
99        Ok(v)
100    }
101
102    fn read_u64_be(&mut self) -> Result<u64> {
103        self.ensure(8)?;
104        let v = u64::from_be_bytes([
105            self.data[self.pos],
106            self.data[self.pos + 1],
107            self.data[self.pos + 2],
108            self.data[self.pos + 3],
109            self.data[self.pos + 4],
110            self.data[self.pos + 5],
111            self.data[self.pos + 6],
112            self.data[self.pos + 7],
113        ]);
114        self.pos += 8;
115        Ok(v)
116    }
117
118    fn read_i64_be(&mut self) -> Result<i64> {
119        self.ensure(8)?;
120        let v = i64::from_be_bytes([
121            self.data[self.pos],
122            self.data[self.pos + 1],
123            self.data[self.pos + 2],
124            self.data[self.pos + 3],
125            self.data[self.pos + 4],
126            self.data[self.pos + 5],
127            self.data[self.pos + 6],
128            self.data[self.pos + 7],
129        ]);
130        self.pos += 8;
131        Ok(v)
132    }
133
134    fn read_f32_be(&mut self) -> Result<f32> {
135        self.ensure(4)?;
136        let v = f32::from_be_bytes([
137            self.data[self.pos],
138            self.data[self.pos + 1],
139            self.data[self.pos + 2],
140            self.data[self.pos + 3],
141        ]);
142        self.pos += 4;
143        Ok(v)
144    }
145
146    fn read_f64_be(&mut self) -> Result<f64> {
147        self.ensure(8)?;
148        let v = f64::from_be_bytes([
149            self.data[self.pos],
150            self.data[self.pos + 1],
151            self.data[self.pos + 2],
152            self.data[self.pos + 3],
153            self.data[self.pos + 4],
154            self.data[self.pos + 5],
155            self.data[self.pos + 6],
156            self.data[self.pos + 7],
157        ]);
158        self.pos += 8;
159        Ok(v)
160    }
161
162    fn read_bytes(&mut self, n: usize) -> Result<&'a [u8]> {
163        self.ensure(n)?;
164        let slice = &self.data[self.pos..self.pos + n];
165        self.pos += n;
166        Ok(slice)
167    }
168
169    fn skip(&mut self, n: usize) -> Result<()> {
170        self.ensure(n)?;
171        self.pos += n;
172        Ok(())
173    }
174
175    /// Read a count field: 4 bytes for CDF-1/2, 8 bytes for CDF-5.
176    fn read_count(&mut self, format: NcFormat) -> Result<u64> {
177        match format {
178            NcFormat::Cdf5 => self.read_u64_be(),
179            _ => self.read_u32_be().map(|v| v as u64),
180        }
181    }
182
183    /// Read a padded name: 4-byte length, then chars, then padding to 4-byte boundary.
184    /// The name length prefix is always 4 bytes for CDF-1/2 and 8 bytes for CDF-5.
185    fn read_name(&mut self, format: NcFormat) -> Result<String> {
186        let len = checked_usize_from_u64(self.read_count(format)?, "classic name length")?;
187        let bytes = self.read_bytes(len)?;
188        let padded_len = checked_pad_to_4(len, "classic name length")?;
189        let pad = padded_len - len;
190        if pad > 0 {
191            self.skip(pad)?;
192        }
193        String::from_utf8(bytes.to_vec())
194            .map_err(|e| Error::InvalidData(format!("invalid UTF-8 name: {}", e)))
195    }
196}
197
198fn checked_pad_to_4(len: usize, context: &str) -> Result<usize> {
199    len.checked_add(padding_to_4(len)).ok_or_else(|| {
200        Error::InvalidData(format!("{context} padded length exceeds platform usize"))
201    })
202}
203
204fn read_list_count(
205    cur: &mut Cursor<'_>,
206    format: NcFormat,
207    min_bytes_per_entry: u64,
208    context: &str,
209) -> Result<usize> {
210    let raw = cur.read_count(format)?;
211    let count = checked_usize_from_u64(raw, context)?;
212    let min_needed = checked_mul_u64(raw, min_bytes_per_entry, context)?;
213    if min_needed > cur.remaining() as u64 {
214        return Err(Error::UnexpectedEof {
215            offset: cur.pos as u64,
216            needed: min_needed,
217            available: cur.remaining() as u64,
218        });
219    }
220    Ok(count)
221}
222
223/// Parse a complete classic NetCDF header from raw file bytes.
224///
225/// The `format` parameter must be one of `Classic`, `Offset64`, or `Cdf5`
226/// (the caller has already read and validated the magic bytes).
227pub fn parse_header(data: &[u8], format: NcFormat) -> Result<ClassicHeader> {
228    // Skip past the 4-byte magic (already validated by caller).
229    let mut cur = Cursor::new(data);
230    cur.skip(4)?;
231
232    // numrecs: 4 bytes for CDF-1/2, 8 bytes for CDF-5.
233    let numrecs_raw = cur.read_count(format)?;
234    let is_streaming = format != NcFormat::Cdf5 && (numrecs_raw as u32) == STREAMING;
235    let numrecs = if is_streaming { 0 } else { numrecs_raw };
236
237    // dim_list
238    let mut dimensions = parse_dim_list(&mut cur, format)?;
239
240    // att_list (global attributes)
241    let global_attributes = parse_att_list(&mut cur, format)?;
242
243    // var_list
244    let mut variables = parse_var_list(&mut cur, format, &dimensions)?;
245
246    if numrecs > 0 {
247        apply_unlimited_dimension_size(&mut dimensions, &mut variables, numrecs);
248    }
249
250    Ok(ClassicHeader {
251        dimensions,
252        global_attributes,
253        variables,
254        numrecs,
255    })
256}
257
258pub(crate) fn has_streaming_numrecs(data: &[u8], format: NcFormat) -> bool {
259    if format == NcFormat::Cdf5 {
260        return false;
261    }
262
263    let Some(bytes) = data.get(4..8) else {
264        return false;
265    };
266
267    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) == STREAMING
268}
269
270/// Parse the dimension list.
271fn parse_dim_list(cur: &mut Cursor<'_>, format: NcFormat) -> Result<Vec<NcDimension>> {
272    let tag = cur.read_u32_be()?;
273
274    if tag == ABSENT {
275        // ABSENT is a zero tag followed by a zero count.
276        let _zero = cur.read_count(format)?;
277        return Ok(Vec::new());
278    }
279
280    if tag != NC_DIMENSION {
281        return Err(Error::InvalidData(format!(
282            "expected NC_DIMENSION tag (0x{:08X}), got 0x{:08X}",
283            NC_DIMENSION, tag
284        )));
285    }
286
287    let nelems = read_list_count(cur, format, 16, "dimension count")?;
288    let mut dims = Vec::with_capacity(nelems);
289
290    for _ in 0..nelems {
291        let name = cur.read_name(format)?;
292        let size = cur.read_count(format)?;
293        // A dimension with size 0 is the unlimited (record) dimension.
294        let is_unlimited = size == 0;
295        dims.push(NcDimension {
296            name,
297            size,
298            is_unlimited,
299        });
300    }
301
302    Ok(dims)
303}
304
305/// Parse an attribute list (used for both global and variable attributes).
306fn parse_att_list(cur: &mut Cursor<'_>, format: NcFormat) -> Result<Vec<NcAttribute>> {
307    let tag = cur.read_u32_be()?;
308
309    if tag == ABSENT {
310        let _zero = cur.read_count(format)?;
311        return Ok(Vec::new());
312    }
313
314    if tag != NC_ATTRIBUTE {
315        return Err(Error::InvalidData(format!(
316            "expected NC_ATTRIBUTE tag (0x{:08X}), got 0x{:08X}",
317            NC_ATTRIBUTE, tag
318        )));
319    }
320
321    let nelems = read_list_count(cur, format, 12, "attribute count")?;
322    let mut attrs = Vec::with_capacity(nelems);
323
324    for _ in 0..nelems {
325        let name = cur.read_name(format)?;
326        let nc_type = cur.read_u32_be()?;
327        let nvalues = checked_usize_from_u64(cur.read_count(format)?, "attribute value count")?;
328        let value = read_attr_values(cur, nc_type, nvalues, format)?;
329
330        attrs.push(NcAttribute { name, value });
331    }
332
333    Ok(attrs)
334}
335
336/// Read attribute values of the given type and count.
337/// Values are padded to a 4-byte boundary in the file.
338fn read_attr_values(
339    cur: &mut Cursor<'_>,
340    nc_type: u32,
341    nvalues: usize,
342    _format: NcFormat,
343) -> Result<NcAttrValue> {
344    let typ = nc_type_from_code(nc_type)?;
345    let elem_size = typ.size()?;
346    let raw_bytes = nvalues.checked_mul(elem_size).ok_or_else(|| {
347        Error::InvalidData("classic attribute byte count exceeds platform usize".to_string())
348    })?;
349    let padded = checked_pad_to_4(raw_bytes, "classic attribute byte count")?;
350
351    match typ {
352        NcType::Byte => {
353            let bytes = cur.read_bytes(raw_bytes)?;
354            let values: Vec<i8> = bytes.iter().map(|&b| b as i8).collect();
355            cur.skip(padded - raw_bytes)?;
356            Ok(NcAttrValue::Bytes(values))
357        }
358        NcType::Char => {
359            let bytes = cur.read_bytes(raw_bytes)?;
360            // Trim trailing null bytes (common in NetCDF char attributes).
361            let s = String::from_utf8_lossy(bytes);
362            let trimmed = s.trim_end_matches('\0').to_string();
363            cur.skip(padded - raw_bytes)?;
364            Ok(NcAttrValue::Chars(trimmed))
365        }
366        NcType::Short => {
367            let mut values = Vec::with_capacity(nvalues);
368            for _ in 0..nvalues {
369                values.push(cur.read_u16_be()? as i16);
370            }
371            let pad = padded - raw_bytes;
372            cur.skip(pad)?;
373            Ok(NcAttrValue::Shorts(values))
374        }
375        NcType::Int => {
376            let mut values = Vec::with_capacity(nvalues);
377            for _ in 0..nvalues {
378                values.push(cur.read_i32_be()?);
379            }
380            Ok(NcAttrValue::Ints(values))
381        }
382        NcType::Float => {
383            let mut values = Vec::with_capacity(nvalues);
384            for _ in 0..nvalues {
385                values.push(cur.read_f32_be()?);
386            }
387            Ok(NcAttrValue::Floats(values))
388        }
389        NcType::Double => {
390            let mut values = Vec::with_capacity(nvalues);
391            for _ in 0..nvalues {
392                values.push(cur.read_f64_be()?);
393            }
394            Ok(NcAttrValue::Doubles(values))
395        }
396        NcType::UByte => {
397            let bytes = cur.read_bytes(raw_bytes)?;
398            cur.skip(padded - raw_bytes)?;
399            Ok(NcAttrValue::UBytes(bytes.to_vec()))
400        }
401        NcType::UShort => {
402            let mut values = Vec::with_capacity(nvalues);
403            for _ in 0..nvalues {
404                values.push(cur.read_u16_be()?);
405            }
406            let pad = padded - raw_bytes;
407            cur.skip(pad)?;
408            Ok(NcAttrValue::UShorts(values))
409        }
410        NcType::UInt => {
411            let mut values = Vec::with_capacity(nvalues);
412            for _ in 0..nvalues {
413                values.push(cur.read_u32_be()?);
414            }
415            Ok(NcAttrValue::UInts(values))
416        }
417        NcType::Int64 => {
418            let mut values = Vec::with_capacity(nvalues);
419            for _ in 0..nvalues {
420                values.push(cur.read_i64_be()?);
421            }
422            Ok(NcAttrValue::Int64s(values))
423        }
424        NcType::UInt64 => {
425            let mut values = Vec::with_capacity(nvalues);
426            for _ in 0..nvalues {
427                values.push(cur.read_u64_be()?);
428            }
429            Ok(NcAttrValue::UInt64s(values))
430        }
431        NcType::String
432        | NcType::Enum { .. }
433        | NcType::Compound { .. }
434        | NcType::Opaque { .. }
435        | NcType::Array { .. }
436        | NcType::VLen { .. } => Err(Error::InvalidData(format!(
437            "{:?} is not valid in classic format attributes",
438            typ
439        ))),
440    }
441}
442
443/// Parse the variable list.
444fn parse_var_list(
445    cur: &mut Cursor<'_>,
446    format: NcFormat,
447    dims: &[NcDimension],
448) -> Result<Vec<NcVariable>> {
449    let tag = cur.read_u32_be()?;
450
451    if tag == ABSENT {
452        let _zero = cur.read_count(format)?;
453        return Ok(Vec::new());
454    }
455
456    if tag != NC_VARIABLE {
457        return Err(Error::InvalidData(format!(
458            "expected NC_VARIABLE tag (0x{:08X}), got 0x{:08X}",
459            NC_VARIABLE, tag
460        )));
461    }
462
463    let nelems = read_list_count(cur, format, 28, "variable count")?;
464    let mut vars = Vec::with_capacity(nelems);
465
466    for _ in 0..nelems {
467        let name = cur.read_name(format)?;
468
469        // Number of dimensions for this variable.
470        let ndims = checked_usize_from_u64(cur.read_count(format)?, "variable dimension count")?;
471
472        // Dimension IDs are NON_NEG values and widen to 64 bits in CDF-5.
473        let mut var_dims = Vec::with_capacity(ndims);
474        let mut is_record_var = false;
475        for _ in 0..ndims {
476            let dimid = checked_usize_from_u64(cur.read_count(format)?, "dimension id")?;
477            if dimid >= dims.len() {
478                return Err(Error::InvalidData(format!(
479                    "variable '{}' references dimension index {} but only {} dimensions exist",
480                    name,
481                    dimid,
482                    dims.len()
483                )));
484            }
485            if dims[dimid].is_unlimited {
486                is_record_var = true;
487            }
488            var_dims.push(dims[dimid].clone());
489        }
490
491        // Variable attributes.
492        let attributes = parse_att_list(cur, format)?;
493
494        // nc_type (always 4 bytes).
495        let nc_type_code = cur.read_u32_be()?;
496        let dtype = nc_type_from_code(nc_type_code)?;
497
498        // vsize: the size of one record's worth of data for this variable,
499        // or the total size for non-record variables.
500        // 4 bytes for CDF-1/2, 8 bytes for CDF-5.
501        let vsize = cur.read_count(format)?;
502
503        // begin (data offset): 4 bytes for CDF-1, 8 bytes for CDF-2/5.
504        let data_offset = match format {
505            NcFormat::Classic => cur.read_u32_be()? as u64,
506            NcFormat::Offset64 | NcFormat::Cdf5 => cur.read_u64_be()?,
507            _ => unreachable!("classic parser only handles CDF-1/2/5"),
508        };
509
510        // Compute record_size (the per-record slice size).
511        let record_size = if is_record_var { vsize } else { 0 };
512
513        // For non-record variables, data_size = vsize.
514        // For record variables, data_size = vsize * numrecs (computed at read time).
515        let data_size = if is_record_var { 0 } else { vsize };
516
517        vars.push(NcVariable {
518            name,
519            dimensions: var_dims,
520            dtype,
521            attributes,
522            data_offset,
523            _data_size: data_size,
524            is_record_var,
525            record_size,
526        });
527    }
528
529    Ok(vars)
530}
531
532pub(crate) fn apply_unlimited_dimension_size(
533    dimensions: &mut [NcDimension],
534    variables: &mut [NcVariable],
535    numrecs: u64,
536) {
537    for dim in dimensions.iter_mut().filter(|dim| dim.is_unlimited) {
538        dim.size = numrecs;
539    }
540
541    for variable in variables {
542        for dim in variable
543            .dimensions
544            .iter_mut()
545            .filter(|dim| dim.is_unlimited)
546        {
547            dim.size = numrecs;
548        }
549    }
550}
551
552#[cfg(test)]
553mod tests {
554    use super::*;
555    use crate::NcFormat;
556
557    /// Build a minimal CDF-1 file header in memory.
558    /// This helper constructs valid header bytes for testing.
559    fn build_cdf1_header(
560        dims: &[(&str, u32)],
561        attrs: &[(&str, u32, &[u8])], // (name, nc_type, raw_value_bytes)
562        vars: &[(&str, &[u32], u32, u32, u32)], // (name, dimids, nc_type, vsize, offset)
563        numrecs: u32,
564    ) -> Vec<u8> {
565        let mut buf = Vec::new();
566
567        // Magic: CDF\x01
568        buf.extend_from_slice(b"CDF\x01");
569
570        // numrecs (4 bytes)
571        buf.extend_from_slice(&numrecs.to_be_bytes());
572
573        // dim_list
574        if dims.is_empty() {
575            // ABSENT
576            buf.extend_from_slice(&ABSENT.to_be_bytes());
577            buf.extend_from_slice(&0u32.to_be_bytes());
578        } else {
579            buf.extend_from_slice(&NC_DIMENSION.to_be_bytes());
580            buf.extend_from_slice(&(dims.len() as u32).to_be_bytes());
581            for (name, size) in dims {
582                write_name_cdf1(&mut buf, name);
583                buf.extend_from_slice(&size.to_be_bytes());
584            }
585        }
586
587        // att_list (global)
588        write_att_list_cdf1(&mut buf, attrs);
589
590        // var_list
591        if vars.is_empty() {
592            buf.extend_from_slice(&ABSENT.to_be_bytes());
593            buf.extend_from_slice(&0u32.to_be_bytes());
594        } else {
595            buf.extend_from_slice(&NC_VARIABLE.to_be_bytes());
596            buf.extend_from_slice(&(vars.len() as u32).to_be_bytes());
597            for (name, dimids, nc_type, vsize, offset) in vars {
598                write_name_cdf1(&mut buf, name);
599                // ndims
600                buf.extend_from_slice(&(dimids.len() as u32).to_be_bytes());
601                // dimids
602                for &did in *dimids {
603                    buf.extend_from_slice(&did.to_be_bytes());
604                }
605                // att_list (empty for test vars)
606                buf.extend_from_slice(&ABSENT.to_be_bytes());
607                buf.extend_from_slice(&0u32.to_be_bytes());
608                // nc_type
609                buf.extend_from_slice(&nc_type.to_be_bytes());
610                // vsize
611                buf.extend_from_slice(&vsize.to_be_bytes());
612                // begin (offset) -- 4 bytes for CDF-1
613                buf.extend_from_slice(&offset.to_be_bytes());
614            }
615        }
616
617        buf
618    }
619
620    fn write_name_cdf1(buf: &mut Vec<u8>, name: &str) {
621        let name_bytes = name.as_bytes();
622        buf.extend_from_slice(&(name_bytes.len() as u32).to_be_bytes());
623        buf.extend_from_slice(name_bytes);
624        let pad = pad_to_4(name_bytes.len()) - name_bytes.len();
625        for _ in 0..pad {
626            buf.push(0);
627        }
628    }
629
630    fn write_att_list_cdf1(buf: &mut Vec<u8>, attrs: &[(&str, u32, &[u8])]) {
631        if attrs.is_empty() {
632            buf.extend_from_slice(&ABSENT.to_be_bytes());
633            buf.extend_from_slice(&0u32.to_be_bytes());
634            return;
635        }
636        buf.extend_from_slice(&NC_ATTRIBUTE.to_be_bytes());
637        buf.extend_from_slice(&(attrs.len() as u32).to_be_bytes());
638        for (name, nc_type, value_bytes) in attrs {
639            write_name_cdf1(buf, name);
640            buf.extend_from_slice(&nc_type.to_be_bytes());
641            // For simplicity, nvalues = 1 element (caller provides exactly one element's bytes)
642            let elem_size = match nc_type {
643                1 => 1, // byte
644                2 => 1, // char
645                3 => 2, // short
646                4 => 4, // int
647                5 => 4, // float
648                6 => 8, // double
649                _ => 1,
650            };
651            let nvalues = value_bytes.len() / elem_size;
652            buf.extend_from_slice(&(nvalues as u32).to_be_bytes());
653            buf.extend_from_slice(value_bytes);
654            let pad = pad_to_4(value_bytes.len()) - value_bytes.len();
655            for _ in 0..pad {
656                buf.push(0);
657            }
658        }
659    }
660
661    fn write_count_cdf5(buf: &mut Vec<u8>, value: u64) {
662        buf.extend_from_slice(&value.to_be_bytes());
663    }
664
665    fn write_name_cdf5(buf: &mut Vec<u8>, name: &str) {
666        let name_bytes = name.as_bytes();
667        write_count_cdf5(buf, name_bytes.len() as u64);
668        buf.extend_from_slice(name_bytes);
669        let pad = pad_to_4(name_bytes.len()) - name_bytes.len();
670        for _ in 0..pad {
671            buf.push(0);
672        }
673    }
674
675    fn build_cdf5_header(
676        dims: &[(&str, u64)],
677        vars: &[(&str, &[u64], u32, u64, u64)],
678        numrecs: u64,
679    ) -> Vec<u8> {
680        let mut buf = Vec::new();
681        buf.extend_from_slice(b"CDF\x05");
682        write_count_cdf5(&mut buf, numrecs);
683
684        if dims.is_empty() {
685            buf.extend_from_slice(&ABSENT.to_be_bytes());
686            write_count_cdf5(&mut buf, 0);
687        } else {
688            buf.extend_from_slice(&NC_DIMENSION.to_be_bytes());
689            write_count_cdf5(&mut buf, dims.len() as u64);
690            for (name, size) in dims {
691                write_name_cdf5(&mut buf, name);
692                write_count_cdf5(&mut buf, *size);
693            }
694        }
695
696        buf.extend_from_slice(&ABSENT.to_be_bytes());
697        write_count_cdf5(&mut buf, 0);
698
699        if vars.is_empty() {
700            buf.extend_from_slice(&ABSENT.to_be_bytes());
701            write_count_cdf5(&mut buf, 0);
702        } else {
703            buf.extend_from_slice(&NC_VARIABLE.to_be_bytes());
704            write_count_cdf5(&mut buf, vars.len() as u64);
705            for (name, dimids, nc_type, vsize, offset) in vars {
706                write_name_cdf5(&mut buf, name);
707                write_count_cdf5(&mut buf, dimids.len() as u64);
708                for dimid in *dimids {
709                    write_count_cdf5(&mut buf, *dimid);
710                }
711                buf.extend_from_slice(&ABSENT.to_be_bytes());
712                write_count_cdf5(&mut buf, 0);
713                buf.extend_from_slice(&nc_type.to_be_bytes());
714                write_count_cdf5(&mut buf, *vsize);
715                buf.extend_from_slice(&offset.to_be_bytes());
716            }
717        }
718
719        buf
720    }
721
722    #[test]
723    fn empty_header() {
724        let data = build_cdf1_header(&[], &[], &[], 0);
725        let header = parse_header(&data, NcFormat::Classic).unwrap();
726        assert!(header.dimensions.is_empty());
727        assert!(header.global_attributes.is_empty());
728        assert!(header.variables.is_empty());
729        assert_eq!(header.numrecs, 0);
730    }
731
732    #[test]
733    fn dimensions() {
734        let data = build_cdf1_header(
735            &[("x", 10), ("y", 20), ("time", 0)], // time is unlimited
736            &[],
737            &[],
738            5,
739        );
740        let header = parse_header(&data, NcFormat::Classic).unwrap();
741        assert_eq!(header.dimensions.len(), 3);
742
743        assert_eq!(header.dimensions[0].name, "x");
744        assert_eq!(header.dimensions[0].size, 10);
745        assert!(!header.dimensions[0].is_unlimited);
746
747        assert_eq!(header.dimensions[1].name, "y");
748        assert_eq!(header.dimensions[1].size, 20);
749        assert!(!header.dimensions[1].is_unlimited);
750
751        assert_eq!(header.dimensions[2].name, "time");
752        assert_eq!(header.dimensions[2].size, 5);
753        assert!(header.dimensions[2].is_unlimited);
754
755        assert_eq!(header.numrecs, 5);
756    }
757
758    #[test]
759    fn global_attributes() {
760        // One NC_INT attribute with value 42.
761        let value_bytes = 42i32.to_be_bytes();
762        let data = build_cdf1_header(
763            &[],
764            &[("answer", 4, &value_bytes)], // NC_INT = 4
765            &[],
766            0,
767        );
768        let header = parse_header(&data, NcFormat::Classic).unwrap();
769        assert_eq!(header.global_attributes.len(), 1);
770        assert_eq!(header.global_attributes[0].name, "answer");
771        if let NcAttrValue::Ints(ref v) = header.global_attributes[0].value {
772            assert_eq!(v, &[42]);
773        } else {
774            panic!("expected Ints attribute");
775        }
776    }
777
778    #[test]
779    fn char_attribute() {
780        let text = b"hello";
781        let data = build_cdf1_header(
782            &[],
783            &[("greeting", 2, text)], // NC_CHAR = 2
784            &[],
785            0,
786        );
787        let header = parse_header(&data, NcFormat::Classic).unwrap();
788        assert_eq!(header.global_attributes.len(), 1);
789        assert_eq!(header.global_attributes[0].name, "greeting");
790        if let NcAttrValue::Chars(ref s) = header.global_attributes[0].value {
791            assert_eq!(s, "hello");
792        } else {
793            panic!("expected Chars attribute");
794        }
795    }
796
797    #[test]
798    fn variables() {
799        let data = build_cdf1_header(
800            &[("x", 10), ("y", 20)],
801            &[],
802            &[
803                ("temperature", &[0, 1], 5, 800, 200), // float, dimids=[x,y]
804                ("pressure", &[0, 1], 6, 1600, 1000),  // double, dimids=[x,y]
805            ],
806            0,
807        );
808        let header = parse_header(&data, NcFormat::Classic).unwrap();
809        assert_eq!(header.variables.len(), 2);
810
811        let temp = &header.variables[0];
812        assert_eq!(temp.name, "temperature");
813        assert_eq!(temp.dtype, NcType::Float);
814        assert_eq!(temp.dimensions.len(), 2);
815        assert_eq!(temp.dimensions[0].name, "x");
816        assert_eq!(temp.dimensions[1].name, "y");
817        assert_eq!(temp.data_offset, 200);
818        assert_eq!(temp._data_size, 800);
819        assert!(!temp.is_record_var);
820
821        let pres = &header.variables[1];
822        assert_eq!(pres.name, "pressure");
823        assert_eq!(pres.dtype, NcType::Double);
824        assert_eq!(pres.data_offset, 1000);
825        assert_eq!(pres._data_size, 1600);
826    }
827
828    #[test]
829    fn record_variable() {
830        let data = build_cdf1_header(
831            &[("time", 0), ("x", 5)], // time is unlimited
832            &[],
833            &[
834                // record variable: first dim is unlimited
835                ("values", &[0, 1], 5, 20, 100), // float, vsize=5*4=20 per record
836            ],
837            10, // 10 records
838        );
839        let header = parse_header(&data, NcFormat::Classic).unwrap();
840        assert_eq!(header.numrecs, 10);
841        assert_eq!(header.variables.len(), 1);
842
843        let var = &header.variables[0];
844        assert_eq!(var.name, "values");
845        assert!(var.is_record_var);
846        assert_eq!(var.record_size, 20);
847        assert_eq!(var._data_size, 0); // data_size=0 for record vars (computed at read time)
848        assert_eq!(var.shape(), vec![10, 5]);
849    }
850
851    #[test]
852    fn cdf2_offset64() {
853        // Build a CDF-2 header manually.
854        // CDF-2 is mostly the same as CDF-1 but the data offset (begin) field is 8 bytes.
855        let mut buf = Vec::new();
856        buf.extend_from_slice(b"CDF\x02");
857        // numrecs (4 bytes)
858        buf.extend_from_slice(&0u32.to_be_bytes());
859        // dim_list: one dimension "x" with size 100
860        buf.extend_from_slice(&NC_DIMENSION.to_be_bytes());
861        buf.extend_from_slice(&1u32.to_be_bytes());
862        write_name_cdf1(&mut buf, "x");
863        buf.extend_from_slice(&100u32.to_be_bytes());
864        // att_list: absent
865        buf.extend_from_slice(&ABSENT.to_be_bytes());
866        buf.extend_from_slice(&0u32.to_be_bytes());
867        // var_list: one variable
868        buf.extend_from_slice(&NC_VARIABLE.to_be_bytes());
869        buf.extend_from_slice(&1u32.to_be_bytes());
870        write_name_cdf1(&mut buf, "data");
871        buf.extend_from_slice(&1u32.to_be_bytes()); // ndims=1
872        buf.extend_from_slice(&0u32.to_be_bytes()); // dimid=0
873                                                    // att_list: absent
874        buf.extend_from_slice(&ABSENT.to_be_bytes());
875        buf.extend_from_slice(&0u32.to_be_bytes());
876        // nc_type = NC_FLOAT = 5
877        buf.extend_from_slice(&5u32.to_be_bytes());
878        // vsize (4 bytes for CDF-2)
879        buf.extend_from_slice(&400u32.to_be_bytes());
880        // begin (8 bytes for CDF-2!)
881        let offset: u64 = 0x1_0000_0000; // > 4 GB offset to test 64-bit
882        buf.extend_from_slice(&offset.to_be_bytes());
883
884        let header = parse_header(&buf, NcFormat::Offset64).unwrap();
885        assert_eq!(header.variables.len(), 1);
886        assert_eq!(header.variables[0].data_offset, 0x1_0000_0000);
887        assert_eq!(header.variables[0]._data_size, 400);
888    }
889
890    #[test]
891    fn cdf5_uses_64_bit_counts_for_var_metadata() {
892        let data = build_cdf5_header(
893            &[("n", 4)],
894            &[
895                ("ubyte_var", &[0], 7, 4, 128),
896                ("int64_var", &[0], 10, 32, 256),
897            ],
898            0,
899        );
900
901        let header = parse_header(&data, NcFormat::Cdf5).unwrap();
902        assert_eq!(header.variables.len(), 2);
903        assert_eq!(header.variables[0].name, "ubyte_var");
904        assert_eq!(header.variables[0].dtype, NcType::UByte);
905        assert_eq!(header.variables[0].dimensions[0].name, "n");
906        assert_eq!(header.variables[1].name, "int64_var");
907        assert_eq!(header.variables[1].dtype, NcType::Int64);
908        assert_eq!(header.variables[1].data_offset, 256);
909    }
910
911    #[test]
912    fn unlimited_dimension_size_tracks_numrecs() {
913        let data = build_cdf1_header(
914            &[("time", 0), ("x", 5)],
915            &[],
916            &[("series", &[0, 1], 6, 40, 128)],
917            3,
918        );
919
920        let header = parse_header(&data, NcFormat::Classic).unwrap();
921        assert_eq!(header.dimensions[0].size, 3);
922        assert_eq!(header.variables[0].shape(), vec![3, 5]);
923    }
924
925    #[test]
926    fn double_attribute() {
927        let pi = std::f64::consts::PI;
928        let value_bytes = pi.to_be_bytes();
929        let data = build_cdf1_header(
930            &[],
931            &[("pi", 6, &value_bytes)], // NC_DOUBLE = 6
932            &[],
933            0,
934        );
935        let header = parse_header(&data, NcFormat::Classic).unwrap();
936        assert_eq!(header.global_attributes.len(), 1);
937        if let NcAttrValue::Doubles(ref v) = header.global_attributes[0].value {
938            assert_eq!(v.len(), 1);
939            assert!((v[0] - pi).abs() < 1e-15);
940        } else {
941            panic!("expected Doubles attribute");
942        }
943    }
944
945    #[test]
946    fn short_attribute_with_padding() {
947        // NC_SHORT (2 bytes) with 3 values = 6 bytes, padded to 8.
948        let mut value_bytes = Vec::new();
949        value_bytes.extend_from_slice(&1i16.to_be_bytes());
950        value_bytes.extend_from_slice(&2i16.to_be_bytes());
951        value_bytes.extend_from_slice(&3i16.to_be_bytes());
952        // The build helper will add padding.
953
954        let mut buf = Vec::new();
955        buf.extend_from_slice(b"CDF\x01");
956        buf.extend_from_slice(&0u32.to_be_bytes()); // numrecs
957                                                    // dim_list: absent
958        buf.extend_from_slice(&ABSENT.to_be_bytes());
959        buf.extend_from_slice(&0u32.to_be_bytes());
960        // att_list: one short attribute with 3 values
961        buf.extend_from_slice(&NC_ATTRIBUTE.to_be_bytes());
962        buf.extend_from_slice(&1u32.to_be_bytes());
963        write_name_cdf1(&mut buf, "vals");
964        buf.extend_from_slice(&3u32.to_be_bytes()); // NC_SHORT
965        buf.extend_from_slice(&3u32.to_be_bytes()); // nvalues=3
966        buf.extend_from_slice(&value_bytes);
967        // Pad to 4-byte boundary: 6 bytes -> 2 bytes padding
968        buf.extend_from_slice(&[0, 0]);
969        // var_list: absent
970        buf.extend_from_slice(&ABSENT.to_be_bytes());
971        buf.extend_from_slice(&0u32.to_be_bytes());
972
973        let header = parse_header(&buf, NcFormat::Classic).unwrap();
974        if let NcAttrValue::Shorts(ref v) = header.global_attributes[0].value {
975            assert_eq!(v, &[1, 2, 3]);
976        } else {
977            panic!("expected Shorts attribute");
978        }
979    }
980
981    #[test]
982    fn name_padding() {
983        // Names with lengths 1, 2, 3, 4, 5 to test all padding cases.
984        let data = build_cdf1_header(
985            &[("a", 1), ("ab", 2), ("abc", 3), ("abcd", 4), ("abcde", 5)],
986            &[],
987            &[],
988            0,
989        );
990        let header = parse_header(&data, NcFormat::Classic).unwrap();
991        assert_eq!(header.dimensions.len(), 5);
992        assert_eq!(header.dimensions[0].name, "a");
993        assert_eq!(header.dimensions[1].name, "ab");
994        assert_eq!(header.dimensions[2].name, "abc");
995        assert_eq!(header.dimensions[3].name, "abcd");
996        assert_eq!(header.dimensions[4].name, "abcde");
997    }
998
999    #[test]
1000    fn invalid_dimension_reference() {
1001        // Variable referencing a non-existent dimension.
1002        let data = build_cdf1_header(
1003            &[("x", 10)], // only dim 0 exists
1004            &[],
1005            &[("bad_var", &[5], 4, 40, 100)], // dimid=5 is out of range
1006            0,
1007        );
1008        let result = parse_header(&data, NcFormat::Classic);
1009        assert!(result.is_err());
1010    }
1011
1012    #[test]
1013    fn byte_attribute() {
1014        let value_bytes: &[u8] = &[0xFF]; // -1 as i8
1015        let data = build_cdf1_header(
1016            &[],
1017            &[("flag", 1, value_bytes)], // NC_BYTE = 1
1018            &[],
1019            0,
1020        );
1021        let header = parse_header(&data, NcFormat::Classic).unwrap();
1022        if let NcAttrValue::Bytes(ref v) = header.global_attributes[0].value {
1023            assert_eq!(v, &[-1i8]);
1024        } else {
1025            panic!("expected Bytes attribute");
1026        }
1027    }
1028
1029    #[test]
1030    fn float_attribute() {
1031        let val = std::f32::consts::PI;
1032        let value_bytes = val.to_be_bytes();
1033        let data = build_cdf1_header(
1034            &[],
1035            &[("pi_approx", 5, &value_bytes)], // NC_FLOAT = 5
1036            &[],
1037            0,
1038        );
1039        let header = parse_header(&data, NcFormat::Classic).unwrap();
1040        if let NcAttrValue::Floats(ref v) = header.global_attributes[0].value {
1041            assert_eq!(v.len(), 1);
1042            assert!((v[0] - std::f32::consts::PI).abs() < 1e-6);
1043        } else {
1044            panic!("expected Floats attribute");
1045        }
1046    }
1047
1048    #[test]
1049    fn multiple_global_attributes() {
1050        let int_val = 100i32.to_be_bytes();
1051        let float_val = 2.5f32.to_be_bytes();
1052        let data = build_cdf1_header(
1053            &[],
1054            &[("count", 4, &int_val), ("scale", 5, &float_val)],
1055            &[],
1056            0,
1057        );
1058        let header = parse_header(&data, NcFormat::Classic).unwrap();
1059        assert_eq!(header.global_attributes.len(), 2);
1060        assert_eq!(header.global_attributes[0].name, "count");
1061        assert_eq!(header.global_attributes[1].name, "scale");
1062    }
1063}