Skip to main content

netcdf_reader/classic/
header.rs

1//! Parse the NetCDF classic (CDF-1/2/5) binary header.
2//!
3//! The classic header is a sequence of big-endian fields describing dimensions,
4//! global attributes, and variables. All multi-byte integers are big-endian.
5//! Strings are padded to 4-byte alignment. CDF-5 uses 8-byte counts and sizes
6//! where CDF-1/2 use 4-byte values.
7
8use crate::error::{Error, Result};
9use crate::types::{NcAttrValue, NcAttribute, NcDimension, NcType, NcVariable};
10use crate::NcFormat;
11
12use super::types::{nc_type_from_code, pad_to_4};
13
14// Header tag constants.
15const ABSENT: u32 = 0x0000_0000;
16const NC_DIMENSION: u32 = 0x0000_000A;
17const NC_VARIABLE: u32 = 0x0000_000B;
18const NC_ATTRIBUTE: u32 = 0x0000_000C;
19
20/// Streaming (indeterminate) record count sentinel.
21const STREAMING: u32 = 0xFFFF_FFFF;
22
23/// Result of parsing a classic NetCDF header.
24pub struct ClassicHeader {
25    pub dimensions: Vec<NcDimension>,
26    pub global_attributes: Vec<NcAttribute>,
27    pub variables: Vec<NcVariable>,
28    pub numrecs: u64,
29}
30
31/// A cursor for reading big-endian data from a byte slice.
32struct Cursor<'a> {
33    data: &'a [u8],
34    pos: usize,
35}
36
37impl<'a> Cursor<'a> {
38    fn new(data: &'a [u8]) -> Self {
39        Cursor { data, pos: 0 }
40    }
41
42    fn remaining(&self) -> usize {
43        self.data.len().saturating_sub(self.pos)
44    }
45
46    fn ensure(&self, n: usize) -> Result<()> {
47        if self.remaining() < n {
48            Err(Error::InvalidData(format!(
49                "unexpected end of header at offset {}: need {} bytes, have {}",
50                self.pos,
51                n,
52                self.remaining()
53            )))
54        } else {
55            Ok(())
56        }
57    }
58
59    #[allow(dead_code)]
60    fn read_u8(&mut self) -> Result<u8> {
61        self.ensure(1)?;
62        let v = self.data[self.pos];
63        self.pos += 1;
64        Ok(v)
65    }
66
67    fn read_u16_be(&mut self) -> Result<u16> {
68        self.ensure(2)?;
69        let v = u16::from_be_bytes([self.data[self.pos], self.data[self.pos + 1]]);
70        self.pos += 2;
71        Ok(v)
72    }
73
74    fn read_u32_be(&mut self) -> Result<u32> {
75        self.ensure(4)?;
76        let v = u32::from_be_bytes([
77            self.data[self.pos],
78            self.data[self.pos + 1],
79            self.data[self.pos + 2],
80            self.data[self.pos + 3],
81        ]);
82        self.pos += 4;
83        Ok(v)
84    }
85
86    fn read_i32_be(&mut self) -> Result<i32> {
87        self.ensure(4)?;
88        let v = i32::from_be_bytes([
89            self.data[self.pos],
90            self.data[self.pos + 1],
91            self.data[self.pos + 2],
92            self.data[self.pos + 3],
93        ]);
94        self.pos += 4;
95        Ok(v)
96    }
97
98    fn read_u64_be(&mut self) -> Result<u64> {
99        self.ensure(8)?;
100        let v = u64::from_be_bytes([
101            self.data[self.pos],
102            self.data[self.pos + 1],
103            self.data[self.pos + 2],
104            self.data[self.pos + 3],
105            self.data[self.pos + 4],
106            self.data[self.pos + 5],
107            self.data[self.pos + 6],
108            self.data[self.pos + 7],
109        ]);
110        self.pos += 8;
111        Ok(v)
112    }
113
114    fn read_i64_be(&mut self) -> Result<i64> {
115        self.ensure(8)?;
116        let v = i64::from_be_bytes([
117            self.data[self.pos],
118            self.data[self.pos + 1],
119            self.data[self.pos + 2],
120            self.data[self.pos + 3],
121            self.data[self.pos + 4],
122            self.data[self.pos + 5],
123            self.data[self.pos + 6],
124            self.data[self.pos + 7],
125        ]);
126        self.pos += 8;
127        Ok(v)
128    }
129
130    fn read_f32_be(&mut self) -> Result<f32> {
131        self.ensure(4)?;
132        let v = f32::from_be_bytes([
133            self.data[self.pos],
134            self.data[self.pos + 1],
135            self.data[self.pos + 2],
136            self.data[self.pos + 3],
137        ]);
138        self.pos += 4;
139        Ok(v)
140    }
141
142    fn read_f64_be(&mut self) -> Result<f64> {
143        self.ensure(8)?;
144        let v = f64::from_be_bytes([
145            self.data[self.pos],
146            self.data[self.pos + 1],
147            self.data[self.pos + 2],
148            self.data[self.pos + 3],
149            self.data[self.pos + 4],
150            self.data[self.pos + 5],
151            self.data[self.pos + 6],
152            self.data[self.pos + 7],
153        ]);
154        self.pos += 8;
155        Ok(v)
156    }
157
158    fn read_bytes(&mut self, n: usize) -> Result<&'a [u8]> {
159        self.ensure(n)?;
160        let slice = &self.data[self.pos..self.pos + n];
161        self.pos += n;
162        Ok(slice)
163    }
164
165    fn skip(&mut self, n: usize) -> Result<()> {
166        self.ensure(n)?;
167        self.pos += n;
168        Ok(())
169    }
170
171    /// Read a count field: 4 bytes for CDF-1/2, 8 bytes for CDF-5.
172    fn read_count(&mut self, format: NcFormat) -> Result<u64> {
173        match format {
174            NcFormat::Cdf5 => self.read_u64_be(),
175            _ => self.read_u32_be().map(|v| v as u64),
176        }
177    }
178
179    /// Read a padded name: 4-byte length, then chars, then padding to 4-byte boundary.
180    /// The name length prefix is always 4 bytes for CDF-1/2 and 8 bytes for CDF-5.
181    fn read_name(&mut self, format: NcFormat) -> Result<String> {
182        let len = self.read_count(format)? as usize;
183        let bytes = self.read_bytes(len)?;
184        let padded_len = pad_to_4(len);
185        let pad = padded_len - len;
186        if pad > 0 {
187            self.skip(pad)?;
188        }
189        String::from_utf8(bytes.to_vec())
190            .map_err(|e| Error::InvalidData(format!("invalid UTF-8 name: {}", e)))
191    }
192}
193
194/// Parse a complete classic NetCDF header from raw file bytes.
195///
196/// The `format` parameter must be one of `Classic`, `Offset64`, or `Cdf5`
197/// (the caller has already read and validated the magic bytes).
198pub fn parse_header(data: &[u8], format: NcFormat) -> Result<ClassicHeader> {
199    // Skip past the 4-byte magic (already validated by caller).
200    let mut cur = Cursor::new(data);
201    cur.skip(4)?;
202
203    // numrecs: 4 bytes for CDF-1/2, 8 bytes for CDF-5.
204    let numrecs_raw = cur.read_count(format)?;
205    let numrecs = if format != NcFormat::Cdf5 && (numrecs_raw as u32) == STREAMING {
206        0 // Treat streaming as 0 records (will be updated when data is read)
207    } else {
208        numrecs_raw
209    };
210
211    // dim_list
212    let mut dimensions = parse_dim_list(&mut cur, format)?;
213
214    // att_list (global attributes)
215    let global_attributes = parse_att_list(&mut cur, format)?;
216
217    // var_list
218    let mut variables = parse_var_list(&mut cur, format, &dimensions)?;
219
220    if numrecs > 0 {
221        apply_unlimited_dimension_size(&mut dimensions, &mut variables, numrecs);
222    }
223
224    Ok(ClassicHeader {
225        dimensions,
226        global_attributes,
227        variables,
228        numrecs,
229    })
230}
231
232/// Parse the dimension list.
233fn parse_dim_list(cur: &mut Cursor<'_>, format: NcFormat) -> Result<Vec<NcDimension>> {
234    let tag = cur.read_u32_be()?;
235
236    if tag == ABSENT {
237        // ABSENT is a zero tag followed by a zero count.
238        let _zero = cur.read_count(format)?;
239        return Ok(Vec::new());
240    }
241
242    if tag != NC_DIMENSION {
243        return Err(Error::InvalidData(format!(
244            "expected NC_DIMENSION tag (0x{:08X}), got 0x{:08X}",
245            NC_DIMENSION, tag
246        )));
247    }
248
249    let nelems = cur.read_count(format)? as usize;
250    let mut dims = Vec::with_capacity(nelems);
251
252    for _ in 0..nelems {
253        let name = cur.read_name(format)?;
254        let size = cur.read_count(format)?;
255        // A dimension with size 0 is the unlimited (record) dimension.
256        let is_unlimited = size == 0;
257        dims.push(NcDimension {
258            name,
259            size,
260            is_unlimited,
261        });
262    }
263
264    Ok(dims)
265}
266
267/// Parse an attribute list (used for both global and variable attributes).
268fn parse_att_list(cur: &mut Cursor<'_>, format: NcFormat) -> Result<Vec<NcAttribute>> {
269    let tag = cur.read_u32_be()?;
270
271    if tag == ABSENT {
272        let _zero = cur.read_count(format)?;
273        return Ok(Vec::new());
274    }
275
276    if tag != NC_ATTRIBUTE {
277        return Err(Error::InvalidData(format!(
278            "expected NC_ATTRIBUTE tag (0x{:08X}), got 0x{:08X}",
279            NC_ATTRIBUTE, tag
280        )));
281    }
282
283    let nelems = cur.read_count(format)? as usize;
284    let mut attrs = Vec::with_capacity(nelems);
285
286    for _ in 0..nelems {
287        let name = cur.read_name(format)?;
288        let nc_type = cur.read_u32_be()?;
289        let nvalues = cur.read_count(format)? as usize;
290        let value = read_attr_values(cur, nc_type, nvalues, format)?;
291
292        attrs.push(NcAttribute { name, value });
293    }
294
295    Ok(attrs)
296}
297
298/// Read attribute values of the given type and count.
299/// Values are padded to a 4-byte boundary in the file.
300fn read_attr_values(
301    cur: &mut Cursor<'_>,
302    nc_type: u32,
303    nvalues: usize,
304    _format: NcFormat,
305) -> Result<NcAttrValue> {
306    let typ = nc_type_from_code(nc_type)?;
307    let elem_size = typ.size();
308    let raw_bytes = nvalues * elem_size;
309    let padded = pad_to_4(raw_bytes);
310
311    match typ {
312        NcType::Byte => {
313            let bytes = cur.read_bytes(raw_bytes)?;
314            let values: Vec<i8> = bytes.iter().map(|&b| b as i8).collect();
315            cur.skip(padded - raw_bytes)?;
316            Ok(NcAttrValue::Bytes(values))
317        }
318        NcType::Char => {
319            let bytes = cur.read_bytes(raw_bytes)?;
320            // Trim trailing null bytes (common in NetCDF char attributes).
321            let s = String::from_utf8_lossy(bytes);
322            let trimmed = s.trim_end_matches('\0').to_string();
323            cur.skip(padded - raw_bytes)?;
324            Ok(NcAttrValue::Chars(trimmed))
325        }
326        NcType::Short => {
327            let mut values = Vec::with_capacity(nvalues);
328            for _ in 0..nvalues {
329                values.push(cur.read_u16_be()? as i16);
330            }
331            let pad = padded - raw_bytes;
332            cur.skip(pad)?;
333            Ok(NcAttrValue::Shorts(values))
334        }
335        NcType::Int => {
336            let mut values = Vec::with_capacity(nvalues);
337            for _ in 0..nvalues {
338                values.push(cur.read_i32_be()?);
339            }
340            Ok(NcAttrValue::Ints(values))
341        }
342        NcType::Float => {
343            let mut values = Vec::with_capacity(nvalues);
344            for _ in 0..nvalues {
345                values.push(cur.read_f32_be()?);
346            }
347            Ok(NcAttrValue::Floats(values))
348        }
349        NcType::Double => {
350            let mut values = Vec::with_capacity(nvalues);
351            for _ in 0..nvalues {
352                values.push(cur.read_f64_be()?);
353            }
354            Ok(NcAttrValue::Doubles(values))
355        }
356        NcType::UByte => {
357            let bytes = cur.read_bytes(raw_bytes)?;
358            cur.skip(padded - raw_bytes)?;
359            Ok(NcAttrValue::UBytes(bytes.to_vec()))
360        }
361        NcType::UShort => {
362            let mut values = Vec::with_capacity(nvalues);
363            for _ in 0..nvalues {
364                values.push(cur.read_u16_be()?);
365            }
366            let pad = padded - raw_bytes;
367            cur.skip(pad)?;
368            Ok(NcAttrValue::UShorts(values))
369        }
370        NcType::UInt => {
371            let mut values = Vec::with_capacity(nvalues);
372            for _ in 0..nvalues {
373                values.push(cur.read_u32_be()?);
374            }
375            Ok(NcAttrValue::UInts(values))
376        }
377        NcType::Int64 => {
378            let mut values = Vec::with_capacity(nvalues);
379            for _ in 0..nvalues {
380                values.push(cur.read_i64_be()?);
381            }
382            Ok(NcAttrValue::Int64s(values))
383        }
384        NcType::UInt64 => {
385            let mut values = Vec::with_capacity(nvalues);
386            for _ in 0..nvalues {
387                values.push(cur.read_u64_be()?);
388            }
389            Ok(NcAttrValue::UInt64s(values))
390        }
391        NcType::String
392        | NcType::Compound { .. }
393        | NcType::Opaque { .. }
394        | NcType::Array { .. }
395        | NcType::VLen { .. } => Err(Error::InvalidData(format!(
396            "{:?} is not valid in classic format attributes",
397            typ
398        ))),
399    }
400}
401
402/// Parse the variable list.
403fn parse_var_list(
404    cur: &mut Cursor<'_>,
405    format: NcFormat,
406    dims: &[NcDimension],
407) -> Result<Vec<NcVariable>> {
408    let tag = cur.read_u32_be()?;
409
410    if tag == ABSENT {
411        let _zero = cur.read_count(format)?;
412        return Ok(Vec::new());
413    }
414
415    if tag != NC_VARIABLE {
416        return Err(Error::InvalidData(format!(
417            "expected NC_VARIABLE tag (0x{:08X}), got 0x{:08X}",
418            NC_VARIABLE, tag
419        )));
420    }
421
422    let nelems = cur.read_count(format)? as usize;
423    let mut vars = Vec::with_capacity(nelems);
424
425    for _ in 0..nelems {
426        let name = cur.read_name(format)?;
427
428        // Number of dimensions for this variable.
429        let ndims = cur.read_count(format)? as usize;
430
431        // Dimension IDs are NON_NEG values and widen to 64 bits in CDF-5.
432        let mut var_dims = Vec::with_capacity(ndims);
433        let mut is_record_var = false;
434        for _ in 0..ndims {
435            let dimid = cur.read_count(format)? as usize;
436            if dimid >= dims.len() {
437                return Err(Error::InvalidData(format!(
438                    "variable '{}' references dimension index {} but only {} dimensions exist",
439                    name,
440                    dimid,
441                    dims.len()
442                )));
443            }
444            if dims[dimid].is_unlimited {
445                is_record_var = true;
446            }
447            var_dims.push(dims[dimid].clone());
448        }
449
450        // Variable attributes.
451        let attributes = parse_att_list(cur, format)?;
452
453        // nc_type (always 4 bytes).
454        let nc_type_code = cur.read_u32_be()?;
455        let dtype = nc_type_from_code(nc_type_code)?;
456
457        // vsize: the size of one record's worth of data for this variable,
458        // or the total size for non-record variables.
459        // 4 bytes for CDF-1/2, 8 bytes for CDF-5.
460        let vsize = cur.read_count(format)?;
461
462        // begin (data offset): 4 bytes for CDF-1, 8 bytes for CDF-2/5.
463        let data_offset = match format {
464            NcFormat::Classic => cur.read_u32_be()? as u64,
465            NcFormat::Offset64 | NcFormat::Cdf5 => cur.read_u64_be()?,
466            _ => unreachable!("classic parser only handles CDF-1/2/5"),
467        };
468
469        // Compute record_size (the per-record slice size).
470        let record_size = if is_record_var { vsize } else { 0 };
471
472        // For non-record variables, data_size = vsize.
473        // For record variables, data_size = vsize * numrecs (computed at read time).
474        let data_size = if is_record_var { 0 } else { vsize };
475
476        vars.push(NcVariable {
477            name,
478            dimensions: var_dims,
479            dtype,
480            attributes,
481            data_offset,
482            _data_size: data_size,
483            is_record_var,
484            record_size,
485        });
486    }
487
488    Ok(vars)
489}
490
491fn apply_unlimited_dimension_size(
492    dimensions: &mut [NcDimension],
493    variables: &mut [NcVariable],
494    numrecs: u64,
495) {
496    for dim in dimensions.iter_mut().filter(|dim| dim.is_unlimited) {
497        dim.size = numrecs;
498    }
499
500    for variable in variables {
501        for dim in variable
502            .dimensions
503            .iter_mut()
504            .filter(|dim| dim.is_unlimited)
505        {
506            dim.size = numrecs;
507        }
508    }
509}
510
511#[cfg(test)]
512mod tests {
513    use super::*;
514    use crate::NcFormat;
515
516    /// Build a minimal CDF-1 file header in memory.
517    /// This helper constructs valid header bytes for testing.
518    fn build_cdf1_header(
519        dims: &[(&str, u32)],
520        attrs: &[(&str, u32, &[u8])], // (name, nc_type, raw_value_bytes)
521        vars: &[(&str, &[u32], u32, u32, u32)], // (name, dimids, nc_type, vsize, offset)
522        numrecs: u32,
523    ) -> Vec<u8> {
524        let mut buf = Vec::new();
525
526        // Magic: CDF\x01
527        buf.extend_from_slice(b"CDF\x01");
528
529        // numrecs (4 bytes)
530        buf.extend_from_slice(&numrecs.to_be_bytes());
531
532        // dim_list
533        if dims.is_empty() {
534            // ABSENT
535            buf.extend_from_slice(&ABSENT.to_be_bytes());
536            buf.extend_from_slice(&0u32.to_be_bytes());
537        } else {
538            buf.extend_from_slice(&NC_DIMENSION.to_be_bytes());
539            buf.extend_from_slice(&(dims.len() as u32).to_be_bytes());
540            for (name, size) in dims {
541                write_name_cdf1(&mut buf, name);
542                buf.extend_from_slice(&size.to_be_bytes());
543            }
544        }
545
546        // att_list (global)
547        write_att_list_cdf1(&mut buf, attrs);
548
549        // var_list
550        if vars.is_empty() {
551            buf.extend_from_slice(&ABSENT.to_be_bytes());
552            buf.extend_from_slice(&0u32.to_be_bytes());
553        } else {
554            buf.extend_from_slice(&NC_VARIABLE.to_be_bytes());
555            buf.extend_from_slice(&(vars.len() as u32).to_be_bytes());
556            for (name, dimids, nc_type, vsize, offset) in vars {
557                write_name_cdf1(&mut buf, name);
558                // ndims
559                buf.extend_from_slice(&(dimids.len() as u32).to_be_bytes());
560                // dimids
561                for &did in *dimids {
562                    buf.extend_from_slice(&did.to_be_bytes());
563                }
564                // att_list (empty for test vars)
565                buf.extend_from_slice(&ABSENT.to_be_bytes());
566                buf.extend_from_slice(&0u32.to_be_bytes());
567                // nc_type
568                buf.extend_from_slice(&nc_type.to_be_bytes());
569                // vsize
570                buf.extend_from_slice(&vsize.to_be_bytes());
571                // begin (offset) -- 4 bytes for CDF-1
572                buf.extend_from_slice(&offset.to_be_bytes());
573            }
574        }
575
576        buf
577    }
578
579    fn write_name_cdf1(buf: &mut Vec<u8>, name: &str) {
580        let name_bytes = name.as_bytes();
581        buf.extend_from_slice(&(name_bytes.len() as u32).to_be_bytes());
582        buf.extend_from_slice(name_bytes);
583        let pad = pad_to_4(name_bytes.len()) - name_bytes.len();
584        for _ in 0..pad {
585            buf.push(0);
586        }
587    }
588
589    fn write_att_list_cdf1(buf: &mut Vec<u8>, attrs: &[(&str, u32, &[u8])]) {
590        if attrs.is_empty() {
591            buf.extend_from_slice(&ABSENT.to_be_bytes());
592            buf.extend_from_slice(&0u32.to_be_bytes());
593            return;
594        }
595        buf.extend_from_slice(&NC_ATTRIBUTE.to_be_bytes());
596        buf.extend_from_slice(&(attrs.len() as u32).to_be_bytes());
597        for (name, nc_type, value_bytes) in attrs {
598            write_name_cdf1(buf, name);
599            buf.extend_from_slice(&nc_type.to_be_bytes());
600            // For simplicity, nvalues = 1 element (caller provides exactly one element's bytes)
601            let elem_size = match nc_type {
602                1 => 1, // byte
603                2 => 1, // char
604                3 => 2, // short
605                4 => 4, // int
606                5 => 4, // float
607                6 => 8, // double
608                _ => 1,
609            };
610            let nvalues = value_bytes.len() / elem_size;
611            buf.extend_from_slice(&(nvalues as u32).to_be_bytes());
612            buf.extend_from_slice(value_bytes);
613            let pad = pad_to_4(value_bytes.len()) - value_bytes.len();
614            for _ in 0..pad {
615                buf.push(0);
616            }
617        }
618    }
619
620    fn write_count_cdf5(buf: &mut Vec<u8>, value: u64) {
621        buf.extend_from_slice(&value.to_be_bytes());
622    }
623
624    fn write_name_cdf5(buf: &mut Vec<u8>, name: &str) {
625        let name_bytes = name.as_bytes();
626        write_count_cdf5(buf, name_bytes.len() as u64);
627        buf.extend_from_slice(name_bytes);
628        let pad = pad_to_4(name_bytes.len()) - name_bytes.len();
629        for _ in 0..pad {
630            buf.push(0);
631        }
632    }
633
634    fn build_cdf5_header(
635        dims: &[(&str, u64)],
636        vars: &[(&str, &[u64], u32, u64, u64)],
637        numrecs: u64,
638    ) -> Vec<u8> {
639        let mut buf = Vec::new();
640        buf.extend_from_slice(b"CDF\x05");
641        write_count_cdf5(&mut buf, numrecs);
642
643        if dims.is_empty() {
644            buf.extend_from_slice(&ABSENT.to_be_bytes());
645            write_count_cdf5(&mut buf, 0);
646        } else {
647            buf.extend_from_slice(&NC_DIMENSION.to_be_bytes());
648            write_count_cdf5(&mut buf, dims.len() as u64);
649            for (name, size) in dims {
650                write_name_cdf5(&mut buf, name);
651                write_count_cdf5(&mut buf, *size);
652            }
653        }
654
655        buf.extend_from_slice(&ABSENT.to_be_bytes());
656        write_count_cdf5(&mut buf, 0);
657
658        if vars.is_empty() {
659            buf.extend_from_slice(&ABSENT.to_be_bytes());
660            write_count_cdf5(&mut buf, 0);
661        } else {
662            buf.extend_from_slice(&NC_VARIABLE.to_be_bytes());
663            write_count_cdf5(&mut buf, vars.len() as u64);
664            for (name, dimids, nc_type, vsize, offset) in vars {
665                write_name_cdf5(&mut buf, name);
666                write_count_cdf5(&mut buf, dimids.len() as u64);
667                for dimid in *dimids {
668                    write_count_cdf5(&mut buf, *dimid);
669                }
670                buf.extend_from_slice(&ABSENT.to_be_bytes());
671                write_count_cdf5(&mut buf, 0);
672                buf.extend_from_slice(&nc_type.to_be_bytes());
673                write_count_cdf5(&mut buf, *vsize);
674                buf.extend_from_slice(&offset.to_be_bytes());
675            }
676        }
677
678        buf
679    }
680
681    #[test]
682    fn test_empty_header() {
683        let data = build_cdf1_header(&[], &[], &[], 0);
684        let header = parse_header(&data, NcFormat::Classic).unwrap();
685        assert!(header.dimensions.is_empty());
686        assert!(header.global_attributes.is_empty());
687        assert!(header.variables.is_empty());
688        assert_eq!(header.numrecs, 0);
689    }
690
691    #[test]
692    fn test_dimensions() {
693        let data = build_cdf1_header(
694            &[("x", 10), ("y", 20), ("time", 0)], // time is unlimited
695            &[],
696            &[],
697            5,
698        );
699        let header = parse_header(&data, NcFormat::Classic).unwrap();
700        assert_eq!(header.dimensions.len(), 3);
701
702        assert_eq!(header.dimensions[0].name, "x");
703        assert_eq!(header.dimensions[0].size, 10);
704        assert!(!header.dimensions[0].is_unlimited);
705
706        assert_eq!(header.dimensions[1].name, "y");
707        assert_eq!(header.dimensions[1].size, 20);
708        assert!(!header.dimensions[1].is_unlimited);
709
710        assert_eq!(header.dimensions[2].name, "time");
711        assert_eq!(header.dimensions[2].size, 5);
712        assert!(header.dimensions[2].is_unlimited);
713
714        assert_eq!(header.numrecs, 5);
715    }
716
717    #[test]
718    fn test_global_attributes() {
719        // One NC_INT attribute with value 42.
720        let value_bytes = 42i32.to_be_bytes();
721        let data = build_cdf1_header(
722            &[],
723            &[("answer", 4, &value_bytes)], // NC_INT = 4
724            &[],
725            0,
726        );
727        let header = parse_header(&data, NcFormat::Classic).unwrap();
728        assert_eq!(header.global_attributes.len(), 1);
729        assert_eq!(header.global_attributes[0].name, "answer");
730        if let NcAttrValue::Ints(ref v) = header.global_attributes[0].value {
731            assert_eq!(v, &[42]);
732        } else {
733            panic!("expected Ints attribute");
734        }
735    }
736
737    #[test]
738    fn test_char_attribute() {
739        let text = b"hello";
740        let data = build_cdf1_header(
741            &[],
742            &[("greeting", 2, text)], // NC_CHAR = 2
743            &[],
744            0,
745        );
746        let header = parse_header(&data, NcFormat::Classic).unwrap();
747        assert_eq!(header.global_attributes.len(), 1);
748        assert_eq!(header.global_attributes[0].name, "greeting");
749        if let NcAttrValue::Chars(ref s) = header.global_attributes[0].value {
750            assert_eq!(s, "hello");
751        } else {
752            panic!("expected Chars attribute");
753        }
754    }
755
756    #[test]
757    fn test_variables() {
758        let data = build_cdf1_header(
759            &[("x", 10), ("y", 20)],
760            &[],
761            &[
762                ("temperature", &[0, 1], 5, 800, 200), // float, dimids=[x,y]
763                ("pressure", &[0, 1], 6, 1600, 1000),  // double, dimids=[x,y]
764            ],
765            0,
766        );
767        let header = parse_header(&data, NcFormat::Classic).unwrap();
768        assert_eq!(header.variables.len(), 2);
769
770        let temp = &header.variables[0];
771        assert_eq!(temp.name, "temperature");
772        assert_eq!(temp.dtype, NcType::Float);
773        assert_eq!(temp.dimensions.len(), 2);
774        assert_eq!(temp.dimensions[0].name, "x");
775        assert_eq!(temp.dimensions[1].name, "y");
776        assert_eq!(temp.data_offset, 200);
777        assert_eq!(temp._data_size, 800);
778        assert!(!temp.is_record_var);
779
780        let pres = &header.variables[1];
781        assert_eq!(pres.name, "pressure");
782        assert_eq!(pres.dtype, NcType::Double);
783        assert_eq!(pres.data_offset, 1000);
784        assert_eq!(pres._data_size, 1600);
785    }
786
787    #[test]
788    fn test_record_variable() {
789        let data = build_cdf1_header(
790            &[("time", 0), ("x", 5)], // time is unlimited
791            &[],
792            &[
793                // record variable: first dim is unlimited
794                ("values", &[0, 1], 5, 20, 100), // float, vsize=5*4=20 per record
795            ],
796            10, // 10 records
797        );
798        let header = parse_header(&data, NcFormat::Classic).unwrap();
799        assert_eq!(header.numrecs, 10);
800        assert_eq!(header.variables.len(), 1);
801
802        let var = &header.variables[0];
803        assert_eq!(var.name, "values");
804        assert!(var.is_record_var);
805        assert_eq!(var.record_size, 20);
806        assert_eq!(var._data_size, 0); // data_size=0 for record vars (computed at read time)
807        assert_eq!(var.shape(), vec![10, 5]);
808    }
809
810    #[test]
811    fn test_cdf2_offset64() {
812        // Build a CDF-2 header manually.
813        // CDF-2 is mostly the same as CDF-1 but the data offset (begin) field is 8 bytes.
814        let mut buf = Vec::new();
815        buf.extend_from_slice(b"CDF\x02");
816        // numrecs (4 bytes)
817        buf.extend_from_slice(&0u32.to_be_bytes());
818        // dim_list: one dimension "x" with size 100
819        buf.extend_from_slice(&NC_DIMENSION.to_be_bytes());
820        buf.extend_from_slice(&1u32.to_be_bytes());
821        write_name_cdf1(&mut buf, "x");
822        buf.extend_from_slice(&100u32.to_be_bytes());
823        // att_list: absent
824        buf.extend_from_slice(&ABSENT.to_be_bytes());
825        buf.extend_from_slice(&0u32.to_be_bytes());
826        // var_list: one variable
827        buf.extend_from_slice(&NC_VARIABLE.to_be_bytes());
828        buf.extend_from_slice(&1u32.to_be_bytes());
829        write_name_cdf1(&mut buf, "data");
830        buf.extend_from_slice(&1u32.to_be_bytes()); // ndims=1
831        buf.extend_from_slice(&0u32.to_be_bytes()); // dimid=0
832                                                    // att_list: absent
833        buf.extend_from_slice(&ABSENT.to_be_bytes());
834        buf.extend_from_slice(&0u32.to_be_bytes());
835        // nc_type = NC_FLOAT = 5
836        buf.extend_from_slice(&5u32.to_be_bytes());
837        // vsize (4 bytes for CDF-2)
838        buf.extend_from_slice(&400u32.to_be_bytes());
839        // begin (8 bytes for CDF-2!)
840        let offset: u64 = 0x1_0000_0000; // > 4 GB offset to test 64-bit
841        buf.extend_from_slice(&offset.to_be_bytes());
842
843        let header = parse_header(&buf, NcFormat::Offset64).unwrap();
844        assert_eq!(header.variables.len(), 1);
845        assert_eq!(header.variables[0].data_offset, 0x1_0000_0000);
846        assert_eq!(header.variables[0]._data_size, 400);
847    }
848
849    #[test]
850    fn test_cdf5_uses_64_bit_counts_for_var_metadata() {
851        let data = build_cdf5_header(
852            &[("n", 4)],
853            &[
854                ("ubyte_var", &[0], 7, 4, 128),
855                ("int64_var", &[0], 10, 32, 256),
856            ],
857            0,
858        );
859
860        let header = parse_header(&data, NcFormat::Cdf5).unwrap();
861        assert_eq!(header.variables.len(), 2);
862        assert_eq!(header.variables[0].name, "ubyte_var");
863        assert_eq!(header.variables[0].dtype, NcType::UByte);
864        assert_eq!(header.variables[0].dimensions[0].name, "n");
865        assert_eq!(header.variables[1].name, "int64_var");
866        assert_eq!(header.variables[1].dtype, NcType::Int64);
867        assert_eq!(header.variables[1].data_offset, 256);
868    }
869
870    #[test]
871    fn test_unlimited_dimension_size_tracks_numrecs() {
872        let data = build_cdf1_header(
873            &[("time", 0), ("x", 5)],
874            &[],
875            &[("series", &[0, 1], 6, 40, 128)],
876            3,
877        );
878
879        let header = parse_header(&data, NcFormat::Classic).unwrap();
880        assert_eq!(header.dimensions[0].size, 3);
881        assert_eq!(header.variables[0].shape(), vec![3, 5]);
882    }
883
884    #[test]
885    fn test_double_attribute() {
886        let pi = std::f64::consts::PI;
887        let value_bytes = pi.to_be_bytes();
888        let data = build_cdf1_header(
889            &[],
890            &[("pi", 6, &value_bytes)], // NC_DOUBLE = 6
891            &[],
892            0,
893        );
894        let header = parse_header(&data, NcFormat::Classic).unwrap();
895        assert_eq!(header.global_attributes.len(), 1);
896        if let NcAttrValue::Doubles(ref v) = header.global_attributes[0].value {
897            assert_eq!(v.len(), 1);
898            assert!((v[0] - pi).abs() < 1e-15);
899        } else {
900            panic!("expected Doubles attribute");
901        }
902    }
903
904    #[test]
905    fn test_short_attribute_with_padding() {
906        // NC_SHORT (2 bytes) with 3 values = 6 bytes, padded to 8.
907        let mut value_bytes = Vec::new();
908        value_bytes.extend_from_slice(&1i16.to_be_bytes());
909        value_bytes.extend_from_slice(&2i16.to_be_bytes());
910        value_bytes.extend_from_slice(&3i16.to_be_bytes());
911        // The build helper will add padding.
912
913        let mut buf = Vec::new();
914        buf.extend_from_slice(b"CDF\x01");
915        buf.extend_from_slice(&0u32.to_be_bytes()); // numrecs
916                                                    // dim_list: absent
917        buf.extend_from_slice(&ABSENT.to_be_bytes());
918        buf.extend_from_slice(&0u32.to_be_bytes());
919        // att_list: one short attribute with 3 values
920        buf.extend_from_slice(&NC_ATTRIBUTE.to_be_bytes());
921        buf.extend_from_slice(&1u32.to_be_bytes());
922        write_name_cdf1(&mut buf, "vals");
923        buf.extend_from_slice(&3u32.to_be_bytes()); // NC_SHORT
924        buf.extend_from_slice(&3u32.to_be_bytes()); // nvalues=3
925        buf.extend_from_slice(&value_bytes);
926        // Pad to 4-byte boundary: 6 bytes -> 2 bytes padding
927        buf.extend_from_slice(&[0, 0]);
928        // var_list: absent
929        buf.extend_from_slice(&ABSENT.to_be_bytes());
930        buf.extend_from_slice(&0u32.to_be_bytes());
931
932        let header = parse_header(&buf, NcFormat::Classic).unwrap();
933        if let NcAttrValue::Shorts(ref v) = header.global_attributes[0].value {
934            assert_eq!(v, &[1, 2, 3]);
935        } else {
936            panic!("expected Shorts attribute");
937        }
938    }
939
940    #[test]
941    fn test_name_padding() {
942        // Names with lengths 1, 2, 3, 4, 5 to test all padding cases.
943        let data = build_cdf1_header(
944            &[("a", 1), ("ab", 2), ("abc", 3), ("abcd", 4), ("abcde", 5)],
945            &[],
946            &[],
947            0,
948        );
949        let header = parse_header(&data, NcFormat::Classic).unwrap();
950        assert_eq!(header.dimensions.len(), 5);
951        assert_eq!(header.dimensions[0].name, "a");
952        assert_eq!(header.dimensions[1].name, "ab");
953        assert_eq!(header.dimensions[2].name, "abc");
954        assert_eq!(header.dimensions[3].name, "abcd");
955        assert_eq!(header.dimensions[4].name, "abcde");
956    }
957
958    #[test]
959    fn test_invalid_dimension_reference() {
960        // Variable referencing a non-existent dimension.
961        let data = build_cdf1_header(
962            &[("x", 10)], // only dim 0 exists
963            &[],
964            &[("bad_var", &[5], 4, 40, 100)], // dimid=5 is out of range
965            0,
966        );
967        let result = parse_header(&data, NcFormat::Classic);
968        assert!(result.is_err());
969    }
970
971    #[test]
972    fn test_byte_attribute() {
973        let value_bytes: &[u8] = &[0xFF]; // -1 as i8
974        let data = build_cdf1_header(
975            &[],
976            &[("flag", 1, value_bytes)], // NC_BYTE = 1
977            &[],
978            0,
979        );
980        let header = parse_header(&data, NcFormat::Classic).unwrap();
981        if let NcAttrValue::Bytes(ref v) = header.global_attributes[0].value {
982            assert_eq!(v, &[-1i8]);
983        } else {
984            panic!("expected Bytes attribute");
985        }
986    }
987
988    #[test]
989    fn test_float_attribute() {
990        let val = std::f32::consts::PI;
991        let value_bytes = val.to_be_bytes();
992        let data = build_cdf1_header(
993            &[],
994            &[("pi_approx", 5, &value_bytes)], // NC_FLOAT = 5
995            &[],
996            0,
997        );
998        let header = parse_header(&data, NcFormat::Classic).unwrap();
999        if let NcAttrValue::Floats(ref v) = header.global_attributes[0].value {
1000            assert_eq!(v.len(), 1);
1001            assert!((v[0] - std::f32::consts::PI).abs() < 1e-6);
1002        } else {
1003            panic!("expected Floats attribute");
1004        }
1005    }
1006
1007    #[test]
1008    fn test_multiple_global_attributes() {
1009        let int_val = 100i32.to_be_bytes();
1010        let float_val = 2.5f32.to_be_bytes();
1011        let data = build_cdf1_header(
1012            &[],
1013            &[("count", 4, &int_val), ("scale", 5, &float_val)],
1014            &[],
1015            0,
1016        );
1017        let header = parse_header(&data, NcFormat::Classic).unwrap();
1018        assert_eq!(header.global_attributes.len(), 2);
1019        assert_eq!(header.global_attributes[0].name, "count");
1020        assert_eq!(header.global_attributes[1].name, "scale");
1021    }
1022}