Skip to main content

hdf5_reader/messages/
dataspace.rs

1//! HDF5 Dataspace message (type 0x0001).
2//!
3//! A dataspace describes the shape of a dataset: scalar, null, or simple
4//! (one or more dimensions with current and optional maximum sizes).
5
6use crate::error::{Error, Result};
7use crate::io::Cursor;
8
9/// Unlimited dimension sentinel value.
10pub const UNLIMITED: u64 = u64::MAX;
11
12/// The type of dataspace.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum DataspaceType {
15    /// Contains no data elements at all.
16    Null,
17    /// A single data element (rank 0).
18    Scalar,
19    /// A regular N-dimensional array.
20    Simple,
21}
22
23/// Parsed dataspace message.
24#[derive(Debug, Clone)]
25pub struct DataspaceMessage {
26    /// Number of dimensions (0 for scalar).
27    pub rank: u8,
28    /// Current dimension sizes (`rank` elements).
29    pub dims: Vec<u64>,
30    /// Optional maximum dimension sizes (`rank` elements). `UNLIMITED` = unlimited.
31    pub max_dims: Option<Vec<u64>>,
32    /// The dataspace type.
33    pub dataspace_type: DataspaceType,
34}
35
36impl DataspaceMessage {
37    /// Total number of elements in the dataspace (product of current dimension sizes).
38    pub fn num_elements(&self) -> Result<u64> {
39        if self.dims.is_empty() {
40            return Ok(match self.dataspace_type {
41                DataspaceType::Scalar => 1,
42                _ => 0,
43            });
44        }
45        self.dims.iter().try_fold(1u64, |acc, &dim| {
46            acc.checked_mul(dim).ok_or_else(|| {
47                Error::InvalidData("dataspace element count overflows u64".to_string())
48            })
49        })
50    }
51}
52
53/// Parse a dataspace message.
54///
55/// `length_size` is needed for version 1 where dimensions are stored using
56/// the file-global length size.
57pub fn parse(
58    cursor: &mut Cursor<'_>,
59    _offset_size: u8,
60    length_size: u8,
61    msg_size: usize,
62) -> Result<DataspaceMessage> {
63    let start = cursor.position();
64    let version = cursor.read_u8()?;
65
66    match version {
67        1 => parse_v1(cursor, length_size),
68        2 => parse_v2(cursor, length_size),
69        v => Err(Error::UnsupportedDataspaceVersion(v)),
70    }
71    .and_then(|msg| {
72        // Advance past any remaining bytes in the message
73        let consumed = (cursor.position() - start) as usize;
74        if consumed < msg_size {
75            cursor.skip(msg_size - consumed)?;
76        }
77        Ok(msg)
78    })
79}
80
81/// Version 1 dataspace message.
82fn parse_v1(cursor: &mut Cursor<'_>, length_size: u8) -> Result<DataspaceMessage> {
83    let rank = cursor.read_u8()?;
84    let flags = cursor.read_u8()?;
85    let _reserved = cursor.read_u8()?; // reserved
86    let _reserved2 = cursor.read_u32_le()?; // reserved
87
88    let has_max_dims = (flags & 0x01) != 0;
89    // Bit 1 was "permutation index present" in v1 but is never actually set
90    // in practice. We skip it if the flag is set.
91    let has_permutation = (flags & 0x02) != 0;
92
93    let dataspace_type = if rank == 0 {
94        DataspaceType::Scalar
95    } else {
96        DataspaceType::Simple
97    };
98
99    let mut dims = Vec::with_capacity(rank as usize);
100    for _ in 0..rank {
101        dims.push(cursor.read_length(length_size)?);
102    }
103
104    let max_dims = if has_max_dims {
105        let mut md = Vec::with_capacity(rank as usize);
106        for _ in 0..rank {
107            md.push(cursor.read_length(length_size)?);
108        }
109        Some(md)
110    } else {
111        None
112    };
113
114    if has_permutation {
115        // Skip permutation indices — each is `length_size` bytes.
116        for _ in 0..rank {
117            cursor.read_length(length_size)?;
118        }
119    }
120
121    Ok(DataspaceMessage {
122        rank,
123        dims,
124        max_dims,
125        dataspace_type,
126    })
127}
128
129/// Version 2 dataspace message.
130fn parse_v2(cursor: &mut Cursor<'_>, length_size: u8) -> Result<DataspaceMessage> {
131    let rank = cursor.read_u8()?;
132    let flags = cursor.read_u8()?;
133    let ds_type_byte = cursor.read_u8()?;
134
135    let has_max_dims = (flags & 0x01) != 0;
136
137    let dataspace_type = match ds_type_byte {
138        0 => DataspaceType::Scalar,
139        1 => DataspaceType::Simple,
140        2 => DataspaceType::Null,
141        _ => {
142            return Err(Error::InvalidData(format!(
143                "unknown dataspace type: {}",
144                ds_type_byte
145            )))
146        }
147    };
148
149    let mut dims = Vec::with_capacity(rank as usize);
150    for _ in 0..rank {
151        dims.push(cursor.read_length(length_size)?);
152    }
153
154    let max_dims = if has_max_dims {
155        let mut md = Vec::with_capacity(rank as usize);
156        for _ in 0..rank {
157            md.push(cursor.read_length(length_size)?);
158        }
159        Some(md)
160    } else {
161        None
162    };
163
164    Ok(DataspaceMessage {
165        rank,
166        dims,
167        max_dims,
168        dataspace_type,
169    })
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn parse_v1_scalar() {
178        // Version 1, rank=0 (scalar), flags=0, reserved bytes
179        let data = [
180            0x01, // version
181            0x00, // rank
182            0x00, // flags
183            0x00, // reserved
184            0x00, 0x00, 0x00, 0x00, // reserved u32
185        ];
186        let mut cursor = Cursor::new(&data);
187        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
188        assert_eq!(msg.rank, 0);
189        assert_eq!(msg.dataspace_type, DataspaceType::Scalar);
190        assert!(msg.dims.is_empty());
191        assert!(msg.max_dims.is_none());
192        assert_eq!(msg.num_elements().unwrap(), 1);
193    }
194
195    #[test]
196    fn parse_v1_simple_2d() {
197        // Version 1, rank=2, flags=0x01 (has max dims), 8-byte lengths
198        let mut data = vec![
199            0x01, // version
200            0x02, // rank
201            0x01, // flags (has max dims)
202            0x00, // reserved
203            0x00, 0x00, 0x00, 0x00, // reserved u32
204        ];
205        // dim[0] = 10
206        data.extend_from_slice(&10u64.to_le_bytes());
207        // dim[1] = 20
208        data.extend_from_slice(&20u64.to_le_bytes());
209        // max_dim[0] = 100
210        data.extend_from_slice(&100u64.to_le_bytes());
211        // max_dim[1] = UNLIMITED
212        data.extend_from_slice(&u64::MAX.to_le_bytes());
213
214        let mut cursor = Cursor::new(&data);
215        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
216        assert_eq!(msg.rank, 2);
217        assert_eq!(msg.dims, vec![10, 20]);
218        assert_eq!(msg.max_dims.as_ref().unwrap(), &vec![100, UNLIMITED]);
219        assert_eq!(msg.dataspace_type, DataspaceType::Simple);
220        assert_eq!(msg.num_elements().unwrap(), 200);
221    }
222
223    #[test]
224    fn parse_v2_simple_1d() {
225        // Version 2, rank=1, flags=0x00, type=1 (simple), 4-byte lengths
226        let mut data = vec![
227            0x02, // version
228            0x01, // rank
229            0x00, // flags
230            0x01, // type = simple
231        ];
232        // dim[0] = 42
233        data.extend_from_slice(&42u32.to_le_bytes());
234
235        let mut cursor = Cursor::new(&data);
236        let msg = parse(&mut cursor, 4, 4, data.len()).unwrap();
237        assert_eq!(msg.rank, 1);
238        assert_eq!(msg.dims, vec![42]);
239        assert!(msg.max_dims.is_none());
240        assert_eq!(msg.dataspace_type, DataspaceType::Simple);
241    }
242
243    #[test]
244    fn parse_v2_null() {
245        let data = [
246            0x02, // version
247            0x00, // rank
248            0x00, // flags
249            0x02, // type = null
250        ];
251        let mut cursor = Cursor::new(&data);
252        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
253        assert_eq!(msg.dataspace_type, DataspaceType::Null);
254        assert_eq!(msg.num_elements().unwrap(), 0);
255    }
256
257    #[test]
258    fn parse_v2_with_max_dims() {
259        let mut data = vec![
260            0x02, // version
261            0x03, // rank = 3
262            0x01, // flags = has max dims
263            0x01, // type = simple
264        ];
265        // current dims: 5, 10, 15
266        for &d in &[5u64, 10, 15] {
267            data.extend_from_slice(&d.to_le_bytes());
268        }
269        // max dims: 50, 100, UNLIMITED
270        for &d in &[50u64, 100, u64::MAX] {
271            data.extend_from_slice(&d.to_le_bytes());
272        }
273
274        let mut cursor = Cursor::new(&data);
275        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
276        assert_eq!(msg.rank, 3);
277        assert_eq!(msg.dims, vec![5, 10, 15]);
278        let md = msg.max_dims.clone().unwrap();
279        assert_eq!(md, vec![50, 100, UNLIMITED]);
280        assert_eq!(msg.num_elements().unwrap(), 750);
281    }
282
283    #[test]
284    fn unsupported_version() {
285        let data = [0x03, 0x00, 0x00, 0x00];
286        let mut cursor = Cursor::new(&data);
287        assert!(parse(&mut cursor, 8, 8, data.len()).is_err());
288    }
289
290    #[test]
291    fn num_elements_rejects_overflow() {
292        let msg = DataspaceMessage {
293            rank: 2,
294            dims: vec![u64::MAX, 2],
295            max_dims: None,
296            dataspace_type: DataspaceType::Simple,
297        };
298
299        let err = msg.num_elements().unwrap_err();
300        assert!(err.to_string().contains("element count"));
301    }
302}