Skip to main content

hdf5_reader/messages/
dataspace.rs

1//! HDF5 Dataspace message (type 0x0001).
2//!
3//! A dataspace describes the shape of a dataset: scalar, null, or simple
4//! (one or more dimensions with current and optional maximum sizes).
5
6use crate::error::{Error, Result};
7use crate::io::Cursor;
8
9/// Unlimited dimension sentinel value.
10pub const UNLIMITED: u64 = u64::MAX;
11
12/// The type of dataspace.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum DataspaceType {
15    /// Contains no data elements at all.
16    Null,
17    /// A single data element (rank 0).
18    Scalar,
19    /// A regular N-dimensional array.
20    Simple,
21}
22
23/// Parsed dataspace message.
24#[derive(Debug, Clone)]
25pub struct DataspaceMessage {
26    /// Number of dimensions (0 for scalar).
27    pub rank: u8,
28    /// Current dimension sizes (`rank` elements).
29    pub dims: Vec<u64>,
30    /// Optional maximum dimension sizes (`rank` elements). `UNLIMITED` = unlimited.
31    pub max_dims: Option<Vec<u64>>,
32    /// The dataspace type.
33    pub dataspace_type: DataspaceType,
34}
35
36impl DataspaceMessage {
37    /// Total number of elements in the dataspace (product of current dimension sizes).
38    pub fn num_elements(&self) -> u64 {
39        if self.dims.is_empty() {
40            return match self.dataspace_type {
41                DataspaceType::Scalar => 1,
42                _ => 0,
43            };
44        }
45        self.dims.iter().product()
46    }
47}
48
49/// Parse a dataspace message.
50///
51/// `length_size` is needed for version 1 where dimensions are stored using
52/// the file-global length size.
53pub fn parse(
54    cursor: &mut Cursor<'_>,
55    _offset_size: u8,
56    length_size: u8,
57    msg_size: usize,
58) -> Result<DataspaceMessage> {
59    let start = cursor.position();
60    let version = cursor.read_u8()?;
61
62    match version {
63        1 => parse_v1(cursor, length_size),
64        2 => parse_v2(cursor, length_size),
65        v => Err(Error::UnsupportedDataspaceVersion(v)),
66    }
67    .and_then(|msg| {
68        // Advance past any remaining bytes in the message
69        let consumed = (cursor.position() - start) as usize;
70        if consumed < msg_size {
71            cursor.skip(msg_size - consumed)?;
72        }
73        Ok(msg)
74    })
75}
76
77/// Version 1 dataspace message.
78fn parse_v1(cursor: &mut Cursor<'_>, length_size: u8) -> Result<DataspaceMessage> {
79    let rank = cursor.read_u8()?;
80    let flags = cursor.read_u8()?;
81    let _reserved = cursor.read_u8()?; // reserved
82    let _reserved2 = cursor.read_u32_le()?; // reserved
83
84    let has_max_dims = (flags & 0x01) != 0;
85    // Bit 1 was "permutation index present" in v1 but is never actually set
86    // in practice. We skip it if the flag is set.
87    let has_permutation = (flags & 0x02) != 0;
88
89    let dataspace_type = if rank == 0 {
90        DataspaceType::Scalar
91    } else {
92        DataspaceType::Simple
93    };
94
95    let mut dims = Vec::with_capacity(rank as usize);
96    for _ in 0..rank {
97        dims.push(cursor.read_length(length_size)?);
98    }
99
100    let max_dims = if has_max_dims {
101        let mut md = Vec::with_capacity(rank as usize);
102        for _ in 0..rank {
103            md.push(cursor.read_length(length_size)?);
104        }
105        Some(md)
106    } else {
107        None
108    };
109
110    if has_permutation {
111        // Skip permutation indices — each is `length_size` bytes.
112        for _ in 0..rank {
113            cursor.read_length(length_size)?;
114        }
115    }
116
117    Ok(DataspaceMessage {
118        rank,
119        dims,
120        max_dims,
121        dataspace_type,
122    })
123}
124
125/// Version 2 dataspace message.
126fn parse_v2(cursor: &mut Cursor<'_>, length_size: u8) -> Result<DataspaceMessage> {
127    let rank = cursor.read_u8()?;
128    let flags = cursor.read_u8()?;
129    let ds_type_byte = cursor.read_u8()?;
130
131    let has_max_dims = (flags & 0x01) != 0;
132
133    let dataspace_type = match ds_type_byte {
134        0 => DataspaceType::Scalar,
135        1 => DataspaceType::Simple,
136        2 => DataspaceType::Null,
137        _ => {
138            return Err(Error::InvalidData(format!(
139                "unknown dataspace type: {}",
140                ds_type_byte
141            )))
142        }
143    };
144
145    let mut dims = Vec::with_capacity(rank as usize);
146    for _ in 0..rank {
147        dims.push(cursor.read_length(length_size)?);
148    }
149
150    let max_dims = if has_max_dims {
151        let mut md = Vec::with_capacity(rank as usize);
152        for _ in 0..rank {
153            md.push(cursor.read_length(length_size)?);
154        }
155        Some(md)
156    } else {
157        None
158    };
159
160    Ok(DataspaceMessage {
161        rank,
162        dims,
163        max_dims,
164        dataspace_type,
165    })
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    #[test]
173    fn test_parse_v1_scalar() {
174        // Version 1, rank=0 (scalar), flags=0, reserved bytes
175        let data = [
176            0x01, // version
177            0x00, // rank
178            0x00, // flags
179            0x00, // reserved
180            0x00, 0x00, 0x00, 0x00, // reserved u32
181        ];
182        let mut cursor = Cursor::new(&data);
183        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
184        assert_eq!(msg.rank, 0);
185        assert_eq!(msg.dataspace_type, DataspaceType::Scalar);
186        assert!(msg.dims.is_empty());
187        assert!(msg.max_dims.is_none());
188        assert_eq!(msg.num_elements(), 1);
189    }
190
191    #[test]
192    fn test_parse_v1_simple_2d() {
193        // Version 1, rank=2, flags=0x01 (has max dims), 8-byte lengths
194        let mut data = vec![
195            0x01, // version
196            0x02, // rank
197            0x01, // flags (has max dims)
198            0x00, // reserved
199            0x00, 0x00, 0x00, 0x00, // reserved u32
200        ];
201        // dim[0] = 10
202        data.extend_from_slice(&10u64.to_le_bytes());
203        // dim[1] = 20
204        data.extend_from_slice(&20u64.to_le_bytes());
205        // max_dim[0] = 100
206        data.extend_from_slice(&100u64.to_le_bytes());
207        // max_dim[1] = UNLIMITED
208        data.extend_from_slice(&u64::MAX.to_le_bytes());
209
210        let mut cursor = Cursor::new(&data);
211        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
212        assert_eq!(msg.rank, 2);
213        assert_eq!(msg.dims, vec![10, 20]);
214        assert_eq!(msg.max_dims.as_ref().unwrap(), &vec![100, UNLIMITED]);
215        assert_eq!(msg.dataspace_type, DataspaceType::Simple);
216        assert_eq!(msg.num_elements(), 200);
217    }
218
219    #[test]
220    fn test_parse_v2_simple_1d() {
221        // Version 2, rank=1, flags=0x00, type=1 (simple), 4-byte lengths
222        let mut data = vec![
223            0x02, // version
224            0x01, // rank
225            0x00, // flags
226            0x01, // type = simple
227        ];
228        // dim[0] = 42
229        data.extend_from_slice(&42u32.to_le_bytes());
230
231        let mut cursor = Cursor::new(&data);
232        let msg = parse(&mut cursor, 4, 4, data.len()).unwrap();
233        assert_eq!(msg.rank, 1);
234        assert_eq!(msg.dims, vec![42]);
235        assert!(msg.max_dims.is_none());
236        assert_eq!(msg.dataspace_type, DataspaceType::Simple);
237    }
238
239    #[test]
240    fn test_parse_v2_null() {
241        let data = [
242            0x02, // version
243            0x00, // rank
244            0x00, // flags
245            0x02, // type = null
246        ];
247        let mut cursor = Cursor::new(&data);
248        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
249        assert_eq!(msg.dataspace_type, DataspaceType::Null);
250        assert_eq!(msg.num_elements(), 0);
251    }
252
253    #[test]
254    fn test_parse_v2_with_max_dims() {
255        let mut data = vec![
256            0x02, // version
257            0x03, // rank = 3
258            0x01, // flags = has max dims
259            0x01, // type = simple
260        ];
261        // current dims: 5, 10, 15
262        for &d in &[5u64, 10, 15] {
263            data.extend_from_slice(&d.to_le_bytes());
264        }
265        // max dims: 50, 100, UNLIMITED
266        for &d in &[50u64, 100, u64::MAX] {
267            data.extend_from_slice(&d.to_le_bytes());
268        }
269
270        let mut cursor = Cursor::new(&data);
271        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
272        assert_eq!(msg.rank, 3);
273        assert_eq!(msg.dims, vec![5, 10, 15]);
274        let md = msg.max_dims.clone().unwrap();
275        assert_eq!(md, vec![50, 100, UNLIMITED]);
276        assert_eq!(msg.num_elements(), 750);
277    }
278
279    #[test]
280    fn test_unsupported_version() {
281        let data = [0x03, 0x00, 0x00, 0x00];
282        let mut cursor = Cursor::new(&data);
283        assert!(parse(&mut cursor, 8, 8, data.len()).is_err());
284    }
285}