Skip to main content

hdf5_reader/messages/
attribute.rs

1//! HDF5 Attribute message (type 0x000C).
2//!
3//! Attributes are small named data items attached to any HDF5 object.
4//! Each attribute has a name, datatype, dataspace, and raw data.
5
6use crate::error::{Error, Result};
7use crate::io::Cursor;
8
9use super::dataspace::{self, DataspaceMessage};
10use super::datatype::{self, Datatype};
11
12/// Parsed attribute message.
13#[derive(Debug, Clone)]
14pub struct AttributeMessage {
15    /// Attribute name.
16    pub name: String,
17    /// Datatype of the attribute value.
18    pub datatype: Datatype,
19    /// Dataspace describing the shape of the attribute data.
20    pub dataspace: DataspaceMessage,
21    /// Raw attribute data (un-decoded bytes).
22    pub raw_data: Vec<u8>,
23}
24
25/// Parse an attribute message.
26///
27/// Attribute messages embed inline datatype and dataspace descriptions
28/// followed by the raw data bytes.
29pub fn parse(
30    cursor: &mut Cursor<'_>,
31    offset_size: u8,
32    length_size: u8,
33    msg_size: usize,
34) -> Result<AttributeMessage> {
35    let start = cursor.position();
36    let version = cursor.read_u8()?;
37
38    let result = match version {
39        1 => parse_v1(cursor, offset_size, length_size),
40        2 => parse_v2(cursor, offset_size, length_size),
41        3 => parse_v3(cursor, offset_size, length_size),
42        v => Err(Error::UnsupportedAttributeVersion(v)),
43    };
44
45    result.and_then(|msg| {
46        let consumed = (cursor.position() - start) as usize;
47        if consumed < msg_size {
48            cursor.skip(msg_size - consumed)?;
49        }
50        Ok(msg)
51    })
52}
53
54fn parse_v1(cursor: &mut Cursor<'_>, offset_size: u8, length_size: u8) -> Result<AttributeMessage> {
55    let _reserved = cursor.read_u8()?;
56    let name_size = cursor.read_u16_le()? as usize;
57    let datatype_size = cursor.read_u16_le()? as usize;
58    let dataspace_size = cursor.read_u16_le()? as usize;
59
60    // Name — padded to 8-byte boundary
61    let name = cursor.read_fixed_string(name_size)?;
62    let name_padded = (name_size + 7) & !7;
63    if name_padded > name_size {
64        cursor.skip(name_padded - name_size)?;
65    }
66
67    // Datatype — padded to 8
68    let dt_msg = datatype::parse(cursor, datatype_size)?;
69    let dt_consumed = datatype_size; // parse() already handles its own size
70    let dt_padded = (dt_consumed + 7) & !7;
71    if dt_padded > dt_consumed {
72        cursor.skip(dt_padded - dt_consumed)?;
73    }
74
75    // Dataspace — padded to 8
76    let ds_msg = dataspace::parse(cursor, offset_size, length_size, dataspace_size)?;
77    let ds_consumed = dataspace_size;
78    let ds_padded = (ds_consumed + 7) & !7;
79    if ds_padded > ds_consumed {
80        cursor.skip(ds_padded - ds_consumed)?;
81    }
82
83    // Raw data — remaining bytes are the attribute data
84    let data_size = ds_msg.num_elements() as usize * dt_msg.size as usize;
85    let raw_data = if data_size > 0 {
86        cursor.read_bytes(data_size)?.to_vec()
87    } else {
88        vec![]
89    };
90
91    Ok(AttributeMessage {
92        name,
93        datatype: dt_msg.datatype,
94        dataspace: ds_msg,
95        raw_data,
96    })
97}
98
99fn parse_v2(cursor: &mut Cursor<'_>, offset_size: u8, length_size: u8) -> Result<AttributeMessage> {
100    let _flags = cursor.read_u8()?;
101    let name_size = cursor.read_u16_le()? as usize;
102    let datatype_size = cursor.read_u16_le()? as usize;
103    let dataspace_size = cursor.read_u16_le()? as usize;
104
105    // Name — NOT padded in v2
106    let name = cursor.read_fixed_string(name_size)?;
107
108    // Datatype
109    let dt_msg = datatype::parse(cursor, datatype_size)?;
110
111    // Dataspace
112    let ds_msg = dataspace::parse(cursor, offset_size, length_size, dataspace_size)?;
113
114    // Raw data
115    let data_size = ds_msg.num_elements() as usize * dt_msg.size as usize;
116    let raw_data = if data_size > 0 {
117        cursor.read_bytes(data_size)?.to_vec()
118    } else {
119        vec![]
120    };
121
122    Ok(AttributeMessage {
123        name,
124        datatype: dt_msg.datatype,
125        dataspace: ds_msg,
126        raw_data,
127    })
128}
129
130fn parse_v3(cursor: &mut Cursor<'_>, offset_size: u8, length_size: u8) -> Result<AttributeMessage> {
131    let flags = cursor.read_u8()?;
132    let name_size = cursor.read_u16_le()? as usize;
133    let datatype_size = cursor.read_u16_le()? as usize;
134    let dataspace_size = cursor.read_u16_le()? as usize;
135    let _name_encoding = cursor.read_u8()?;
136
137    if (flags & 0x03) != 0 {
138        return Err(Error::InvalidData(
139            "shared datatype/dataspace in attribute v3 is not supported".to_string(),
140        ));
141    }
142
143    // Name — NOT padded in v3
144    let name = cursor.read_fixed_string(name_size)?;
145
146    // Datatype
147    let dt_msg = datatype::parse(cursor, datatype_size)?;
148
149    // Dataspace
150    let ds_msg = dataspace::parse(cursor, offset_size, length_size, dataspace_size)?;
151
152    // Raw data
153    let data_size = ds_msg.num_elements() as usize * dt_msg.size as usize;
154    let raw_data = if data_size > 0 {
155        cursor.read_bytes(data_size)?.to_vec()
156    } else {
157        vec![]
158    };
159
160    Ok(AttributeMessage {
161        name,
162        datatype: dt_msg.datatype,
163        dataspace: ds_msg,
164        raw_data,
165    })
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171    use crate::error::ByteOrder;
172    use crate::messages::dataspace::DataspaceType;
173
174    /// Build a minimal u32-LE datatype description (8 bytes total: 4 class/ver/flags + 4 size).
175    fn u32_le_datatype() -> Vec<u8> {
176        let mut buf = Vec::new();
177        // class=0, version=1, flags=0 (LE unsigned)
178        let class_word: u32 = 0x01 << 4;
179        buf.extend_from_slice(&class_word.to_le_bytes());
180        buf.extend_from_slice(&4u32.to_le_bytes()); // size=4
181                                                    // properties: offset=0, precision=32
182        buf.extend_from_slice(&0u16.to_le_bytes());
183        buf.extend_from_slice(&32u16.to_le_bytes());
184        buf
185    }
186
187    /// Build a scalar dataspace (v2, rank=0, type=scalar).
188    fn scalar_dataspace() -> Vec<u8> {
189        vec![0x02, 0x00, 0x00, 0x00]
190    }
191
192    #[test]
193    fn test_parse_v1_scalar_u32_attr() {
194        let dt = u32_le_datatype();
195        let ds = scalar_dataspace();
196
197        let mut data = vec![
198            0x01, // version 1
199            0x00, // reserved
200        ];
201        // name size = 5 ("temp\0")
202        data.extend_from_slice(&5u16.to_le_bytes());
203        // datatype size
204        data.extend_from_slice(&(dt.len() as u16).to_le_bytes());
205        // dataspace size
206        data.extend_from_slice(&(ds.len() as u16).to_le_bytes());
207
208        // Name "temp\0" padded to 8 bytes
209        data.extend_from_slice(b"temp\0\0\0\0");
210
211        // Datatype (12 bytes), padded to 16
212        data.extend_from_slice(&dt);
213        data.extend_from_slice(&[0u8; 4]); // padding to 16
214
215        // Dataspace (4 bytes), padded to 8
216        data.extend_from_slice(&ds);
217        data.extend_from_slice(&[0u8; 4]); // padding to 8
218
219        // Raw data: 1 scalar element * 4 bytes
220        data.extend_from_slice(&42u32.to_le_bytes());
221
222        let mut cursor = Cursor::new(&data);
223        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
224        assert_eq!(msg.name, "temp");
225        assert_eq!(msg.dataspace.dataspace_type, DataspaceType::Scalar);
226        assert_eq!(msg.raw_data, 42u32.to_le_bytes());
227        match &msg.datatype {
228            Datatype::FixedPoint {
229                size: 4,
230                signed: false,
231                byte_order: ByteOrder::LittleEndian,
232            } => {}
233            other => panic!("unexpected datatype: {:?}", other),
234        }
235    }
236
237    #[test]
238    fn test_parse_v3_scalar_attr() {
239        let dt = u32_le_datatype();
240        let ds = scalar_dataspace();
241
242        let mut data = vec![
243            0x03, // version 3
244            0x00, // flags
245        ];
246        // name size = 4 ("abc\0")
247        data.extend_from_slice(&4u16.to_le_bytes());
248        data.extend_from_slice(&(dt.len() as u16).to_le_bytes());
249        data.extend_from_slice(&(ds.len() as u16).to_le_bytes());
250        data.push(0x00); // ASCII name encoding
251
252        // Name (not padded in v3)
253        data.extend_from_slice(b"abc\0");
254
255        // Datatype
256        data.extend_from_slice(&dt);
257
258        // Dataspace
259        data.extend_from_slice(&ds);
260
261        // Raw data
262        data.extend_from_slice(&99u32.to_le_bytes());
263
264        let mut cursor = Cursor::new(&data);
265        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
266        assert_eq!(msg.name, "abc");
267        assert_eq!(msg.dataspace.dataspace_type, DataspaceType::Scalar);
268        assert_eq!(msg.raw_data, 99u32.to_le_bytes());
269    }
270
271    #[test]
272    fn test_parse_v3_utf8_name_attr() {
273        let dt = u32_le_datatype();
274        let ds = scalar_dataspace();
275
276        let mut data = vec![
277            0x03, // version 3
278            0x00, // flags
279        ];
280        data.extend_from_slice(&2u16.to_le_bytes()); // "x\0"
281        data.extend_from_slice(&(dt.len() as u16).to_le_bytes());
282        data.extend_from_slice(&(ds.len() as u16).to_le_bytes());
283        data.push(0x01); // UTF-8 name encoding
284        data.extend_from_slice(b"x\0");
285        data.extend_from_slice(&dt);
286        data.extend_from_slice(&ds);
287        data.extend_from_slice(&7u32.to_le_bytes());
288
289        let mut cursor = Cursor::new(&data);
290        let msg = parse(&mut cursor, 8, 8, data.len()).unwrap();
291        assert_eq!(msg.name, "x");
292        assert_eq!(msg.dataspace.dataspace_type, DataspaceType::Scalar);
293        assert_eq!(msg.raw_data, 7u32.to_le_bytes());
294    }
295}