Skip to main content

rust_hdf5/format/messages/
attribute.rs

1//! Attribute message (type 0x0C) -- describes an attribute attached to an object.
2//!
3//! Binary layout (version 3, no shared datatypes):
4//!   Byte 0:    version = 3
5//!   Byte 1:    flags (0 for non-shared)
6//!   Bytes 2-3: name_size (u16 LE, including null terminator)
7//!   Bytes 4-5: datatype_size (u16 LE)
8//!   Bytes 6-7: dataspace_size (u16 LE)
9//!   Byte 8:    name character set encoding (0=ASCII, 1=UTF-8)
10//!   <name: name_size bytes, null-terminated>
11//!   <encoded datatype message: datatype_size bytes>
12//!   <encoded dataspace message: dataspace_size bytes>
13//!   <raw attribute data>
14
15use crate::format::messages::dataspace::DataspaceMessage;
16use crate::format::messages::datatype::DatatypeMessage;
17use crate::format::{FormatContext, FormatError, FormatResult};
18
19const ATTR_VERSION: u8 = 3;
20
21/// An HDF5 attribute message.
22#[derive(Debug, Clone, PartialEq)]
23pub struct AttributeMessage {
24    /// Attribute name.
25    pub name: String,
26    /// Datatype of the attribute value.
27    pub datatype: DatatypeMessage,
28    /// Dataspace (scalar or simple).
29    pub dataspace: DataspaceMessage,
30    /// Raw attribute value data.
31    pub data: Vec<u8>,
32}
33
34impl AttributeMessage {
35    /// Create a scalar string attribute with the given name and value.
36    ///
37    /// Uses a null-terminated UTF-8 fixed-length string datatype with
38    /// size = value.len() + 1 (for the null terminator), and a scalar
39    /// dataspace.
40    pub fn scalar_string(name: &str, value: &str) -> Self {
41        let str_size = (value.len() + 1) as u32; // +1 for null terminator
42        let datatype = DatatypeMessage::fixed_string_utf8(str_size);
43        let dataspace = DataspaceMessage::scalar();
44
45        // Data: string bytes + null terminator
46        let mut data = Vec::with_capacity(str_size as usize);
47        data.extend_from_slice(value.as_bytes());
48        data.push(0); // null terminator
49
50        Self {
51            name: name.to_string(),
52            datatype,
53            dataspace,
54            data,
55        }
56    }
57
58    /// Create a scalar numeric attribute with raw bytes as value.
59    pub fn scalar_numeric(name: &str, datatype: DatatypeMessage, data: Vec<u8>) -> Self {
60        Self {
61            name: name.to_string(),
62            datatype,
63            dataspace: DataspaceMessage::scalar(),
64            data,
65        }
66    }
67
68    /// Encode the attribute message into a byte vector.
69    ///
70    /// The result is the raw payload for an object header message of type
71    /// 0x0C (MSG_ATTRIBUTE). It does NOT include the object header message
72    /// envelope (type, size, flags bytes); that is handled by the caller.
73    pub fn encode(&self, ctx: &FormatContext) -> Vec<u8> {
74        let encoded_dt = self.datatype.encode(ctx);
75        let encoded_ds = self.dataspace.encode(ctx);
76
77        // Name with null terminator
78        let name_bytes = self.name.as_bytes();
79        let name_size = name_bytes.len() + 1; // +1 for null terminator
80
81        // Total: 9 (header) + name_size + datatype_size + dataspace_size + data_size
82        let total = 9 + name_size + encoded_dt.len() + encoded_ds.len() + self.data.len();
83        let mut buf = Vec::with_capacity(total);
84
85        // Byte 0: version
86        buf.push(ATTR_VERSION);
87
88        // Byte 1: flags (0 = non-shared)
89        buf.push(0x00);
90
91        // Bytes 2-3: name size (u16 LE)
92        buf.extend_from_slice(&(name_size as u16).to_le_bytes());
93
94        // Bytes 4-5: datatype size (u16 LE)
95        buf.extend_from_slice(&(encoded_dt.len() as u16).to_le_bytes());
96
97        // Bytes 6-7: dataspace size (u16 LE)
98        buf.extend_from_slice(&(encoded_ds.len() as u16).to_le_bytes());
99
100        // Byte 8: name character set encoding (1 = UTF-8)
101        buf.push(0x01);
102
103        // Name (null-terminated)
104        buf.extend_from_slice(name_bytes);
105        buf.push(0x00);
106
107        // Encoded datatype
108        buf.extend_from_slice(&encoded_dt);
109
110        // Encoded dataspace
111        buf.extend_from_slice(&encoded_ds);
112
113        // Raw data
114        buf.extend_from_slice(&self.data);
115
116        debug_assert_eq!(buf.len(), total);
117        buf
118    }
119
120    /// Decode an attribute message from a byte buffer.
121    ///
122    /// Supports versions 1, 2, and 3:
123    /// - v1: 8-byte header, each field padded to 8-byte alignment
124    /// - v2: 8-byte header, no alignment padding
125    /// - v3: 9-byte header (adds charset byte), no alignment padding
126    pub fn decode(buf: &[u8], ctx: &FormatContext) -> FormatResult<(Self, usize)> {
127        if buf.len() < 8 {
128            return Err(FormatError::BufferTooShort {
129                needed: 8,
130                available: buf.len(),
131            });
132        }
133
134        let version = buf[0];
135        if !(1..=ATTR_VERSION).contains(&version) {
136            return Err(FormatError::InvalidVersion(version));
137        }
138
139        // flags at buf[1]
140        let name_size = u16::from_le_bytes([buf[2], buf[3]]) as usize;
141        let datatype_size = u16::from_le_bytes([buf[4], buf[5]]) as usize;
142        let dataspace_size = u16::from_le_bytes([buf[6], buf[7]]) as usize;
143
144        let mut pos = if version >= 3 {
145            // v3 has charset byte at offset 8
146            9
147        } else {
148            // v1, v2: no charset byte
149            8
150        };
151
152        // v1 pads each field to 8-byte alignment
153        let align = if version == 1 { 8 } else { 1 };
154
155        // Name
156        let needed = pos + name_size;
157        if buf.len() < needed {
158            return Err(FormatError::BufferTooShort {
159                needed,
160                available: buf.len(),
161            });
162        }
163        // Strip trailing null
164        let name_end = if name_size > 0 && buf[pos + name_size - 1] == 0 {
165            pos + name_size - 1
166        } else {
167            pos + name_size
168        };
169        let name = String::from_utf8_lossy(&buf[pos..name_end]).to_string();
170        pos += name_size;
171        // v1 alignment
172        if align > 1 {
173            pos = (pos + align - 1) & !(align - 1);
174        }
175
176        // Datatype
177        let needed = pos + datatype_size;
178        if buf.len() < needed {
179            return Err(FormatError::BufferTooShort {
180                needed,
181                available: buf.len(),
182            });
183        }
184        let (datatype, _) = DatatypeMessage::decode(&buf[pos..pos + datatype_size], ctx)?;
185        pos += datatype_size;
186        if align > 1 {
187            pos = (pos + align - 1) & !(align - 1);
188        }
189
190        // Dataspace
191        let needed = pos + dataspace_size;
192        if buf.len() < needed {
193            return Err(FormatError::BufferTooShort {
194                needed,
195                available: buf.len(),
196            });
197        }
198        let (dataspace, _) = DataspaceMessage::decode(&buf[pos..pos + dataspace_size], ctx)?;
199        pos += dataspace_size;
200        if align > 1 {
201            pos = (pos + align - 1) & !(align - 1);
202        }
203
204        // Data: remaining bytes = datatype.element_size() * number_of_elements
205        let num_elements: u64 = if dataspace.dims.is_empty() {
206            1 // scalar
207        } else {
208            dataspace.dims.iter().product()
209        };
210        let data_size = (num_elements * datatype.element_size() as u64) as usize;
211        let needed = pos + data_size;
212        if buf.len() < needed {
213            return Err(FormatError::BufferTooShort {
214                needed,
215                available: buf.len(),
216            });
217        }
218        let data = buf[pos..pos + data_size].to_vec();
219        pos += data_size;
220
221        Ok((
222            Self {
223                name,
224                datatype,
225                dataspace,
226                data,
227            },
228            pos,
229        ))
230    }
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    fn ctx() -> FormatContext {
238        FormatContext {
239            sizeof_addr: 8,
240            sizeof_size: 8,
241        }
242    }
243
244    #[test]
245    fn scalar_string_roundtrip() {
246        let msg = AttributeMessage::scalar_string("my_attr", "hello");
247        let encoded = msg.encode(&ctx());
248        let (decoded, consumed) = AttributeMessage::decode(&encoded, &ctx()).unwrap();
249        assert_eq!(consumed, encoded.len());
250        assert_eq!(decoded.name, "my_attr");
251        assert_eq!(decoded.data, b"hello\0");
252        assert_eq!(decoded, msg);
253    }
254
255    #[test]
256    fn scalar_string_empty() {
257        let msg = AttributeMessage::scalar_string("empty", "");
258        let encoded = msg.encode(&ctx());
259        let (decoded, consumed) = AttributeMessage::decode(&encoded, &ctx()).unwrap();
260        assert_eq!(consumed, encoded.len());
261        assert_eq!(decoded.name, "empty");
262        assert_eq!(decoded.data, b"\0");
263        assert_eq!(decoded, msg);
264    }
265
266    #[test]
267    fn version_is_three() {
268        let msg = AttributeMessage::scalar_string("test", "val");
269        let encoded = msg.encode(&ctx());
270        assert_eq!(encoded[0], 3);
271    }
272
273    #[test]
274    fn decode_buffer_too_short() {
275        let buf = [0u8; 4];
276        let err = AttributeMessage::decode(&buf, &ctx()).unwrap_err();
277        match err {
278            FormatError::BufferTooShort { .. } => {}
279            other => panic!("unexpected error: {:?}", other),
280        }
281    }
282
283    #[test]
284    fn decode_bad_version() {
285        let msg = AttributeMessage::scalar_string("x", "y");
286        let mut encoded = msg.encode(&ctx());
287        encoded[0] = 0; // invalid version
288        let err = AttributeMessage::decode(&encoded, &ctx()).unwrap_err();
289        match err {
290            FormatError::InvalidVersion(0) => {}
291            other => panic!("unexpected error: {:?}", other),
292        }
293    }
294
295    #[test]
296    fn scalar_string_utf8_content() {
297        let msg = AttributeMessage::scalar_string("desc", "caf\u{00e9}");
298        let encoded = msg.encode(&ctx());
299        let (decoded, _) = AttributeMessage::decode(&encoded, &ctx()).unwrap();
300        assert_eq!(decoded.name, "desc");
301        // "caf\u{e9}" is 5 bytes in UTF-8 + null = 6
302        assert_eq!(decoded.data.len(), 6);
303        assert_eq!(&decoded.data[..5], "caf\u{00e9}".as_bytes());
304        assert_eq!(decoded.data[5], 0);
305    }
306}