Skip to main content

rust_hdf5/format/messages/
attribute.rs

1//! Attribute message (type 0x0C) -- describes an attribute attached to an object.
2//!
3//! Binary layout (version 3, no shared datatypes):
4//!   Byte 0:    version = 3
5//!   Byte 1:    flags (0 for non-shared)
6//!   Bytes 2-3: name_size (u16 LE, including null terminator)
7//!   Bytes 4-5: datatype_size (u16 LE)
8//!   Bytes 6-7: dataspace_size (u16 LE)
9//!   Byte 8:    name character set encoding (0=ASCII, 1=UTF-8)
10//!   <name: name_size bytes, null-terminated>
11//!   <encoded datatype message: datatype_size bytes>
12//!   <encoded dataspace message: dataspace_size bytes>
13//!   <raw attribute data>
14
15use crate::format::messages::dataspace::DataspaceMessage;
16use crate::format::messages::datatype::DatatypeMessage;
17use crate::format::{FormatContext, FormatError, FormatResult};
18
19const ATTR_VERSION: u8 = 3;
20
21/// An HDF5 attribute message.
22#[derive(Debug, Clone, PartialEq)]
23pub struct AttributeMessage {
24    /// Attribute name.
25    pub name: String,
26    /// Datatype of the attribute value.
27    pub datatype: DatatypeMessage,
28    /// Dataspace (scalar or simple).
29    pub dataspace: DataspaceMessage,
30    /// Raw attribute value data.
31    pub data: Vec<u8>,
32}
33
34impl AttributeMessage {
35    /// Create a scalar string attribute with the given name and value.
36    ///
37    /// Uses a null-terminated UTF-8 fixed-length string datatype with
38    /// size = value.len() + 1 (for the null terminator), and a scalar
39    /// dataspace.
40    pub fn scalar_string(name: &str, value: &str) -> Self {
41        let str_size = (value.len() + 1) as u32; // +1 for null terminator
42        let datatype = DatatypeMessage::fixed_string_utf8(str_size);
43        let dataspace = DataspaceMessage::scalar();
44
45        // Data: string bytes + null terminator
46        let mut data = Vec::with_capacity(str_size as usize);
47        data.extend_from_slice(value.as_bytes());
48        data.push(0); // null terminator
49
50        Self {
51            name: name.to_string(),
52            datatype,
53            dataspace,
54            data,
55        }
56    }
57
58    /// Create a scalar numeric attribute with raw bytes as value.
59    pub fn scalar_numeric(name: &str, datatype: DatatypeMessage, data: Vec<u8>) -> Self {
60        Self {
61            name: name.to_string(),
62            datatype,
63            dataspace: DataspaceMessage::scalar(),
64            data,
65        }
66    }
67
68    /// Encode the attribute message into a byte vector.
69    ///
70    /// The result is the raw payload for an object header message of type
71    /// 0x0C (MSG_ATTRIBUTE). It does NOT include the object header message
72    /// envelope (type, size, flags bytes); that is handled by the caller.
73    pub fn encode(&self, ctx: &FormatContext) -> Vec<u8> {
74        let encoded_dt = self.datatype.encode(ctx);
75        let encoded_ds = self.dataspace.encode(ctx);
76
77        // Name with null terminator
78        let name_bytes = self.name.as_bytes();
79        let name_size = name_bytes.len() + 1; // +1 for null terminator
80
81        // Total: 9 (header) + name_size + datatype_size + dataspace_size + data_size
82        let total = 9 + name_size + encoded_dt.len() + encoded_ds.len() + self.data.len();
83        let mut buf = Vec::with_capacity(total);
84
85        // Byte 0: version
86        buf.push(ATTR_VERSION);
87
88        // Byte 1: flags (0 = non-shared)
89        buf.push(0x00);
90
91        // Bytes 2-3: name size (u16 LE)
92        buf.extend_from_slice(&(name_size as u16).to_le_bytes());
93
94        // Bytes 4-5: datatype size (u16 LE)
95        buf.extend_from_slice(&(encoded_dt.len() as u16).to_le_bytes());
96
97        // Bytes 6-7: dataspace size (u16 LE)
98        buf.extend_from_slice(&(encoded_ds.len() as u16).to_le_bytes());
99
100        // Byte 8: name character set encoding (1 = UTF-8)
101        buf.push(0x01);
102
103        // Name (null-terminated)
104        buf.extend_from_slice(name_bytes);
105        buf.push(0x00);
106
107        // Encoded datatype
108        buf.extend_from_slice(&encoded_dt);
109
110        // Encoded dataspace
111        buf.extend_from_slice(&encoded_ds);
112
113        // Raw data
114        buf.extend_from_slice(&self.data);
115
116        debug_assert_eq!(buf.len(), total);
117        buf
118    }
119
120    /// Decode an attribute message from a byte buffer.
121    ///
122    /// Supports versions 1, 2, and 3:
123    /// - v1: 8-byte header, each field padded to 8-byte alignment
124    /// - v2: 8-byte header, no alignment padding
125    /// - v3: 9-byte header (adds charset byte), no alignment padding
126    pub fn decode(buf: &[u8], ctx: &FormatContext) -> FormatResult<(Self, usize)> {
127        if buf.len() < 8 {
128            return Err(FormatError::BufferTooShort {
129                needed: 8,
130                available: buf.len(),
131            });
132        }
133
134        let version = buf[0];
135        if !(1..=ATTR_VERSION).contains(&version) {
136            return Err(FormatError::InvalidVersion(version));
137        }
138
139        // flags at buf[1]
140        let name_size = u16::from_le_bytes([buf[2], buf[3]]) as usize;
141        let datatype_size = u16::from_le_bytes([buf[4], buf[5]]) as usize;
142        let dataspace_size = u16::from_le_bytes([buf[6], buf[7]]) as usize;
143
144        let mut pos = if version >= 3 {
145            // v3 has charset byte at offset 8
146            9
147        } else {
148            // v1, v2: no charset byte
149            8
150        };
151
152        // v1 pads each field to 8-byte alignment
153        let align = if version == 1 { 8 } else { 1 };
154
155        // Name
156        let needed = pos + name_size;
157        if buf.len() < needed {
158            return Err(FormatError::BufferTooShort {
159                needed,
160                available: buf.len(),
161            });
162        }
163        // Strip trailing null
164        let name_end = if name_size > 0 && buf[pos + name_size - 1] == 0 {
165            pos + name_size - 1
166        } else {
167            pos + name_size
168        };
169        let name = String::from_utf8_lossy(&buf[pos..name_end]).to_string();
170        pos += name_size;
171        // v1 alignment
172        if align > 1 {
173            pos = (pos + align - 1) & !(align - 1);
174        }
175
176        // Datatype
177        let needed = pos + datatype_size;
178        if buf.len() < needed {
179            return Err(FormatError::BufferTooShort {
180                needed,
181                available: buf.len(),
182            });
183        }
184        let (datatype, _) = DatatypeMessage::decode(&buf[pos..pos + datatype_size], ctx)?;
185        pos += datatype_size;
186        if align > 1 {
187            pos = (pos + align - 1) & !(align - 1);
188        }
189
190        // Dataspace
191        let needed = pos + dataspace_size;
192        if buf.len() < needed {
193            return Err(FormatError::BufferTooShort {
194                needed,
195                available: buf.len(),
196            });
197        }
198        let (dataspace, _) = DataspaceMessage::decode(&buf[pos..pos + dataspace_size], ctx)?;
199        pos += dataspace_size;
200        if align > 1 {
201            pos = (pos + align - 1) & !(align - 1);
202        }
203
204        // Data: remaining bytes = datatype.element_size() * number_of_elements
205        let num_elements: u64 = if dataspace.dims.is_empty() {
206            1 // scalar
207        } else {
208            // dims are file-derived; saturate so a crafted attribute with
209            // absurd dimensions is rejected by the buffer check below
210            // instead of overflowing.
211            dataspace
212                .dims
213                .iter()
214                .fold(1u64, |acc, &d| acc.saturating_mul(d))
215        };
216        let data_size = num_elements
217            .saturating_mul(datatype.element_size() as u64)
218            .min(usize::MAX as u64) as usize;
219        let needed = pos.saturating_add(data_size);
220        if buf.len() < needed {
221            return Err(FormatError::BufferTooShort {
222                needed,
223                available: buf.len(),
224            });
225        }
226        let data = buf[pos..pos + data_size].to_vec();
227        pos += data_size;
228
229        Ok((
230            Self {
231                name,
232                datatype,
233                dataspace,
234                data,
235            },
236            pos,
237        ))
238    }
239}
240
241#[cfg(test)]
242mod tests {
243    use super::*;
244
245    fn ctx() -> FormatContext {
246        FormatContext {
247            sizeof_addr: 8,
248            sizeof_size: 8,
249        }
250    }
251
252    #[test]
253    fn scalar_string_roundtrip() {
254        let msg = AttributeMessage::scalar_string("my_attr", "hello");
255        let encoded = msg.encode(&ctx());
256        let (decoded, consumed) = AttributeMessage::decode(&encoded, &ctx()).unwrap();
257        assert_eq!(consumed, encoded.len());
258        assert_eq!(decoded.name, "my_attr");
259        assert_eq!(decoded.data, b"hello\0");
260        assert_eq!(decoded, msg);
261    }
262
263    #[test]
264    fn scalar_string_empty() {
265        let msg = AttributeMessage::scalar_string("empty", "");
266        let encoded = msg.encode(&ctx());
267        let (decoded, consumed) = AttributeMessage::decode(&encoded, &ctx()).unwrap();
268        assert_eq!(consumed, encoded.len());
269        assert_eq!(decoded.name, "empty");
270        assert_eq!(decoded.data, b"\0");
271        assert_eq!(decoded, msg);
272    }
273
274    #[test]
275    fn version_is_three() {
276        let msg = AttributeMessage::scalar_string("test", "val");
277        let encoded = msg.encode(&ctx());
278        assert_eq!(encoded[0], 3);
279    }
280
281    #[test]
282    fn decode_buffer_too_short() {
283        let buf = [0u8; 4];
284        let err = AttributeMessage::decode(&buf, &ctx()).unwrap_err();
285        match err {
286            FormatError::BufferTooShort { .. } => {}
287            other => panic!("unexpected error: {:?}", other),
288        }
289    }
290
291    #[test]
292    fn decode_bad_version() {
293        let msg = AttributeMessage::scalar_string("x", "y");
294        let mut encoded = msg.encode(&ctx());
295        encoded[0] = 0; // invalid version
296        let err = AttributeMessage::decode(&encoded, &ctx()).unwrap_err();
297        match err {
298            FormatError::InvalidVersion(0) => {}
299            other => panic!("unexpected error: {:?}", other),
300        }
301    }
302
303    #[test]
304    fn scalar_string_utf8_content() {
305        let msg = AttributeMessage::scalar_string("desc", "caf\u{00e9}");
306        let encoded = msg.encode(&ctx());
307        let (decoded, _) = AttributeMessage::decode(&encoded, &ctx()).unwrap();
308        assert_eq!(decoded.name, "desc");
309        // "caf\u{e9}" is 5 bytes in UTF-8 + null = 6
310        assert_eq!(decoded.data.len(), 6);
311        assert_eq!(&decoded.data[..5], "caf\u{00e9}".as_bytes());
312        assert_eq!(decoded.data[5], 0);
313    }
314}