Skip to main content

rust_hdf5/format/messages/
link.rs

1//! Link message (type 0x06) — encodes a single link within a group.
2//!
3//! Binary layout (version 1):
4//!   Byte 0: version = 1
5//!   Byte 1: flags
6//!     bits 0-1: size of name-length field (0=1B, 1=2B, 2=4B, 3=8B)
7//!     bit 2:    creation order present
8//!     bit 3:    link type present
9//!     bit 4:    charset field present
10//!   [if bit 3]: link_type u8 (0=hard, 1=soft, 64+=external)
11//!   [if bit 2]: creation_order u32 LE
12//!   [if bit 4]: charset u8 (0=ASCII, 1=UTF-8)
13//!   name_length: 1/2/4/8 bytes per bits 0-1
14//!   name:        name_length bytes (UTF-8)
15//!   [hard link]:  address (sizeof_addr bytes)
16//!   [soft link]:  target_length u16 LE + target string
17
18use crate::format::{FormatContext, FormatError, FormatResult};
19
20const VERSION: u8 = 1;
21
22const FLAG_NAME_LEN_MASK: u8 = 0x03;
23const FLAG_CREATION_ORDER: u8 = 0x04;
24const FLAG_LINK_TYPE: u8 = 0x08;
25const FLAG_CHARSET: u8 = 0x10;
26
27const LINK_TYPE_HARD: u8 = 0;
28const LINK_TYPE_SOFT: u8 = 1;
29
30/// Link target discriminant.
31#[derive(Debug, Clone, PartialEq)]
32pub enum LinkTarget {
33    /// Hard link — points to an object header at `address`.
34    Hard { address: u64 },
35    /// Soft link — points to a path string.
36    Soft { target: String },
37}
38
39/// Link message payload.
40#[derive(Debug, Clone, PartialEq)]
41pub struct LinkMessage {
42    pub name: String,
43    pub target: LinkTarget,
44}
45
46impl LinkMessage {
47    /// Create a hard link.
48    pub fn hard(name: &str, address: u64) -> Self {
49        Self {
50            name: name.to_string(),
51            target: LinkTarget::Hard { address },
52        }
53    }
54
55    /// Create a soft link.
56    pub fn soft(name: &str, target: &str) -> Self {
57        Self {
58            name: name.to_string(),
59            target: LinkTarget::Soft {
60                target: target.to_string(),
61            },
62        }
63    }
64
65    // ------------------------------------------------------------------ encode
66
67    pub fn encode(&self, ctx: &FormatContext) -> Vec<u8> {
68        let name_bytes = self.name.as_bytes();
69        let name_len = name_bytes.len();
70        let name_len_size = min_bytes_for_value(name_len as u64);
71        let name_len_code = match name_len_size {
72            1 => 0u8,
73            2 => 1,
74            4 => 2,
75            _ => 3, // 8
76        };
77
78        let link_type = match &self.target {
79            LinkTarget::Hard { .. } => LINK_TYPE_HARD,
80            LinkTarget::Soft { .. } => LINK_TYPE_SOFT,
81        };
82
83        // Always store link type so that soft links are correctly identified.
84        let mut flags: u8 = name_len_code & FLAG_NAME_LEN_MASK;
85        flags |= FLAG_LINK_TYPE; // always include link type for clarity
86        flags |= FLAG_CHARSET; // always include charset (UTF-8)
87
88        let mut buf = Vec::with_capacity(32);
89        buf.push(VERSION);
90        buf.push(flags);
91
92        // link type
93        buf.push(link_type);
94
95        // charset: 1 = UTF-8
96        buf.push(1u8);
97
98        // name length
99        match name_len_size {
100            1 => buf.push(name_len as u8),
101            2 => buf.extend_from_slice(&(name_len as u16).to_le_bytes()),
102            4 => buf.extend_from_slice(&(name_len as u32).to_le_bytes()),
103            _ => buf.extend_from_slice(&(name_len as u64).to_le_bytes()),
104        }
105
106        // name
107        buf.extend_from_slice(name_bytes);
108
109        // link info
110        match &self.target {
111            LinkTarget::Hard { address } => {
112                let sa = ctx.sizeof_addr as usize;
113                buf.extend_from_slice(&address.to_le_bytes()[..sa]);
114            }
115            LinkTarget::Soft { target } => {
116                let tbytes = target.as_bytes();
117                buf.extend_from_slice(&(tbytes.len() as u16).to_le_bytes());
118                buf.extend_from_slice(tbytes);
119            }
120        }
121
122        buf
123    }
124
125    // ------------------------------------------------------------------ decode
126
127    pub fn decode(buf: &[u8], ctx: &FormatContext) -> FormatResult<(Self, usize)> {
128        if buf.len() < 2 {
129            return Err(FormatError::BufferTooShort {
130                needed: 2,
131                available: buf.len(),
132            });
133        }
134
135        let version = buf[0];
136        if version != VERSION {
137            return Err(FormatError::InvalidVersion(version));
138        }
139
140        let flags = buf[1];
141        let name_len_code = flags & FLAG_NAME_LEN_MASK;
142        let has_creation_order = (flags & FLAG_CREATION_ORDER) != 0;
143        let has_link_type = (flags & FLAG_LINK_TYPE) != 0;
144        let has_charset = (flags & FLAG_CHARSET) != 0;
145
146        let mut pos = 2;
147
148        // link type
149        let link_type = if has_link_type {
150            check_len(buf, pos, 1)?;
151            let lt = buf[pos];
152            pos += 1;
153            lt
154        } else {
155            LINK_TYPE_HARD // default
156        };
157
158        // creation order
159        if has_creation_order {
160            check_len(buf, pos, 4)?;
161            // skip creation order (u32) — we don't store it in the struct
162            pos += 4;
163        }
164
165        // charset
166        if has_charset {
167            check_len(buf, pos, 1)?;
168            // skip charset byte
169            pos += 1;
170        }
171
172        // name length
173        let name_len_size: usize = match name_len_code {
174            0 => 1,
175            1 => 2,
176            2 => 4,
177            _ => 8,
178        };
179        check_len(buf, pos, name_len_size)?;
180        let name_len = read_uint(&buf[pos..], name_len_size) as usize;
181        pos += name_len_size;
182
183        // name
184        check_len(buf, pos, name_len)?;
185        let name = std::str::from_utf8(&buf[pos..pos + name_len])
186            .map_err(|e| FormatError::InvalidData(format!("invalid UTF-8 link name: {}", e)))?
187            .to_string();
188        pos += name_len;
189
190        // target
191        let target = match link_type {
192            LINK_TYPE_HARD => {
193                let sa = ctx.sizeof_addr as usize;
194                check_len(buf, pos, sa)?;
195                let address = read_uint(&buf[pos..], sa);
196                pos += sa;
197                LinkTarget::Hard { address }
198            }
199            LINK_TYPE_SOFT => {
200                check_len(buf, pos, 2)?;
201                let tlen = u16::from_le_bytes([buf[pos], buf[pos + 1]]) as usize;
202                pos += 2;
203                check_len(buf, pos, tlen)?;
204                let target = std::str::from_utf8(&buf[pos..pos + tlen])
205                    .map_err(|e| {
206                        FormatError::InvalidData(format!("invalid UTF-8 soft link target: {}", e))
207                    })?
208                    .to_string();
209                pos += tlen;
210                LinkTarget::Soft { target }
211            }
212            other => {
213                return Err(FormatError::UnsupportedFeature(format!(
214                    "link type {}",
215                    other
216                )));
217            }
218        };
219
220        Ok((Self { name, target }, pos))
221    }
222}
223
224// ========================================================================= helpers
225
226fn check_len(buf: &[u8], pos: usize, need: usize) -> FormatResult<()> {
227    if buf.len() < pos + need {
228        Err(FormatError::BufferTooShort {
229            needed: pos + need,
230            available: buf.len(),
231        })
232    } else {
233        Ok(())
234    }
235}
236
237/// Read a little-endian unsigned integer of `n` bytes (1..=8) into a `u64`.
238fn read_uint(buf: &[u8], n: usize) -> u64 {
239    let mut tmp = [0u8; 8];
240    tmp[..n].copy_from_slice(&buf[..n]);
241    u64::from_le_bytes(tmp)
242}
243
244/// Minimum number of bytes (1, 2, 4, or 8) to represent `v`.
245fn min_bytes_for_value(v: u64) -> usize {
246    if v <= u8::MAX as u64 {
247        1
248    } else if v <= u16::MAX as u64 {
249        2
250    } else if v <= u32::MAX as u64 {
251        4
252    } else {
253        8
254    }
255}
256
257// ======================================================================= tests
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    fn ctx8() -> FormatContext {
264        FormatContext {
265            sizeof_addr: 8,
266            sizeof_size: 8,
267        }
268    }
269
270    fn ctx4() -> FormatContext {
271        FormatContext {
272            sizeof_addr: 4,
273            sizeof_size: 4,
274        }
275    }
276
277    #[test]
278    fn roundtrip_hard_link() {
279        let msg = LinkMessage::hard("dataset1", 0x1000);
280        let encoded = msg.encode(&ctx8());
281        let (decoded, consumed) = LinkMessage::decode(&encoded, &ctx8()).unwrap();
282        assert_eq!(consumed, encoded.len());
283        assert_eq!(decoded, msg);
284    }
285
286    #[test]
287    fn roundtrip_hard_link_ctx4() {
288        let msg = LinkMessage::hard("grp", 0x2000);
289        let encoded = msg.encode(&ctx4());
290        let (decoded, consumed) = LinkMessage::decode(&encoded, &ctx4()).unwrap();
291        assert_eq!(consumed, encoded.len());
292        assert_eq!(decoded, msg);
293    }
294
295    #[test]
296    fn roundtrip_soft_link() {
297        let msg = LinkMessage::soft("alias", "/group/dataset");
298        let encoded = msg.encode(&ctx8());
299        let (decoded, consumed) = LinkMessage::decode(&encoded, &ctx8()).unwrap();
300        assert_eq!(consumed, encoded.len());
301        assert_eq!(decoded, msg);
302    }
303
304    #[test]
305    fn roundtrip_empty_name() {
306        // edge case: empty name
307        let msg = LinkMessage::hard("", 0x100);
308        let encoded = msg.encode(&ctx8());
309        let (decoded, _) = LinkMessage::decode(&encoded, &ctx8()).unwrap();
310        assert_eq!(decoded, msg);
311    }
312
313    #[test]
314    fn roundtrip_long_name() {
315        // name longer than 255 bytes triggers 2-byte name length
316        let long_name: String = "a".repeat(300);
317        let msg = LinkMessage::hard(&long_name, 0xABCD);
318        let encoded = msg.encode(&ctx8());
319        let (decoded, consumed) = LinkMessage::decode(&encoded, &ctx8()).unwrap();
320        assert_eq!(consumed, encoded.len());
321        assert_eq!(decoded, msg);
322    }
323
324    #[test]
325    fn roundtrip_unicode_name() {
326        let msg = LinkMessage::hard("日本語データ", 0x4000);
327        let encoded = msg.encode(&ctx8());
328        let (decoded, _) = LinkMessage::decode(&encoded, &ctx8()).unwrap();
329        assert_eq!(decoded, msg);
330    }
331
332    #[test]
333    fn decode_bad_version() {
334        let buf = [2u8, 0]; // version 2 unsupported
335        let err = LinkMessage::decode(&buf, &ctx8()).unwrap_err();
336        match err {
337            FormatError::InvalidVersion(2) => {}
338            other => panic!("unexpected error: {:?}", other),
339        }
340    }
341
342    #[test]
343    fn decode_buffer_too_short() {
344        let buf = [1u8];
345        let err = LinkMessage::decode(&buf, &ctx8()).unwrap_err();
346        match err {
347            FormatError::BufferTooShort { .. } => {}
348            other => panic!("unexpected error: {:?}", other),
349        }
350    }
351
352    #[test]
353    fn version_byte() {
354        let encoded = LinkMessage::hard("x", 0).encode(&ctx8());
355        assert_eq!(encoded[0], 1);
356    }
357
358    #[test]
359    fn min_bytes_for_value_checks() {
360        assert_eq!(min_bytes_for_value(0), 1);
361        assert_eq!(min_bytes_for_value(255), 1);
362        assert_eq!(min_bytes_for_value(256), 2);
363        assert_eq!(min_bytes_for_value(65535), 2);
364        assert_eq!(min_bytes_for_value(65536), 4);
365        assert_eq!(min_bytes_for_value(u32::MAX as u64), 4);
366        assert_eq!(min_bytes_for_value(u32::MAX as u64 + 1), 8);
367    }
368}