Skip to main content

rust_hdf5/format/messages/
link.rs

1//! Link message (type 0x06) — encodes a single link within a group.
2//!
3//! Binary layout (version 1):
4//!   Byte 0: version = 1
5//!   Byte 1: flags
6//!     bits 0-1: size of name-length field (0=1B, 1=2B, 2=4B, 3=8B)
7//!     bit 2:    creation order present
8//!     bit 3:    link type present
9//!     bit 4:    charset field present
10//!   [if bit 3]: link_type u8 (0=hard, 1=soft, 64+=external)
11//!   [if bit 2]: creation_order i64 LE
12//!   [if bit 4]: charset u8 (0=ASCII, 1=UTF-8)
13//!   name_length: 1/2/4/8 bytes per bits 0-1
14//!   name:        name_length bytes (UTF-8)
15//!   [hard link]:  address (sizeof_addr bytes)
16//!   [soft link]:  target_length u16 LE + target string
17
18use crate::format::bytes::read_le_uint as read_uint;
19use crate::format::{FormatContext, FormatError, FormatResult};
20
21const VERSION: u8 = 1;
22
23const FLAG_NAME_LEN_MASK: u8 = 0x03;
24const FLAG_CREATION_ORDER: u8 = 0x04;
25const FLAG_LINK_TYPE: u8 = 0x08;
26const FLAG_CHARSET: u8 = 0x10;
27
28const LINK_TYPE_HARD: u8 = 0;
29const LINK_TYPE_SOFT: u8 = 1;
30
31/// Link target discriminant.
32#[derive(Debug, Clone, PartialEq)]
33pub enum LinkTarget {
34    /// Hard link — points to an object header at `address`.
35    Hard { address: u64 },
36    /// Soft link — points to a path string.
37    Soft { target: String },
38}
39
40/// Link message payload.
41#[derive(Debug, Clone, PartialEq)]
42pub struct LinkMessage {
43    pub name: String,
44    pub target: LinkTarget,
45}
46
47impl LinkMessage {
48    /// Create a hard link.
49    pub fn hard(name: &str, address: u64) -> Self {
50        Self {
51            name: name.to_string(),
52            target: LinkTarget::Hard { address },
53        }
54    }
55
56    /// Create a soft link.
57    pub fn soft(name: &str, target: &str) -> Self {
58        Self {
59            name: name.to_string(),
60            target: LinkTarget::Soft {
61                target: target.to_string(),
62            },
63        }
64    }
65
66    // ------------------------------------------------------------------ encode
67
68    pub fn encode(&self, ctx: &FormatContext) -> Vec<u8> {
69        let name_bytes = self.name.as_bytes();
70        let name_len = name_bytes.len();
71        let name_len_size = min_bytes_for_value(name_len as u64);
72        let name_len_code = match name_len_size {
73            1 => 0u8,
74            2 => 1,
75            4 => 2,
76            _ => 3, // 8
77        };
78
79        let link_type = match &self.target {
80            LinkTarget::Hard { .. } => LINK_TYPE_HARD,
81            LinkTarget::Soft { .. } => LINK_TYPE_SOFT,
82        };
83
84        // Always store link type so that soft links are correctly identified.
85        let mut flags: u8 = name_len_code & FLAG_NAME_LEN_MASK;
86        flags |= FLAG_LINK_TYPE; // always include link type for clarity
87        flags |= FLAG_CHARSET; // always include charset (UTF-8)
88
89        let mut buf = Vec::with_capacity(32);
90        buf.push(VERSION);
91        buf.push(flags);
92
93        // link type
94        buf.push(link_type);
95
96        // charset: 1 = UTF-8
97        buf.push(1u8);
98
99        // name length
100        match name_len_size {
101            1 => buf.push(name_len as u8),
102            2 => buf.extend_from_slice(&(name_len as u16).to_le_bytes()),
103            4 => buf.extend_from_slice(&(name_len as u32).to_le_bytes()),
104            _ => buf.extend_from_slice(&(name_len as u64).to_le_bytes()),
105        }
106
107        // name
108        buf.extend_from_slice(name_bytes);
109
110        // link info
111        match &self.target {
112            LinkTarget::Hard { address } => {
113                let sa = ctx.sizeof_addr as usize;
114                buf.extend_from_slice(&address.to_le_bytes()[..sa]);
115            }
116            LinkTarget::Soft { target } => {
117                let tbytes = target.as_bytes();
118                buf.extend_from_slice(&(tbytes.len() as u16).to_le_bytes());
119                buf.extend_from_slice(tbytes);
120            }
121        }
122
123        buf
124    }
125
126    // ------------------------------------------------------------------ decode
127
128    pub fn decode(buf: &[u8], ctx: &FormatContext) -> FormatResult<(Self, usize)> {
129        if buf.len() < 2 {
130            return Err(FormatError::BufferTooShort {
131                needed: 2,
132                available: buf.len(),
133            });
134        }
135
136        let version = buf[0];
137        if version != VERSION {
138            return Err(FormatError::InvalidVersion(version));
139        }
140
141        let flags = buf[1];
142        let name_len_code = flags & FLAG_NAME_LEN_MASK;
143        let has_creation_order = (flags & FLAG_CREATION_ORDER) != 0;
144        let has_link_type = (flags & FLAG_LINK_TYPE) != 0;
145        let has_charset = (flags & FLAG_CHARSET) != 0;
146
147        let mut pos = 2;
148
149        // link type
150        let link_type = if has_link_type {
151            check_len(buf, pos, 1)?;
152            let lt = buf[pos];
153            pos += 1;
154            lt
155        } else {
156            LINK_TYPE_HARD // default
157        };
158
159        // creation order — an 8-byte signed integer (H5Olink.c INT64DECODE),
160        // not 4. We don't store it, but the width must be skipped exactly.
161        if has_creation_order {
162            check_len(buf, pos, 8)?;
163            pos += 8;
164        }
165
166        // charset
167        if has_charset {
168            check_len(buf, pos, 1)?;
169            // skip charset byte
170            pos += 1;
171        }
172
173        // name length
174        let name_len_size: usize = match name_len_code {
175            0 => 1,
176            1 => 2,
177            2 => 4,
178            _ => 8,
179        };
180        check_len(buf, pos, name_len_size)?;
181        let name_len = read_uint(&buf[pos..], name_len_size) as usize;
182        pos += name_len_size;
183
184        // name
185        check_len(buf, pos, name_len)?;
186        let name = std::str::from_utf8(&buf[pos..pos + name_len])
187            .map_err(|e| FormatError::InvalidData(format!("invalid UTF-8 link name: {}", e)))?
188            .to_string();
189        pos += name_len;
190
191        // target
192        let target = match link_type {
193            LINK_TYPE_HARD => {
194                let sa = ctx.sizeof_addr as usize;
195                check_len(buf, pos, sa)?;
196                let address = read_uint(&buf[pos..], sa);
197                pos += sa;
198                LinkTarget::Hard { address }
199            }
200            LINK_TYPE_SOFT => {
201                check_len(buf, pos, 2)?;
202                let tlen = u16::from_le_bytes([buf[pos], buf[pos + 1]]) as usize;
203                pos += 2;
204                check_len(buf, pos, tlen)?;
205                let target = std::str::from_utf8(&buf[pos..pos + tlen])
206                    .map_err(|e| {
207                        FormatError::InvalidData(format!("invalid UTF-8 soft link target: {}", e))
208                    })?
209                    .to_string();
210                pos += tlen;
211                LinkTarget::Soft { target }
212            }
213            other => {
214                return Err(FormatError::UnsupportedFeature(format!(
215                    "link type {}",
216                    other
217                )));
218            }
219        };
220
221        Ok((Self { name, target }, pos))
222    }
223}
224
225// ========================================================================= helpers
226
227fn check_len(buf: &[u8], pos: usize, need: usize) -> FormatResult<()> {
228    // `need` can be a file-derived length up to 8 bytes wide; a checked add
229    // ensures `pos + need` cannot wrap to a small value that spuriously
230    // passes the bound check (and then panics a slice in the caller).
231    match pos.checked_add(need) {
232        Some(end) if end <= buf.len() => Ok(()),
233        _ => Err(FormatError::BufferTooShort {
234            needed: pos.saturating_add(need),
235            available: buf.len(),
236        }),
237    }
238}
239
240/// Minimum number of bytes (1, 2, 4, or 8) to represent `v`.
241fn min_bytes_for_value(v: u64) -> usize {
242    if v <= u8::MAX as u64 {
243        1
244    } else if v <= u16::MAX as u64 {
245        2
246    } else if v <= u32::MAX as u64 {
247        4
248    } else {
249        8
250    }
251}
252
253// ======================================================================= tests
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258
259    fn ctx8() -> FormatContext {
260        FormatContext {
261            sizeof_addr: 8,
262            sizeof_size: 8,
263        }
264    }
265
266    fn ctx4() -> FormatContext {
267        FormatContext {
268            sizeof_addr: 4,
269            sizeof_size: 4,
270        }
271    }
272
273    #[test]
274    fn roundtrip_hard_link() {
275        let msg = LinkMessage::hard("dataset1", 0x1000);
276        let encoded = msg.encode(&ctx8());
277        let (decoded, consumed) = LinkMessage::decode(&encoded, &ctx8()).unwrap();
278        assert_eq!(consumed, encoded.len());
279        assert_eq!(decoded, msg);
280    }
281
282    #[test]
283    fn roundtrip_hard_link_ctx4() {
284        let msg = LinkMessage::hard("grp", 0x2000);
285        let encoded = msg.encode(&ctx4());
286        let (decoded, consumed) = LinkMessage::decode(&encoded, &ctx4()).unwrap();
287        assert_eq!(consumed, encoded.len());
288        assert_eq!(decoded, msg);
289    }
290
291    #[test]
292    fn roundtrip_soft_link() {
293        let msg = LinkMessage::soft("alias", "/group/dataset");
294        let encoded = msg.encode(&ctx8());
295        let (decoded, consumed) = LinkMessage::decode(&encoded, &ctx8()).unwrap();
296        assert_eq!(consumed, encoded.len());
297        assert_eq!(decoded, msg);
298    }
299
300    #[test]
301    fn roundtrip_empty_name() {
302        // edge case: empty name
303        let msg = LinkMessage::hard("", 0x100);
304        let encoded = msg.encode(&ctx8());
305        let (decoded, _) = LinkMessage::decode(&encoded, &ctx8()).unwrap();
306        assert_eq!(decoded, msg);
307    }
308
309    #[test]
310    fn roundtrip_long_name() {
311        // name longer than 255 bytes triggers 2-byte name length
312        let long_name: String = "a".repeat(300);
313        let msg = LinkMessage::hard(&long_name, 0xABCD);
314        let encoded = msg.encode(&ctx8());
315        let (decoded, consumed) = LinkMessage::decode(&encoded, &ctx8()).unwrap();
316        assert_eq!(consumed, encoded.len());
317        assert_eq!(decoded, msg);
318    }
319
320    #[test]
321    fn roundtrip_unicode_name() {
322        let msg = LinkMessage::hard("日本語データ", 0x4000);
323        let encoded = msg.encode(&ctx8());
324        let (decoded, _) = LinkMessage::decode(&encoded, &ctx8()).unwrap();
325        assert_eq!(decoded, msg);
326    }
327
328    #[test]
329    fn decode_bad_version() {
330        let buf = [2u8, 0]; // version 2 unsupported
331        let err = LinkMessage::decode(&buf, &ctx8()).unwrap_err();
332        match err {
333            FormatError::InvalidVersion(2) => {}
334            other => panic!("unexpected error: {:?}", other),
335        }
336    }
337
338    #[test]
339    fn decode_buffer_too_short() {
340        let buf = [1u8];
341        let err = LinkMessage::decode(&buf, &ctx8()).unwrap_err();
342        match err {
343            FormatError::BufferTooShort { .. } => {}
344            other => panic!("unexpected error: {:?}", other),
345        }
346    }
347
348    #[test]
349    fn version_byte() {
350        let encoded = LinkMessage::hard("x", 0).encode(&ctx8());
351        assert_eq!(encoded[0], 1);
352    }
353
354    #[test]
355    fn min_bytes_for_value_checks() {
356        assert_eq!(min_bytes_for_value(0), 1);
357        assert_eq!(min_bytes_for_value(255), 1);
358        assert_eq!(min_bytes_for_value(256), 2);
359        assert_eq!(min_bytes_for_value(65535), 2);
360        assert_eq!(min_bytes_for_value(65536), 4);
361        assert_eq!(min_bytes_for_value(u32::MAX as u64), 4);
362        assert_eq!(min_bytes_for_value(u32::MAX as u64 + 1), 8);
363    }
364}