Skip to main content

rust_hdf5/format/
local_heap.rs

1//! Local heap decode (for reading legacy HDF5 files).
2//!
3//! The local heap is used by v0/v1 groups to store link names as
4//! null-terminated strings. The heap header lives at a known address and
5//! points to a contiguous data block.
6//!
7//! Header layout:
8//! ```text
9//! "HEAP" (4 bytes)
10//! version: 1 byte (0)
11//! reserved: 3 bytes
12//! data_size: sizeof_size bytes LE
13//! free_list_offset: sizeof_size bytes LE (0xFFFFFFFFFFFFFFFF = none)
14//! data_addr: sizeof_addr bytes LE
15//! ```
16
17use crate::format::bytes::read_le_uint as read_uint;
18use crate::format::{FormatError, FormatResult};
19
20/// The 4-byte local heap signature.
21pub const LOCAL_HEAP_SIGNATURE: [u8; 4] = *b"HEAP";
22
23/// Decoded local heap header.
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct LocalHeapHeader {
26    /// Total size of the data segment.
27    pub data_size: u64,
28    /// Offset into the data segment of the first free block, or u64::MAX if none.
29    pub free_list_offset: u64,
30    /// File address of the data segment.
31    pub data_addr: u64,
32}
33
34impl LocalHeapHeader {
35    /// Decode a local heap header from `buf`.
36    ///
37    /// `sizeof_addr` and `sizeof_size` come from the superblock.
38    pub fn decode(buf: &[u8], sizeof_addr: usize, sizeof_size: usize) -> FormatResult<Self> {
39        let min_size = 4 + 1 + 3 + sizeof_size * 2 + sizeof_addr;
40        if buf.len() < min_size {
41            return Err(FormatError::BufferTooShort {
42                needed: min_size,
43                available: buf.len(),
44            });
45        }
46
47        if buf[0..4] != LOCAL_HEAP_SIGNATURE {
48            return Err(FormatError::InvalidSignature);
49        }
50
51        let version = buf[4];
52        if version != 0 {
53            return Err(FormatError::InvalidVersion(version));
54        }
55
56        // buf[5..8] reserved
57        let mut pos = 8;
58
59        let data_size = read_uint(&buf[pos..], sizeof_size);
60        pos += sizeof_size;
61
62        let free_list_offset = read_uint(&buf[pos..], sizeof_size);
63        pos += sizeof_size;
64
65        let data_addr = read_uint(&buf[pos..], sizeof_addr);
66
67        Ok(LocalHeapHeader {
68            data_size,
69            free_list_offset,
70            data_addr,
71        })
72    }
73}
74
75/// Look up a null-terminated string in the heap data block by offset.
76///
77/// `heap_data` is the raw bytes of the local heap data segment.
78/// `offset` is the byte offset into that segment where the string starts.
79pub fn local_heap_get_string(heap_data: &[u8], offset: u64) -> FormatResult<String> {
80    let start = offset as usize;
81    if start >= heap_data.len() {
82        return Err(FormatError::InvalidData(format!(
83            "local heap offset {} out of range (heap size {})",
84            offset,
85            heap_data.len()
86        )));
87    }
88
89    // Find the null terminator
90    let end = heap_data[start..]
91        .iter()
92        .position(|&b| b == 0)
93        .map(|p| start + p)
94        .unwrap_or(heap_data.len());
95
96    String::from_utf8(heap_data[start..end].to_vec())
97        .map_err(|e| FormatError::InvalidData(format!("invalid UTF-8 in local heap string: {}", e)))
98}
99
100// ======================================================================= tests
101
102#[cfg(test)]
103mod tests {
104    use super::*;
105
106    fn build_heap_header(
107        data_size: u64,
108        free_list_offset: u64,
109        data_addr: u64,
110        sizeof_addr: usize,
111        sizeof_size: usize,
112    ) -> Vec<u8> {
113        let mut buf = Vec::new();
114        buf.extend_from_slice(&LOCAL_HEAP_SIGNATURE);
115        buf.push(0); // version
116        buf.extend_from_slice(&[0u8; 3]); // reserved
117        buf.extend_from_slice(&data_size.to_le_bytes()[..sizeof_size]);
118        buf.extend_from_slice(&free_list_offset.to_le_bytes()[..sizeof_size]);
119        buf.extend_from_slice(&data_addr.to_le_bytes()[..sizeof_addr]);
120        buf
121    }
122
123    #[test]
124    fn decode_basic() {
125        let buf = build_heap_header(128, u64::MAX, 0x1000, 8, 8);
126        let hdr = LocalHeapHeader::decode(&buf, 8, 8).unwrap();
127        assert_eq!(hdr.data_size, 128);
128        assert_eq!(hdr.free_list_offset, u64::MAX);
129        assert_eq!(hdr.data_addr, 0x1000);
130    }
131
132    #[test]
133    fn decode_4byte() {
134        let buf = build_heap_header(64, 0xFFFFFFFF, 0x800, 4, 4);
135        let hdr = LocalHeapHeader::decode(&buf, 4, 4).unwrap();
136        assert_eq!(hdr.data_size, 64);
137        assert_eq!(hdr.free_list_offset, 0xFFFFFFFF);
138        assert_eq!(hdr.data_addr, 0x800);
139    }
140
141    #[test]
142    fn decode_bad_sig() {
143        let mut buf = build_heap_header(64, 0, 0x800, 8, 8);
144        buf[0] = b'X';
145        assert!(matches!(
146            LocalHeapHeader::decode(&buf, 8, 8).unwrap_err(),
147            FormatError::InvalidSignature
148        ));
149    }
150
151    #[test]
152    fn decode_bad_version() {
153        let mut buf = build_heap_header(64, 0, 0x800, 8, 8);
154        buf[4] = 1;
155        assert!(matches!(
156            LocalHeapHeader::decode(&buf, 8, 8).unwrap_err(),
157            FormatError::InvalidVersion(1)
158        ));
159    }
160
161    #[test]
162    fn decode_too_short() {
163        let buf = [0u8; 4];
164        assert!(matches!(
165            LocalHeapHeader::decode(&buf, 8, 8).unwrap_err(),
166            FormatError::BufferTooShort { .. }
167        ));
168    }
169
170    #[test]
171    fn get_string_basic() {
172        let mut data = Vec::new();
173        data.extend_from_slice(b"\0"); // offset 0: empty
174        data.extend_from_slice(b"hello\0");
175        data.extend_from_slice(b"world\0");
176
177        assert_eq!(local_heap_get_string(&data, 0).unwrap(), "");
178        assert_eq!(local_heap_get_string(&data, 1).unwrap(), "hello");
179        assert_eq!(local_heap_get_string(&data, 7).unwrap(), "world");
180    }
181
182    #[test]
183    fn get_string_out_of_range() {
184        let data = b"hello\0";
185        assert!(matches!(
186            local_heap_get_string(data, 100).unwrap_err(),
187            FormatError::InvalidData(_)
188        ));
189    }
190}