Skip to main content

rust_hdf5/format/
local_heap.rs

1//! Local heap decode (for reading legacy HDF5 files).
2//!
3//! The local heap is used by v0/v1 groups to store link names as
4//! null-terminated strings. The heap header lives at a known address and
5//! points to a contiguous data block.
6//!
7//! Header layout:
8//! ```text
9//! "HEAP" (4 bytes)
10//! version: 1 byte (0)
11//! reserved: 3 bytes
12//! data_size: sizeof_size bytes LE
13//! free_list_offset: sizeof_size bytes LE (0xFFFFFFFFFFFFFFFF = none)
14//! data_addr: sizeof_addr bytes LE
15//! ```
16
17use crate::format::{FormatError, FormatResult};
18
19/// The 4-byte local heap signature.
20pub const LOCAL_HEAP_SIGNATURE: [u8; 4] = *b"HEAP";
21
22/// Decoded local heap header.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct LocalHeapHeader {
25    /// Total size of the data segment.
26    pub data_size: u64,
27    /// Offset into the data segment of the first free block, or u64::MAX if none.
28    pub free_list_offset: u64,
29    /// File address of the data segment.
30    pub data_addr: u64,
31}
32
33impl LocalHeapHeader {
34    /// Decode a local heap header from `buf`.
35    ///
36    /// `sizeof_addr` and `sizeof_size` come from the superblock.
37    pub fn decode(buf: &[u8], sizeof_addr: usize, sizeof_size: usize) -> FormatResult<Self> {
38        let min_size = 4 + 1 + 3 + sizeof_size * 2 + sizeof_addr;
39        if buf.len() < min_size {
40            return Err(FormatError::BufferTooShort {
41                needed: min_size,
42                available: buf.len(),
43            });
44        }
45
46        if buf[0..4] != LOCAL_HEAP_SIGNATURE {
47            return Err(FormatError::InvalidSignature);
48        }
49
50        let version = buf[4];
51        if version != 0 {
52            return Err(FormatError::InvalidVersion(version));
53        }
54
55        // buf[5..8] reserved
56        let mut pos = 8;
57
58        let data_size = read_uint(&buf[pos..], sizeof_size);
59        pos += sizeof_size;
60
61        let free_list_offset = read_uint(&buf[pos..], sizeof_size);
62        pos += sizeof_size;
63
64        let data_addr = read_uint(&buf[pos..], sizeof_addr);
65
66        Ok(LocalHeapHeader {
67            data_size,
68            free_list_offset,
69            data_addr,
70        })
71    }
72}
73
74/// Look up a null-terminated string in the heap data block by offset.
75///
76/// `heap_data` is the raw bytes of the local heap data segment.
77/// `offset` is the byte offset into that segment where the string starts.
78pub fn local_heap_get_string(heap_data: &[u8], offset: u64) -> FormatResult<String> {
79    let start = offset as usize;
80    if start >= heap_data.len() {
81        return Err(FormatError::InvalidData(format!(
82            "local heap offset {} out of range (heap size {})",
83            offset,
84            heap_data.len()
85        )));
86    }
87
88    // Find the null terminator
89    let end = heap_data[start..]
90        .iter()
91        .position(|&b| b == 0)
92        .map(|p| start + p)
93        .unwrap_or(heap_data.len());
94
95    String::from_utf8(heap_data[start..end].to_vec())
96        .map_err(|e| FormatError::InvalidData(format!("invalid UTF-8 in local heap string: {}", e)))
97}
98
99/// Read a little-endian unsigned integer of `n` bytes into a u64.
100fn read_uint(buf: &[u8], n: usize) -> u64 {
101    let mut tmp = [0u8; 8];
102    tmp[..n].copy_from_slice(&buf[..n]);
103    u64::from_le_bytes(tmp)
104}
105
106// ======================================================================= tests
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111
112    fn build_heap_header(
113        data_size: u64,
114        free_list_offset: u64,
115        data_addr: u64,
116        sizeof_addr: usize,
117        sizeof_size: usize,
118    ) -> Vec<u8> {
119        let mut buf = Vec::new();
120        buf.extend_from_slice(&LOCAL_HEAP_SIGNATURE);
121        buf.push(0); // version
122        buf.extend_from_slice(&[0u8; 3]); // reserved
123        buf.extend_from_slice(&data_size.to_le_bytes()[..sizeof_size]);
124        buf.extend_from_slice(&free_list_offset.to_le_bytes()[..sizeof_size]);
125        buf.extend_from_slice(&data_addr.to_le_bytes()[..sizeof_addr]);
126        buf
127    }
128
129    #[test]
130    fn decode_basic() {
131        let buf = build_heap_header(128, u64::MAX, 0x1000, 8, 8);
132        let hdr = LocalHeapHeader::decode(&buf, 8, 8).unwrap();
133        assert_eq!(hdr.data_size, 128);
134        assert_eq!(hdr.free_list_offset, u64::MAX);
135        assert_eq!(hdr.data_addr, 0x1000);
136    }
137
138    #[test]
139    fn decode_4byte() {
140        let buf = build_heap_header(64, 0xFFFFFFFF, 0x800, 4, 4);
141        let hdr = LocalHeapHeader::decode(&buf, 4, 4).unwrap();
142        assert_eq!(hdr.data_size, 64);
143        assert_eq!(hdr.free_list_offset, 0xFFFFFFFF);
144        assert_eq!(hdr.data_addr, 0x800);
145    }
146
147    #[test]
148    fn decode_bad_sig() {
149        let mut buf = build_heap_header(64, 0, 0x800, 8, 8);
150        buf[0] = b'X';
151        assert!(matches!(
152            LocalHeapHeader::decode(&buf, 8, 8).unwrap_err(),
153            FormatError::InvalidSignature
154        ));
155    }
156
157    #[test]
158    fn decode_bad_version() {
159        let mut buf = build_heap_header(64, 0, 0x800, 8, 8);
160        buf[4] = 1;
161        assert!(matches!(
162            LocalHeapHeader::decode(&buf, 8, 8).unwrap_err(),
163            FormatError::InvalidVersion(1)
164        ));
165    }
166
167    #[test]
168    fn decode_too_short() {
169        let buf = [0u8; 4];
170        assert!(matches!(
171            LocalHeapHeader::decode(&buf, 8, 8).unwrap_err(),
172            FormatError::BufferTooShort { .. }
173        ));
174    }
175
176    #[test]
177    fn get_string_basic() {
178        let mut data = Vec::new();
179        data.extend_from_slice(b"\0"); // offset 0: empty
180        data.extend_from_slice(b"hello\0");
181        data.extend_from_slice(b"world\0");
182
183        assert_eq!(local_heap_get_string(&data, 0).unwrap(), "");
184        assert_eq!(local_heap_get_string(&data, 1).unwrap(), "hello");
185        assert_eq!(local_heap_get_string(&data, 7).unwrap(), "world");
186    }
187
188    #[test]
189    fn get_string_out_of_range() {
190        let data = b"hello\0";
191        assert!(matches!(
192            local_heap_get_string(data, 100).unwrap_err(),
193            FormatError::InvalidData(_)
194        ));
195    }
196}