Skip to main content

hdf5_reader/
local_heap.rs

1//! HDF5 Local Heap (HEAP).
2//!
3//! A local heap stores small, variable-length data — most commonly the link
4//! names referenced by symbol table entries in old-style (v1) groups. Each
5//! group has its own local heap pointed to by the Symbol Table message or the
6//! root symbol table entry's scratch-pad.
7//!
8//! The heap header stores metadata while the actual string data lives in a
9//! contiguous data segment elsewhere in the file.
10
11use crate::error::{Error, Result};
12use crate::io::Cursor;
13use crate::storage::Storage;
14
15/// Signature bytes for a Local Heap: ASCII `HEAP`.
16const HEAP_SIGNATURE: [u8; 4] = *b"HEAP";
17
18/// A parsed HDF5 local heap header.
19///
20/// The data segment (containing the actual strings) is stored separately in
21/// the file at `data_segment_address`.
22#[derive(Debug, Clone)]
23pub struct LocalHeap {
24    /// Size in bytes of the data segment.
25    pub data_segment_size: u64,
26    /// Offset to the head of the free-list within the data segment
27    /// (relative to the start of the data segment).
28    pub free_list_offset: u64,
29    /// Absolute file address of the data segment.
30    pub data_segment_address: u64,
31}
32
33impl LocalHeap {
34    /// Parse a local heap header at the current cursor position.
35    ///
36    /// Format:
37    /// - Signature: `HEAP` (4 bytes)
38    /// - Version: 0 (1 byte)
39    /// - Reserved: 3 bytes
40    /// - Data segment size (`length_size` bytes)
41    /// - Offset to head of free list (`length_size` bytes)
42    /// - Data segment address (`offset_size` bytes)
43    pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
44        let sig = cursor.read_bytes(4)?;
45        if sig != HEAP_SIGNATURE {
46            return Err(Error::InvalidLocalHeapSignature);
47        }
48
49        let version = cursor.read_u8()?;
50        if version != 0 {
51            return Err(Error::UnsupportedLocalHeapVersion(version));
52        }
53
54        // Reserved 3 bytes
55        cursor.skip(3)?;
56
57        let data_segment_size = cursor.read_length(length_size)?;
58        let free_list_offset = cursor.read_length(length_size)?;
59        let data_segment_address = cursor.read_offset(offset_size)?;
60
61        Ok(LocalHeap {
62            data_segment_size,
63            free_list_offset,
64            data_segment_address,
65        })
66    }
67
68    /// Parse a local heap header from random-access storage.
69    pub fn parse_at_storage(
70        storage: &dyn Storage,
71        address: u64,
72        offset_size: u8,
73        length_size: u8,
74    ) -> Result<Self> {
75        let header_len = 4
76            + 1
77            + 3
78            + usize::from(length_size)
79            + usize::from(length_size)
80            + usize::from(offset_size);
81        let bytes = storage.read_range(address, header_len)?;
82        let mut cursor = Cursor::new(bytes.as_ref());
83        Self::parse(&mut cursor, offset_size, length_size)
84    }
85
86    /// Read a null-terminated string at the given offset within the heap's
87    /// data segment.
88    ///
89    /// `offset` is relative to the start of the data segment (as stored in
90    /// `SymbolTableEntry::link_name_offset`). `file_data` must be the entire
91    /// file (or at least the portion containing the data segment).
92    pub fn get_string(&self, offset: u64, file_data: &[u8]) -> Result<String> {
93        let abs = self
94            .data_segment_address
95            .checked_add(offset)
96            .ok_or(Error::OffsetOutOfBounds(offset))?;
97        let start = abs as usize;
98
99        if start >= file_data.len() {
100            return Err(Error::OffsetOutOfBounds(abs));
101        }
102
103        // Find the null terminator within the data segment bounds.
104        let segment_end = (self.data_segment_address as usize)
105            .saturating_add(self.data_segment_size as usize)
106            .min(file_data.len());
107
108        let search_region = &file_data[start..segment_end];
109        let null_pos = search_region.iter().position(|&b| b == 0).ok_or_else(|| {
110            Error::InvalidData("local heap string missing null terminator".into())
111        })?;
112
113        let s = std::str::from_utf8(&search_region[..null_pos])
114            .map_err(|e| Error::InvalidData(format!("invalid UTF-8 in local heap string: {e}")))?;
115
116        Ok(s.to_string())
117    }
118
119    /// Read a null-terminated string from random-access storage.
120    pub fn get_string_storage(&self, offset: u64, storage: &dyn Storage) -> Result<String> {
121        if offset >= self.data_segment_size {
122            return Err(Error::OffsetOutOfBounds(offset));
123        }
124
125        let available = self
126            .data_segment_size
127            .checked_sub(offset)
128            .ok_or(Error::OffsetOutOfBounds(offset))?;
129        let len = usize::try_from(available).map_err(|_| {
130            Error::InvalidData("local heap string region exceeds platform usize capacity".into())
131        })?;
132        let abs = self
133            .data_segment_address
134            .checked_add(offset)
135            .ok_or(Error::OffsetOutOfBounds(offset))?;
136        let bytes = storage.read_range(abs, len)?;
137        let null_pos = bytes.iter().position(|&b| b == 0).ok_or_else(|| {
138            Error::InvalidData("local heap string missing null terminator".into())
139        })?;
140        let s = std::str::from_utf8(&bytes[..null_pos])
141            .map_err(|e| Error::InvalidData(format!("invalid UTF-8 in local heap string: {e}")))?;
142        Ok(s.to_string())
143    }
144}
145
146#[cfg(test)]
147mod tests {
148    use super::*;
149
150    /// Build a local heap header with the given parameters (8-byte offset/length).
151    fn build_heap_header(
152        data_segment_size: u64,
153        free_list_offset: u64,
154        data_segment_address: u64,
155    ) -> Vec<u8> {
156        let mut buf = Vec::new();
157        buf.extend_from_slice(b"HEAP");
158        buf.push(0); // version
159        buf.extend_from_slice(&[0, 0, 0]); // reserved
160        buf.extend_from_slice(&data_segment_size.to_le_bytes());
161        buf.extend_from_slice(&free_list_offset.to_le_bytes());
162        buf.extend_from_slice(&data_segment_address.to_le_bytes());
163        buf
164    }
165
166    #[test]
167    fn test_parse_local_heap() {
168        let data = build_heap_header(256, 128, 0x2000);
169
170        let mut cursor = Cursor::new(&data);
171        let heap = LocalHeap::parse(&mut cursor, 8, 8).unwrap();
172
173        assert_eq!(heap.data_segment_size, 256);
174        assert_eq!(heap.free_list_offset, 128);
175        assert_eq!(heap.data_segment_address, 0x2000);
176    }
177
178    #[test]
179    fn test_parse_local_heap_4byte() {
180        let mut buf = Vec::new();
181        buf.extend_from_slice(b"HEAP");
182        buf.push(0); // version
183        buf.extend_from_slice(&[0, 0, 0]); // reserved
184        buf.extend_from_slice(&64u32.to_le_bytes()); // data segment size
185        buf.extend_from_slice(&32u32.to_le_bytes()); // free list offset
186        buf.extend_from_slice(&0x400u32.to_le_bytes()); // data segment address
187
188        let mut cursor = Cursor::new(&buf);
189        let heap = LocalHeap::parse(&mut cursor, 4, 4).unwrap();
190
191        assert_eq!(heap.data_segment_size, 64);
192        assert_eq!(heap.free_list_offset, 32);
193        assert_eq!(heap.data_segment_address, 0x400);
194    }
195
196    #[test]
197    fn test_bad_signature() {
198        let mut data = build_heap_header(256, 128, 0x2000);
199        data[0] = b'X'; // corrupt signature
200        let mut cursor = Cursor::new(&data);
201        assert!(matches!(
202            LocalHeap::parse(&mut cursor, 8, 8),
203            Err(Error::InvalidLocalHeapSignature)
204        ));
205    }
206
207    #[test]
208    fn test_bad_version() {
209        let mut data = build_heap_header(256, 128, 0x2000);
210        data[4] = 1; // version 1 (unsupported)
211        let mut cursor = Cursor::new(&data);
212        assert!(matches!(
213            LocalHeap::parse(&mut cursor, 8, 8),
214            Err(Error::UnsupportedLocalHeapVersion(1))
215        ));
216    }
217
218    #[test]
219    fn test_get_string() {
220        // Simulate a file where the data segment starts at offset 100.
221        let mut file_data = vec![0u8; 200];
222        // Place "hello\0world\0" at the data segment.
223        let seg_start = 100usize;
224        file_data[seg_start..seg_start + 6].copy_from_slice(b"hello\0");
225        file_data[seg_start + 6..seg_start + 12].copy_from_slice(b"world\0");
226
227        let heap = LocalHeap {
228            data_segment_size: 100,
229            free_list_offset: 50,
230            data_segment_address: seg_start as u64,
231        };
232
233        assert_eq!(heap.get_string(0, &file_data).unwrap(), "hello");
234        assert_eq!(heap.get_string(6, &file_data).unwrap(), "world");
235    }
236
237    #[test]
238    fn test_get_string_out_of_bounds() {
239        let file_data = vec![0u8; 50];
240        let heap = LocalHeap {
241            data_segment_size: 100,
242            free_list_offset: 0,
243            data_segment_address: 100, // beyond file_data
244        };
245        assert!(heap.get_string(0, &file_data).is_err());
246    }
247
248    #[test]
249    fn test_get_string_missing_null() {
250        // Data segment with no null terminator.
251        let mut file_data = vec![0xFFu8; 200];
252        file_data[100..105].copy_from_slice(b"abcde");
253
254        let heap = LocalHeap {
255            data_segment_size: 5,
256            free_list_offset: 0,
257            data_segment_address: 100,
258        };
259        assert!(heap.get_string(0, &file_data).is_err());
260    }
261}