Skip to main content

hdf5_reader/
local_heap.rs

1//! HDF5 Local Heap (HEAP).
2//!
3//! A local heap stores small, variable-length data — most commonly the link
4//! names referenced by symbol table entries in old-style (v1) groups. Each
5//! group has its own local heap pointed to by the Symbol Table message or the
6//! root symbol table entry's scratch-pad.
7//!
8//! The heap header stores metadata while the actual string data lives in a
9//! contiguous data segment elsewhere in the file.
10
11use crate::error::{Error, Result};
12use crate::io::Cursor;
13
14/// Signature bytes for a Local Heap: ASCII `HEAP`.
15const HEAP_SIGNATURE: [u8; 4] = *b"HEAP";
16
17/// A parsed HDF5 local heap header.
18///
19/// The data segment (containing the actual strings) is stored separately in
20/// the file at `data_segment_address`.
21#[derive(Debug, Clone)]
22pub struct LocalHeap {
23    /// Size in bytes of the data segment.
24    pub data_segment_size: u64,
25    /// Offset to the head of the free-list within the data segment
26    /// (relative to the start of the data segment).
27    pub free_list_offset: u64,
28    /// Absolute file address of the data segment.
29    pub data_segment_address: u64,
30}
31
32impl LocalHeap {
33    /// Parse a local heap header at the current cursor position.
34    ///
35    /// Format:
36    /// - Signature: `HEAP` (4 bytes)
37    /// - Version: 0 (1 byte)
38    /// - Reserved: 3 bytes
39    /// - Data segment size (`length_size` bytes)
40    /// - Offset to head of free list (`length_size` bytes)
41    /// - Data segment address (`offset_size` bytes)
42    pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
43        let sig = cursor.read_bytes(4)?;
44        if sig != HEAP_SIGNATURE {
45            return Err(Error::InvalidLocalHeapSignature);
46        }
47
48        let version = cursor.read_u8()?;
49        if version != 0 {
50            return Err(Error::UnsupportedLocalHeapVersion(version));
51        }
52
53        // Reserved 3 bytes
54        cursor.skip(3)?;
55
56        let data_segment_size = cursor.read_length(length_size)?;
57        let free_list_offset = cursor.read_length(length_size)?;
58        let data_segment_address = cursor.read_offset(offset_size)?;
59
60        Ok(LocalHeap {
61            data_segment_size,
62            free_list_offset,
63            data_segment_address,
64        })
65    }
66
67    /// Read a null-terminated string at the given offset within the heap's
68    /// data segment.
69    ///
70    /// `offset` is relative to the start of the data segment (as stored in
71    /// `SymbolTableEntry::link_name_offset`). `file_data` must be the entire
72    /// file (or at least the portion containing the data segment).
73    pub fn get_string(&self, offset: u64, file_data: &[u8]) -> Result<String> {
74        let abs = self
75            .data_segment_address
76            .checked_add(offset)
77            .ok_or(Error::OffsetOutOfBounds(offset))?;
78        let start = abs as usize;
79
80        if start >= file_data.len() {
81            return Err(Error::OffsetOutOfBounds(abs));
82        }
83
84        // Find the null terminator within the data segment bounds.
85        let segment_end = (self.data_segment_address as usize)
86            .saturating_add(self.data_segment_size as usize)
87            .min(file_data.len());
88
89        let search_region = &file_data[start..segment_end];
90        let null_pos = search_region.iter().position(|&b| b == 0).ok_or_else(|| {
91            Error::InvalidData("local heap string missing null terminator".into())
92        })?;
93
94        let s = std::str::from_utf8(&search_region[..null_pos])
95            .map_err(|e| Error::InvalidData(format!("invalid UTF-8 in local heap string: {e}")))?;
96
97        Ok(s.to_string())
98    }
99}
100
101#[cfg(test)]
102mod tests {
103    use super::*;
104
105    /// Build a local heap header with the given parameters (8-byte offset/length).
106    fn build_heap_header(
107        data_segment_size: u64,
108        free_list_offset: u64,
109        data_segment_address: u64,
110    ) -> Vec<u8> {
111        let mut buf = Vec::new();
112        buf.extend_from_slice(b"HEAP");
113        buf.push(0); // version
114        buf.extend_from_slice(&[0, 0, 0]); // reserved
115        buf.extend_from_slice(&data_segment_size.to_le_bytes());
116        buf.extend_from_slice(&free_list_offset.to_le_bytes());
117        buf.extend_from_slice(&data_segment_address.to_le_bytes());
118        buf
119    }
120
121    #[test]
122    fn test_parse_local_heap() {
123        let data = build_heap_header(256, 128, 0x2000);
124
125        let mut cursor = Cursor::new(&data);
126        let heap = LocalHeap::parse(&mut cursor, 8, 8).unwrap();
127
128        assert_eq!(heap.data_segment_size, 256);
129        assert_eq!(heap.free_list_offset, 128);
130        assert_eq!(heap.data_segment_address, 0x2000);
131    }
132
133    #[test]
134    fn test_parse_local_heap_4byte() {
135        let mut buf = Vec::new();
136        buf.extend_from_slice(b"HEAP");
137        buf.push(0); // version
138        buf.extend_from_slice(&[0, 0, 0]); // reserved
139        buf.extend_from_slice(&64u32.to_le_bytes()); // data segment size
140        buf.extend_from_slice(&32u32.to_le_bytes()); // free list offset
141        buf.extend_from_slice(&0x400u32.to_le_bytes()); // data segment address
142
143        let mut cursor = Cursor::new(&buf);
144        let heap = LocalHeap::parse(&mut cursor, 4, 4).unwrap();
145
146        assert_eq!(heap.data_segment_size, 64);
147        assert_eq!(heap.free_list_offset, 32);
148        assert_eq!(heap.data_segment_address, 0x400);
149    }
150
151    #[test]
152    fn test_bad_signature() {
153        let mut data = build_heap_header(256, 128, 0x2000);
154        data[0] = b'X'; // corrupt signature
155        let mut cursor = Cursor::new(&data);
156        assert!(matches!(
157            LocalHeap::parse(&mut cursor, 8, 8),
158            Err(Error::InvalidLocalHeapSignature)
159        ));
160    }
161
162    #[test]
163    fn test_bad_version() {
164        let mut data = build_heap_header(256, 128, 0x2000);
165        data[4] = 1; // version 1 (unsupported)
166        let mut cursor = Cursor::new(&data);
167        assert!(matches!(
168            LocalHeap::parse(&mut cursor, 8, 8),
169            Err(Error::UnsupportedLocalHeapVersion(1))
170        ));
171    }
172
173    #[test]
174    fn test_get_string() {
175        // Simulate a file where the data segment starts at offset 100.
176        let mut file_data = vec![0u8; 200];
177        // Place "hello\0world\0" at the data segment.
178        let seg_start = 100usize;
179        file_data[seg_start..seg_start + 6].copy_from_slice(b"hello\0");
180        file_data[seg_start + 6..seg_start + 12].copy_from_slice(b"world\0");
181
182        let heap = LocalHeap {
183            data_segment_size: 100,
184            free_list_offset: 50,
185            data_segment_address: seg_start as u64,
186        };
187
188        assert_eq!(heap.get_string(0, &file_data).unwrap(), "hello");
189        assert_eq!(heap.get_string(6, &file_data).unwrap(), "world");
190    }
191
192    #[test]
193    fn test_get_string_out_of_bounds() {
194        let file_data = vec![0u8; 50];
195        let heap = LocalHeap {
196            data_segment_size: 100,
197            free_list_offset: 0,
198            data_segment_address: 100, // beyond file_data
199        };
200        assert!(heap.get_string(0, &file_data).is_err());
201    }
202
203    #[test]
204    fn test_get_string_missing_null() {
205        // Data segment with no null terminator.
206        let mut file_data = vec![0xFFu8; 200];
207        file_data[100..105].copy_from_slice(b"abcde");
208
209        let heap = LocalHeap {
210            data_segment_size: 5,
211            free_list_offset: 0,
212            data_segment_address: 100,
213        };
214        assert!(heap.get_string(0, &file_data).is_err());
215    }
216}