Skip to main content

hdf5_reader/
symbol_table.rs

1//! HDF5 Symbol Table Entry and Symbol Table Node (SNOD).
2//!
3//! Symbol table entries appear in the superblock (root group) and inside
4//! symbol table nodes. Each entry maps a link-name offset (within a local
5//! heap) to an object header address, with optional scratch-pad data for
6//! group B-tree/heap addresses or symbolic link values.
7//!
8//! Symbol table nodes (SNOD) are the leaf containers for these entries and
9//! are stored at addresses referenced by v1 B-tree group nodes.
10
11use crate::error::{Error, Result};
12use crate::io::Cursor;
13
14// ---------------------------------------------------------------------------
15// Symbol Table Entry
16// ---------------------------------------------------------------------------
17
18/// A symbol table entry (used in v0/v1 superblock and in SNOD nodes).
19///
20/// When `cache_type` is 1 (group), the scratch-pad space contains the address
21/// of the B-tree and local heap for the group. When `cache_type` is 2, the
22/// scratch-pad contains a symbolic link offset. These are extracted during
23/// parsing and cached for convenience.
24#[derive(Debug, Clone)]
25pub struct SymbolTableEntry {
26    /// Offset of the link name within the local heap data segment.
27    pub link_name_offset: u64,
28    /// Address of the object header for the target object.
29    pub object_header_address: u64,
30    /// Cache type: 0 = nothing cached, 1 = group info, 2 = symbolic link.
31    pub cache_type: u32,
32    /// Raw scratch-pad bytes (always 16 bytes in the file).
33    pub scratch: [u8; 16],
34    /// B-tree address extracted from scratch when `cache_type == 1`.
35    cached_btree_address: Option<u64>,
36    /// Local heap address extracted from scratch when `cache_type == 1`.
37    cached_heap_address: Option<u64>,
38}
39
40impl SymbolTableEntry {
41    /// Parse a single symbol table entry from the cursor.
42    ///
43    /// The format is:
44    /// - Link name offset (`offset_size` bytes)
45    /// - Object header address (`offset_size` bytes)
46    /// - Cache type (u32 LE)
47    /// - Reserved (u32 LE)
48    /// - 16 bytes scratch-pad
49    pub fn parse(cursor: &mut Cursor, offset_size: u8, _length_size: u8) -> Result<Self> {
50        let link_name_offset = cursor.read_offset(offset_size)?;
51        let object_header_address = cursor.read_offset(offset_size)?;
52        let cache_type = cursor.read_u32_le()?;
53        let _reserved = cursor.read_u32_le()?;
54
55        let scratch_bytes = cursor.read_bytes(16)?;
56        let mut scratch = [0u8; 16];
57        scratch.copy_from_slice(scratch_bytes);
58
59        // Extract cached group addresses from scratch when cache_type == 1.
60        let (cached_btree_address, cached_heap_address) = if cache_type == 1 {
61            let mut sc = Cursor::new(&scratch);
62            let btree = sc.read_offset(offset_size)?;
63            let heap = sc.read_offset(offset_size)?;
64            (Some(btree), Some(heap))
65        } else {
66            (None, None)
67        };
68
69        Ok(SymbolTableEntry {
70            link_name_offset,
71            object_header_address,
72            cache_type,
73            scratch,
74            cached_btree_address,
75            cached_heap_address,
76        })
77    }
78
79    /// Address of the B-tree for this group (only valid when `cache_type == 1`).
80    pub fn btree_address(&self) -> Option<u64> {
81        self.cached_btree_address
82    }
83
84    /// Address of the local heap for this group (only valid when `cache_type == 1`).
85    pub fn local_heap_address(&self) -> Option<u64> {
86        self.cached_heap_address
87    }
88}
89
90// ---------------------------------------------------------------------------
91// Symbol Table Node (SNOD)
92// ---------------------------------------------------------------------------
93
94/// Signature bytes for a Symbol Table Node: ASCII `SNOD`.
95const SNOD_SIGNATURE: [u8; 4] = *b"SNOD";
96
97/// A symbol table node (SNOD) containing one or more symbol table entries.
98///
99/// These nodes are the leaves of v1 group B-trees and contain the actual
100/// link-name-to-object mappings for groups stored in the old format.
101#[derive(Debug, Clone)]
102pub struct SymbolTableNode {
103    /// The entries contained in this node.
104    pub entries: Vec<SymbolTableEntry>,
105}
106
107impl SymbolTableNode {
108    /// Parse a symbol table node at the current cursor position.
109    ///
110    /// The format is:
111    /// - Signature: `SNOD` (4 bytes)
112    /// - Version: 1 (1 byte)
113    /// - Reserved: 1 byte
114    /// - Number of entries (u16 LE)
115    /// - That many `SymbolTableEntry` values
116    pub fn parse(cursor: &mut Cursor, offset_size: u8, length_size: u8) -> Result<Self> {
117        let sig = cursor.read_bytes(4)?;
118        if sig != SNOD_SIGNATURE {
119            return Err(Error::InvalidSymbolTableNodeSignature);
120        }
121
122        let version = cursor.read_u8()?;
123        if version != 1 {
124            return Err(Error::UnsupportedSymbolTableNodeVersion(version));
125        }
126
127        let _reserved = cursor.read_u8()?;
128        let num_entries = cursor.read_u16_le()?;
129
130        let mut entries = Vec::with_capacity(num_entries as usize);
131        for _ in 0..num_entries {
132            entries.push(SymbolTableEntry::parse(cursor, offset_size, length_size)?);
133        }
134
135        Ok(SymbolTableNode { entries })
136    }
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    /// Build raw bytes for a symbol table entry with the given fields.
144    fn build_entry_bytes(
145        link_name_offset: u64,
146        obj_header_addr: u64,
147        cache_type: u32,
148        scratch: &[u8; 16],
149        offset_size: u8,
150    ) -> Vec<u8> {
151        let mut buf = Vec::new();
152        // link name offset
153        match offset_size {
154            4 => buf.extend_from_slice(&(link_name_offset as u32).to_le_bytes()),
155            8 => buf.extend_from_slice(&link_name_offset.to_le_bytes()),
156            _ => panic!("test only supports 4/8 byte offsets"),
157        }
158        // object header address
159        match offset_size {
160            4 => buf.extend_from_slice(&(obj_header_addr as u32).to_le_bytes()),
161            8 => buf.extend_from_slice(&obj_header_addr.to_le_bytes()),
162            _ => panic!("test only supports 4/8 byte offsets"),
163        }
164        // cache type
165        buf.extend_from_slice(&cache_type.to_le_bytes());
166        // reserved
167        buf.extend_from_slice(&0u32.to_le_bytes());
168        // scratch pad
169        buf.extend_from_slice(scratch);
170        buf
171    }
172
173    #[test]
174    fn test_parse_entry_no_cache() {
175        let scratch = [0u8; 16];
176        let data = build_entry_bytes(42, 0x1000, 0, &scratch, 8);
177
178        let mut cursor = Cursor::new(&data);
179        let entry = SymbolTableEntry::parse(&mut cursor, 8, 8).unwrap();
180
181        assert_eq!(entry.link_name_offset, 42);
182        assert_eq!(entry.object_header_address, 0x1000);
183        assert_eq!(entry.cache_type, 0);
184        assert!(entry.btree_address().is_none());
185        assert!(entry.local_heap_address().is_none());
186    }
187
188    #[test]
189    fn test_parse_entry_group_cache_8byte() {
190        // Build scratch with btree=0x2000 and heap=0x3000 (8-byte offsets)
191        let mut scratch = [0u8; 16];
192        scratch[..8].copy_from_slice(&0x2000u64.to_le_bytes());
193        scratch[8..16].copy_from_slice(&0x3000u64.to_le_bytes());
194
195        let data = build_entry_bytes(0, 0x1000, 1, &scratch, 8);
196
197        let mut cursor = Cursor::new(&data);
198        let entry = SymbolTableEntry::parse(&mut cursor, 8, 8).unwrap();
199
200        assert_eq!(entry.cache_type, 1);
201        assert_eq!(entry.btree_address(), Some(0x2000));
202        assert_eq!(entry.local_heap_address(), Some(0x3000));
203    }
204
205    #[test]
206    fn test_parse_entry_group_cache_4byte() {
207        // Build scratch with btree=0x400 and heap=0x800 (4-byte offsets)
208        let mut scratch = [0u8; 16];
209        scratch[..4].copy_from_slice(&0x400u32.to_le_bytes());
210        scratch[4..8].copy_from_slice(&0x800u32.to_le_bytes());
211
212        let data = build_entry_bytes(0, 0x100, 1, &scratch, 4);
213
214        let mut cursor = Cursor::new(&data);
215        let entry = SymbolTableEntry::parse(&mut cursor, 4, 4).unwrap();
216
217        assert_eq!(entry.cache_type, 1);
218        assert_eq!(entry.btree_address(), Some(0x400));
219        assert_eq!(entry.local_heap_address(), Some(0x800));
220    }
221
222    #[test]
223    fn test_parse_snod_basic() {
224        let mut data = Vec::new();
225        // Signature
226        data.extend_from_slice(b"SNOD");
227        // Version
228        data.push(1);
229        // Reserved
230        data.push(0);
231        // Number of entries = 2
232        data.extend_from_slice(&2u16.to_le_bytes());
233
234        // Entry 1: cache_type=0
235        let scratch1 = [0u8; 16];
236        data.extend(build_entry_bytes(0, 0x1000, 0, &scratch1, 8));
237
238        // Entry 2: cache_type=1, btree=0xA000, heap=0xB000
239        let mut scratch2 = [0u8; 16];
240        scratch2[..8].copy_from_slice(&0xA000u64.to_le_bytes());
241        scratch2[8..16].copy_from_slice(&0xB000u64.to_le_bytes());
242        data.extend(build_entry_bytes(16, 0x2000, 1, &scratch2, 8));
243
244        let mut cursor = Cursor::new(&data);
245        let node = SymbolTableNode::parse(&mut cursor, 8, 8).unwrap();
246
247        assert_eq!(node.entries.len(), 2);
248        assert_eq!(node.entries[0].link_name_offset, 0);
249        assert_eq!(node.entries[0].object_header_address, 0x1000);
250        assert_eq!(node.entries[0].cache_type, 0);
251
252        assert_eq!(node.entries[1].link_name_offset, 16);
253        assert_eq!(node.entries[1].object_header_address, 0x2000);
254        assert_eq!(node.entries[1].btree_address(), Some(0xA000));
255        assert_eq!(node.entries[1].local_heap_address(), Some(0xB000));
256    }
257
258    #[test]
259    fn test_snod_bad_signature() {
260        let data = b"XNOD\x01\x00\x00\x00";
261        let mut cursor = Cursor::new(data);
262        assert!(matches!(
263            SymbolTableNode::parse(&mut cursor, 8, 8),
264            Err(Error::InvalidSymbolTableNodeSignature)
265        ));
266    }
267
268    #[test]
269    fn test_snod_bad_version() {
270        let data = b"SNOD\x02\x00\x00\x00";
271        let mut cursor = Cursor::new(data);
272        assert!(matches!(
273            SymbolTableNode::parse(&mut cursor, 8, 8),
274            Err(Error::UnsupportedSymbolTableNodeVersion(2))
275        ));
276    }
277}