Skip to main content

rust_hdf5/format/chunk_index/
fixed_array.rs

1//! Fixed Array (FA) chunk index structures for HDF5.
2//!
3//! Implements the on-disk format for the fixed array used to index chunked
4//! datasets where no dimension is unlimited (all dimensions are fixed-size).
5//!
6//! Structures:
7//!   - Header (FAHD): metadata about the fixed array
8//!   - Data Block (FADB): holds chunk addresses (or filtered chunk entries)
9
10use crate::format::checksum::checksum_metadata;
11use crate::format::{FormatContext, FormatError, FormatResult, UNDEF_ADDR};
12
13/// Signature for the fixed array header.
14pub const FAHD_SIGNATURE: [u8; 4] = *b"FAHD";
15/// Signature for the fixed array data block.
16pub const FADB_SIGNATURE: [u8; 4] = *b"FADB";
17
18/// Fixed array version.
19pub const FA_VERSION: u8 = 0;
20
21/// Client ID for unfiltered chunks.
22pub const FA_CLIENT_CHUNK: u8 = 0;
23/// Client ID for filtered chunks.
24pub const FA_CLIENT_FILT_CHUNK: u8 = 1;
25
26/// Fixed array header.
27///
28/// On-disk layout:
29/// ```text
30/// "FAHD"(4) + version=0(1) + client_id(1)
31/// + element_size(1) + max_dblk_page_nelmts_bits(1)
32/// + num_elmts(sizeof_size)
33/// + data_blk_addr(sizeof_addr)
34/// + checksum(4)
35/// ```
36#[derive(Debug, Clone, PartialEq)]
37pub struct FixedArrayHeader {
38    pub client_id: u8,
39    pub element_size: u8,
40    pub max_dblk_page_nelmts_bits: u8,
41    pub num_elmts: u64,
42    pub data_blk_addr: u64,
43}
44
45impl FixedArrayHeader {
46    /// Create a new header for unfiltered chunk indexing.
47    pub fn new_for_chunks(ctx: &FormatContext, num_elmts: u64) -> Self {
48        Self {
49            client_id: FA_CLIENT_CHUNK,
50            element_size: ctx.sizeof_addr,
51            max_dblk_page_nelmts_bits: 0,
52            num_elmts,
53            data_blk_addr: UNDEF_ADDR,
54        }
55    }
56
57    /// Create a new header for filtered chunk indexing.
58    ///
59    /// `chunk_size_len` is the number of bytes needed to encode the chunk size
60    /// (typically computed from the maximum possible compressed chunk size).
61    pub fn new_for_filtered_chunks(
62        ctx: &FormatContext,
63        num_elmts: u64,
64        chunk_size_len: u8,
65    ) -> Self {
66        // element_size = sizeof_addr + chunk_size_len + 4 (filter_mask)
67        let element_size = ctx.sizeof_addr + chunk_size_len + 4;
68        Self {
69            client_id: FA_CLIENT_FILT_CHUNK,
70            element_size,
71            max_dblk_page_nelmts_bits: 0,
72            num_elmts,
73            data_blk_addr: UNDEF_ADDR,
74        }
75    }
76
77    /// Compute the encoded size (for pre-allocation).
78    pub fn encoded_size(&self, ctx: &FormatContext) -> usize {
79        let ss = ctx.sizeof_size as usize;
80        let sa = ctx.sizeof_addr as usize;
81        // signature(4) + version(1) + client_id(1)
82        // + element_size(1) + max_dblk_page_nelmts_bits(1)
83        // + num_elmts(sizeof_size) + data_blk_addr(sizeof_addr)
84        // + checksum(4)
85        4 + 1 + 1 + 1 + 1 + ss + sa + 4
86    }
87
88    pub fn encode(&self, ctx: &FormatContext) -> Vec<u8> {
89        let ss = ctx.sizeof_size as usize;
90        let sa = ctx.sizeof_addr as usize;
91        let size = self.encoded_size(ctx);
92        let mut buf = Vec::with_capacity(size);
93
94        buf.extend_from_slice(&FAHD_SIGNATURE);
95        buf.push(FA_VERSION);
96        buf.push(self.client_id);
97        buf.push(self.element_size);
98        buf.push(self.max_dblk_page_nelmts_bits);
99
100        buf.extend_from_slice(&self.num_elmts.to_le_bytes()[..ss]);
101        buf.extend_from_slice(&self.data_blk_addr.to_le_bytes()[..sa]);
102
103        let cksum = checksum_metadata(&buf);
104        buf.extend_from_slice(&cksum.to_le_bytes());
105
106        debug_assert_eq!(buf.len(), size);
107        buf
108    }
109
110    pub fn decode(buf: &[u8], ctx: &FormatContext) -> FormatResult<Self> {
111        let ss = ctx.sizeof_size as usize;
112        let sa = ctx.sizeof_addr as usize;
113        let min_size = 4 + 1 + 1 + 1 + 1 + ss + sa + 4;
114
115        if buf.len() < min_size {
116            return Err(FormatError::BufferTooShort {
117                needed: min_size,
118                available: buf.len(),
119            });
120        }
121
122        if buf[0..4] != FAHD_SIGNATURE {
123            return Err(FormatError::InvalidSignature);
124        }
125
126        let version = buf[4];
127        if version != FA_VERSION {
128            return Err(FormatError::InvalidVersion(version));
129        }
130
131        // Verify checksum
132        let data_end = min_size - 4;
133        let stored_cksum = u32::from_le_bytes([
134            buf[data_end],
135            buf[data_end + 1],
136            buf[data_end + 2],
137            buf[data_end + 3],
138        ]);
139        let computed_cksum = checksum_metadata(&buf[..data_end]);
140        if stored_cksum != computed_cksum {
141            return Err(FormatError::ChecksumMismatch {
142                expected: stored_cksum,
143                computed: computed_cksum,
144            });
145        }
146
147        let client_id = buf[5];
148        let element_size = buf[6];
149        let max_dblk_page_nelmts_bits = buf[7];
150
151        let mut pos = 8;
152        let num_elmts = read_size(&buf[pos..], ss);
153        pos += ss;
154        let data_blk_addr = read_addr(&buf[pos..], sa);
155
156        Ok(Self {
157            client_id,
158            element_size,
159            max_dblk_page_nelmts_bits,
160            num_elmts,
161            data_blk_addr,
162        })
163    }
164}
165
166/// A single element in a fixed array for unfiltered chunks.
167/// Each element is simply a chunk address (sizeof_addr bytes).
168#[derive(Debug, Clone, PartialEq)]
169pub struct FixedArrayChunkElement {
170    pub address: u64,
171}
172
173/// A single element in a fixed array for filtered chunks.
174#[derive(Debug, Clone, PartialEq)]
175pub struct FixedArrayFilteredChunkElement {
176    pub address: u64,
177    pub chunk_size: u32,
178    pub filter_mask: u32,
179}
180
181/// Fixed array data block.
182///
183/// On-disk layout:
184/// ```text
185/// "FADB"(4) + version=0(1) + client_id(1)
186/// + header_addr(sizeof_addr)
187/// + [if paged: page_init_bitmap]
188/// + elements(num_elmts * element_size)
189/// + checksum(4)
190/// ```
191#[derive(Debug, Clone, PartialEq)]
192pub struct FixedArrayDataBlock {
193    pub client_id: u8,
194    pub header_addr: u64,
195    /// Chunk addresses (for unfiltered chunks).
196    pub elements: Vec<u64>,
197    /// Filtered chunk entries (for filtered chunks; only used when client_id == 1).
198    pub filtered_elements: Vec<FixedArrayFilteredChunkElement>,
199}
200
201impl FixedArrayDataBlock {
202    /// Create a new empty data block for unfiltered chunks.
203    pub fn new_unfiltered(header_addr: u64, num_elmts: usize) -> Self {
204        Self {
205            client_id: FA_CLIENT_CHUNK,
206            header_addr,
207            elements: vec![UNDEF_ADDR; num_elmts],
208            filtered_elements: Vec::new(),
209        }
210    }
211
212    /// Create a new empty data block for filtered chunks.
213    pub fn new_filtered(header_addr: u64, num_elmts: usize) -> Self {
214        let default_entry = FixedArrayFilteredChunkElement {
215            address: UNDEF_ADDR,
216            chunk_size: 0,
217            filter_mask: 0,
218        };
219        Self {
220            client_id: FA_CLIENT_FILT_CHUNK,
221            header_addr,
222            elements: Vec::new(),
223            filtered_elements: vec![default_entry; num_elmts],
224        }
225    }
226
227    /// Compute the encoded size for unfiltered chunks.
228    pub fn encoded_size_unfiltered(&self, ctx: &FormatContext) -> usize {
229        let sa = ctx.sizeof_addr as usize;
230        // signature(4) + version(1) + client_id(1)
231        // + header_addr(sa)
232        // + elements(n * sa)
233        // + checksum(4)
234        4 + 1 + 1 + sa + self.elements.len() * sa + 4
235    }
236
237    /// Compute the encoded size for filtered chunks.
238    pub fn encoded_size_filtered(&self, ctx: &FormatContext, chunk_size_len: usize) -> usize {
239        let sa = ctx.sizeof_addr as usize;
240        let elem_size = sa + chunk_size_len + 4; // addr + chunk_size + filter_mask
241                                                 // signature(4) + version(1) + client_id(1)
242                                                 // + header_addr(sa)
243                                                 // + elements(n * elem_size)
244                                                 // + checksum(4)
245        4 + 1 + 1 + sa + self.filtered_elements.len() * elem_size + 4
246    }
247
248    /// Encode for unfiltered chunks.
249    pub fn encode_unfiltered(&self, ctx: &FormatContext) -> Vec<u8> {
250        let sa = ctx.sizeof_addr as usize;
251        let size = self.encoded_size_unfiltered(ctx);
252        let mut buf = Vec::with_capacity(size);
253
254        buf.extend_from_slice(&FADB_SIGNATURE);
255        buf.push(FA_VERSION);
256        buf.push(self.client_id);
257        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
258
259        for &addr in &self.elements {
260            buf.extend_from_slice(&addr.to_le_bytes()[..sa]);
261        }
262
263        let cksum = checksum_metadata(&buf);
264        buf.extend_from_slice(&cksum.to_le_bytes());
265
266        debug_assert_eq!(buf.len(), size);
267        buf
268    }
269
270    /// Encode for filtered chunks.
271    pub fn encode_filtered(&self, ctx: &FormatContext, chunk_size_len: usize) -> Vec<u8> {
272        let sa = ctx.sizeof_addr as usize;
273        let size = self.encoded_size_filtered(ctx, chunk_size_len);
274        let mut buf = Vec::with_capacity(size);
275
276        buf.extend_from_slice(&FADB_SIGNATURE);
277        buf.push(FA_VERSION);
278        buf.push(self.client_id);
279        buf.extend_from_slice(&self.header_addr.to_le_bytes()[..sa]);
280
281        for elem in &self.filtered_elements {
282            buf.extend_from_slice(&elem.address.to_le_bytes()[..sa]);
283            buf.extend_from_slice(&elem.chunk_size.to_le_bytes()[..chunk_size_len]);
284            buf.extend_from_slice(&elem.filter_mask.to_le_bytes());
285        }
286
287        let cksum = checksum_metadata(&buf);
288        buf.extend_from_slice(&cksum.to_le_bytes());
289
290        debug_assert_eq!(buf.len(), size);
291        buf
292    }
293
294    /// Decode for unfiltered chunks.
295    pub fn decode_unfiltered(
296        buf: &[u8],
297        ctx: &FormatContext,
298        num_elmts: usize,
299    ) -> FormatResult<Self> {
300        let sa = ctx.sizeof_addr as usize;
301        let min_size = 4 + 1 + 1 + sa + num_elmts * sa + 4;
302
303        if buf.len() < min_size {
304            return Err(FormatError::BufferTooShort {
305                needed: min_size,
306                available: buf.len(),
307            });
308        }
309
310        if buf[0..4] != FADB_SIGNATURE {
311            return Err(FormatError::InvalidSignature);
312        }
313
314        let version = buf[4];
315        if version != FA_VERSION {
316            return Err(FormatError::InvalidVersion(version));
317        }
318
319        // Verify checksum
320        let data_end = min_size - 4;
321        let stored_cksum = u32::from_le_bytes([
322            buf[data_end],
323            buf[data_end + 1],
324            buf[data_end + 2],
325            buf[data_end + 3],
326        ]);
327        let computed_cksum = checksum_metadata(&buf[..data_end]);
328        if stored_cksum != computed_cksum {
329            return Err(FormatError::ChecksumMismatch {
330                expected: stored_cksum,
331                computed: computed_cksum,
332            });
333        }
334
335        let client_id = buf[5];
336        let mut pos = 6;
337        let header_addr = read_addr(&buf[pos..], sa);
338        pos += sa;
339
340        let mut elements = Vec::with_capacity(num_elmts);
341        for _ in 0..num_elmts {
342            elements.push(read_addr(&buf[pos..], sa));
343            pos += sa;
344        }
345
346        Ok(Self {
347            client_id,
348            header_addr,
349            elements,
350            filtered_elements: Vec::new(),
351        })
352    }
353
354    /// Decode for filtered chunks.
355    pub fn decode_filtered(
356        buf: &[u8],
357        ctx: &FormatContext,
358        num_elmts: usize,
359        chunk_size_len: usize,
360    ) -> FormatResult<Self> {
361        let sa = ctx.sizeof_addr as usize;
362        let elem_size = sa + chunk_size_len + 4;
363        let min_size = 4 + 1 + 1 + sa + num_elmts * elem_size + 4;
364
365        if buf.len() < min_size {
366            return Err(FormatError::BufferTooShort {
367                needed: min_size,
368                available: buf.len(),
369            });
370        }
371
372        if buf[0..4] != FADB_SIGNATURE {
373            return Err(FormatError::InvalidSignature);
374        }
375
376        let version = buf[4];
377        if version != FA_VERSION {
378            return Err(FormatError::InvalidVersion(version));
379        }
380
381        // Verify checksum
382        let data_end = min_size - 4;
383        let stored_cksum = u32::from_le_bytes([
384            buf[data_end],
385            buf[data_end + 1],
386            buf[data_end + 2],
387            buf[data_end + 3],
388        ]);
389        let computed_cksum = checksum_metadata(&buf[..data_end]);
390        if stored_cksum != computed_cksum {
391            return Err(FormatError::ChecksumMismatch {
392                expected: stored_cksum,
393                computed: computed_cksum,
394            });
395        }
396
397        let client_id = buf[5];
398        let mut pos = 6;
399        let header_addr = read_addr(&buf[pos..], sa);
400        pos += sa;
401
402        let mut filtered_elements = Vec::with_capacity(num_elmts);
403        for _ in 0..num_elmts {
404            let address = read_addr(&buf[pos..], sa);
405            pos += sa;
406            let chunk_size = read_size(&buf[pos..], chunk_size_len) as u32;
407            pos += chunk_size_len;
408            let filter_mask =
409                u32::from_le_bytes([buf[pos], buf[pos + 1], buf[pos + 2], buf[pos + 3]]);
410            pos += 4;
411            filtered_elements.push(FixedArrayFilteredChunkElement {
412                address,
413                chunk_size,
414                filter_mask,
415            });
416        }
417
418        Ok(Self {
419            client_id,
420            header_addr,
421            elements: Vec::new(),
422            filtered_elements,
423        })
424    }
425}
426
427// ========================================================================= helpers
428
429fn read_addr(buf: &[u8], n: usize) -> u64 {
430    if buf[..n].iter().all(|&b| b == 0xFF) {
431        UNDEF_ADDR
432    } else {
433        let mut tmp = [0u8; 8];
434        tmp[..n].copy_from_slice(&buf[..n]);
435        u64::from_le_bytes(tmp)
436    }
437}
438
439fn read_size(buf: &[u8], n: usize) -> u64 {
440    let mut tmp = [0u8; 8];
441    tmp[..n].copy_from_slice(&buf[..n]);
442    u64::from_le_bytes(tmp)
443}
444
445// ======================================================================= tests
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    fn ctx8() -> FormatContext {
452        FormatContext {
453            sizeof_addr: 8,
454            sizeof_size: 8,
455        }
456    }
457
458    fn ctx4() -> FormatContext {
459        FormatContext {
460            sizeof_addr: 4,
461            sizeof_size: 4,
462        }
463    }
464
465    #[test]
466    fn header_roundtrip() {
467        let mut hdr = FixedArrayHeader::new_for_chunks(&ctx8(), 10);
468        hdr.data_blk_addr = 0x2000;
469
470        let encoded = hdr.encode(&ctx8());
471        assert_eq!(encoded.len(), hdr.encoded_size(&ctx8()));
472        assert_eq!(&encoded[..4], b"FAHD");
473
474        let decoded = FixedArrayHeader::decode(&encoded, &ctx8()).unwrap();
475        assert_eq!(decoded, hdr);
476    }
477
478    #[test]
479    fn header_roundtrip_ctx4() {
480        let mut hdr = FixedArrayHeader::new_for_chunks(&ctx4(), 5);
481        hdr.data_blk_addr = 0x800;
482
483        let encoded = hdr.encode(&ctx4());
484        let decoded = FixedArrayHeader::decode(&encoded, &ctx4()).unwrap();
485        assert_eq!(decoded, hdr);
486    }
487
488    #[test]
489    fn header_bad_signature() {
490        let hdr = FixedArrayHeader::new_for_chunks(&ctx8(), 10);
491        let mut encoded = hdr.encode(&ctx8());
492        encoded[0] = b'X';
493        let err = FixedArrayHeader::decode(&encoded, &ctx8()).unwrap_err();
494        assert!(matches!(err, FormatError::InvalidSignature));
495    }
496
497    #[test]
498    fn header_checksum_mismatch() {
499        let hdr = FixedArrayHeader::new_for_chunks(&ctx8(), 10);
500        let mut encoded = hdr.encode(&ctx8());
501        encoded[6] ^= 0xFF;
502        let err = FixedArrayHeader::decode(&encoded, &ctx8()).unwrap_err();
503        assert!(matches!(err, FormatError::ChecksumMismatch { .. }));
504    }
505
506    #[test]
507    fn data_block_unfiltered_roundtrip() {
508        let mut dblk = FixedArrayDataBlock::new_unfiltered(0x1000, 4);
509        dblk.elements[0] = 0x3000;
510        dblk.elements[1] = 0x4000;
511        dblk.elements[2] = UNDEF_ADDR;
512        dblk.elements[3] = 0x5000;
513
514        let encoded = dblk.encode_unfiltered(&ctx8());
515        assert_eq!(encoded.len(), dblk.encoded_size_unfiltered(&ctx8()));
516        assert_eq!(&encoded[..4], b"FADB");
517
518        let decoded = FixedArrayDataBlock::decode_unfiltered(&encoded, &ctx8(), 4).unwrap();
519        assert_eq!(decoded.elements, dblk.elements);
520        assert_eq!(decoded.header_addr, 0x1000);
521    }
522
523    #[test]
524    fn data_block_unfiltered_roundtrip_ctx4() {
525        let mut dblk = FixedArrayDataBlock::new_unfiltered(0x500, 3);
526        dblk.elements[0] = 0x100;
527        dblk.elements[1] = 0x200;
528        dblk.elements[2] = 0x300;
529
530        let encoded = dblk.encode_unfiltered(&ctx4());
531        let decoded = FixedArrayDataBlock::decode_unfiltered(&encoded, &ctx4(), 3).unwrap();
532        assert_eq!(decoded.elements, dblk.elements);
533    }
534
535    #[test]
536    fn data_block_unfiltered_bad_checksum() {
537        let dblk = FixedArrayDataBlock::new_unfiltered(0x1000, 2);
538        let mut encoded = dblk.encode_unfiltered(&ctx8());
539        encoded[8] ^= 0xFF;
540        let err = FixedArrayDataBlock::decode_unfiltered(&encoded, &ctx8(), 2).unwrap_err();
541        assert!(matches!(err, FormatError::ChecksumMismatch { .. }));
542    }
543
544    #[test]
545    fn data_block_filtered_roundtrip() {
546        let mut dblk = FixedArrayDataBlock::new_filtered(0x1000, 2);
547        dblk.filtered_elements[0] = FixedArrayFilteredChunkElement {
548            address: 0x2000,
549            chunk_size: 512,
550            filter_mask: 0,
551        };
552        dblk.filtered_elements[1] = FixedArrayFilteredChunkElement {
553            address: 0x3000,
554            chunk_size: 400,
555            filter_mask: 1,
556        };
557
558        let chunk_size_len = 4; // 4 bytes for chunk_size
559        let encoded = dblk.encode_filtered(&ctx8(), chunk_size_len);
560        assert_eq!(
561            encoded.len(),
562            dblk.encoded_size_filtered(&ctx8(), chunk_size_len)
563        );
564
565        let decoded =
566            FixedArrayDataBlock::decode_filtered(&encoded, &ctx8(), 2, chunk_size_len).unwrap();
567        assert_eq!(decoded.filtered_elements, dblk.filtered_elements);
568    }
569
570    #[test]
571    fn header_filtered_roundtrip() {
572        let hdr = FixedArrayHeader::new_for_filtered_chunks(&ctx8(), 6, 4);
573        assert_eq!(hdr.element_size, 8 + 4 + 4); // addr + chunk_size_len + filter_mask
574        assert_eq!(hdr.client_id, FA_CLIENT_FILT_CHUNK);
575
576        let encoded = hdr.encode(&ctx8());
577        let decoded = FixedArrayHeader::decode(&encoded, &ctx8()).unwrap();
578        assert_eq!(decoded, hdr);
579    }
580
581    #[test]
582    fn empty_data_block() {
583        let dblk = FixedArrayDataBlock::new_unfiltered(0x500, 0);
584        let encoded = dblk.encode_unfiltered(&ctx8());
585        let decoded = FixedArrayDataBlock::decode_unfiltered(&encoded, &ctx8(), 0).unwrap();
586        assert!(decoded.elements.is_empty());
587    }
588}