Skip to main content

rust_hdf5/format/messages/
data_layout.rs

1//! Data layout message (type 0x08) — describes how raw data is stored.
2//!
3//! Binary layout (version 3):
4//!   Byte 0: version = 3
5//!   Byte 1: layout class (0=compact, 1=contiguous, 2=chunked)
6//!
7//!   Contiguous (class 1):
8//!     address: sizeof_addr bytes
9//!     size:    sizeof_size bytes
10//!
11//!   Compact (class 0):
12//!     compact_size: u16 LE
13//!     data:         compact_size bytes
14//!
15//! Binary layout (version 4, chunked only):
16//!   Byte 0: version = 4
17//!   Byte 1: layout class = 2 (chunked)
18//!   flags(1) + ndims(1) + enc_bytes_per_dim(1)
19//!   + dim_sizes(ndims * enc_bytes_per_dim, each LE)
20//!   + index_type(1)
21//!   + [for earray: 5 param bytes]
22//!   + index_address(sizeof_addr)
23
24use crate::format::{FormatContext, FormatError, FormatResult, UNDEF_ADDR};
25
26const VERSION_3: u8 = 3;
27const VERSION_4: u8 = 4;
28const CLASS_COMPACT: u8 = 0;
29const CLASS_CONTIGUOUS: u8 = 1;
30const CLASS_CHUNKED: u8 = 2;
31
32/// Chunk index type for version-4 chunked layout.
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34#[repr(u8)]
35pub enum ChunkIndexType {
36    SingleChunk = 1,
37    Implicit = 2,
38    FixedArray = 3,
39    ExtensibleArray = 4,
40    BTreeV2 = 5,
41}
42
43impl ChunkIndexType {
44    pub fn from_u8(v: u8) -> Option<Self> {
45        match v {
46            1 => Some(Self::SingleChunk),
47            2 => Some(Self::Implicit),
48            3 => Some(Self::FixedArray),
49            4 => Some(Self::ExtensibleArray),
50            5 => Some(Self::BTreeV2),
51            _ => None,
52        }
53    }
54}
55
56/// Parameters for the extensible array chunk index.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub struct EarrayParams {
59    pub max_nelmts_bits: u8,
60    pub idx_blk_elmts: u8,
61    pub sup_blk_min_data_ptrs: u8,
62    pub data_blk_min_elmts: u8,
63    pub max_dblk_page_nelmts_bits: u8,
64}
65
66impl EarrayParams {
67    /// Default extensible array parameters (from H5Dpkg.h).
68    pub fn default_params() -> Self {
69        Self {
70            max_nelmts_bits: 32,
71            idx_blk_elmts: 4,
72            sup_blk_min_data_ptrs: 4,
73            data_blk_min_elmts: 16,
74            max_dblk_page_nelmts_bits: 10,
75        }
76    }
77}
78
79/// Parameters for the fixed array chunk index (max_dblk_page_nelmts_bits).
80#[derive(Debug, Clone, PartialEq, Eq)]
81pub struct FixedArrayParams {
82    pub max_dblk_page_nelmts_bits: u8,
83}
84
85impl FixedArrayParams {
86    pub fn default_params() -> Self {
87        Self {
88            max_dblk_page_nelmts_bits: 0,
89        }
90    }
91}
92
93/// Data layout message payload.
94#[derive(Debug, Clone, PartialEq)]
95pub enum DataLayoutMessage {
96    /// Contiguous storage — raw data in a single block.
97    Contiguous {
98        /// Address of raw data.  `UNDEF_ADDR` if not yet allocated.
99        address: u64,
100        /// Size of raw data in bytes.
101        size: u64,
102    },
103    /// Compact storage — raw data stored within the object header.
104    Compact {
105        /// The raw data bytes.
106        data: Vec<u8>,
107    },
108    /// Version 4 chunked storage.
109    ChunkedV4 {
110        flags: u8,
111        /// Chunk dimension sizes.
112        chunk_dims: Vec<u64>,
113        /// Type of chunk index structure.
114        index_type: ChunkIndexType,
115        /// Extensible array parameters (present when index_type == ExtensibleArray).
116        earray_params: Option<EarrayParams>,
117        /// Fixed array parameters (present when index_type == FixedArray).
118        farray_params: Option<FixedArrayParams>,
119        /// Address of the chunk index structure.
120        index_address: u64,
121    },
122}
123
124impl DataLayoutMessage {
125    /// Contiguous layout with no data allocated yet.
126    pub fn contiguous_unallocated(size: u64) -> Self {
127        Self::Contiguous {
128            address: UNDEF_ADDR,
129            size,
130        }
131    }
132
133    /// Contiguous layout pointing to allocated data.
134    pub fn contiguous(address: u64, size: u64) -> Self {
135        Self::Contiguous { address, size }
136    }
137
138    /// Compact layout with inline data.
139    pub fn compact(data: Vec<u8>) -> Self {
140        Self::Compact { data }
141    }
142
143    /// Version 4 chunked layout with extensible array index.
144    ///
145    /// `chunk_dims` should include the trailing element-size dimension.
146    /// For example, for a 2D dataset with chunk=(1,4) and element_size=8,
147    /// pass chunk_dims = [1, 4, 8].
148    pub fn chunked_v4_earray(
149        chunk_dims: Vec<u64>,
150        earray_params: EarrayParams,
151        index_address: u64,
152    ) -> Self {
153        Self::ChunkedV4 {
154            flags: 0,
155            chunk_dims,
156            index_type: ChunkIndexType::ExtensibleArray,
157            earray_params: Some(earray_params),
158            farray_params: None,
159            index_address,
160        }
161    }
162
163    /// Version 4 chunked layout with fixed array index.
164    ///
165    /// `chunk_dims` should include the trailing element-size dimension.
166    pub fn chunked_v4_farray(
167        chunk_dims: Vec<u64>,
168        farray_params: FixedArrayParams,
169        index_address: u64,
170    ) -> Self {
171        Self::ChunkedV4 {
172            flags: 0,
173            chunk_dims,
174            index_type: ChunkIndexType::FixedArray,
175            earray_params: None,
176            farray_params: Some(farray_params),
177            index_address,
178        }
179    }
180
181    /// Version 4 chunked layout with B-tree v2 index.
182    ///
183    /// `chunk_dims` should include the trailing element-size dimension.
184    pub fn chunked_v4_btree_v2(chunk_dims: Vec<u64>, index_address: u64) -> Self {
185        Self::ChunkedV4 {
186            flags: 0,
187            chunk_dims,
188            index_type: ChunkIndexType::BTreeV2,
189            earray_params: None,
190            farray_params: None,
191            index_address,
192        }
193    }
194
195    /// Version 4 chunked layout with single-chunk index.
196    ///
197    /// `chunk_dims` should include the trailing element-size dimension.
198    pub fn chunked_v4_single(chunk_dims: Vec<u64>, index_address: u64) -> Self {
199        Self::ChunkedV4 {
200            flags: 0,
201            chunk_dims,
202            index_type: ChunkIndexType::SingleChunk,
203            earray_params: None,
204            farray_params: None,
205            index_address,
206        }
207    }
208
209    // ------------------------------------------------------------------ encode
210
211    pub fn encode(&self, ctx: &FormatContext) -> Vec<u8> {
212        match self {
213            Self::Contiguous { address, size } => {
214                let sa = ctx.sizeof_addr as usize;
215                let ss = ctx.sizeof_size as usize;
216                let mut buf = Vec::with_capacity(2 + sa + ss);
217                buf.push(VERSION_3);
218                buf.push(CLASS_CONTIGUOUS);
219                buf.extend_from_slice(&address.to_le_bytes()[..sa]);
220                buf.extend_from_slice(&size.to_le_bytes()[..ss]);
221                buf
222            }
223            Self::Compact { data } => {
224                let mut buf = Vec::with_capacity(2 + 2 + data.len());
225                buf.push(VERSION_3);
226                buf.push(CLASS_COMPACT);
227                buf.extend_from_slice(&(data.len() as u16).to_le_bytes());
228                buf.extend_from_slice(data);
229                buf
230            }
231            Self::ChunkedV4 {
232                flags,
233                chunk_dims,
234                index_type,
235                earray_params,
236                farray_params,
237                index_address,
238            } => {
239                let sa = ctx.sizeof_addr as usize;
240                let ndims = chunk_dims.len() as u8;
241
242                // Compute enc_bytes_per_dim: minimum bytes to represent the
243                // max chunk dimension value.
244                let max_dim = chunk_dims.iter().copied().max().unwrap_or(1);
245                let enc_bytes = enc_bytes_for_value(max_dim);
246
247                let mut buf = Vec::with_capacity(64);
248                buf.push(VERSION_4);
249                buf.push(CLASS_CHUNKED);
250                buf.push(*flags);
251                buf.push(ndims);
252                buf.push(enc_bytes);
253
254                // Dimension sizes
255                for &d in chunk_dims {
256                    buf.extend_from_slice(&d.to_le_bytes()[..enc_bytes as usize]);
257                }
258
259                // Index type
260                buf.push(*index_type as u8);
261
262                // Index-type-specific parameters
263                match *index_type {
264                    ChunkIndexType::ExtensibleArray => {
265                        if let Some(ref params) = earray_params {
266                            buf.push(params.max_nelmts_bits);
267                            buf.push(params.idx_blk_elmts);
268                            buf.push(params.sup_blk_min_data_ptrs);
269                            buf.push(params.data_blk_min_elmts);
270                            buf.push(params.max_dblk_page_nelmts_bits);
271                        }
272                    }
273                    ChunkIndexType::FixedArray => {
274                        if let Some(ref params) = farray_params {
275                            buf.push(params.max_dblk_page_nelmts_bits);
276                        }
277                    }
278                    // BTreeV2, SingleChunk, Implicit: no extra parameters
279                    _ => {}
280                }
281
282                // Index address
283                buf.extend_from_slice(&index_address.to_le_bytes()[..sa]);
284
285                buf
286            }
287        }
288    }
289
290    // ------------------------------------------------------------------ decode
291
292    pub fn decode(buf: &[u8], ctx: &FormatContext) -> FormatResult<(Self, usize)> {
293        if buf.len() < 2 {
294            return Err(FormatError::BufferTooShort {
295                needed: 2,
296                available: buf.len(),
297            });
298        }
299
300        let version = buf[0];
301        let class = buf[1];
302
303        match (version, class) {
304            (VERSION_3, CLASS_CONTIGUOUS) => {
305                let sa = ctx.sizeof_addr as usize;
306                let ss = ctx.sizeof_size as usize;
307                let mut pos = 2;
308                let needed = pos + sa + ss;
309                if buf.len() < needed {
310                    return Err(FormatError::BufferTooShort {
311                        needed,
312                        available: buf.len(),
313                    });
314                }
315                let address = read_addr(&buf[pos..], sa);
316                pos += sa;
317                let size = read_size(&buf[pos..], ss);
318                pos += ss;
319                Ok((Self::Contiguous { address, size }, pos))
320            }
321            (VERSION_3, CLASS_COMPACT) => {
322                let mut pos = 2;
323                if buf.len() < pos + 2 {
324                    return Err(FormatError::BufferTooShort {
325                        needed: pos + 2,
326                        available: buf.len(),
327                    });
328                }
329                let compact_size = u16::from_le_bytes([buf[pos], buf[pos + 1]]) as usize;
330                pos += 2;
331                if buf.len() < pos + compact_size {
332                    return Err(FormatError::BufferTooShort {
333                        needed: pos + compact_size,
334                        available: buf.len(),
335                    });
336                }
337                let data = buf[pos..pos + compact_size].to_vec();
338                pos += compact_size;
339                Ok((Self::Compact { data }, pos))
340            }
341            (VERSION_4, CLASS_CHUNKED) => {
342                let sa = ctx.sizeof_addr as usize;
343                let mut pos = 2;
344
345                // flags(1) + ndims(1) + enc_bytes_per_dim(1)
346                if buf.len() < pos + 3 {
347                    return Err(FormatError::BufferTooShort {
348                        needed: pos + 3,
349                        available: buf.len(),
350                    });
351                }
352                let flags = buf[pos];
353                pos += 1;
354                let ndims = buf[pos] as usize;
355                pos += 1;
356                let enc_bytes = buf[pos] as usize;
357                pos += 1;
358
359                // dim sizes
360                let dim_data_len = ndims * enc_bytes;
361                if buf.len() < pos + dim_data_len {
362                    return Err(FormatError::BufferTooShort {
363                        needed: pos + dim_data_len,
364                        available: buf.len(),
365                    });
366                }
367                let mut chunk_dims = Vec::with_capacity(ndims);
368                for _ in 0..ndims {
369                    chunk_dims.push(read_size(&buf[pos..], enc_bytes));
370                    pos += enc_bytes;
371                }
372
373                // index type
374                if buf.len() < pos + 1 {
375                    return Err(FormatError::BufferTooShort {
376                        needed: pos + 1,
377                        available: buf.len(),
378                    });
379                }
380                let idx_type_raw = buf[pos];
381                pos += 1;
382                let index_type = ChunkIndexType::from_u8(idx_type_raw).ok_or_else(|| {
383                    FormatError::UnsupportedFeature(format!("chunk index type {}", idx_type_raw))
384                })?;
385
386                // Index-type-specific parameters
387                let mut earray_params = None;
388                let mut farray_params = None;
389
390                match index_type {
391                    ChunkIndexType::ExtensibleArray => {
392                        if buf.len() < pos + 5 {
393                            return Err(FormatError::BufferTooShort {
394                                needed: pos + 5,
395                                available: buf.len(),
396                            });
397                        }
398                        earray_params = Some(EarrayParams {
399                            max_nelmts_bits: buf[pos],
400                            idx_blk_elmts: buf[pos + 1],
401                            sup_blk_min_data_ptrs: buf[pos + 2],
402                            data_blk_min_elmts: buf[pos + 3],
403                            max_dblk_page_nelmts_bits: buf[pos + 4],
404                        });
405                        pos += 5;
406                    }
407                    ChunkIndexType::FixedArray => {
408                        if buf.len() < pos + 1 {
409                            return Err(FormatError::BufferTooShort {
410                                needed: pos + 1,
411                                available: buf.len(),
412                            });
413                        }
414                        farray_params = Some(FixedArrayParams {
415                            max_dblk_page_nelmts_bits: buf[pos],
416                        });
417                        pos += 1;
418                    }
419                    // BTreeV2, SingleChunk, Implicit: no extra parameters
420                    _ => {}
421                }
422
423                // index address
424                if buf.len() < pos + sa {
425                    return Err(FormatError::BufferTooShort {
426                        needed: pos + sa,
427                        available: buf.len(),
428                    });
429                }
430                let index_address = read_addr(&buf[pos..], sa);
431                pos += sa;
432
433                Ok((
434                    Self::ChunkedV4 {
435                        flags,
436                        chunk_dims,
437                        index_type,
438                        earray_params,
439                        farray_params,
440                        index_address,
441                    },
442                    pos,
443                ))
444            }
445            (VERSION_3, other) => Err(FormatError::UnsupportedFeature(format!(
446                "data layout class {}",
447                other
448            ))),
449            (v, _) => Err(FormatError::InvalidVersion(v)),
450        }
451    }
452}
453
454// ========================================================================= helpers
455
456/// Read a little-endian address of `n` bytes, mapping all-ones to `UNDEF_ADDR`.
457fn read_addr(buf: &[u8], n: usize) -> u64 {
458    if buf[..n].iter().all(|&b| b == 0xFF) {
459        UNDEF_ADDR
460    } else {
461        let mut tmp = [0u8; 8];
462        tmp[..n].copy_from_slice(&buf[..n]);
463        u64::from_le_bytes(tmp)
464    }
465}
466
467/// Read a little-endian size of `n` bytes.
468fn read_size(buf: &[u8], n: usize) -> u64 {
469    let mut tmp = [0u8; 8];
470    tmp[..n].copy_from_slice(&buf[..n]);
471    u64::from_le_bytes(tmp)
472}
473
474/// Compute the minimum number of bytes (1-8) needed to encode `v`.
475fn enc_bytes_for_value(v: u64) -> u8 {
476    if v == 0 {
477        return 1;
478    }
479    let bits_needed = 64 - v.leading_zeros(); // 1..=64
480    bits_needed.div_ceil(8) as u8
481}
482
483// ======================================================================= tests
484
485#[cfg(test)]
486mod tests {
487    use super::*;
488
489    fn ctx8() -> FormatContext {
490        FormatContext {
491            sizeof_addr: 8,
492            sizeof_size: 8,
493        }
494    }
495
496    fn ctx4() -> FormatContext {
497        FormatContext {
498            sizeof_addr: 4,
499            sizeof_size: 4,
500        }
501    }
502
503    #[test]
504    fn roundtrip_contiguous() {
505        let msg = DataLayoutMessage::contiguous(0x1000, 4096);
506        let encoded = msg.encode(&ctx8());
507        // 2 + 8 + 8 = 18
508        assert_eq!(encoded.len(), 18);
509        let (decoded, consumed) = DataLayoutMessage::decode(&encoded, &ctx8()).unwrap();
510        assert_eq!(consumed, 18);
511        assert_eq!(decoded, msg);
512    }
513
514    #[test]
515    fn roundtrip_contiguous_ctx4() {
516        let msg = DataLayoutMessage::contiguous(0x800, 256);
517        let encoded = msg.encode(&ctx4());
518        // 2 + 4 + 4 = 10
519        assert_eq!(encoded.len(), 10);
520        let (decoded, consumed) = DataLayoutMessage::decode(&encoded, &ctx4()).unwrap();
521        assert_eq!(consumed, 10);
522        assert_eq!(decoded, msg);
523    }
524
525    #[test]
526    fn roundtrip_contiguous_unallocated() {
527        let msg = DataLayoutMessage::contiguous_unallocated(1024);
528        let encoded = msg.encode(&ctx8());
529        let (decoded, _) = DataLayoutMessage::decode(&encoded, &ctx8()).unwrap();
530        assert_eq!(decoded, msg);
531        match decoded {
532            DataLayoutMessage::Contiguous { address, size } => {
533                assert_eq!(address, UNDEF_ADDR);
534                assert_eq!(size, 1024);
535            }
536            _ => panic!("expected Contiguous"),
537        }
538    }
539
540    #[test]
541    fn roundtrip_contiguous_undef_ctx4() {
542        let msg = DataLayoutMessage::contiguous_unallocated(512);
543        let encoded = msg.encode(&ctx4());
544        let (decoded, _) = DataLayoutMessage::decode(&encoded, &ctx4()).unwrap();
545        match decoded {
546            DataLayoutMessage::Contiguous { address, .. } => {
547                assert_eq!(address, UNDEF_ADDR);
548            }
549            _ => panic!("expected Contiguous"),
550        }
551    }
552
553    #[test]
554    fn roundtrip_compact() {
555        let data = vec![1, 2, 3, 4, 5, 6, 7, 8];
556        let msg = DataLayoutMessage::compact(data.clone());
557        let encoded = msg.encode(&ctx8());
558        // 2 + 2 + 8 = 12
559        assert_eq!(encoded.len(), 12);
560        let (decoded, consumed) = DataLayoutMessage::decode(&encoded, &ctx8()).unwrap();
561        assert_eq!(consumed, 12);
562        assert_eq!(decoded, msg);
563    }
564
565    #[test]
566    fn roundtrip_compact_empty() {
567        let msg = DataLayoutMessage::compact(vec![]);
568        let encoded = msg.encode(&ctx8());
569        assert_eq!(encoded.len(), 4); // 2 + 2 + 0
570        let (decoded, consumed) = DataLayoutMessage::decode(&encoded, &ctx8()).unwrap();
571        assert_eq!(consumed, 4);
572        assert_eq!(decoded, msg);
573    }
574
575    #[test]
576    fn decode_bad_version() {
577        let buf = [2u8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
578        let err = DataLayoutMessage::decode(&buf, &ctx8()).unwrap_err();
579        match err {
580            FormatError::InvalidVersion(2) => {}
581            other => panic!("unexpected error: {:?}", other),
582        }
583    }
584
585    #[test]
586    fn decode_unsupported_class() {
587        let buf = [3u8, 3]; // class 3 = unknown
588        let err = DataLayoutMessage::decode(&buf, &ctx8()).unwrap_err();
589        match err {
590            FormatError::UnsupportedFeature(_) => {}
591            other => panic!("unexpected error: {:?}", other),
592        }
593    }
594
595    #[test]
596    fn decode_buffer_too_short() {
597        let buf = [3u8];
598        let err = DataLayoutMessage::decode(&buf, &ctx8()).unwrap_err();
599        match err {
600            FormatError::BufferTooShort { .. } => {}
601            other => panic!("unexpected error: {:?}", other),
602        }
603    }
604
605    #[test]
606    fn decode_contiguous_truncated() {
607        // version=3, class=1, but not enough bytes for address+size
608        let buf = [3u8, 1, 0, 0];
609        let err = DataLayoutMessage::decode(&buf, &ctx8()).unwrap_err();
610        match err {
611            FormatError::BufferTooShort { .. } => {}
612            other => panic!("unexpected error: {:?}", other),
613        }
614    }
615
616    #[test]
617    fn version_and_class_bytes() {
618        let encoded = DataLayoutMessage::contiguous(0, 0).encode(&ctx8());
619        assert_eq!(encoded[0], 3);
620        assert_eq!(encoded[1], 1);
621
622        let encoded = DataLayoutMessage::compact(vec![]).encode(&ctx8());
623        assert_eq!(encoded[0], 3);
624        assert_eq!(encoded[1], 0);
625    }
626
627    #[test]
628    fn roundtrip_chunked_v4_earray() {
629        let params = EarrayParams::default_params();
630        let msg = DataLayoutMessage::chunked_v4_earray(vec![1, 256, 256], params, 0x2000);
631        let encoded = msg.encode(&ctx8());
632        assert_eq!(encoded[0], 4); // version 4
633        assert_eq!(encoded[1], 2); // class chunked
634        let (decoded, consumed) = DataLayoutMessage::decode(&encoded, &ctx8()).unwrap();
635        assert_eq!(consumed, encoded.len());
636        assert_eq!(decoded, msg);
637    }
638
639    #[test]
640    fn roundtrip_chunked_v4_earray_ctx4() {
641        let params = EarrayParams::default_params();
642        let msg = DataLayoutMessage::chunked_v4_earray(vec![1, 128], params, 0x1000);
643        let encoded = msg.encode(&ctx4());
644        let (decoded, consumed) = DataLayoutMessage::decode(&encoded, &ctx4()).unwrap();
645        assert_eq!(consumed, encoded.len());
646        assert_eq!(decoded, msg);
647    }
648
649    #[test]
650    fn roundtrip_chunked_v4_single() {
651        let msg = DataLayoutMessage::chunked_v4_single(vec![100, 200], 0x3000);
652        let encoded = msg.encode(&ctx8());
653        let (decoded, consumed) = DataLayoutMessage::decode(&encoded, &ctx8()).unwrap();
654        assert_eq!(consumed, encoded.len());
655        assert_eq!(decoded, msg);
656    }
657
658    #[test]
659    fn chunked_v4_enc_bytes() {
660        // chunk dims [1, 256, 256]: max=256, needs 2 bytes
661        let params = EarrayParams::default_params();
662        let msg = DataLayoutMessage::chunked_v4_earray(vec![1, 256, 256], params, 0x2000);
663        let encoded = msg.encode(&ctx8());
664        // version(1) + class(1) + flags(1) + ndims(1) + enc_bytes(1)
665        // + 3*2 dim bytes + index_type(1) + 5 earray params + 8 addr = 25
666        assert_eq!(encoded.len(), 25);
667        assert_eq!(encoded[4], 2); // enc_bytes_per_dim = 2
668    }
669
670    #[test]
671    fn chunked_v4_large_dims() {
672        // Large dims requiring 4 bytes each
673        let params = EarrayParams::default_params();
674        let msg = DataLayoutMessage::chunked_v4_earray(vec![1, 65536], params, 0x4000);
675        let encoded = msg.encode(&ctx8());
676        assert_eq!(encoded[4], 3); // enc_bytes_per_dim = 3 (65536 = 0x10000, needs 3 bytes)
677    }
678}