Skip to main content

async_hdf5/messages/
data_layout.rs

1use bytes::Bytes;
2
3use crate::endian::HDF5Reader;
4use crate::error::{HDF5Error, Result};
5
6/// Chunk indexing strategy.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum ChunkIndexType {
9    /// Type 1: single chunk — address points directly to it.
10    SingleChunk,
11    /// Type 2: implicit index — fixed grid, computed addresses.
12    Implicit,
13    /// Type 3: fixed array — one unlimited dimension.
14    FixedArray,
15    /// Type 4: extensible array — multiple unlimited dimensions.
16    ExtensibleArray,
17    /// Type 5: B-tree v2 — general case.
18    BTreeV2,
19    /// Legacy: layout message v3 uses B-tree v1.
20    BTreeV1,
21}
22
23/// Data layout message — describes how dataset storage is organized.
24///
25/// Message type 0x0008.
26#[derive(Debug, Clone)]
27pub enum StorageLayout {
28    /// Data stored inline in the object header. For very small datasets.
29    Compact {
30        /// The inline data bytes.
31        data: Bytes,
32    },
33
34    /// Data stored as a single contiguous block in the file.
35    Contiguous {
36        /// Byte offset in the file. UNDEF_ADDR = unallocated.
37        address: u64,
38        /// Total size in bytes.
39        size: u64,
40    },
41
42    /// Data stored in chunks, with an index structure for lookup.
43    Chunked {
44        /// Chunk dimensions in array elements.
45        chunk_shape: Vec<u64>,
46        /// Address of the chunk index (B-tree or other structure).
47        index_address: u64,
48        /// How chunks are indexed.
49        indexing_type: ChunkIndexType,
50        /// Layout flags.
51        flags: u8,
52        /// Additional indexing parameters (type-specific).
53        index_params: ChunkIndexParams,
54    },
55}
56
57/// Type-specific chunk index parameters.
58#[derive(Debug, Clone)]
59pub enum ChunkIndexParams {
60    /// No extra params needed (B-tree v1, implicit).
61    None,
62    /// Single chunk: filtered chunk size and filter mask.
63    SingleChunk {
64        /// Size of the filtered (compressed) chunk in bytes.
65        filtered_size: u64,
66        /// Bit mask indicating which filters were not applied.
67        filter_mask: u32,
68    },
69    /// Fixed array: page bits.
70    FixedArray {
71        /// Log2 of the number of entries per data block page.
72        page_bits: u8,
73    },
74    /// Extensible array: max bits, index elements, min pointers, min elements, page bits.
75    ExtensibleArray {
76        /// Number of bits for the max number of elements in a data block.
77        max_bits: u8,
78        /// Number of elements in an index block.
79        index_elements: u8,
80        /// Minimum number of data block pointers in a secondary block.
81        min_pointers: u8,
82        /// Minimum number of elements in a data block.
83        min_elements: u8,
84        /// Number of bits for data block page size.
85        page_bits: u8,
86    },
87}
88
89impl StorageLayout {
90    /// Parse from the raw data layout message bytes.
91    pub fn parse(data: &Bytes, size_of_offsets: u8, size_of_lengths: u8) -> Result<Self> {
92        let mut r = HDF5Reader::with_sizes(data.clone(), size_of_offsets, size_of_lengths);
93
94        let version = r.read_u8()?;
95
96        match version {
97            1 | 2 => Self::parse_v1_v2(&mut r, version),
98            3 => Self::parse_v3(&mut r, size_of_offsets, size_of_lengths),
99            4 | 5 => Self::parse_v4_v5(&mut r, version, size_of_offsets, size_of_lengths),
100            _ => Err(HDF5Error::UnsupportedDataLayoutVersion(version)),
101        }
102    }
103
104    /// Parse layout message version 1 or 2.
105    fn parse_v1_v2(r: &mut HDF5Reader, version: u8) -> Result<Self> {
106        let ndims = r.read_u8()?;
107        let layout_class = r.read_u8()?;
108        r.skip(5); // reserved
109
110        // v1 has data address here for all classes
111        if version == 1 {
112            let _data_address = r.read_u32()?;
113        }
114
115        let address = r.read_offset()?;
116
117        match layout_class {
118            0 => {
119                // Compact
120                let data_size = r.read_u32()? as usize;
121                let data = r.slice_from_position(data_size)?;
122                r.skip(data_size as u64);
123                Ok(StorageLayout::Compact { data })
124            }
125            1 => {
126                // Contiguous
127                let mut total_size = 1u64;
128                for _ in 0..ndims {
129                    total_size *= r.read_u32()? as u64;
130                }
131                let element_size = r.read_u32()? as u64;
132                Ok(StorageLayout::Contiguous {
133                    address,
134                    size: total_size * element_size,
135                })
136            }
137            2 => {
138                // Chunked
139                let mut chunk_shape = Vec::with_capacity(ndims as usize);
140                // v1/v2: ndims dimensions, each 4 bytes, last one is element size
141                for _ in 0..ndims.saturating_sub(1) {
142                    chunk_shape.push(r.read_u32()? as u64);
143                }
144                let _element_size = r.read_u32()?;
145
146                Ok(StorageLayout::Chunked {
147                    chunk_shape,
148                    index_address: address,
149                    indexing_type: ChunkIndexType::BTreeV1,
150                    flags: 0,
151                    index_params: ChunkIndexParams::None,
152                })
153            }
154            _ => Err(HDF5Error::General(format!(
155                "unknown layout class: {layout_class}"
156            ))),
157        }
158    }
159
160    /// Parse layout message version 3.
161    fn parse_v3(r: &mut HDF5Reader, _size_of_offsets: u8, _size_of_lengths: u8) -> Result<Self> {
162        let layout_class = r.read_u8()?;
163
164        match layout_class {
165            0 => {
166                // Compact
167                let data_size = r.read_u16()? as usize;
168                let data = r.slice_from_position(data_size)?;
169                r.skip(data_size as u64);
170                Ok(StorageLayout::Compact { data })
171            }
172            1 => {
173                // Contiguous
174                let address = r.read_offset()?;
175                let size = r.read_length()?;
176                Ok(StorageLayout::Contiguous { address, size })
177            }
178            2 => {
179                // Chunked (v3 = B-tree v1)
180                let ndims = r.read_u8()?;
181                let address = r.read_offset()?;
182
183                // ndims dimension sizes (4 bytes each), last is element size
184                let mut chunk_shape = Vec::with_capacity(ndims as usize);
185                for _ in 0..ndims.saturating_sub(1) {
186                    chunk_shape.push(r.read_u32()? as u64);
187                }
188                let _element_size = r.read_u32()?;
189
190                Ok(StorageLayout::Chunked {
191                    chunk_shape,
192                    index_address: address,
193                    indexing_type: ChunkIndexType::BTreeV1,
194                    flags: 0,
195                    index_params: ChunkIndexParams::None,
196                })
197            }
198            _ => Err(HDF5Error::General(format!(
199                "unknown layout class: {layout_class}"
200            ))),
201        }
202    }
203
204    /// Parse layout message version 4 or 5 (modern, with chunk indexing types).
205    fn parse_v4_v5(
206        r: &mut HDF5Reader,
207        _version: u8,
208        _size_of_offsets: u8,
209        _size_of_lengths: u8,
210    ) -> Result<Self> {
211        let layout_class = r.read_u8()?;
212
213        match layout_class {
214            0 => {
215                // Compact
216                let data_size = r.read_u16()? as usize;
217                let data = r.slice_from_position(data_size)?;
218                r.skip(data_size as u64);
219                Ok(StorageLayout::Compact { data })
220            }
221            1 => {
222                // Contiguous
223                let address = r.read_offset()?;
224                let size = r.read_length()?;
225                Ok(StorageLayout::Contiguous { address, size })
226            }
227            2 => {
228                // Chunked with indexing type
229                let flags = r.read_u8()?;
230                let ndims = r.read_u8()?;
231                let dim_size_enc_len = r.read_u8()?;
232
233                // ndims includes an extra dimension for the element size,
234                // just like v3. We read all ndims values but only keep the
235                // first ndims-1 as the actual chunk shape.
236                let mut chunk_shape = Vec::with_capacity(ndims as usize);
237                for _ in 0..ndims {
238                    let dim = match dim_size_enc_len {
239                        1 => r.read_u8()? as u64,
240                        2 => r.read_u16()? as u64,
241                        4 => r.read_u32()? as u64,
242                        8 => r.read_u64()?,
243                        _ => {
244                            return Err(HDF5Error::General(format!(
245                                "unsupported dimension size encoding length: {dim_size_enc_len}"
246                            )));
247                        }
248                    };
249                    chunk_shape.push(dim);
250                }
251                // Last dimension is element size, not a chunk dimension
252                let _element_size = chunk_shape.pop();
253
254                let chunk_indexing_type = r.read_u8()?;
255                let (indexing_type, index_params) = match chunk_indexing_type {
256                    1 => {
257                        // Single chunk
258                        let params = if flags & 0x02 != 0 {
259                            // Filtered single chunk
260                            let filtered_size = r.read_length()?;
261                            let filter_mask = r.read_u32()?;
262                            ChunkIndexParams::SingleChunk {
263                                filtered_size,
264                                filter_mask,
265                            }
266                        } else {
267                            ChunkIndexParams::None
268                        };
269                        (ChunkIndexType::SingleChunk, params)
270                    }
271                    2 => {
272                        // Implicit
273                        (ChunkIndexType::Implicit, ChunkIndexParams::None)
274                    }
275                    3 => {
276                        // Fixed array
277                        let page_bits = r.read_u8()?;
278                        (
279                            ChunkIndexType::FixedArray,
280                            ChunkIndexParams::FixedArray { page_bits },
281                        )
282                    }
283                    4 => {
284                        // Extensible array
285                        let max_bits = r.read_u8()?;
286                        let index_elements = r.read_u8()?;
287                        let min_pointers = r.read_u8()?;
288                        let min_elements = r.read_u8()?;
289                        let page_bits = r.read_u8()?;
290                        (
291                            ChunkIndexType::ExtensibleArray,
292                            ChunkIndexParams::ExtensibleArray {
293                                max_bits,
294                                index_elements,
295                                min_pointers,
296                                min_elements,
297                                page_bits,
298                            },
299                        )
300                    }
301                    5 => {
302                        // B-tree v2
303                        (ChunkIndexType::BTreeV2, ChunkIndexParams::None)
304                    }
305                    _ => {
306                        return Err(HDF5Error::UnsupportedChunkIndexingType(chunk_indexing_type));
307                    }
308                };
309
310                let index_address = r.read_offset()?;
311
312                Ok(StorageLayout::Chunked {
313                    chunk_shape,
314                    index_address,
315                    indexing_type,
316                    flags,
317                    index_params,
318                })
319            }
320            _ => Err(HDF5Error::General(format!(
321                "unknown layout class: {layout_class}"
322            ))),
323        }
324    }
325
326    /// Returns true if the layout is chunked.
327    pub fn is_chunked(&self) -> bool {
328        matches!(self, StorageLayout::Chunked { .. })
329    }
330
331    /// Returns true if the layout is contiguous.
332    pub fn is_contiguous(&self) -> bool {
333        matches!(self, StorageLayout::Contiguous { .. })
334    }
335}