async_hdf5/messages/data_layout.rs
1use bytes::Bytes;
2
3use crate::endian::HDF5Reader;
4use crate::error::{HDF5Error, Result};
5
6/// Chunk indexing strategy.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum ChunkIndexType {
9 /// Type 1: single chunk — address points directly to it.
10 SingleChunk,
11 /// Type 2: implicit index — fixed grid, computed addresses.
12 Implicit,
13 /// Type 3: fixed array — one unlimited dimension.
14 FixedArray,
15 /// Type 4: extensible array — multiple unlimited dimensions.
16 ExtensibleArray,
17 /// Type 5: B-tree v2 — general case.
18 BTreeV2,
19 /// Legacy: layout message v3 uses B-tree v1.
20 BTreeV1,
21}
22
23/// Data layout message — describes how dataset storage is organized.
24///
25/// Message type 0x0008.
26#[derive(Debug, Clone)]
27pub enum StorageLayout {
28 /// Data stored inline in the object header. For very small datasets.
29 Compact {
30 /// The inline data bytes.
31 data: Bytes,
32 },
33
34 /// Data stored as a single contiguous block in the file.
35 Contiguous {
36 /// Byte offset in the file. UNDEF_ADDR = unallocated.
37 address: u64,
38 /// Total size in bytes.
39 size: u64,
40 },
41
42 /// Data stored in chunks, with an index structure for lookup.
43 Chunked {
44 /// Chunk dimensions in array elements.
45 chunk_shape: Vec<u64>,
46 /// Address of the chunk index (B-tree or other structure).
47 index_address: u64,
48 /// How chunks are indexed.
49 indexing_type: ChunkIndexType,
50 /// Layout flags.
51 flags: u8,
52 /// Additional indexing parameters (type-specific).
53 index_params: ChunkIndexParams,
54 },
55}
56
57/// Type-specific chunk index parameters.
58#[derive(Debug, Clone)]
59pub enum ChunkIndexParams {
60 /// No extra params needed (B-tree v1, implicit).
61 None,
62 /// Single chunk: filtered chunk size and filter mask.
63 SingleChunk {
64 /// Size of the filtered (compressed) chunk in bytes.
65 filtered_size: u64,
66 /// Bit mask indicating which filters were not applied.
67 filter_mask: u32,
68 },
69 /// Fixed array: page bits.
70 FixedArray {
71 /// Log2 of the number of entries per data block page.
72 page_bits: u8,
73 },
74 /// Extensible array: max bits, index elements, min pointers, min elements, page bits.
75 ExtensibleArray {
76 /// Number of bits for the max number of elements in a data block.
77 max_bits: u8,
78 /// Number of elements in an index block.
79 index_elements: u8,
80 /// Minimum number of data block pointers in a secondary block.
81 min_pointers: u8,
82 /// Minimum number of elements in a data block.
83 min_elements: u8,
84 /// Number of bits for data block page size.
85 page_bits: u8,
86 },
87}
88
89impl StorageLayout {
90 /// Parse from the raw data layout message bytes.
91 pub fn parse(data: &Bytes, size_of_offsets: u8, size_of_lengths: u8) -> Result<Self> {
92 let mut r = HDF5Reader::with_sizes(data.clone(), size_of_offsets, size_of_lengths);
93
94 let version = r.read_u8()?;
95
96 match version {
97 1 | 2 => Self::parse_v1_v2(&mut r, version),
98 3 => Self::parse_v3(&mut r, size_of_offsets, size_of_lengths),
99 4 | 5 => Self::parse_v4_v5(&mut r, version, size_of_offsets, size_of_lengths),
100 _ => Err(HDF5Error::UnsupportedDataLayoutVersion(version)),
101 }
102 }
103
104 /// Parse layout message version 1 or 2.
105 fn parse_v1_v2(r: &mut HDF5Reader, version: u8) -> Result<Self> {
106 let ndims = r.read_u8()?;
107 let layout_class = r.read_u8()?;
108 r.skip(5); // reserved
109
110 // v1 has data address here for all classes
111 if version == 1 {
112 let _data_address = r.read_u32()?;
113 }
114
115 let address = r.read_offset()?;
116
117 match layout_class {
118 0 => {
119 // Compact
120 let data_size = r.read_u32()? as usize;
121 let data = r.slice_from_position(data_size)?;
122 r.skip(data_size as u64);
123 Ok(StorageLayout::Compact { data })
124 }
125 1 => {
126 // Contiguous
127 let mut total_size = 1u64;
128 for _ in 0..ndims {
129 total_size *= r.read_u32()? as u64;
130 }
131 let element_size = r.read_u32()? as u64;
132 Ok(StorageLayout::Contiguous {
133 address,
134 size: total_size * element_size,
135 })
136 }
137 2 => {
138 // Chunked
139 let mut chunk_shape = Vec::with_capacity(ndims as usize);
140 // v1/v2: ndims dimensions, each 4 bytes, last one is element size
141 for _ in 0..ndims.saturating_sub(1) {
142 chunk_shape.push(r.read_u32()? as u64);
143 }
144 let _element_size = r.read_u32()?;
145
146 Ok(StorageLayout::Chunked {
147 chunk_shape,
148 index_address: address,
149 indexing_type: ChunkIndexType::BTreeV1,
150 flags: 0,
151 index_params: ChunkIndexParams::None,
152 })
153 }
154 _ => Err(HDF5Error::General(format!(
155 "unknown layout class: {layout_class}"
156 ))),
157 }
158 }
159
160 /// Parse layout message version 3.
161 fn parse_v3(r: &mut HDF5Reader, _size_of_offsets: u8, _size_of_lengths: u8) -> Result<Self> {
162 let layout_class = r.read_u8()?;
163
164 match layout_class {
165 0 => {
166 // Compact
167 let data_size = r.read_u16()? as usize;
168 let data = r.slice_from_position(data_size)?;
169 r.skip(data_size as u64);
170 Ok(StorageLayout::Compact { data })
171 }
172 1 => {
173 // Contiguous
174 let address = r.read_offset()?;
175 let size = r.read_length()?;
176 Ok(StorageLayout::Contiguous { address, size })
177 }
178 2 => {
179 // Chunked (v3 = B-tree v1)
180 let ndims = r.read_u8()?;
181 let address = r.read_offset()?;
182
183 // ndims dimension sizes (4 bytes each), last is element size
184 let mut chunk_shape = Vec::with_capacity(ndims as usize);
185 for _ in 0..ndims.saturating_sub(1) {
186 chunk_shape.push(r.read_u32()? as u64);
187 }
188 let _element_size = r.read_u32()?;
189
190 Ok(StorageLayout::Chunked {
191 chunk_shape,
192 index_address: address,
193 indexing_type: ChunkIndexType::BTreeV1,
194 flags: 0,
195 index_params: ChunkIndexParams::None,
196 })
197 }
198 _ => Err(HDF5Error::General(format!(
199 "unknown layout class: {layout_class}"
200 ))),
201 }
202 }
203
204 /// Parse layout message version 4 or 5 (modern, with chunk indexing types).
205 fn parse_v4_v5(
206 r: &mut HDF5Reader,
207 _version: u8,
208 _size_of_offsets: u8,
209 _size_of_lengths: u8,
210 ) -> Result<Self> {
211 let layout_class = r.read_u8()?;
212
213 match layout_class {
214 0 => {
215 // Compact
216 let data_size = r.read_u16()? as usize;
217 let data = r.slice_from_position(data_size)?;
218 r.skip(data_size as u64);
219 Ok(StorageLayout::Compact { data })
220 }
221 1 => {
222 // Contiguous
223 let address = r.read_offset()?;
224 let size = r.read_length()?;
225 Ok(StorageLayout::Contiguous { address, size })
226 }
227 2 => {
228 // Chunked with indexing type
229 let flags = r.read_u8()?;
230 let ndims = r.read_u8()?;
231 let dim_size_enc_len = r.read_u8()?;
232
233 // ndims includes an extra dimension for the element size,
234 // just like v3. We read all ndims values but only keep the
235 // first ndims-1 as the actual chunk shape.
236 let mut chunk_shape = Vec::with_capacity(ndims as usize);
237 for _ in 0..ndims {
238 let dim = match dim_size_enc_len {
239 1 => r.read_u8()? as u64,
240 2 => r.read_u16()? as u64,
241 4 => r.read_u32()? as u64,
242 8 => r.read_u64()?,
243 _ => {
244 return Err(HDF5Error::General(format!(
245 "unsupported dimension size encoding length: {dim_size_enc_len}"
246 )));
247 }
248 };
249 chunk_shape.push(dim);
250 }
251 // Last dimension is element size, not a chunk dimension
252 let _element_size = chunk_shape.pop();
253
254 let chunk_indexing_type = r.read_u8()?;
255 let (indexing_type, index_params) = match chunk_indexing_type {
256 1 => {
257 // Single chunk
258 let params = if flags & 0x02 != 0 {
259 // Filtered single chunk
260 let filtered_size = r.read_length()?;
261 let filter_mask = r.read_u32()?;
262 ChunkIndexParams::SingleChunk {
263 filtered_size,
264 filter_mask,
265 }
266 } else {
267 ChunkIndexParams::None
268 };
269 (ChunkIndexType::SingleChunk, params)
270 }
271 2 => {
272 // Implicit
273 (ChunkIndexType::Implicit, ChunkIndexParams::None)
274 }
275 3 => {
276 // Fixed array
277 let page_bits = r.read_u8()?;
278 (
279 ChunkIndexType::FixedArray,
280 ChunkIndexParams::FixedArray { page_bits },
281 )
282 }
283 4 => {
284 // Extensible array
285 let max_bits = r.read_u8()?;
286 let index_elements = r.read_u8()?;
287 let min_pointers = r.read_u8()?;
288 let min_elements = r.read_u8()?;
289 let page_bits = r.read_u8()?;
290 (
291 ChunkIndexType::ExtensibleArray,
292 ChunkIndexParams::ExtensibleArray {
293 max_bits,
294 index_elements,
295 min_pointers,
296 min_elements,
297 page_bits,
298 },
299 )
300 }
301 5 => {
302 // B-tree v2
303 (ChunkIndexType::BTreeV2, ChunkIndexParams::None)
304 }
305 _ => {
306 return Err(HDF5Error::UnsupportedChunkIndexingType(chunk_indexing_type));
307 }
308 };
309
310 let index_address = r.read_offset()?;
311
312 Ok(StorageLayout::Chunked {
313 chunk_shape,
314 index_address,
315 indexing_type,
316 flags,
317 index_params,
318 })
319 }
320 _ => Err(HDF5Error::General(format!(
321 "unknown layout class: {layout_class}"
322 ))),
323 }
324 }
325
326 /// Returns true if the layout is chunked.
327 pub fn is_chunked(&self) -> bool {
328 matches!(self, StorageLayout::Chunked { .. })
329 }
330
331 /// Returns true if the layout is contiguous.
332 pub fn is_contiguous(&self) -> bool {
333 matches!(self, StorageLayout::Contiguous { .. })
334 }
335}