1use std::sync::Arc;
14
15use crate::endian::HDF5Reader;
16use crate::error::{HDF5Error, Result};
17use crate::fixed_array::{parse_entries, read_n_byte_uint, FixedArrayChunkEntry};
18use crate::reader::AsyncFileReader;
19
20#[derive(Debug, Clone)]
22pub struct SBlockInfo {
23 pub ndblks: usize,
25 pub dblk_nelmts: usize,
27 pub start_idx: u64,
29}
30
31#[derive(Debug)]
33pub struct ExtensibleArrayHeader {
34 pub client_id: u8,
36 pub element_size: u8,
38 pub max_nelmts_bits: u8,
40 pub idx_blk_elmts: u8,
42 pub data_blk_min_elmts: u8,
44 pub sup_blk_min_data_ptrs: u8,
46 pub max_dblk_page_nelmts_bits: u8,
48 pub index_block_address: u64,
50
51 pub nsblks: usize,
54 pub sblk_info: Vec<SBlockInfo>,
56 pub dblk_page_nelmts: usize,
58 pub arr_off_size: u8,
60}
61
62impl ExtensibleArrayHeader {
63 pub async fn read(
65 reader: &Arc<dyn AsyncFileReader>,
66 address: u64,
67 size_of_offsets: u8,
68 size_of_lengths: u8,
69 ) -> Result<Self> {
70 let fetch_size = 12 + 6 * size_of_lengths as u64 + size_of_offsets as u64 + 4;
75 let data = reader.get_bytes(address..address + fetch_size).await?;
76 let mut r = HDF5Reader::with_sizes(data, size_of_offsets, size_of_lengths);
77
78 r.read_signature(b"EAHD")?;
79 let version = r.read_u8()?;
80 if version != 0 {
81 return Err(HDF5Error::General(format!(
82 "unsupported Extensible Array header version: {version}"
83 )));
84 }
85
86 let client_id = r.read_u8()?;
87 let element_size = r.read_u8()?;
88 let max_nelmts_bits = r.read_u8()?;
89 let idx_blk_elmts = r.read_u8()?;
90 let data_blk_min_elmts = r.read_u8()?;
91 let sup_blk_min_data_ptrs = r.read_u8()?;
92 let max_dblk_page_nelmts_bits = r.read_u8()?;
93
94 r.skip(6 * size_of_lengths as u64);
96
97 let index_block_address = r.read_offset()?;
98 let log2_min = log2_of_power_of_2(data_blk_min_elmts as u32);
102 let nsblks = 1 + (max_nelmts_bits as usize - log2_min as usize);
103 let dblk_page_nelmts = 1usize << max_dblk_page_nelmts_bits;
104 let arr_off_size = max_nelmts_bits.div_ceil(8);
105
106 let mut sblk_info = Vec::with_capacity(nsblks);
107 let mut start_idx = 0u64;
108 for s in 0..nsblks {
109 let ndblks = 1usize << (s / 2);
110 let dblk_nelmts = (1usize << s.div_ceil(2)) * data_blk_min_elmts as usize;
111 sblk_info.push(SBlockInfo {
112 ndblks,
113 dblk_nelmts,
114 start_idx,
115 });
116 start_idx += (ndblks as u64) * (dblk_nelmts as u64);
117 }
118
119 Ok(Self {
120 client_id,
121 element_size,
122 max_nelmts_bits,
123 idx_blk_elmts,
124 data_blk_min_elmts,
125 sup_blk_min_data_ptrs,
126 max_dblk_page_nelmts_bits,
127 index_block_address,
128 nsblks,
129 sblk_info,
130 dblk_page_nelmts,
131 arr_off_size,
132 })
133 }
134}
135
136pub(crate) struct IndexedEntry {
138 pub flat_idx: u64,
139 pub entry: FixedArrayChunkEntry,
140}
141
142pub(crate) async fn read_extensible_array_entries(
147 reader: &Arc<dyn AsyncFileReader>,
148 header: &ExtensibleArrayHeader,
149 size_of_offsets: u8,
150 size_of_lengths: u8,
151 uncompressed_chunk_size: u64,
152 layout_version: u8,
153) -> Result<Vec<IndexedEntry>> {
154 if HDF5Reader::is_undef_addr(header.index_block_address, size_of_offsets) {
155 return Ok(vec![]);
156 }
157
158 let is_filtered = header.client_id == 1;
159
160 let nsblks_in_iblock = if header.sup_blk_min_data_ptrs <= 1 {
162 0usize
163 } else {
164 2 * log2_of_power_of_2(header.sup_blk_min_data_ptrs as u32) as usize
165 };
166 let ndblk_addrs = if header.sup_blk_min_data_ptrs <= 1 {
167 0usize
168 } else {
169 2 * (header.sup_blk_min_data_ptrs as usize - 1)
170 };
171 let nsblk_addrs = header.nsblks.saturating_sub(nsblks_in_iblock);
172
173 let iblock_size = 4 + 1 + 1 + size_of_offsets as usize + header.idx_blk_elmts as usize * header.element_size as usize + ndblk_addrs * size_of_offsets as usize + nsblk_addrs * size_of_offsets as usize + 4; let data = reader
184 .get_bytes(header.index_block_address..header.index_block_address + iblock_size as u64)
185 .await?;
186 let mut r = HDF5Reader::with_sizes(data, size_of_offsets, size_of_lengths);
187
188 r.read_signature(b"EAIB")?;
190 let version = r.read_u8()?;
191 if version != 0 {
192 return Err(HDF5Error::General(format!(
193 "unsupported Extensible Array index block version: {version}"
194 )));
195 }
196 let _client_id = r.read_u8()?;
197 let _header_address = r.read_offset()?;
198
199 let mut result = Vec::new();
201 if header.idx_blk_elmts > 0 {
202 let inline = parse_entries(
203 &mut r,
204 header.idx_blk_elmts as usize,
205 is_filtered,
206 size_of_offsets,
207 uncompressed_chunk_size,
208 layout_version,
209 )?;
210 for (i, entry) in inline.into_iter().enumerate() {
211 if !HDF5Reader::is_undef_addr(entry.address, size_of_offsets) {
212 result.push(IndexedEntry {
213 flat_idx: i as u64,
214 entry,
215 });
216 }
217 }
218 }
219
220 let mut dblk_addrs = Vec::with_capacity(ndblk_addrs);
222 for _ in 0..ndblk_addrs {
223 dblk_addrs.push(r.read_offset()?);
224 }
225
226 let mut sblk_addrs = Vec::with_capacity(nsblk_addrs);
228 for _ in 0..nsblk_addrs {
229 sblk_addrs.push(r.read_offset()?);
230 }
231
232 let base_idx = header.idx_blk_elmts as u64;
236 let mut dblk_idx = 0usize;
237 for s in 0..nsblks_in_iblock.min(header.nsblks) {
238 let info = &header.sblk_info[s];
239 for d in 0..info.ndblks {
240 if dblk_idx < dblk_addrs.len() {
241 let addr = dblk_addrs[dblk_idx];
242 dblk_idx += 1;
243 let flat_start = base_idx + info.start_idx + (d as u64) * (info.dblk_nelmts as u64);
244 collect_data_block_entries(
245 reader,
246 addr,
247 info.dblk_nelmts,
248 flat_start,
249 header,
250 size_of_offsets,
251 size_of_lengths,
252 uncompressed_chunk_size,
253 layout_version,
254 None,
255 &mut result,
256 )
257 .await?;
258 }
259 }
260 }
261
262 for (sblk_rel_idx, &sblk_addr) in sblk_addrs.iter().enumerate() {
264 let sblk_idx = nsblks_in_iblock + sblk_rel_idx;
265 if sblk_idx >= header.nsblks {
266 break;
267 }
268 if HDF5Reader::is_undef_addr(sblk_addr, size_of_offsets) {
270 continue;
271 }
272
273 collect_super_block_entries(
274 reader,
275 sblk_addr,
276 sblk_idx,
277 base_idx,
278 header,
279 size_of_offsets,
280 size_of_lengths,
281 uncompressed_chunk_size,
282 layout_version,
283 &mut result,
284 )
285 .await?;
286 }
287
288 Ok(result)
289}
290
291#[allow(clippy::too_many_arguments)]
293async fn collect_super_block_entries(
294 reader: &Arc<dyn AsyncFileReader>,
295 address: u64,
296 sblk_idx: usize,
297 base_idx: u64,
298 header: &ExtensibleArrayHeader,
299 size_of_offsets: u8,
300 size_of_lengths: u8,
301 uncompressed_chunk_size: u64,
302 layout_version: u8,
303 result: &mut Vec<IndexedEntry>,
304) -> Result<()> {
305 let info = &header.sblk_info[sblk_idx];
306
307 let dblk_npages = if info.dblk_nelmts > header.dblk_page_nelmts {
309 info.dblk_nelmts / header.dblk_page_nelmts
310 } else {
311 0
312 };
313 let dblk_page_init_size = if dblk_npages > 0 {
314 dblk_npages.div_ceil(8)
315 } else {
316 0
317 };
318
319 let sblock_size = 4 + 1 + 1 + size_of_offsets as usize + header.arr_off_size as usize + info.ndblks * dblk_page_init_size + info.ndblks * size_of_offsets as usize + 4; let data = reader
330 .get_bytes(address..address + sblock_size as u64)
331 .await?;
332 let mut r = HDF5Reader::with_sizes(data, size_of_offsets, size_of_lengths);
333
334 r.read_signature(b"EASB")?;
335 let version = r.read_u8()?;
336 if version != 0 {
337 return Err(HDF5Error::General(format!(
338 "unsupported Extensible Array super block version: {version}"
339 )));
340 }
341 let _client_id = r.read_u8()?;
342 let _header_address = r.read_offset()?;
343 let _block_offset = read_n_byte_uint(&mut r, header.arr_off_size)?;
344
345 let mut page_init_bitmaps: Vec<Vec<u8>> = Vec::with_capacity(info.ndblks);
347 if dblk_page_init_size > 0 {
348 for _ in 0..info.ndblks {
349 let bitmap = r.read_bytes(dblk_page_init_size)?;
350 page_init_bitmaps.push(bitmap);
351 }
352 }
353
354 let mut dblk_addrs = Vec::with_capacity(info.ndblks);
356 for _ in 0..info.ndblks {
357 dblk_addrs.push(r.read_offset()?);
358 }
359
360 for (d, &dblk_addr) in dblk_addrs.iter().enumerate() {
362 let page_bitmap = if !page_init_bitmaps.is_empty() {
363 Some(&page_init_bitmaps[d])
364 } else {
365 None
366 };
367
368 let flat_start = base_idx + info.start_idx + (d as u64) * (info.dblk_nelmts as u64);
369 collect_data_block_entries(
370 reader,
371 dblk_addr,
372 info.dblk_nelmts,
373 flat_start,
374 header,
375 size_of_offsets,
376 size_of_lengths,
377 uncompressed_chunk_size,
378 layout_version,
379 page_bitmap,
380 result,
381 )
382 .await?;
383 }
384
385 Ok(())
386}
387
388#[allow(clippy::too_many_arguments)]
390async fn collect_data_block_entries(
391 reader: &Arc<dyn AsyncFileReader>,
392 address: u64,
393 nelmts: usize,
394 flat_start: u64,
395 header: &ExtensibleArrayHeader,
396 size_of_offsets: u8,
397 size_of_lengths: u8,
398 uncompressed_chunk_size: u64,
399 layout_version: u8,
400 page_bitmap: Option<&Vec<u8>>,
401 result: &mut Vec<IndexedEntry>,
402) -> Result<()> {
403 if HDF5Reader::is_undef_addr(address, size_of_offsets) {
404 return Ok(()); }
406
407 let is_filtered = header.client_id == 1;
408 let is_paged = nelmts > header.dblk_page_nelmts;
409
410 let prefix_size = 4 + 1 + 1 + size_of_offsets as usize + header.arr_off_size as usize; if !is_paged {
418 let total_size = prefix_size + nelmts * header.element_size as usize + 4; let data = reader
421 .get_bytes(address..address + total_size as u64)
422 .await?;
423 let mut r = HDF5Reader::with_sizes(data, size_of_offsets, size_of_lengths);
424
425 r.read_signature(b"EADB")?;
426 let version = r.read_u8()?;
427 if version != 0 {
428 return Err(HDF5Error::General(format!(
429 "unsupported Extensible Array data block version: {version}"
430 )));
431 }
432 let _client_id = r.read_u8()?;
433 let _header_address = r.read_offset()?;
434 let _block_offset = read_n_byte_uint(&mut r, header.arr_off_size)?;
435
436 let entries = parse_entries(
437 &mut r,
438 nelmts,
439 is_filtered,
440 size_of_offsets,
441 uncompressed_chunk_size,
442 layout_version,
443 )?;
444 for (i, entry) in entries.into_iter().enumerate() {
445 if !HDF5Reader::is_undef_addr(entry.address, size_of_offsets) {
446 result.push(IndexedEntry {
447 flat_idx: flat_start + i as u64,
448 entry,
449 });
450 }
451 }
452 } else {
453 let prefix_total = prefix_size + 4; let data = reader
456 .get_bytes(address..address + prefix_total as u64)
457 .await?;
458 let mut r = HDF5Reader::with_sizes(data, size_of_offsets, size_of_lengths);
459
460 r.read_signature(b"EADB")?;
461 let version = r.read_u8()?;
462 if version != 0 {
463 return Err(HDF5Error::General(format!(
464 "unsupported Extensible Array data block version: {version}"
465 )));
466 }
467 let _client_id = r.read_u8()?;
468 let _header_address = r.read_offset()?;
469 let _block_offset = read_n_byte_uint(&mut r, header.arr_off_size)?;
470
471 let npages = nelmts / header.dblk_page_nelmts;
473 let page_size = header.dblk_page_nelmts * header.element_size as usize + 4; let pages_start = address + prefix_total as u64;
475
476 for page_idx in 0..npages {
477 let page_initialized = match page_bitmap {
479 Some(bitmap) => {
480 let byte_idx = page_idx / 8;
481 let bit_idx = page_idx % 8;
482 byte_idx < bitmap.len() && (bitmap[byte_idx] >> bit_idx) & 1 != 0
483 }
484 None => true,
485 };
486
487 if !page_initialized {
488 continue; }
490
491 let page_addr = pages_start + (page_idx as u64) * (page_size as u64);
492 let page_data = reader
493 .get_bytes(page_addr..page_addr + page_size as u64)
494 .await?;
495 let mut pr = HDF5Reader::with_sizes(page_data, size_of_offsets, size_of_lengths);
496
497 let page_entries = parse_entries(
498 &mut pr,
499 header.dblk_page_nelmts,
500 is_filtered,
501 size_of_offsets,
502 uncompressed_chunk_size,
503 layout_version,
504 )?;
505 let page_flat_start = flat_start + (page_idx * header.dblk_page_nelmts) as u64;
506 for (i, entry) in page_entries.into_iter().enumerate() {
507 if !HDF5Reader::is_undef_addr(entry.address, size_of_offsets) {
508 result.push(IndexedEntry {
509 flat_idx: page_flat_start + i as u64,
510 entry,
511 });
512 }
513 }
514 }
515 }
516
517 Ok(())
518}
519
520fn log2_of_power_of_2(n: u32) -> u32 {
522 debug_assert!(n > 0 && n.is_power_of_two(), "{n} is not a power of 2");
523 n.trailing_zeros()
524}