1use std::mem::MaybeUninit;
2use std::num::NonZeroUsize;
3use std::sync::{Arc, OnceLock};
4
5use lru::LruCache;
6use ndarray::{ArrayD, IxDyn};
7use parking_lot::Mutex;
8#[cfg(feature = "rayon")]
9use rayon::prelude::*;
10use smallvec::SmallVec;
11
12use crate::attribute_api::{
13 collect_attribute_messages_storage, decode_string, decode_varlen_byte_string,
14 read_one_vlen_string_storage, resolve_vlen_bytes_storage, Attribute,
15};
16use crate::cache::{ChunkCache, ChunkKey};
17use crate::chunk_index;
18use crate::datatype_api::{dtype_element_size, H5Type};
19use crate::error::{Error, Result};
20use crate::filters::{self, FilterRegistry};
21use crate::io::Cursor;
22use crate::local_heap::LocalHeap;
23use crate::messages::attribute::AttributeMessage;
24use crate::messages::dataspace::{DataspaceMessage, DataspaceType};
25use crate::messages::datatype::{Datatype, StringSize};
26use crate::messages::external_files::ExternalFilesMessage;
27use crate::messages::fill_value::{FillTime, FillValueMessage};
28use crate::messages::filter_pipeline::FilterPipelineMessage;
29use crate::messages::layout::{ChunkIndexing, DataLayout};
30use crate::messages::HdfMessage;
31use crate::object_header::ObjectHeader;
32use crate::storage::DynStorage;
33use crate::FileContext;
34
35const HOT_FULL_DATASET_CACHE_MAX_BYTES: usize = 32 * 1024 * 1024;
36
37#[derive(Clone, Copy)]
38struct FlatBufferPtr {
39 ptr: *mut u8,
40 len: usize,
41}
42
43#[derive(Clone, Copy)]
44struct ChunkCopyLayout<'a> {
45 chunk_offsets: &'a [u64],
46 chunk_shape: &'a [u64],
47 dataset_shape: &'a [u64],
48 dataset_strides: &'a [usize],
49 chunk_strides: &'a [usize],
50 elem_size: usize,
51}
52
53#[derive(Clone, Copy)]
54struct UnitStrideCopyLayout<'a> {
55 chunk_offsets: &'a [u64],
56 chunk_shape: &'a [u64],
57 dataset_shape: &'a [u64],
58 resolved: &'a ResolvedSelection,
59 chunk_strides: &'a [usize],
60 result_strides: &'a [usize],
61 elem_size: usize,
62}
63
64#[derive(Clone, Copy)]
65struct ContiguousSliceDirectLayout<'a> {
66 dataset_strides: &'a [usize],
67 result_strides: &'a [usize],
68 elem_size: usize,
69 result_total_bytes: usize,
70}
71
72#[derive(Clone)]
73struct ResolvedExternalRawSlot {
74 logical_offset: u64,
75 storage: DynStorage,
76 file_offset: u64,
77 size: u64,
78}
79
80pub(crate) struct DatasetParseContext {
81 pub(crate) context: Arc<FileContext>,
82}
83
84#[derive(Clone, Copy)]
85struct ChunkEntrySelection<'a> {
86 shape: &'a [u64],
87 ndim: usize,
88 elem_size: usize,
89 chunk_bounds: Option<(&'a [u64], &'a [u64])>,
90}
91
92unsafe impl Send for FlatBufferPtr {}
93
94unsafe impl Sync for FlatBufferPtr {}
95
96impl FlatBufferPtr {
97 #[cfg(feature = "rayon")]
98 #[inline(always)]
99 unsafe fn copy_chunk(self, chunk_data: &[u8], layout: ChunkCopyLayout<'_>) -> Result<()> {
100 copy_chunk_to_flat_with_strides_ptr(chunk_data, self, layout)
101 }
102
103 #[cfg(feature = "rayon")]
104 #[inline(always)]
105 unsafe fn copy_selected(
106 self,
107 chunk_data: &[u8],
108 dim_indices: &[Vec<(usize, usize)>],
109 chunk_strides: &[usize],
110 result_strides: &[usize],
111 elem_size: usize,
112 ndim: usize,
113 ) -> Result<()> {
114 copy_selected_elements_ptr(
115 chunk_data,
116 self.ptr,
117 self.len,
118 dim_indices,
119 chunk_strides,
120 result_strides,
121 elem_size,
122 ndim,
123 )
124 }
125
126 #[cfg(feature = "rayon")]
127 #[inline(always)]
128 unsafe fn copy_unit_stride_chunk_overlap(
129 self,
130 chunk_data: &[u8],
131 layout: UnitStrideCopyLayout<'_>,
132 ) -> Result<()> {
133 copy_unit_stride_chunk_overlap_ptr(chunk_data, self, layout)
134 }
135}
136
137#[derive(Debug, Clone)]
139pub struct SliceInfo {
140 pub selections: Vec<SliceInfoElem>,
141}
142
143#[derive(Debug, Clone)]
145pub enum SliceInfoElem {
146 Index(u64),
148 Slice { start: u64, end: u64, step: u64 },
150}
151
152#[derive(Clone, Debug)]
153struct ResolvedSelectionDim {
154 start: u64,
155 end: u64,
156 step: u64,
157 count: usize,
158}
159
160#[derive(Clone, Debug, PartialEq, Eq, Hash)]
161struct ChunkEntryCacheKey {
162 index_address: u64,
163 first_chunk: SmallVec<[u64; 4]>,
164 last_chunk: SmallVec<[u64; 4]>,
165}
166
167impl ResolvedSelectionDim {
168 fn chunk_index_range(&self, chunk_extent: u64) -> Option<(u64, u64)> {
169 if self.count == 0 {
170 return None;
171 }
172
173 Some((self.start / chunk_extent, (self.end - 1) / chunk_extent))
174 }
175}
176
177#[derive(Clone, Debug)]
178struct ResolvedSelection {
179 dims: Vec<ResolvedSelectionDim>,
180 result_shape: Vec<usize>,
181 result_elements: usize,
182}
183
184impl ResolvedSelection {
185 fn result_dims_with_collapsed(&self) -> Vec<usize> {
186 self.dims.iter().map(|dim| dim.count).collect()
187 }
188
189 fn is_unit_stride(&self) -> bool {
190 self.dims.iter().all(|dim| dim.step == 1)
191 }
192}
193
194impl SliceInfo {
195 pub fn all(ndim: usize) -> Self {
197 SliceInfo {
198 selections: vec![
199 SliceInfoElem::Slice {
200 start: 0,
201 end: u64::MAX,
202 step: 1,
203 };
204 ndim
205 ],
206 }
207 }
208}
209
210fn checked_usize(value: u64, context: &str) -> Result<usize> {
211 usize::try_from(value).map_err(|_| {
212 Error::InvalidData(format!(
213 "{context} value {value} exceeds platform usize capacity"
214 ))
215 })
216}
217
218fn checked_mul_usize(lhs: usize, rhs: usize, context: &str) -> Result<usize> {
219 lhs.checked_mul(rhs)
220 .ok_or_else(|| Error::InvalidData(format!("{context} exceeds platform usize capacity")))
221}
222
223fn checked_add_usize(lhs: usize, rhs: usize, context: &str) -> Result<usize> {
224 lhs.checked_add(rhs)
225 .ok_or_else(|| Error::InvalidData(format!("{context} exceeds platform usize capacity")))
226}
227
228fn checked_mul_u64(lhs: u64, rhs: u64, context: &str) -> Result<u64> {
229 lhs.checked_mul(rhs)
230 .ok_or_else(|| Error::InvalidData(format!("{context} exceeds u64 capacity")))
231}
232
233fn checked_add_u64(lhs: u64, rhs: u64, context: &str) -> Result<u64> {
234 lhs.checked_add(rhs)
235 .ok_or_else(|| Error::InvalidData(format!("{context} exceeds u64 capacity")))
236}
237
238fn checked_shape_elements_usize(shape: &[u64], context: &str) -> Result<usize> {
239 let mut total = 1usize;
240 for &dim in shape {
241 total = checked_mul_usize(total, checked_usize(dim, context)?, context)?;
242 }
243 Ok(total)
244}
245
246fn full_dataset_chunk_bounds(
247 shape: &[u64],
248 chunk_shape: &[u64],
249) -> Result<Option<(Vec<u64>, Vec<u64>)>> {
250 validate_chunk_shape(shape, chunk_shape)?;
251 if shape.contains(&0) {
252 return Ok(None);
253 }
254
255 let first_chunk = vec![0u64; shape.len()];
256 let last_chunk = shape
257 .iter()
258 .zip(chunk_shape.iter())
259 .map(|(&dim, &chunk)| dim.div_ceil(chunk) - 1)
260 .collect();
261 Ok(Some((first_chunk, last_chunk)))
262}
263
264fn validate_chunk_shape(shape: &[u64], chunk_shape: &[u64]) -> Result<()> {
265 if chunk_shape.len() != shape.len() {
266 return Err(Error::InvalidData(format!(
267 "chunk rank {} does not match dataset rank {}",
268 chunk_shape.len(),
269 shape.len()
270 )));
271 }
272 if let Some((dim, _)) = chunk_shape
273 .iter()
274 .enumerate()
275 .find(|(_, chunk)| **chunk == 0)
276 {
277 return Err(Error::InvalidData(format!(
278 "chunk dimension {dim} has zero extent"
279 )));
280 }
281 Ok(())
282}
283
284fn validate_decoded_chunk_len(
285 entry: &chunk_index::ChunkEntry,
286 chunk_shape: &[u64],
287 elem_size: usize,
288 actual_len: usize,
289) -> Result<()> {
290 let chunk_elements = checked_shape_elements_usize(chunk_shape, "decoded chunk element count")?;
291 let expected_len = checked_mul_usize(chunk_elements, elem_size, "decoded chunk byte length")?;
292 if actual_len != expected_len {
293 return Err(Error::InvalidData(format!(
294 "chunk at offsets {:?} decoded to {} bytes, expected {} bytes",
295 entry.offsets, actual_len, expected_len
296 )));
297 }
298 Ok(())
299}
300
301fn validate_chunk_grid_coverage(
302 entries: &mut [chunk_index::ChunkEntry],
303 shape: &[u64],
304 chunk_shape: &[u64],
305 first_chunk: &[u64],
306 last_chunk: &[u64],
307) -> Result<bool> {
308 validate_chunk_shape(shape, chunk_shape)?;
309 if first_chunk.len() != shape.len() || last_chunk.len() != shape.len() {
310 return Err(Error::InvalidData(format!(
311 "chunk grid bounds rank does not match dataset rank {}",
312 shape.len()
313 )));
314 }
315
316 if shape.contains(&0) {
317 if entries.is_empty() {
318 return Ok(true);
319 }
320 return Err(Error::InvalidData(
321 "chunk index contains entries for an empty dataset".into(),
322 ));
323 }
324
325 for dim in 0..shape.len() {
326 if first_chunk[dim] > last_chunk[dim] {
327 return Err(Error::InvalidData(format!(
328 "invalid chunk grid bounds for dimension {dim}: {} > {}",
329 first_chunk[dim], last_chunk[dim]
330 )));
331 }
332 }
333
334 entries.sort_by(|a, b| a.offsets.cmp(&b.offsets));
335
336 for i in 0..entries.len() {
337 validate_chunk_entry_offsets(&entries[i], shape, chunk_shape, first_chunk, last_chunk)?;
338 if i > 0 && entries[i].offsets == entries[i - 1].offsets {
339 return Err(Error::InvalidData(format!(
340 "duplicate chunk output offsets {:?} (addresses {:#x} and {:#x})",
341 entries[i].offsets,
342 entries[i - 1].address,
343 entries[i].address
344 )));
345 }
346 }
347
348 let mut entry_idx = 0usize;
349 let mut expected = first_chunk.to_vec();
350 loop {
351 let expected_offsets: Vec<u64> = expected
352 .iter()
353 .enumerate()
354 .map(|(dim, chunk_index)| chunk_index * chunk_shape[dim])
355 .collect();
356
357 if entry_idx >= entries.len() || entries[entry_idx].offsets != expected_offsets {
358 return Ok(false);
359 }
360 entry_idx += 1;
361
362 if !advance_chunk_index(&mut expected, first_chunk, last_chunk) {
363 break;
364 }
365 }
366
367 Ok(entry_idx == entries.len())
368}
369
370fn validate_chunk_entry_offsets(
371 entry: &chunk_index::ChunkEntry,
372 shape: &[u64],
373 chunk_shape: &[u64],
374 first_chunk: &[u64],
375 last_chunk: &[u64],
376) -> Result<()> {
377 if entry.offsets.len() != shape.len() {
378 return Err(Error::InvalidData(format!(
379 "chunk at address {:#x} has rank {}, expected {}",
380 entry.address,
381 entry.offsets.len(),
382 shape.len()
383 )));
384 }
385
386 for dim in 0..shape.len() {
387 let offset = entry.offsets[dim];
388 if offset >= shape[dim] {
389 return Err(Error::InvalidData(format!(
390 "chunk at address {:#x} has out-of-bounds offset {} for dimension {} of size {}",
391 entry.address, offset, dim, shape[dim]
392 )));
393 }
394 if offset % chunk_shape[dim] != 0 {
395 return Err(Error::InvalidData(format!(
396 "chunk at address {:#x} has non-grid offset {} for dimension {} with chunk extent {}",
397 entry.address, offset, dim, chunk_shape[dim]
398 )));
399 }
400
401 let chunk_index = offset / chunk_shape[dim];
402 if chunk_index < first_chunk[dim] || chunk_index > last_chunk[dim] {
403 return Err(Error::InvalidData(format!(
404 "chunk at address {:#x} has offset {:?} outside requested chunk grid",
405 entry.address, entry.offsets
406 )));
407 }
408 }
409
410 Ok(())
411}
412
413fn advance_chunk_index(index: &mut [u64], first_chunk: &[u64], last_chunk: &[u64]) -> bool {
414 if index.is_empty() {
415 return false;
416 }
417
418 for dim in (0..index.len()).rev() {
419 if index[dim] < last_chunk[dim] {
420 index[dim] += 1;
421 if dim + 1 < index.len() {
422 index[(dim + 1)..].copy_from_slice(&first_chunk[(dim + 1)..]);
423 }
424 return true;
425 }
426 }
427
428 false
429}
430
431fn row_major_strides(shape: &[u64], context: &str) -> Result<Vec<usize>> {
432 let ndim = shape.len();
433 if ndim == 0 {
434 return Ok(Vec::new());
435 }
436
437 let mut strides = vec![1usize; ndim];
438 for i in (0..ndim - 1).rev() {
439 let next_extent = checked_usize(shape[i + 1], context)?;
440 strides[i] = checked_mul_usize(strides[i + 1], next_extent, context)?;
441 }
442 Ok(strides)
443}
444
445fn assume_init_u8_vec(mut buffer: Vec<MaybeUninit<u8>>) -> Vec<u8> {
446 let ptr = buffer.as_mut_ptr() as *mut u8;
447 let len = buffer.len();
448 let capacity = buffer.capacity();
449 std::mem::forget(buffer);
450 unsafe { Vec::from_raw_parts(ptr, len, capacity) }
451}
452
453fn assume_init_vec<T>(mut buffer: Vec<MaybeUninit<T>>) -> Vec<T> {
454 let ptr = buffer.as_mut_ptr() as *mut T;
455 let len = buffer.len();
456 let capacity = buffer.capacity();
457 std::mem::forget(buffer);
458 unsafe { Vec::from_raw_parts(ptr, len, capacity) }
459}
460
461fn normalize_selection(selection: &SliceInfo, shape: &[u64]) -> Result<ResolvedSelection> {
462 if selection.selections.len() != shape.len() {
463 return Err(Error::InvalidData(format!(
464 "slice has {} dimensions but dataset has {}",
465 selection.selections.len(),
466 shape.len()
467 )));
468 }
469
470 let mut dims = Vec::with_capacity(shape.len());
471 let mut result_shape = Vec::new();
472 let mut result_elements = 1usize;
473
474 for (i, sel) in selection.selections.iter().enumerate() {
475 let dim_size = shape[i];
476 match sel {
477 SliceInfoElem::Index(idx) => {
478 if *idx >= dim_size {
479 return Err(Error::SliceOutOfBounds {
480 dim: i,
481 index: *idx,
482 size: dim_size,
483 });
484 }
485 dims.push(ResolvedSelectionDim {
486 start: *idx,
487 end: *idx + 1,
488 step: 1,
489 count: 1,
490 });
491 }
492 SliceInfoElem::Slice { start, end, step } => {
493 if *step == 0 {
494 return Err(Error::InvalidData("slice step cannot be 0".into()));
495 }
496 if *start > dim_size {
497 return Err(Error::SliceOutOfBounds {
498 dim: i,
499 index: *start,
500 size: dim_size,
501 });
502 }
503
504 let actual_end = if *end == u64::MAX {
505 dim_size
506 } else {
507 (*end).min(dim_size)
508 };
509 let count_u64 = if *start >= actual_end {
510 0
511 } else {
512 (actual_end - *start).div_ceil(*step)
513 };
514 let count = checked_usize(count_u64, "slice element count")?;
515
516 dims.push(ResolvedSelectionDim {
517 start: *start,
518 end: actual_end,
519 step: *step,
520 count,
521 });
522 result_shape.push(count);
523 result_elements =
524 checked_mul_usize(result_elements, count, "slice result element count")?;
525 }
526 }
527 }
528
529 Ok(ResolvedSelection {
530 dims,
531 result_shape,
532 result_elements,
533 })
534}
535
536fn selection_dim_is_full_unit(dim: &ResolvedSelectionDim, dim_size: u64) -> bool {
537 dim.step == 1
538 && dim.start == 0
539 && dim.end == dim_size
540 && u64::try_from(dim.count).ok() == Some(dim_size)
541}
542
543fn selection_covers_full_dataset(resolved: &ResolvedSelection, shape: &[u64]) -> bool {
544 resolved.result_shape.len() == shape.len()
545 && resolved
546 .dims
547 .iter()
548 .zip(shape.iter())
549 .all(|(dim, &dim_size)| selection_dim_is_full_unit(dim, dim_size))
550}
551
552fn contiguous_slice_tail_start(shape: &[u64], resolved: &ResolvedSelection) -> usize {
553 let ndim = shape.len();
554 if ndim == 0 {
555 return 0;
556 }
557
558 let mut tail_start = if resolved.dims[ndim - 1].step == 1 {
559 ndim - 1
560 } else {
561 ndim
562 };
563
564 while tail_start > 0 {
565 let prev = tail_start - 1;
566 let later_dims_are_full =
567 (tail_start..ndim).all(|d| selection_dim_is_full_unit(&resolved.dims[d], shape[d]));
568 if resolved.dims[prev].step == 1 && later_dims_are_full {
569 tail_start = prev;
570 } else {
571 break;
572 }
573 }
574
575 tail_start
576}
577
578fn contiguous_slice_block_elements(
579 resolved: &ResolvedSelection,
580 tail_start: usize,
581) -> Result<usize> {
582 let mut elements = 1usize;
583 for dim in &resolved.dims[tail_start..] {
584 elements = checked_mul_usize(elements, dim.count, "contiguous slice block elements")?;
585 }
586 Ok(elements)
587}
588
589fn result_strides_for_dims(result_dims: &[usize]) -> Result<Vec<usize>> {
590 let ndim = result_dims.len();
591 let mut result_strides = vec![1usize; ndim];
592 for d in (0..ndim.saturating_sub(1)).rev() {
593 result_strides[d] =
594 checked_mul_usize(result_strides[d + 1], result_dims[d + 1], "result stride")?;
595 }
596 Ok(result_strides)
597}
598
599pub struct Dataset {
601 pub(crate) context: Arc<FileContext>,
602 pub(crate) name: String,
603 pub(crate) data_address: u64,
604 pub(crate) dataspace: DataspaceMessage,
605 pub(crate) datatype: Datatype,
606 pub(crate) layout: DataLayout,
607 pub(crate) fill_value: Option<FillValueMessage>,
608 pub(crate) filters: Option<FilterPipelineMessage>,
609 pub(crate) external_files: Option<ExternalFilesMessage>,
610 pub(crate) attributes: Vec<AttributeMessage>,
611 pub(crate) chunk_cache: Arc<ChunkCache>,
612 chunk_entry_cache: Arc<Mutex<LruCache<ChunkEntryCacheKey, Arc<Vec<chunk_index::ChunkEntry>>>>>,
613 full_chunk_entries: Arc<OnceLock<Arc<Vec<chunk_index::ChunkEntry>>>>,
614 full_dataset_bytes: Arc<OnceLock<Arc<Vec<u8>>>>,
615 external_slots: Arc<OnceLock<Arc<Vec<ResolvedExternalRawSlot>>>>,
616 pub(crate) filter_registry: Arc<FilterRegistry>,
617}
618
619pub(crate) struct DatasetTemplate {
620 name: String,
621 data_address: u64,
622 dataspace: DataspaceMessage,
623 datatype: Datatype,
624 layout: DataLayout,
625 fill_value: Option<FillValueMessage>,
626 filters: Option<FilterPipelineMessage>,
627 external_files: Option<ExternalFilesMessage>,
628 attributes: Vec<AttributeMessage>,
629 chunk_entry_cache: Arc<Mutex<LruCache<ChunkEntryCacheKey, Arc<Vec<chunk_index::ChunkEntry>>>>>,
630 full_chunk_entries: Arc<OnceLock<Arc<Vec<chunk_index::ChunkEntry>>>>,
631 full_dataset_bytes: Arc<OnceLock<Arc<Vec<u8>>>>,
632 external_slots: Arc<OnceLock<Arc<Vec<ResolvedExternalRawSlot>>>>,
633}
634
635impl Dataset {
636 pub(crate) fn from_template(context: Arc<FileContext>, template: Arc<DatasetTemplate>) -> Self {
637 Dataset {
638 chunk_cache: context.chunk_cache.clone(),
639 filter_registry: context.filter_registry.clone(),
640 context,
641 name: template.name.clone(),
642 data_address: template.data_address,
643 dataspace: template.dataspace.clone(),
644 datatype: template.datatype.clone(),
645 layout: template.layout.clone(),
646 fill_value: template.fill_value.clone(),
647 filters: template.filters.clone(),
648 external_files: template.external_files.clone(),
649 attributes: template.attributes.clone(),
650 chunk_entry_cache: template.chunk_entry_cache.clone(),
651 full_chunk_entries: template.full_chunk_entries.clone(),
652 full_dataset_bytes: template.full_dataset_bytes.clone(),
653 external_slots: template.external_slots.clone(),
654 }
655 }
656
657 pub(crate) fn template(&self) -> Arc<DatasetTemplate> {
658 Arc::new(DatasetTemplate {
659 name: self.name.clone(),
660 data_address: self.data_address,
661 dataspace: self.dataspace.clone(),
662 datatype: self.datatype.clone(),
663 layout: self.layout.clone(),
664 fill_value: self.fill_value.clone(),
665 filters: self.filters.clone(),
666 external_files: self.external_files.clone(),
667 attributes: self.attributes.clone(),
668 chunk_entry_cache: self.chunk_entry_cache.clone(),
669 full_chunk_entries: self.full_chunk_entries.clone(),
670 full_dataset_bytes: self.full_dataset_bytes.clone(),
671 external_slots: self.external_slots.clone(),
672 })
673 }
674
675 pub(crate) fn from_parsed_header(
676 context: DatasetParseContext,
677 address: u64,
678 name: String,
679 header: &ObjectHeader,
680 ) -> Result<Self> {
681 let mut dataspace: Option<DataspaceMessage> = None;
682 let mut datatype: Option<Datatype> = None;
683 let mut layout: Option<DataLayout> = None;
684 let mut fill_value: Option<FillValueMessage> = None;
685 let mut filter_pipeline: Option<FilterPipelineMessage> = None;
686 let mut external_files: Option<ExternalFilesMessage> = None;
687 let attributes = collect_attribute_messages_storage(
688 header,
689 context.context.storage.as_ref(),
690 context.context.superblock.offset_size,
691 context.context.superblock.length_size,
692 )?;
693
694 for msg in &header.messages {
695 match msg {
696 HdfMessage::Dataspace(ds) => dataspace = Some(ds.clone()),
697 HdfMessage::Datatype(dt) => datatype = Some(dt.datatype.clone()),
698 HdfMessage::DataLayout(dl) => layout = Some(dl.layout.clone()),
699 HdfMessage::FillValue(fv) => fill_value = Some(fv.clone()),
700 HdfMessage::FilterPipeline(fp) => filter_pipeline = Some(fp.clone()),
701 HdfMessage::ExternalFiles(ef) => external_files = Some(ef.clone()),
702 _ => {}
703 }
704 }
705
706 let dataspace =
707 dataspace.ok_or_else(|| Error::InvalidData("dataset missing dataspace".into()))?;
708 let dt = datatype.ok_or_else(|| Error::InvalidData("dataset missing datatype".into()))?;
709 let layout =
710 layout.ok_or_else(|| Error::InvalidData("dataset missing data layout".into()))?;
711 let layout = normalize_layout(layout, &dataspace);
712 let attr_fill_value = attributes
713 .iter()
714 .find(|attr| attr.name == "_FillValue" && attr.dataspace.num_elements() == 1)
715 .map(|attr| FillValueMessage {
716 defined: !attr.raw_data.is_empty(),
717 fill_time: FillTime::IfSet,
718 value: Some(attr.raw_data.clone()),
719 });
720 let fill_value = match fill_value {
721 Some(existing) if existing.value.is_some() => Some(existing),
722 _ => attr_fill_value,
723 };
724
725 Ok(Dataset {
726 context: context.context.clone(),
727 name,
728 data_address: address,
729 dataspace,
730 datatype: dt,
731 layout,
732 fill_value,
733 filters: filter_pipeline,
734 external_files,
735 attributes,
736 chunk_cache: context.context.chunk_cache.clone(),
737 chunk_entry_cache: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(32).unwrap()))),
738 full_chunk_entries: Arc::new(OnceLock::new()),
739 full_dataset_bytes: Arc::new(OnceLock::new()),
740 external_slots: Arc::new(OnceLock::new()),
741 filter_registry: context.context.filter_registry.clone(),
742 })
743 }
744
745 pub fn name(&self) -> &str {
747 &self.name
748 }
749
750 pub fn address(&self) -> u64 {
753 self.data_address
754 }
755
756 pub fn shape(&self) -> &[u64] {
758 &self.dataspace.dims
759 }
760
761 pub fn dtype(&self) -> &Datatype {
763 &self.datatype
764 }
765
766 pub fn ndim(&self) -> usize {
768 self.dataspace.dims.len()
769 }
770
771 fn offset_size(&self) -> u8 {
772 self.context.superblock.offset_size
773 }
774
775 fn length_size(&self) -> u8 {
776 self.context.superblock.length_size
777 }
778
779 pub fn max_dims(&self) -> Option<&[u64]> {
781 self.dataspace.max_dims.as_deref()
782 }
783
784 pub fn chunks(&self) -> Option<Vec<u32>> {
786 match &self.layout {
787 DataLayout::Chunked { dims, .. } => Some(dims.clone()),
788 _ => None,
789 }
790 }
791
792 pub fn fill_value(&self) -> Option<&FillValueMessage> {
794 self.fill_value.as_ref()
795 }
796
797 pub fn attributes(&self) -> Vec<Attribute> {
799 self.attributes
800 .iter()
801 .map(|a| attribute_from_message_storage(a, self.context.as_ref()))
802 .collect()
803 }
804
805 pub fn attribute(&self, name: &str) -> Result<Attribute> {
807 self.attributes
808 .iter()
809 .find(|a| a.name == name)
810 .map(|a| attribute_from_message_storage(a, self.context.as_ref()))
811 .ok_or_else(|| Error::AttributeNotFound(name.to_string()))
812 }
813
814 pub fn read_string(&self) -> Result<String> {
818 let mut strings = self.read_strings()?;
819 match strings.len() {
820 1 => Ok(strings.swap_remove(0)),
821 0 => Err(Error::InvalidData(format!(
822 "dataset '{}' contains no string elements",
823 self.name
824 ))),
825 count => Err(Error::InvalidData(format!(
826 "dataset '{}' contains {count} string elements; use read_strings()",
827 self.name
828 ))),
829 }
830 }
831
832 pub fn read_strings(&self) -> Result<Vec<String>> {
834 match &self.datatype {
835 Datatype::String {
836 size: StringSize::Fixed(len),
837 encoding,
838 padding,
839 } => {
840 let raw = self.read_raw_bytes()?;
841 let elem_size = *len as usize;
842 let count = checked_usize(self.num_elements(), "dataset string element count")?;
843 let expected_bytes =
844 checked_mul_usize(count, elem_size, "dataset string byte size")?;
845 if raw.len() < expected_bytes {
846 return Err(Error::InvalidData(format!(
847 "dataset '{}' string data too short: need {} bytes, have {}",
848 self.name,
849 expected_bytes,
850 raw.len()
851 )));
852 }
853
854 let mut strings = Vec::with_capacity(count);
855 for i in 0..count {
856 let start = i * elem_size;
857 let end = start + elem_size;
858 strings.push(decode_string(&raw[start..end], *padding, *encoding)?);
859 }
860 Ok(strings)
861 }
862 Datatype::String {
863 size: StringSize::Variable,
864 encoding,
865 padding,
866 } => {
867 let raw = self.read_raw_bytes()?;
868 let count = checked_usize(self.num_elements(), "dataset string element count")?;
869 let ref_size = 4 + self.offset_size() as usize + 4;
870 let expected_bytes =
871 checked_mul_usize(count, ref_size, "dataset string reference byte size")?;
872 if raw.len() < expected_bytes {
873 return Err(Error::InvalidData(format!(
874 "dataset '{}' vlen string data too short: need {} bytes, have {}",
875 self.name,
876 expected_bytes,
877 raw.len()
878 )));
879 }
880
881 let mut strings = Vec::with_capacity(count);
882 for i in 0..count {
883 let offset = i * ref_size;
884 strings.push(read_one_vlen_string_storage(
885 &raw,
886 offset,
887 self.context.storage.as_ref(),
888 self.offset_size(),
889 self.length_size(),
890 *padding,
891 *encoding,
892 )?);
893 }
894 Ok(strings)
895 }
896 Datatype::VarLen { base } => {
897 if !matches!(base.as_ref(), Datatype::FixedPoint { size: 1, .. }) {
898 return Err(Error::TypeMismatch {
899 expected: "String dataset".into(),
900 actual: format!("{:?}", self.datatype),
901 });
902 }
903
904 let raw = self.read_raw_bytes()?;
905 let count = checked_usize(self.num_elements(), "dataset string element count")?;
906 let ref_size = 4 + self.offset_size() as usize + 4;
907 let expected_bytes =
908 checked_mul_usize(count, ref_size, "dataset string reference byte size")?;
909 if raw.len() < expected_bytes {
910 return Err(Error::InvalidData(format!(
911 "dataset '{}' vlen byte string data too short: need {} bytes, have {}",
912 self.name,
913 expected_bytes,
914 raw.len()
915 )));
916 }
917
918 let mut strings = Vec::with_capacity(count);
919 for i in 0..count {
920 let offset = i * ref_size;
921 let ref_bytes = &raw[offset..offset + ref_size];
922 let value = resolve_vlen_bytes_storage(
923 ref_bytes,
924 self.context.storage.as_ref(),
925 self.offset_size(),
926 self.length_size(),
927 )
928 .unwrap_or_default();
929 strings.push(decode_varlen_byte_string(&value)?);
930 }
931 Ok(strings)
932 }
933 _ => Err(Error::TypeMismatch {
934 expected: "String dataset".into(),
935 actual: format!("{:?}", self.datatype),
936 }),
937 }
938 }
939
940 pub fn num_elements(&self) -> u64 {
942 if self.dataspace.dims.is_empty() {
943 match self.dataspace.dataspace_type {
944 DataspaceType::Scalar => 1,
945 DataspaceType::Null => 0,
946 DataspaceType::Simple => 0,
947 }
948 } else {
949 self.dataspace.dims.iter().product()
950 }
951 }
952
953 pub fn read_array<T: H5Type>(&self) -> Result<ArrayD<T>> {
955 let result = match &self.layout {
956 DataLayout::Compact { data } => self.read_compact::<T>(data),
957 DataLayout::Contiguous { address, size } => self.read_contiguous::<T>(*address, *size),
958 DataLayout::Chunked {
959 address,
960 dims,
961 element_size,
962 chunk_indexing,
963 } => self.read_chunked::<T>(*address, dims, *element_size, chunk_indexing.as_ref()),
964 };
965 result.map_err(|e| e.with_context(&self.name))
966 }
967
968 #[cfg(feature = "rayon")]
972 pub fn read_array_parallel<T: H5Type>(&self) -> Result<ArrayD<T>> {
973 match &self.layout {
974 DataLayout::Chunked {
975 address,
976 dims,
977 element_size,
978 chunk_indexing,
979 } => self.read_chunked_parallel::<T>(
980 *address,
981 dims,
982 *element_size,
983 chunk_indexing.as_ref(),
984 ),
985 _ => self.read_array::<T>(),
986 }
987 }
988
989 #[cfg(feature = "rayon")]
993 pub fn read_array_in_pool<T: H5Type>(&self, pool: &rayon::ThreadPool) -> Result<ArrayD<T>> {
994 match &self.layout {
995 DataLayout::Chunked {
996 address,
997 dims,
998 element_size,
999 chunk_indexing,
1000 } => pool.install(|| {
1001 self.read_chunked_parallel::<T>(
1002 *address,
1003 dims,
1004 *element_size,
1005 chunk_indexing.as_ref(),
1006 )
1007 }),
1008 _ => self.read_array::<T>(),
1009 }
1010 }
1011
1012 #[cfg(feature = "rayon")]
1017 pub fn read_slice_parallel<T: H5Type>(&self, selection: &SliceInfo) -> Result<ArrayD<T>> {
1018 let resolved = normalize_selection(selection, &self.dataspace.dims)?;
1019
1020 match &self.layout {
1021 DataLayout::Chunked {
1022 address,
1023 dims,
1024 element_size,
1025 chunk_indexing,
1026 } => self.read_chunked_slice_parallel::<T>(
1027 *address,
1028 dims,
1029 *element_size,
1030 chunk_indexing.as_ref(),
1031 selection,
1032 &resolved,
1033 ),
1034 _ => self.read_slice::<T>(selection),
1035 }
1036 }
1037
1038 pub fn read_slice<T: H5Type>(&self, selection: &SliceInfo) -> Result<ArrayD<T>> {
1040 let resolved = normalize_selection(selection, &self.dataspace.dims)?;
1041
1042 match &self.layout {
1043 DataLayout::Contiguous { address, size } => {
1044 self.read_contiguous_slice::<T>(*address, *size, &resolved)
1045 }
1046 DataLayout::Compact { data } => self.read_compact_slice::<T>(data, selection),
1047 DataLayout::Chunked {
1048 address,
1049 dims,
1050 element_size,
1051 chunk_indexing,
1052 } => self.read_chunked_slice::<T>(
1053 *address,
1054 dims,
1055 *element_size,
1056 chunk_indexing.as_ref(),
1057 selection,
1058 &resolved,
1059 ),
1060 }
1061 }
1062
1063 fn read_compact<T: H5Type>(&self, data: &[u8]) -> Result<ArrayD<T>> {
1064 self.decode_raw_data::<T>(data)
1065 }
1066
1067 fn read_raw_bytes(&self) -> Result<Vec<u8>> {
1068 let elem_size = dtype_element_size(&self.datatype);
1069 let total_elements = checked_usize(self.num_elements(), "dataset element count")?;
1070 let total_bytes = checked_mul_usize(total_elements, elem_size, "dataset size in bytes")?;
1071
1072 let result = match &self.layout {
1073 DataLayout::Compact { data } => Ok(self.normalize_raw_bytes(data, total_bytes)),
1074 DataLayout::Contiguous { address, size } => {
1075 self.read_contiguous_bytes(*address, *size, total_bytes)
1076 }
1077 DataLayout::Chunked {
1078 address,
1079 dims,
1080 element_size: _,
1081 chunk_indexing,
1082 } => self.read_chunked_bytes(*address, dims, chunk_indexing.as_ref(), total_bytes),
1083 };
1084
1085 result.map_err(|e| e.with_context(&self.name))
1086 }
1087
1088 fn read_contiguous<T: H5Type>(&self, address: u64, size: u64) -> Result<ArrayD<T>> {
1089 if self.external_files.is_some() {
1090 let elem_size = dtype_element_size(&self.datatype);
1091 let total_elements = checked_usize(self.num_elements(), "dataset element count")?;
1092 let total_bytes =
1093 checked_mul_usize(total_elements, elem_size, "dataset size in bytes")?;
1094 let raw = self.read_external_range(0, total_bytes)?;
1095 return self.decode_raw_data::<T>(&raw);
1096 }
1097
1098 if Cursor::is_undefined_offset(address, self.offset_size()) || size == 0 {
1099 return self.make_fill_array::<T>();
1101 }
1102
1103 let sz = checked_usize(size, "contiguous dataset size")?;
1104 let raw = self.context.read_range(address, sz)?;
1105 self.decode_raw_data::<T>(raw.as_ref())
1106 }
1107
1108 fn read_contiguous_bytes(
1109 &self,
1110 address: u64,
1111 size: u64,
1112 total_bytes: usize,
1113 ) -> Result<Vec<u8>> {
1114 if self.external_files.is_some() {
1115 return self.read_external_range(0, total_bytes);
1116 }
1117
1118 if Cursor::is_undefined_offset(address, self.offset_size()) || size == 0 {
1119 return Ok(self.make_output_buffer(total_bytes));
1120 }
1121
1122 let sz = checked_usize(size, "contiguous dataset size")?;
1123 let raw = self.context.read_range(address, sz)?;
1124 Ok(self.normalize_raw_bytes(raw.as_ref(), total_bytes))
1125 }
1126
1127 fn read_contiguous_logical_range(
1128 &self,
1129 address: u64,
1130 logical_offset: usize,
1131 len: usize,
1132 ) -> Result<Vec<u8>> {
1133 if self.external_files.is_some() {
1134 return self.read_external_range(logical_offset, len);
1135 }
1136
1137 let file_offset = checked_add_u64(
1138 address,
1139 u64::try_from(logical_offset).map_err(|_| {
1140 Error::InvalidData("contiguous logical offset exceeds u64 capacity".to_string())
1141 })?,
1142 "contiguous read file offset",
1143 )?;
1144 Ok(self.context.read_range(file_offset, len)?.to_vec())
1145 }
1146
1147 fn read_external_range(&self, logical_offset: usize, len: usize) -> Result<Vec<u8>> {
1148 let mut output = self.make_output_buffer(len);
1149 if len == 0 {
1150 return Ok(output);
1151 }
1152
1153 let request_start = u64::try_from(logical_offset).map_err(|_| {
1154 Error::InvalidData("external dataset offset exceeds u64 capacity".to_string())
1155 })?;
1156 let request_len = u64::try_from(len).map_err(|_| {
1157 Error::InvalidData("external dataset length exceeds u64 capacity".to_string())
1158 })?;
1159 let request_end = request_start
1160 .checked_add(request_len)
1161 .ok_or_else(|| Error::InvalidData("external dataset range overflows".into()))?;
1162
1163 for slot in self.external_raw_slots()?.iter() {
1164 let slot_end = slot.logical_offset.saturating_add(slot.size);
1165 let overlap_start = request_start.max(slot.logical_offset);
1166 let overlap_end = request_end.min(slot_end);
1167 if overlap_start >= overlap_end {
1168 continue;
1169 }
1170
1171 let read_offset = slot
1172 .file_offset
1173 .checked_add(overlap_start - slot.logical_offset)
1174 .ok_or_else(|| Error::InvalidData("external file read offset overflows".into()))?;
1175 let read_len = checked_usize(overlap_end - overlap_start, "external read length")?;
1176 let dst_start = checked_usize(overlap_start - request_start, "external read dst")?;
1177 let dst_end = checked_add_usize(dst_start, read_len, "external read dst end")?;
1178 let bytes = slot.storage.read_range(read_offset, read_len)?;
1179 output[dst_start..dst_end].copy_from_slice(bytes.as_ref());
1180 }
1181
1182 Ok(output)
1183 }
1184
1185 fn external_raw_slots(&self) -> Result<Arc<Vec<ResolvedExternalRawSlot>>> {
1186 if let Some(slots) = self.external_slots.get() {
1187 return Ok(slots.clone());
1188 }
1189
1190 let slots = Arc::new(self.load_external_raw_slots()?);
1191 let _ = self.external_slots.set(slots.clone());
1192 Ok(self
1193 .external_slots
1194 .get()
1195 .expect("external slot cache must exist after initialization")
1196 .clone())
1197 }
1198
1199 fn load_external_raw_slots(&self) -> Result<Vec<ResolvedExternalRawSlot>> {
1200 let Some(external_files) = self.external_files.as_ref() else {
1201 return Ok(Vec::new());
1202 };
1203
1204 let heap = LocalHeap::parse_at_storage(
1205 self.context.storage.as_ref(),
1206 external_files.heap_address,
1207 self.offset_size(),
1208 self.length_size(),
1209 )?;
1210
1211 let mut logical_offset = 0u64;
1212 let mut slots = Vec::with_capacity(external_files.slots.len());
1213 for slot in &external_files.slots {
1214 let filename =
1215 heap.get_string_storage(slot.name_offset, self.context.storage.as_ref())?;
1216 let storage = self
1217 .context
1218 .resolve_external_file(&filename)?
1219 .ok_or_else(|| {
1220 Error::Other(format!(
1221 "external raw data file '{filename}' could not be resolved"
1222 ))
1223 })?;
1224 let size = if Cursor::is_undefined_offset(slot.size, self.length_size()) {
1225 u64::MAX.saturating_sub(logical_offset)
1226 } else {
1227 slot.size
1228 };
1229
1230 slots.push(ResolvedExternalRawSlot {
1231 logical_offset,
1232 storage,
1233 file_offset: slot.offset,
1234 size,
1235 });
1236
1237 if Cursor::is_undefined_offset(slot.size, self.length_size()) {
1238 break;
1239 }
1240 logical_offset = logical_offset.checked_add(slot.size).ok_or_else(|| {
1241 Error::InvalidData("external raw data logical offset overflows".into())
1242 })?;
1243 }
1244
1245 Ok(slots)
1246 }
1247
1248 fn read_chunked<T: H5Type>(
1249 &self,
1250 index_address: u64,
1251 chunk_dims: &[u32],
1252 _element_size: u32,
1253 chunk_indexing: Option<&ChunkIndexing>,
1254 ) -> Result<ArrayD<T>> {
1255 if Cursor::is_undefined_offset(index_address, self.offset_size()) {
1256 return self.make_fill_array::<T>();
1257 }
1258
1259 let ndim = self.ndim();
1260 let shape = &self.dataspace.dims;
1261 let elem_size = dtype_element_size(&self.datatype);
1262 let total_elements = checked_usize(self.num_elements(), "dataset element count")?;
1263 let total_bytes = checked_mul_usize(total_elements, elem_size, "dataset size in bytes")?;
1264
1265 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1266 if let Some(cached_bytes) = self.full_dataset_bytes.get() {
1267 return self.decode_raw_data::<T>(cached_bytes);
1268 }
1269 }
1270
1271 let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1272 validate_chunk_shape(shape, &chunk_shape)?;
1273 let dataset_strides = row_major_strides(shape, "dataset stride")?;
1274 let chunk_strides = row_major_strides(&chunk_shape, "chunk stride")?;
1275
1276 let mut entries = self.collect_chunk_entries(
1277 index_address,
1278 chunk_dims,
1279 chunk_indexing,
1280 ChunkEntrySelection {
1281 shape,
1282 ndim,
1283 elem_size,
1284 chunk_bounds: None,
1285 },
1286 )?;
1287
1288 let full_chunk_coverage = match full_dataset_chunk_bounds(shape, &chunk_shape)? {
1289 Some((first_chunk, last_chunk)) => validate_chunk_grid_coverage(
1290 &mut entries,
1291 shape,
1292 &chunk_shape,
1293 &first_chunk,
1294 &last_chunk,
1295 )?,
1296 None if entries.is_empty() => true,
1297 None => {
1298 return Err(Error::InvalidData(
1299 "chunk index contains entries for an empty dataset".into(),
1300 ))
1301 }
1302 };
1303 if full_chunk_coverage {
1304 if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
1305 let mut result_values: Vec<MaybeUninit<T>> =
1306 std::iter::repeat_with(MaybeUninit::<T>::uninit)
1307 .take(total_elements)
1308 .collect();
1309 let result_ptr = result_values.as_mut_ptr() as *mut u8;
1310 let result_len = checked_mul_usize(
1311 result_values.len(),
1312 std::mem::size_of::<T>(),
1313 "typed dataset size in bytes",
1314 )?;
1315
1316 for entry in &entries {
1317 let chunk_data =
1318 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1319 unsafe {
1320 copy_chunk_to_flat_with_strides_ptr(
1321 &chunk_data,
1322 FlatBufferPtr {
1323 ptr: result_ptr,
1324 len: result_len,
1325 },
1326 ChunkCopyLayout {
1327 chunk_offsets: &entry.offsets,
1328 chunk_shape: &chunk_shape,
1329 dataset_shape: shape,
1330 dataset_strides: &dataset_strides,
1331 chunk_strides: &chunk_strides,
1332 elem_size,
1333 },
1334 )?;
1335 }
1336 }
1337
1338 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1339 let mut cached_bytes = vec![0u8; total_bytes];
1340 unsafe {
1341 std::ptr::copy_nonoverlapping(
1342 result_ptr,
1343 cached_bytes.as_mut_ptr(),
1344 total_bytes,
1345 );
1346 }
1347 let _ = self.full_dataset_bytes.set(Arc::new(cached_bytes));
1348 }
1349
1350 let mut result_shape = Vec::with_capacity(shape.len());
1351 for &dim in shape {
1352 result_shape.push(checked_usize(dim, "dataset dimension")?);
1353 }
1354 let result_values = assume_init_vec(result_values);
1355 return ArrayD::from_shape_vec(IxDyn(&result_shape), result_values)
1356 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
1357 }
1358
1359 let mut flat_data = vec![MaybeUninit::<u8>::uninit(); total_bytes];
1360 let flat_ptr = flat_data.as_mut_ptr() as *mut u8;
1361 let flat_len = flat_data.len();
1362
1363 for entry in &entries {
1364 let chunk_data =
1365 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1366 unsafe {
1367 copy_chunk_to_flat_with_strides_ptr(
1368 &chunk_data,
1369 FlatBufferPtr {
1370 ptr: flat_ptr,
1371 len: flat_len,
1372 },
1373 ChunkCopyLayout {
1374 chunk_offsets: &entry.offsets,
1375 chunk_shape: &chunk_shape,
1376 dataset_shape: shape,
1377 dataset_strides: &dataset_strides,
1378 chunk_strides: &chunk_strides,
1379 elem_size,
1380 },
1381 )?;
1382 }
1383 }
1384
1385 let flat_data = assume_init_u8_vec(flat_data);
1386 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1387 let _ = self.full_dataset_bytes.set(Arc::new(flat_data.clone()));
1388 }
1389 return self.decode_raw_data::<T>(&flat_data);
1390 }
1391
1392 let mut flat_data = self.make_output_buffer(total_bytes);
1393 for entry in &entries {
1394 let chunk_data =
1395 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1396 copy_chunk_to_flat_with_strides(
1397 &chunk_data,
1398 &mut flat_data,
1399 ChunkCopyLayout {
1400 chunk_offsets: &entry.offsets,
1401 chunk_shape: &chunk_shape,
1402 dataset_shape: shape,
1403 dataset_strides: &dataset_strides,
1404 chunk_strides: &chunk_strides,
1405 elem_size,
1406 },
1407 )?;
1408 }
1409
1410 self.decode_raw_data::<T>(&flat_data)
1411 }
1412
1413 fn read_chunked_bytes(
1414 &self,
1415 index_address: u64,
1416 chunk_dims: &[u32],
1417 chunk_indexing: Option<&ChunkIndexing>,
1418 total_bytes: usize,
1419 ) -> Result<Vec<u8>> {
1420 if Cursor::is_undefined_offset(index_address, self.offset_size()) {
1421 return Ok(self.make_output_buffer(total_bytes));
1422 }
1423
1424 let ndim = self.ndim();
1425 let shape = &self.dataspace.dims;
1426 let elem_size = dtype_element_size(&self.datatype);
1427
1428 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1429 if let Some(cached_bytes) = self.full_dataset_bytes.get() {
1430 return Ok(cached_bytes.as_ref().clone());
1431 }
1432 }
1433
1434 let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1435 validate_chunk_shape(shape, &chunk_shape)?;
1436 let dataset_strides = row_major_strides(shape, "dataset stride")?;
1437 let chunk_strides = row_major_strides(&chunk_shape, "chunk stride")?;
1438
1439 let mut entries = self.collect_chunk_entries(
1440 index_address,
1441 chunk_dims,
1442 chunk_indexing,
1443 ChunkEntrySelection {
1444 shape,
1445 ndim,
1446 elem_size,
1447 chunk_bounds: None,
1448 },
1449 )?;
1450
1451 let full_chunk_coverage = match full_dataset_chunk_bounds(shape, &chunk_shape)? {
1452 Some((first_chunk, last_chunk)) => validate_chunk_grid_coverage(
1453 &mut entries,
1454 shape,
1455 &chunk_shape,
1456 &first_chunk,
1457 &last_chunk,
1458 )?,
1459 None if entries.is_empty() => true,
1460 None => {
1461 return Err(Error::InvalidData(
1462 "chunk index contains entries for an empty dataset".into(),
1463 ))
1464 }
1465 };
1466
1467 let mut flat_data = self.make_output_buffer(total_bytes);
1468 for entry in &entries {
1469 let chunk_data =
1470 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1471 copy_chunk_to_flat_with_strides(
1472 &chunk_data,
1473 &mut flat_data,
1474 ChunkCopyLayout {
1475 chunk_offsets: &entry.offsets,
1476 chunk_shape: &chunk_shape,
1477 dataset_shape: shape,
1478 dataset_strides: &dataset_strides,
1479 chunk_strides: &chunk_strides,
1480 elem_size,
1481 },
1482 )?;
1483 }
1484
1485 if full_chunk_coverage && total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1486 let _ = self.full_dataset_bytes.set(Arc::new(flat_data.clone()));
1487 }
1488
1489 Ok(flat_data)
1490 }
1491
1492 #[cfg(feature = "rayon")]
1493 fn read_chunked_parallel<T: H5Type>(
1494 &self,
1495 index_address: u64,
1496 chunk_dims: &[u32],
1497 _element_size: u32,
1498 chunk_indexing: Option<&ChunkIndexing>,
1499 ) -> Result<ArrayD<T>> {
1500 if Cursor::is_undefined_offset(index_address, self.offset_size()) {
1501 return self.make_fill_array::<T>();
1502 }
1503
1504 let ndim = self.ndim();
1505 let shape = &self.dataspace.dims;
1506 let elem_size = dtype_element_size(&self.datatype);
1507 let total_elements = checked_usize(self.num_elements(), "dataset element count")?;
1508 let total_bytes = checked_mul_usize(total_elements, elem_size, "dataset size in bytes")?;
1509
1510 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1511 if let Some(cached_bytes) = self.full_dataset_bytes.get() {
1512 return self.decode_raw_data::<T>(cached_bytes);
1513 }
1514 }
1515
1516 let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1517 validate_chunk_shape(shape, &chunk_shape)?;
1518 let dataset_strides = row_major_strides(shape, "dataset stride")?;
1519 let chunk_strides = row_major_strides(&chunk_shape, "chunk stride")?;
1520
1521 let mut entries = self.collect_chunk_entries(
1522 index_address,
1523 chunk_dims,
1524 chunk_indexing,
1525 ChunkEntrySelection {
1526 shape,
1527 ndim,
1528 elem_size,
1529 chunk_bounds: None,
1530 },
1531 )?;
1532
1533 let full_chunk_coverage = match full_dataset_chunk_bounds(shape, &chunk_shape)? {
1534 Some((first_chunk, last_chunk)) => validate_chunk_grid_coverage(
1535 &mut entries,
1536 shape,
1537 &chunk_shape,
1538 &first_chunk,
1539 &last_chunk,
1540 )?,
1541 None if entries.is_empty() => true,
1542 None => {
1543 return Err(Error::InvalidData(
1544 "chunk index contains entries for an empty dataset".into(),
1545 ))
1546 }
1547 };
1548 if full_chunk_coverage {
1549 if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
1550 let mut result_values: Vec<MaybeUninit<T>> =
1551 std::iter::repeat_with(MaybeUninit::<T>::uninit)
1552 .take(total_elements)
1553 .collect();
1554 let flat = FlatBufferPtr {
1555 ptr: result_values.as_mut_ptr() as *mut u8,
1556 len: checked_mul_usize(
1557 result_values.len(),
1558 std::mem::size_of::<T>(),
1559 "typed dataset size in bytes",
1560 )?,
1561 };
1562
1563 entries
1564 .par_iter()
1565 .map(|entry| {
1566 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)
1567 .and_then(|data| unsafe {
1568 flat.copy_chunk(
1569 &data,
1570 ChunkCopyLayout {
1571 chunk_offsets: &entry.offsets,
1572 chunk_shape: &chunk_shape,
1573 dataset_shape: shape,
1574 dataset_strides: &dataset_strides,
1575 chunk_strides: &chunk_strides,
1576 elem_size,
1577 },
1578 )
1579 })
1580 })
1581 .collect::<std::result::Result<Vec<_>, Error>>()?;
1582
1583 let mut result_shape = Vec::with_capacity(shape.len());
1584 for &dim in shape {
1585 result_shape.push(checked_usize(dim, "dataset dimension")?);
1586 }
1587 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1588 let mut cached_bytes = vec![0u8; total_bytes];
1589 unsafe {
1590 std::ptr::copy_nonoverlapping(
1591 flat.ptr,
1592 cached_bytes.as_mut_ptr(),
1593 total_bytes,
1594 );
1595 }
1596 let _ = self.full_dataset_bytes.set(Arc::new(cached_bytes));
1597 }
1598 let result_values = assume_init_vec(result_values);
1599 return ArrayD::from_shape_vec(IxDyn(&result_shape), result_values)
1600 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
1601 }
1602
1603 let mut flat_data = vec![MaybeUninit::<u8>::uninit(); total_bytes];
1604 let flat = FlatBufferPtr {
1605 ptr: flat_data.as_mut_ptr() as *mut u8,
1606 len: flat_data.len(),
1607 };
1608
1609 entries
1610 .par_iter()
1611 .map(|entry| {
1612 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)
1613 .and_then(|data| unsafe {
1614 flat.copy_chunk(
1615 &data,
1616 ChunkCopyLayout {
1617 chunk_offsets: &entry.offsets,
1618 chunk_shape: &chunk_shape,
1619 dataset_shape: shape,
1620 dataset_strides: &dataset_strides,
1621 chunk_strides: &chunk_strides,
1622 elem_size,
1623 },
1624 )
1625 })
1626 })
1627 .collect::<std::result::Result<Vec<_>, Error>>()?;
1628
1629 let flat_data = assume_init_u8_vec(flat_data);
1630 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1631 let _ = self.full_dataset_bytes.set(Arc::new(flat_data.clone()));
1632 }
1633 return self.decode_raw_data::<T>(&flat_data);
1634 }
1635
1636 let mut flat_data = self.make_output_buffer(total_bytes);
1637 let flat = FlatBufferPtr {
1638 ptr: flat_data.as_mut_ptr(),
1639 len: flat_data.len(),
1640 };
1641
1642 entries
1643 .par_iter()
1644 .map(|entry| {
1645 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)
1646 .and_then(|data| unsafe {
1647 flat.copy_chunk(
1648 &data,
1649 ChunkCopyLayout {
1650 chunk_offsets: &entry.offsets,
1651 chunk_shape: &chunk_shape,
1652 dataset_shape: shape,
1653 dataset_strides: &dataset_strides,
1654 chunk_strides: &chunk_strides,
1655 elem_size,
1656 },
1657 )
1658 })
1659 })
1660 .collect::<std::result::Result<Vec<_>, Error>>()?;
1661
1662 self.decode_raw_data::<T>(&flat_data)
1663 }
1664
1665 fn collect_chunk_entries(
1669 &self,
1670 index_address: u64,
1671 chunk_dims: &[u32],
1672 chunk_indexing: Option<&ChunkIndexing>,
1673 selection: ChunkEntrySelection<'_>,
1674 ) -> Result<Vec<chunk_index::ChunkEntry>> {
1675 if selection.chunk_bounds.is_none() {
1676 if let Some(cached) = self.full_chunk_entries.get() {
1677 return Ok((**cached).clone());
1678 }
1679 }
1680
1681 let cache_key =
1682 selection
1683 .chunk_bounds
1684 .map(|(first_chunk, last_chunk)| ChunkEntryCacheKey {
1685 index_address,
1686 first_chunk: SmallVec::from_slice(first_chunk),
1687 last_chunk: SmallVec::from_slice(last_chunk),
1688 });
1689
1690 if let Some(ref key) = cache_key {
1691 let mut cache = self.chunk_entry_cache.lock();
1692 if let Some(cached) = cache.get(key) {
1693 return Ok((**cached).clone());
1694 }
1695 }
1696
1697 let entries = match chunk_indexing {
1698 None => {
1699 self.collect_btree_v1_entries(
1701 index_address,
1702 selection.ndim,
1703 chunk_dims,
1704 selection.chunk_bounds,
1705 )
1706 }
1707 Some(ChunkIndexing::SingleChunk {
1708 filtered_size,
1709 filters,
1710 }) => Ok(vec![chunk_index::single_chunk_entry(
1711 index_address,
1712 *filtered_size,
1713 *filters,
1714 selection.ndim,
1715 )]),
1716 Some(ChunkIndexing::BTreeV2) => chunk_index::collect_v2_chunk_entries_storage(
1717 self.context.storage.as_ref(),
1718 index_address,
1719 self.offset_size(),
1720 self.length_size(),
1721 selection.ndim as u32,
1722 chunk_dims,
1723 selection.chunk_bounds,
1724 ),
1725 Some(ChunkIndexing::Implicit) => Ok(chunk_index::collect_implicit_chunk_entries(
1726 index_address,
1727 selection.shape,
1728 chunk_dims,
1729 selection.elem_size,
1730 selection.chunk_bounds,
1731 )),
1732 Some(ChunkIndexing::FixedArray { .. }) => {
1733 crate::fixed_array::collect_fixed_array_chunk_entries_storage(
1734 self.context.storage.as_ref(),
1735 index_address,
1736 self.offset_size(),
1737 self.length_size(),
1738 selection.shape,
1739 chunk_dims,
1740 selection.chunk_bounds,
1741 )
1742 }
1743 Some(ChunkIndexing::ExtensibleArray { .. }) => {
1744 crate::extensible_array::collect_extensible_array_chunk_entries_storage(
1745 self.context.storage.as_ref(),
1746 index_address,
1747 self.offset_size(),
1748 self.length_size(),
1749 selection.shape,
1750 chunk_dims,
1751 selection.chunk_bounds,
1752 )
1753 }
1754 }?;
1755
1756 if let Some(key) = cache_key {
1757 let mut cache = self.chunk_entry_cache.lock();
1758 cache.put(key, Arc::new(entries.clone()));
1759 } else {
1760 let _ = self.full_chunk_entries.set(Arc::new(entries.clone()));
1761 }
1762
1763 Ok(entries)
1764 }
1765
1766 fn collect_btree_v1_entries(
1768 &self,
1769 btree_address: u64,
1770 ndim: usize,
1771 chunk_dims: &[u32],
1772 chunk_bounds: Option<(&[u64], &[u64])>,
1773 ) -> Result<Vec<chunk_index::ChunkEntry>> {
1774 let leaves = crate::btree_v1::collect_btree_v1_leaves_storage(
1775 self.context.storage.as_ref(),
1776 btree_address,
1777 self.offset_size(),
1778 self.length_size(),
1779 Some(ndim as u32),
1780 chunk_dims,
1781 chunk_bounds,
1782 )?;
1783
1784 let mut entries = Vec::with_capacity(leaves.len());
1785 for (key, chunk_addr) in &leaves {
1786 match key {
1787 crate::btree_v1::BTreeV1Key::RawData {
1788 chunk_size,
1789 filter_mask,
1790 offsets,
1791 } => {
1792 entries.push(chunk_index::ChunkEntry {
1793 address: *chunk_addr,
1794 size: *chunk_size as u64,
1795 filter_mask: *filter_mask,
1796 offsets: offsets[..ndim].to_vec(),
1797 });
1798 }
1799 _ => {
1800 return Err(Error::InvalidData(
1801 "expected raw data key in chunk B-tree".into(),
1802 ))
1803 }
1804 }
1805 }
1806 Ok(entries)
1807 }
1808
1809 fn load_chunk_data(
1810 &self,
1811 entry: &chunk_index::ChunkEntry,
1812 dataset_addr: u64,
1813 chunk_shape: &[u64],
1814 elem_size: usize,
1815 ) -> Result<Arc<Vec<u8>>> {
1816 let cache_key = ChunkKey {
1817 dataset_addr,
1818 chunk_offsets: smallvec::SmallVec::from_slice(&entry.offsets),
1819 };
1820
1821 self.chunk_cache.get_or_insert_with(cache_key, || {
1822 let size = if entry.size > 0 {
1823 checked_usize(entry.size, "encoded chunk size")?
1824 } else {
1825 let chunk_elements =
1826 checked_shape_elements_usize(chunk_shape, "chunk element count")?;
1827 checked_mul_usize(chunk_elements, elem_size, "chunk byte size")?
1828 };
1829 let raw = self.context.read_range(entry.address, size)?;
1830
1831 if let Some(ref pipeline) = self.filters {
1832 filters::apply_pipeline(
1833 raw.as_ref(),
1834 &pipeline.filters,
1835 entry.filter_mask,
1836 elem_size,
1837 Some(&self.filter_registry),
1838 )
1839 } else {
1840 Ok(raw.to_vec())
1841 }
1842 })
1843 }
1844
1845 fn load_exact_chunk_data(
1846 &self,
1847 entry: &chunk_index::ChunkEntry,
1848 dataset_addr: u64,
1849 chunk_shape: &[u64],
1850 elem_size: usize,
1851 ) -> Result<Arc<Vec<u8>>> {
1852 let data = self.load_chunk_data(entry, dataset_addr, chunk_shape, elem_size)?;
1853 validate_decoded_chunk_len(entry, chunk_shape, elem_size, data.len())?;
1854 Ok(data)
1855 }
1856
1857 fn read_chunked_slice<T: H5Type>(
1862 &self,
1863 index_address: u64,
1864 chunk_dims: &[u32],
1865 _element_size: u32,
1866 chunk_indexing: Option<&ChunkIndexing>,
1867 _selection: &SliceInfo,
1868 resolved: &ResolvedSelection,
1869 ) -> Result<ArrayD<T>> {
1870 if resolved.result_elements == 0 {
1871 return self.make_fill_array_from_shape::<T>(0, &resolved.result_shape);
1872 }
1873
1874 if Cursor::is_undefined_offset(index_address, self.offset_size()) {
1875 return self
1876 .make_fill_array_from_shape::<T>(resolved.result_elements, &resolved.result_shape);
1877 }
1878
1879 let ndim = self.ndim();
1880 let shape = &self.dataspace.dims;
1881 let elem_size = dtype_element_size(&self.datatype);
1882 let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1883 validate_chunk_shape(shape, &chunk_shape)?;
1884 let mut first_chunk = vec![0u64; ndim];
1885 let mut last_chunk = vec![0u64; ndim];
1886 for d in 0..ndim {
1887 let (first, last) = resolved.dims[d]
1888 .chunk_index_range(chunk_shape[d])
1889 .expect("zero-sized result handled above");
1890 first_chunk[d] = first;
1891 last_chunk[d] = last;
1892 }
1893
1894 let mut overlapping = self.collect_chunk_entries(
1896 index_address,
1897 chunk_dims,
1898 chunk_indexing,
1899 ChunkEntrySelection {
1900 shape,
1901 ndim,
1902 elem_size,
1903 chunk_bounds: Some((&first_chunk, &last_chunk)),
1904 },
1905 )?;
1906 let fully_covered_grid = validate_chunk_grid_coverage(
1907 &mut overlapping,
1908 shape,
1909 &chunk_shape,
1910 &first_chunk,
1911 &last_chunk,
1912 )?;
1913
1914 let result_total_bytes = checked_mul_usize(
1915 resolved.result_elements,
1916 elem_size,
1917 "slice result size in bytes",
1918 )?;
1919 let result_dims = resolved.result_dims_with_collapsed();
1921 let mut result_strides = vec![1usize; ndim];
1922 for d in (0..ndim.saturating_sub(1)).rev() {
1923 result_strides[d] =
1924 checked_mul_usize(result_strides[d + 1], result_dims[d + 1], "result stride")?;
1925 }
1926 let mut chunk_strides = vec![1usize; ndim];
1927 for d in (0..ndim.saturating_sub(1)).rev() {
1928 chunk_strides[d] = checked_mul_usize(
1929 chunk_strides[d + 1],
1930 chunk_shape[d + 1] as usize,
1931 "chunk stride",
1932 )?;
1933 }
1934 let use_unit_stride_fast_path = resolved.is_unit_stride();
1935 let fully_covered_unit_stride = use_unit_stride_fast_path && fully_covered_grid;
1936
1937 if fully_covered_unit_stride {
1938 if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
1939 let mut result_values: Vec<MaybeUninit<T>> =
1940 std::iter::repeat_with(MaybeUninit::<T>::uninit)
1941 .take(resolved.result_elements)
1942 .collect();
1943 let result_ptr = result_values.as_mut_ptr() as *mut u8;
1944 let result_len = checked_mul_usize(
1945 result_values.len(),
1946 std::mem::size_of::<T>(),
1947 "typed slice result size in bytes",
1948 )?;
1949
1950 for entry in &overlapping {
1951 let chunk_data =
1952 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1953
1954 unsafe {
1955 copy_unit_stride_chunk_overlap_ptr(
1956 &chunk_data,
1957 FlatBufferPtr {
1958 ptr: result_ptr,
1959 len: result_len,
1960 },
1961 UnitStrideCopyLayout {
1962 chunk_offsets: &entry.offsets,
1963 chunk_shape: &chunk_shape,
1964 dataset_shape: shape,
1965 resolved,
1966 chunk_strides: &chunk_strides,
1967 result_strides: &result_strides,
1968 elem_size,
1969 },
1970 )?;
1971 }
1972 }
1973
1974 let result_values = assume_init_vec(result_values);
1975 return ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), result_values)
1976 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
1977 }
1978
1979 let mut result_buf = vec![MaybeUninit::<u8>::uninit(); result_total_bytes];
1980 let result_ptr = result_buf.as_mut_ptr() as *mut u8;
1981 let result_len = result_buf.len();
1982
1983 for entry in &overlapping {
1984 let chunk_data =
1985 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1986
1987 unsafe {
1988 copy_unit_stride_chunk_overlap_ptr(
1989 &chunk_data,
1990 FlatBufferPtr {
1991 ptr: result_ptr,
1992 len: result_len,
1993 },
1994 UnitStrideCopyLayout {
1995 chunk_offsets: &entry.offsets,
1996 chunk_shape: &chunk_shape,
1997 dataset_shape: shape,
1998 resolved,
1999 chunk_strides: &chunk_strides,
2000 result_strides: &result_strides,
2001 elem_size,
2002 },
2003 )?;
2004 }
2005 }
2006
2007 let result_buf = assume_init_u8_vec(result_buf);
2008 return self.decode_buffer_with_shape::<T>(
2009 &result_buf,
2010 resolved.result_elements,
2011 &resolved.result_shape,
2012 );
2013 }
2014
2015 let mut result_buf = self.make_output_buffer(result_total_bytes);
2016
2017 for entry in &overlapping {
2019 let chunk_data =
2020 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
2021
2022 if use_unit_stride_fast_path {
2023 copy_unit_stride_chunk_overlap(
2024 &chunk_data,
2025 &mut result_buf,
2026 UnitStrideCopyLayout {
2027 chunk_offsets: &entry.offsets,
2028 chunk_shape: &chunk_shape,
2029 dataset_shape: shape,
2030 resolved,
2031 chunk_strides: &chunk_strides,
2032 result_strides: &result_strides,
2033 elem_size,
2034 },
2035 )?;
2036 continue;
2037 }
2038
2039 let mut dim_indices: Vec<Vec<(usize, usize)>> = Vec::with_capacity(ndim);
2042 for d in 0..ndim {
2043 let chunk_start = entry.offsets[d];
2044 let chunk_end = (chunk_start + chunk_shape[d]).min(shape[d]);
2045 let dim = &resolved.dims[d];
2046 let sel_start = dim.start;
2047 let sel_end = dim.end;
2048 let sel_step = dim.step;
2049 let mut indices = Vec::new();
2050
2051 let first_sel = if sel_start >= chunk_start {
2053 sel_start
2054 } else {
2055 let steps_to_skip = (chunk_start - sel_start).div_ceil(sel_step);
2056 sel_start + steps_to_skip * sel_step
2057 };
2058
2059 let mut sel_idx = first_sel;
2060 while sel_idx < sel_end && sel_idx < chunk_end {
2061 let chunk_local = checked_usize(sel_idx - chunk_start, "chunk-local index")?;
2062 let result_dim_idx =
2064 checked_usize((sel_idx - dim.start) / sel_step, "result index")?;
2065 indices.push((chunk_local, result_dim_idx));
2066 sel_idx += sel_step;
2067 }
2068
2069 dim_indices.push(indices);
2070 }
2071
2072 copy_selected_elements(
2074 &chunk_data,
2075 &mut result_buf,
2076 &dim_indices,
2077 &chunk_strides,
2078 &result_strides,
2079 elem_size,
2080 ndim,
2081 )?;
2082 }
2083
2084 self.decode_buffer_with_shape::<T>(
2085 &result_buf,
2086 resolved.result_elements,
2087 &resolved.result_shape,
2088 )
2089 }
2090
2091 #[cfg(feature = "rayon")]
2097 fn read_chunked_slice_parallel<T: H5Type>(
2098 &self,
2099 index_address: u64,
2100 chunk_dims: &[u32],
2101 _element_size: u32,
2102 chunk_indexing: Option<&ChunkIndexing>,
2103 _selection: &SliceInfo,
2104 resolved: &ResolvedSelection,
2105 ) -> Result<ArrayD<T>> {
2106 if resolved.result_elements == 0 {
2107 return self.make_fill_array_from_shape::<T>(0, &resolved.result_shape);
2108 }
2109
2110 if Cursor::is_undefined_offset(index_address, self.offset_size()) {
2111 return self
2112 .make_fill_array_from_shape::<T>(resolved.result_elements, &resolved.result_shape);
2113 }
2114
2115 let ndim = self.ndim();
2116 let shape = &self.dataspace.dims;
2117 let elem_size = dtype_element_size(&self.datatype);
2118 let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
2119 validate_chunk_shape(shape, &chunk_shape)?;
2120 let mut first_chunk = vec![0u64; ndim];
2121 let mut last_chunk = vec![0u64; ndim];
2122 for d in 0..ndim {
2123 let (first, last) = resolved.dims[d]
2124 .chunk_index_range(chunk_shape[d])
2125 .expect("zero-sized result handled above");
2126 first_chunk[d] = first;
2127 last_chunk[d] = last;
2128 }
2129
2130 let mut overlapping = self.collect_chunk_entries(
2132 index_address,
2133 chunk_dims,
2134 chunk_indexing,
2135 ChunkEntrySelection {
2136 shape,
2137 ndim,
2138 elem_size,
2139 chunk_bounds: Some((&first_chunk, &last_chunk)),
2140 },
2141 )?;
2142 let fully_covered_grid = validate_chunk_grid_coverage(
2143 &mut overlapping,
2144 shape,
2145 &chunk_shape,
2146 &first_chunk,
2147 &last_chunk,
2148 )?;
2149
2150 let result_total_bytes = checked_mul_usize(
2152 resolved.result_elements,
2153 elem_size,
2154 "slice result size in bytes",
2155 )?;
2156 let result_dims = resolved.result_dims_with_collapsed();
2158 let mut result_strides = vec![1usize; ndim];
2159 for d in (0..ndim.saturating_sub(1)).rev() {
2160 result_strides[d] =
2161 checked_mul_usize(result_strides[d + 1], result_dims[d + 1], "result stride")?;
2162 }
2163 let mut chunk_strides = vec![1usize; ndim];
2164 for d in (0..ndim.saturating_sub(1)).rev() {
2165 chunk_strides[d] = checked_mul_usize(
2166 chunk_strides[d + 1],
2167 chunk_shape[d + 1] as usize,
2168 "chunk stride",
2169 )?;
2170 }
2171 let use_unit_stride_fast_path = resolved.is_unit_stride();
2172 let fully_covered_unit_stride = use_unit_stride_fast_path && fully_covered_grid;
2173
2174 if fully_covered_unit_stride {
2175 if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
2176 let mut result_values: Vec<MaybeUninit<T>> =
2177 std::iter::repeat_with(MaybeUninit::<T>::uninit)
2178 .take(resolved.result_elements)
2179 .collect();
2180 let flat = FlatBufferPtr {
2181 ptr: result_values.as_mut_ptr() as *mut u8,
2182 len: checked_mul_usize(
2183 result_values.len(),
2184 std::mem::size_of::<T>(),
2185 "typed slice result size in bytes",
2186 )?,
2187 };
2188
2189 overlapping
2190 .par_iter()
2191 .map(|entry| {
2192 let chunk_data = self.load_exact_chunk_data(
2193 entry,
2194 index_address,
2195 &chunk_shape,
2196 elem_size,
2197 )?;
2198
2199 unsafe {
2200 flat.copy_unit_stride_chunk_overlap(
2201 &chunk_data,
2202 UnitStrideCopyLayout {
2203 chunk_offsets: &entry.offsets,
2204 chunk_shape: &chunk_shape,
2205 dataset_shape: shape,
2206 resolved,
2207 chunk_strides: &chunk_strides,
2208 result_strides: &result_strides,
2209 elem_size,
2210 },
2211 )?;
2212 }
2213
2214 Ok(())
2215 })
2216 .collect::<std::result::Result<Vec<_>, Error>>()?;
2217
2218 let result_values = assume_init_vec(result_values);
2219 return ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), result_values)
2220 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
2221 }
2222
2223 let mut result_buf = vec![MaybeUninit::<u8>::uninit(); result_total_bytes];
2224 let flat = FlatBufferPtr {
2225 ptr: result_buf.as_mut_ptr() as *mut u8,
2226 len: result_buf.len(),
2227 };
2228
2229 overlapping
2230 .par_iter()
2231 .map(|entry| {
2232 let chunk_data =
2233 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
2234
2235 unsafe {
2236 flat.copy_unit_stride_chunk_overlap(
2237 &chunk_data,
2238 UnitStrideCopyLayout {
2239 chunk_offsets: &entry.offsets,
2240 chunk_shape: &chunk_shape,
2241 dataset_shape: shape,
2242 resolved,
2243 chunk_strides: &chunk_strides,
2244 result_strides: &result_strides,
2245 elem_size,
2246 },
2247 )?;
2248 }
2249
2250 Ok(())
2251 })
2252 .collect::<std::result::Result<Vec<_>, Error>>()?;
2253
2254 let result_buf = assume_init_u8_vec(result_buf);
2255 return self.decode_buffer_with_shape::<T>(
2256 &result_buf,
2257 resolved.result_elements,
2258 &resolved.result_shape,
2259 );
2260 }
2261
2262 let mut result_buf = self.make_output_buffer(result_total_bytes);
2263
2264 let flat = FlatBufferPtr {
2265 ptr: result_buf.as_mut_ptr(),
2266 len: result_buf.len(),
2267 };
2268
2269 overlapping
2270 .par_iter()
2271 .map(|entry| {
2272 let chunk_data =
2273 self.load_exact_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
2274
2275 if use_unit_stride_fast_path {
2276 unsafe {
2277 flat.copy_unit_stride_chunk_overlap(
2278 &chunk_data,
2279 UnitStrideCopyLayout {
2280 chunk_offsets: &entry.offsets,
2281 chunk_shape: &chunk_shape,
2282 dataset_shape: shape,
2283 resolved,
2284 chunk_strides: &chunk_strides,
2285 result_strides: &result_strides,
2286 elem_size,
2287 },
2288 )?;
2289 }
2290 return Ok(());
2291 }
2292
2293 let mut dim_indices: Vec<Vec<(usize, usize)>> = Vec::with_capacity(ndim);
2296 for d in 0..ndim {
2297 let chunk_start = entry.offsets[d];
2298 let chunk_end = (chunk_start + chunk_shape[d]).min(shape[d]);
2299 let dim = &resolved.dims[d];
2300 let sel_start = dim.start;
2301 let sel_end = dim.end;
2302 let sel_step = dim.step;
2303 let mut indices = Vec::new();
2304
2305 let first_sel = if sel_start >= chunk_start {
2306 sel_start
2307 } else {
2308 let steps_to_skip = (chunk_start - sel_start).div_ceil(sel_step);
2309 sel_start + steps_to_skip * sel_step
2310 };
2311
2312 let mut sel_idx = first_sel;
2313 while sel_idx < sel_end && sel_idx < chunk_end {
2314 let chunk_local =
2315 checked_usize(sel_idx - chunk_start, "chunk-local index")?;
2316 let result_dim_idx =
2317 checked_usize((sel_idx - dim.start) / sel_step, "result index")?;
2318 indices.push((chunk_local, result_dim_idx));
2319 sel_idx += sel_step;
2320 }
2321
2322 dim_indices.push(indices);
2323 }
2324
2325 unsafe {
2329 flat.copy_selected(
2330 &chunk_data,
2331 &dim_indices,
2332 &chunk_strides,
2333 &result_strides,
2334 elem_size,
2335 ndim,
2336 )?;
2337 }
2338
2339 Ok(())
2340 })
2341 .collect::<std::result::Result<Vec<_>, Error>>()?;
2342
2343 self.decode_buffer_with_shape::<T>(
2344 &result_buf,
2345 resolved.result_elements,
2346 &resolved.result_shape,
2347 )
2348 }
2349
2350 fn read_contiguous_slice<T: H5Type>(
2351 &self,
2352 address: u64,
2353 size: u64,
2354 resolved: &ResolvedSelection,
2355 ) -> Result<ArrayD<T>> {
2356 if resolved.result_elements == 0 {
2357 return self.make_fill_array_from_shape::<T>(0, &resolved.result_shape);
2358 }
2359
2360 if self.external_files.is_none()
2361 && (Cursor::is_undefined_offset(address, self.offset_size()) || size == 0)
2362 {
2363 return self
2364 .make_fill_array_from_shape::<T>(resolved.result_elements, &resolved.result_shape);
2365 }
2366
2367 let shape = &self.dataspace.dims;
2368 if selection_covers_full_dataset(resolved, shape) {
2369 return self.read_contiguous::<T>(address, size);
2370 }
2371
2372 let elem_size = dtype_element_size(&self.datatype);
2373 let result_total_bytes = checked_mul_usize(
2374 resolved.result_elements,
2375 elem_size,
2376 "contiguous slice result size in bytes",
2377 )?;
2378 let dataset_strides = row_major_strides(shape, "contiguous dataset stride")?;
2379 let result_dims = resolved.result_dims_with_collapsed();
2380 let result_strides = result_strides_for_dims(&result_dims)?;
2381 let result_buf = self.read_contiguous_slice_bytes_direct(
2382 address,
2383 size,
2384 resolved,
2385 ContiguousSliceDirectLayout {
2386 dataset_strides: &dataset_strides,
2387 result_strides: &result_strides,
2388 elem_size,
2389 result_total_bytes,
2390 },
2391 )?;
2392
2393 self.decode_buffer_with_shape::<T>(
2394 &result_buf,
2395 resolved.result_elements,
2396 &resolved.result_shape,
2397 )
2398 }
2399
2400 fn read_contiguous_slice_bytes_direct(
2401 &self,
2402 address: u64,
2403 size: u64,
2404 resolved: &ResolvedSelection,
2405 layout: ContiguousSliceDirectLayout<'_>,
2406 ) -> Result<Vec<u8>> {
2407 let shape = &self.dataspace.dims;
2408 let ndim = shape.len();
2409 if resolved.dims.len() != ndim
2410 || layout.dataset_strides.len() != ndim
2411 || layout.result_strides.len() != ndim
2412 {
2413 return Err(Error::InvalidData(format!(
2414 "contiguous slice layout rank does not match dataset rank {ndim}"
2415 )));
2416 }
2417
2418 let storage_len = if self.external_files.is_some() {
2419 checked_mul_usize(
2420 checked_usize(self.num_elements(), "dataset element count")?,
2421 layout.elem_size,
2422 "external dataset size",
2423 )?
2424 } else {
2425 checked_usize(size, "contiguous dataset size")?
2426 };
2427 let tail_start = contiguous_slice_tail_start(shape, resolved);
2428 let block_elements = contiguous_slice_block_elements(resolved, tail_start)?;
2429 let block_bytes = checked_mul_usize(
2430 block_elements,
2431 layout.elem_size,
2432 "contiguous slice block size in bytes",
2433 )?;
2434 let mut result_buf = self.make_output_buffer(layout.result_total_bytes);
2435
2436 let prefix_blocks =
2437 resolved.dims[..tail_start]
2438 .iter()
2439 .try_fold(1usize, |acc, dim| -> Result<usize> {
2440 checked_mul_usize(acc, dim.count, "contiguous slice block count")
2441 })?;
2442 let mut counters = vec![0usize; tail_start];
2443
2444 for _ in 0..prefix_blocks {
2445 let mut source_elem = 0usize;
2446 let mut result_elem = 0usize;
2447
2448 for (d, &counter) in counters.iter().enumerate().take(tail_start) {
2449 let ordinal = u64::try_from(counter).map_err(|_| {
2450 Error::InvalidData("contiguous slice ordinal exceeds u64".to_string())
2451 })?;
2452 let coord = checked_add_u64(
2453 resolved.dims[d].start,
2454 checked_mul_u64(
2455 ordinal,
2456 resolved.dims[d].step,
2457 "contiguous slice coordinate",
2458 )?,
2459 "contiguous slice coordinate",
2460 )?;
2461 let coord = checked_usize(coord, "contiguous slice source index")?;
2462 let source_term =
2463 checked_mul_usize(coord, layout.dataset_strides[d], "contiguous slice source")?;
2464 let result_term = checked_mul_usize(
2465 counter,
2466 layout.result_strides[d],
2467 "contiguous slice result",
2468 )?;
2469 source_elem =
2470 checked_add_usize(source_elem, source_term, "contiguous slice source")?;
2471 result_elem =
2472 checked_add_usize(result_elem, result_term, "contiguous slice result")?;
2473 }
2474
2475 for (d, &dataset_stride) in layout
2476 .dataset_strides
2477 .iter()
2478 .enumerate()
2479 .take(ndim)
2480 .skip(tail_start)
2481 {
2482 let coord = checked_usize(resolved.dims[d].start, "contiguous slice source index")?;
2483 let source_term =
2484 checked_mul_usize(coord, dataset_stride, "contiguous slice source")?;
2485 source_elem =
2486 checked_add_usize(source_elem, source_term, "contiguous slice source")?;
2487 }
2488
2489 let source_start = checked_mul_usize(
2490 source_elem,
2491 layout.elem_size,
2492 "contiguous slice source byte offset",
2493 )?;
2494 let source_end = checked_add_usize(
2495 source_start,
2496 block_bytes,
2497 "contiguous slice source byte end",
2498 )?;
2499 if source_end > storage_len {
2500 return Err(Error::InvalidData(format!(
2501 "contiguous slice range {}..{} exceeds dataset storage size {}",
2502 source_start, source_end, storage_len
2503 )));
2504 }
2505
2506 let dst_start = checked_mul_usize(
2507 result_elem,
2508 layout.elem_size,
2509 "contiguous slice destination byte offset",
2510 )?;
2511 let dst_end = checked_add_usize(
2512 dst_start,
2513 block_bytes,
2514 "contiguous slice destination byte end",
2515 )?;
2516 if dst_end > result_buf.len() {
2517 return Err(Error::InvalidData(format!(
2518 "contiguous slice destination range {}..{} exceeds result size {}",
2519 dst_start,
2520 dst_end,
2521 result_buf.len()
2522 )));
2523 }
2524
2525 let block = self.read_contiguous_logical_range(address, source_start, block_bytes)?;
2526 if block.len() != block_bytes {
2527 return Err(Error::InvalidData(format!(
2528 "contiguous slice read returned {} bytes, expected {}",
2529 block.len(),
2530 block_bytes
2531 )));
2532 }
2533 result_buf[dst_start..dst_end].copy_from_slice(&block);
2534
2535 let mut carry = true;
2536 for d in (0..tail_start).rev() {
2537 if carry {
2538 counters[d] += 1;
2539 if counters[d] < resolved.dims[d].count {
2540 carry = false;
2541 } else {
2542 counters[d] = 0;
2543 }
2544 }
2545 }
2546 }
2547
2548 Ok(result_buf)
2549 }
2550
2551 fn read_compact_slice<T: H5Type>(
2552 &self,
2553 data: &[u8],
2554 selection: &SliceInfo,
2555 ) -> Result<ArrayD<T>> {
2556 let full = self.read_compact::<T>(data)?;
2557 slice_array(&full, selection, &self.dataspace.dims)
2558 }
2559
2560 fn decode_buffer_with_shape<T: H5Type>(
2561 &self,
2562 raw: &[u8],
2563 n: usize,
2564 shape: &[usize],
2565 ) -> Result<ArrayD<T>> {
2566 let elem_size = dtype_element_size(&self.datatype);
2567
2568 if let Some(elements) = T::decode_vec(raw, &self.datatype, n) {
2569 let elements = elements?;
2570 return ArrayD::from_shape_vec(IxDyn(shape), elements)
2571 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
2572 }
2573
2574 let mut elements = Vec::with_capacity(n);
2575 for i in 0..n {
2576 let start = checked_mul_usize(i, elem_size, "decoded element byte offset")?;
2577 let end = checked_mul_usize(i + 1, elem_size, "decoded element end offset")?;
2578 if end > raw.len() {
2579 let padded = if end <= raw.len().saturating_add(elem_size) {
2581 let mut buf = vec![0u8; elem_size];
2582 let available = raw.len().saturating_sub(start);
2583 if available > 0 {
2584 buf[..available].copy_from_slice(&raw[start..start + available]);
2585 }
2586 T::from_bytes(&buf, &self.datatype)?
2587 } else {
2588 T::from_bytes(&vec![0u8; elem_size], &self.datatype)?
2589 };
2590 elements.push(padded);
2591 } else {
2592 elements.push(T::from_bytes(&raw[start..end], &self.datatype)?);
2593 }
2594 }
2595
2596 ArrayD::from_shape_vec(IxDyn(shape), elements)
2597 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")))
2598 }
2599
2600 fn decode_raw_data<T: H5Type>(&self, raw: &[u8]) -> Result<ArrayD<T>> {
2601 let n = checked_usize(self.num_elements(), "dataset element count")?;
2602 let mut shape = Vec::with_capacity(self.dataspace.dims.len());
2603 for &dim in &self.dataspace.dims {
2604 shape.push(checked_usize(dim, "dataset dimension")?);
2605 }
2606 self.decode_buffer_with_shape::<T>(raw, n, &shape)
2607 }
2608
2609 fn make_fill_array<T: H5Type>(&self) -> Result<ArrayD<T>> {
2610 let n = checked_usize(self.num_elements(), "dataset element count")?;
2611 let mut shape = Vec::with_capacity(self.dataspace.dims.len());
2612 for &dim in &self.dataspace.dims {
2613 shape.push(checked_usize(dim, "dataset dimension")?);
2614 }
2615 self.make_fill_array_from_shape::<T>(n, &shape)
2616 }
2617
2618 fn make_fill_array_from_shape<T: H5Type>(
2619 &self,
2620 element_count: usize,
2621 shape: &[usize],
2622 ) -> Result<ArrayD<T>> {
2623 let elem_size = dtype_element_size(&self.datatype);
2624 let total_bytes = checked_mul_usize(element_count, elem_size, "fill result size in bytes")?;
2625 let fill = self.make_output_buffer(total_bytes);
2626 self.decode_buffer_with_shape::<T>(&fill, element_count, shape)
2627 }
2628
2629 fn make_output_buffer(&self, total_bytes: usize) -> Vec<u8> {
2630 if let Some(ref fv) = self.fill_value {
2631 if let Some(ref fill_bytes) = fv.value {
2632 let mut buf = vec![0u8; total_bytes];
2633 if !fill_bytes.is_empty() {
2634 for chunk in buf.chunks_exact_mut(fill_bytes.len()) {
2635 chunk.copy_from_slice(fill_bytes);
2636 }
2637 }
2638 buf
2639 } else {
2640 vec![0u8; total_bytes]
2641 }
2642 } else {
2643 vec![0u8; total_bytes]
2644 }
2645 }
2646
2647 fn normalize_raw_bytes(&self, raw: &[u8], total_bytes: usize) -> Vec<u8> {
2648 if raw.len() >= total_bytes {
2649 raw[..total_bytes].to_vec()
2650 } else {
2651 let mut normalized = self.make_output_buffer(total_bytes);
2652 normalized[..raw.len()].copy_from_slice(raw);
2653 normalized
2654 }
2655 }
2656}
2657
2658fn attribute_from_message_storage(message: &AttributeMessage, context: &FileContext) -> Attribute {
2659 let raw_data = match &message.datatype {
2660 Datatype::VarLen { base }
2661 if matches!(base.as_ref(), Datatype::FixedPoint { size: 1, .. })
2662 && message.dataspace.num_elements() == 1 =>
2663 {
2664 resolve_vlen_bytes_storage(
2665 &message.raw_data,
2666 context.storage.as_ref(),
2667 context.superblock.offset_size,
2668 context.superblock.length_size,
2669 )
2670 .unwrap_or_else(|| message.raw_data.clone())
2671 }
2672 _ => message.raw_data.clone(),
2673 };
2674
2675 Attribute {
2676 name: message.name.clone(),
2677 datatype: message.datatype.clone(),
2678 shape: match message.dataspace.dataspace_type {
2679 DataspaceType::Scalar => vec![],
2680 DataspaceType::Null => vec![0],
2681 DataspaceType::Simple => message.dataspace.dims.clone(),
2682 },
2683 raw_data,
2684 }
2685}
2686
2687fn normalize_layout(layout: DataLayout, dataspace: &DataspaceMessage) -> DataLayout {
2688 match layout {
2689 DataLayout::Chunked {
2690 address,
2691 mut dims,
2692 mut element_size,
2693 chunk_indexing,
2694 } if dims.len() == dataspace.dims.len() + 1 => {
2695 if let Some(legacy_element_size) = dims.pop() {
2696 if element_size == 0 {
2697 element_size = legacy_element_size;
2698 }
2699 }
2700 DataLayout::Chunked {
2701 address,
2702 dims,
2703 element_size,
2704 chunk_indexing,
2705 }
2706 }
2707 other => other,
2708 }
2709}
2710
2711#[cfg(test)]
2712fn copy_chunk_to_flat(
2714 chunk_data: &[u8],
2715 flat: &mut [u8],
2716 chunk_offsets: &[u64],
2717 chunk_shape: &[u64],
2718 dataset_shape: &[u64],
2719 elem_size: usize,
2720) -> Result<()> {
2721 let dataset_strides = row_major_strides(dataset_shape, "dataset stride")
2722 .expect("dataset strides should fit in usize");
2723 let chunk_strides =
2724 row_major_strides(chunk_shape, "chunk stride").expect("chunk strides should fit in usize");
2725 copy_chunk_to_flat_with_strides(
2726 chunk_data,
2727 flat,
2728 ChunkCopyLayout {
2729 chunk_offsets,
2730 chunk_shape,
2731 dataset_shape,
2732 dataset_strides: &dataset_strides,
2733 chunk_strides: &chunk_strides,
2734 elem_size,
2735 },
2736 )
2737}
2738
2739fn copy_chunk_to_flat_with_strides(
2740 chunk_data: &[u8],
2741 flat: &mut [u8],
2742 layout: ChunkCopyLayout<'_>,
2743) -> Result<()> {
2744 unsafe {
2745 copy_chunk_to_flat_with_strides_ptr(
2746 chunk_data,
2747 FlatBufferPtr {
2748 ptr: flat.as_mut_ptr(),
2749 len: flat.len(),
2750 },
2751 layout,
2752 )
2753 }
2754}
2755
2756#[inline(always)]
2757unsafe fn copy_chunk_to_flat_with_strides_ptr(
2758 chunk_data: &[u8],
2759 flat: FlatBufferPtr,
2760 layout: ChunkCopyLayout<'_>,
2761) -> Result<()> {
2762 let ndim = layout.dataset_shape.len();
2763 if layout.chunk_offsets.len() != ndim
2764 || layout.chunk_shape.len() != ndim
2765 || layout.dataset_strides.len() != ndim
2766 || layout.chunk_strides.len() != ndim
2767 {
2768 return Err(Error::InvalidData(format!(
2769 "chunk copy layout rank does not match dataset rank {ndim}"
2770 )));
2771 }
2772
2773 if ndim == 0 {
2774 if chunk_data.len() < layout.elem_size || flat.len < layout.elem_size {
2775 return Err(Error::InvalidData(format!(
2776 "scalar chunk copy requires {} bytes, got source {} and destination {}",
2777 layout.elem_size,
2778 chunk_data.len(),
2779 flat.len
2780 )));
2781 }
2782 std::ptr::copy_nonoverlapping(chunk_data.as_ptr(), flat.ptr, layout.elem_size);
2783 return Ok(());
2784 }
2785
2786 let mut actual_chunk_shape = Vec::with_capacity(ndim);
2788 for i in 0..ndim {
2789 if layout.chunk_offsets[i] >= layout.dataset_shape[i] {
2790 return Err(Error::InvalidData(format!(
2791 "chunk offset {} is outside dimension {} of size {}",
2792 layout.chunk_offsets[i], i, layout.dataset_shape[i]
2793 )));
2794 }
2795 let remaining = layout.dataset_shape[i] - layout.chunk_offsets[i];
2796 actual_chunk_shape.push(checked_usize(
2797 remaining.min(layout.chunk_shape[i]),
2798 "actual chunk extent",
2799 )?);
2800 }
2801
2802 let row_elems = *actual_chunk_shape.last().unwrap_or(&1);
2803 let row_bytes = checked_mul_usize(row_elems, layout.elem_size, "chunk row bytes")?;
2804 let mut dataset_origin = 0usize;
2805 for (d, offset) in layout.chunk_offsets.iter().enumerate() {
2806 let offset = checked_usize(*offset, "chunk offset")?;
2807 let term = checked_mul_usize(offset, layout.dataset_strides[d], "chunk origin")?;
2808 dataset_origin = checked_add_usize(dataset_origin, term, "chunk origin")?;
2809 }
2810
2811 if ndim == 1 {
2812 let dst_start = checked_mul_usize(dataset_origin, layout.elem_size, "chunk dst offset")?;
2813 let dst_end = checked_add_usize(dst_start, row_bytes, "chunk dst end")?;
2814 if row_bytes > chunk_data.len() || dst_end > flat.len {
2815 return Err(Error::InvalidData(format!(
2816 "chunk copy out of bounds: source row needs {} bytes from {} bytes, destination range {}..{} exceeds {} bytes",
2817 row_bytes,
2818 chunk_data.len(),
2819 dst_start,
2820 dst_end,
2821 flat.len
2822 )));
2823 }
2824 std::ptr::copy_nonoverlapping(chunk_data.as_ptr(), flat.ptr.add(dst_start), row_bytes);
2825 return Ok(());
2826 }
2827
2828 let outer_dims = &actual_chunk_shape[..ndim - 1];
2829 let total_rows = checked_product_usize(outer_dims, "chunk row count")?;
2830 let mut outer_idx = vec![0usize; ndim - 1];
2831
2832 for _ in 0..total_rows {
2833 let mut chunk_row = 0usize;
2834 let mut dataset_row = dataset_origin;
2835 for (d, outer) in outer_idx.iter().copied().enumerate() {
2836 let chunk_term = checked_mul_usize(outer, layout.chunk_strides[d], "chunk row")?;
2837 let dataset_term = checked_mul_usize(outer, layout.dataset_strides[d], "dataset row")?;
2838 chunk_row = checked_add_usize(chunk_row, chunk_term, "chunk row")?;
2839 dataset_row = checked_add_usize(dataset_row, dataset_term, "dataset row")?;
2840 }
2841
2842 let src_start = checked_mul_usize(chunk_row, layout.elem_size, "chunk src offset")?;
2843 let dst_start = checked_mul_usize(dataset_row, layout.elem_size, "chunk dst offset")?;
2844 let src_end = checked_add_usize(src_start, row_bytes, "chunk src end")?;
2845 let dst_end = checked_add_usize(dst_start, row_bytes, "chunk dst end")?;
2846 if src_end > chunk_data.len() || dst_end > flat.len {
2847 return Err(Error::InvalidData(format!(
2848 "chunk copy out of bounds: source range {}..{} of {} bytes, destination range {}..{} of {} bytes",
2849 src_start,
2850 src_end,
2851 chunk_data.len(),
2852 dst_start,
2853 dst_end,
2854 flat.len
2855 )));
2856 }
2857 std::ptr::copy_nonoverlapping(
2858 chunk_data.as_ptr().add(src_start),
2859 flat.ptr.add(dst_start),
2860 row_bytes,
2861 );
2862
2863 let mut carry = true;
2864 for d in (0..outer_idx.len()).rev() {
2865 if carry {
2866 outer_idx[d] += 1;
2867 if outer_idx[d] < outer_dims[d] {
2868 carry = false;
2869 } else {
2870 outer_idx[d] = 0;
2871 }
2872 }
2873 }
2874 }
2875
2876 Ok(())
2877}
2878
2879fn checked_product_usize(values: &[usize], context: &str) -> Result<usize> {
2880 let mut product = 1usize;
2881 for &value in values {
2882 product = checked_mul_usize(product, value, context)?;
2883 }
2884 Ok(product)
2885}
2886
2887fn unit_stride_chunk_overlap_plan(
2888 chunk_offsets: &[u64],
2889 chunk_shape: &[u64],
2890 dataset_shape: &[u64],
2891 resolved: &ResolvedSelection,
2892) -> Result<(Vec<usize>, Vec<usize>, Vec<usize>)> {
2893 let ndim = dataset_shape.len();
2894 let mut overlap_counts = Vec::with_capacity(ndim);
2895 let mut chunk_local_start = Vec::with_capacity(ndim);
2896 let mut result_start = Vec::with_capacity(ndim);
2897
2898 for d in 0..ndim {
2899 let chunk_start = chunk_offsets[d];
2900 let chunk_end = (chunk_start + chunk_shape[d]).min(dataset_shape[d]);
2901 let dim = &resolved.dims[d];
2902 let overlap_start = chunk_start.max(dim.start);
2903 let overlap_end = chunk_end.min(dim.end);
2904 if overlap_start >= overlap_end {
2905 return Ok((Vec::new(), Vec::new(), Vec::new()));
2906 }
2907
2908 overlap_counts.push(checked_usize(
2909 overlap_end - overlap_start,
2910 "chunk overlap size",
2911 )?);
2912 chunk_local_start.push(checked_usize(
2913 overlap_start - chunk_start,
2914 "chunk overlap start",
2915 )?);
2916 result_start.push(checked_usize(
2917 overlap_start - dim.start,
2918 "slice result overlap start",
2919 )?);
2920 }
2921
2922 Ok((overlap_counts, chunk_local_start, result_start))
2923}
2924
2925#[inline(always)]
2926fn copy_unit_stride_chunk_overlap(
2927 chunk_data: &[u8],
2928 result_buf: &mut [u8],
2929 layout: UnitStrideCopyLayout<'_>,
2930) -> Result<()> {
2931 unsafe {
2932 copy_unit_stride_chunk_overlap_ptr(
2933 chunk_data,
2934 FlatBufferPtr {
2935 ptr: result_buf.as_mut_ptr(),
2936 len: result_buf.len(),
2937 },
2938 layout,
2939 )
2940 }
2941}
2942
2943#[inline(always)]
2954unsafe fn copy_unit_stride_chunk_overlap_ptr(
2955 chunk_data: &[u8],
2956 result: FlatBufferPtr,
2957 layout: UnitStrideCopyLayout<'_>,
2958) -> Result<()> {
2959 let ndim = layout.dataset_shape.len();
2960 if layout.chunk_offsets.len() != ndim
2961 || layout.chunk_shape.len() != ndim
2962 || layout.resolved.dims.len() != ndim
2963 || layout.chunk_strides.len() != ndim
2964 || layout.result_strides.len() != ndim
2965 {
2966 return Err(Error::InvalidData(format!(
2967 "unit-stride copy layout rank does not match dataset rank {ndim}"
2968 )));
2969 }
2970
2971 if ndim == 0 {
2972 if chunk_data.len() < layout.elem_size || result.len < layout.elem_size {
2973 return Err(Error::InvalidData(format!(
2974 "scalar slice copy requires {} bytes, got source {} and destination {}",
2975 layout.elem_size,
2976 chunk_data.len(),
2977 result.len
2978 )));
2979 }
2980 std::ptr::copy_nonoverlapping(chunk_data.as_ptr(), result.ptr, layout.elem_size);
2981 return Ok(());
2982 }
2983
2984 let (overlap_counts, chunk_local_start, result_start) = unit_stride_chunk_overlap_plan(
2985 layout.chunk_offsets,
2986 layout.chunk_shape,
2987 layout.dataset_shape,
2988 layout.resolved,
2989 )?;
2990 if overlap_counts.is_empty() {
2991 return Ok(());
2992 }
2993
2994 let row_elems = *overlap_counts.last().unwrap_or(&1);
2995 let row_bytes = checked_mul_usize(row_elems, layout.elem_size, "unit-stride slice row bytes")?;
2996
2997 let mut chunk_origin = 0usize;
2998 let mut result_origin = 0usize;
2999 for d in 0..ndim {
3000 let chunk_term = checked_mul_usize(
3001 chunk_local_start[d],
3002 layout.chunk_strides[d],
3003 "chunk overlap origin",
3004 )?;
3005 let result_term = checked_mul_usize(
3006 result_start[d],
3007 layout.result_strides[d],
3008 "slice result origin",
3009 )?;
3010 chunk_origin = checked_add_usize(chunk_origin, chunk_term, "chunk overlap origin")?;
3011 result_origin = checked_add_usize(result_origin, result_term, "slice result origin")?;
3012 }
3013
3014 if ndim == 1 {
3015 let src_start = checked_mul_usize(chunk_origin, layout.elem_size, "slice src offset")?;
3016 let dst_start = checked_mul_usize(result_origin, layout.elem_size, "slice dst offset")?;
3017 let src_end = checked_add_usize(src_start, row_bytes, "slice src end")?;
3018 let dst_end = checked_add_usize(dst_start, row_bytes, "slice dst end")?;
3019 if src_end > chunk_data.len() || dst_end > result.len {
3020 return Err(Error::InvalidData(format!(
3021 "unit-stride slice copy out of bounds: source range {}..{} of {} bytes, destination range {}..{} of {} bytes",
3022 src_start,
3023 src_end,
3024 chunk_data.len(),
3025 dst_start,
3026 dst_end,
3027 result.len
3028 )));
3029 }
3030 std::ptr::copy_nonoverlapping(
3031 chunk_data.as_ptr().add(src_start),
3032 result.ptr.add(dst_start),
3033 row_bytes,
3034 );
3035 return Ok(());
3036 }
3037
3038 let outer_counts = &overlap_counts[..ndim - 1];
3039 let total_rows = checked_product_usize(outer_counts, "unit-stride slice row count")?;
3040 let mut outer_idx = vec![0usize; ndim - 1];
3041
3042 for _ in 0..total_rows {
3043 let mut chunk_row = chunk_origin;
3044 let mut result_row = result_origin;
3045 for (d, outer) in outer_idx.iter().copied().enumerate() {
3046 let chunk_term = checked_mul_usize(outer, layout.chunk_strides[d], "slice chunk row")?;
3047 let result_term =
3048 checked_mul_usize(outer, layout.result_strides[d], "slice result row")?;
3049 chunk_row = checked_add_usize(chunk_row, chunk_term, "slice chunk row")?;
3050 result_row = checked_add_usize(result_row, result_term, "slice result row")?;
3051 }
3052
3053 let src_start = checked_mul_usize(chunk_row, layout.elem_size, "slice src offset")?;
3054 let dst_start = checked_mul_usize(result_row, layout.elem_size, "slice dst offset")?;
3055 let src_end = checked_add_usize(src_start, row_bytes, "slice src end")?;
3056 let dst_end = checked_add_usize(dst_start, row_bytes, "slice dst end")?;
3057 if src_end > chunk_data.len() || dst_end > result.len {
3058 return Err(Error::InvalidData(format!(
3059 "unit-stride slice copy out of bounds: source range {}..{} of {} bytes, destination range {}..{} of {} bytes",
3060 src_start,
3061 src_end,
3062 chunk_data.len(),
3063 dst_start,
3064 dst_end,
3065 result.len
3066 )));
3067 }
3068 std::ptr::copy_nonoverlapping(
3069 chunk_data.as_ptr().add(src_start),
3070 result.ptr.add(dst_start),
3071 row_bytes,
3072 );
3073
3074 let mut carry = true;
3075 for d in (0..outer_idx.len()).rev() {
3076 if carry {
3077 outer_idx[d] += 1;
3078 if outer_idx[d] < outer_counts[d] {
3079 carry = false;
3080 } else {
3081 outer_idx[d] = 0;
3082 }
3083 }
3084 }
3085 }
3086
3087 Ok(())
3088}
3089
3090#[allow(clippy::too_many_arguments)]
3091#[inline(always)]
3095fn copy_selected_elements(
3096 chunk_data: &[u8],
3097 result_buf: &mut [u8],
3098 dim_indices: &[Vec<(usize, usize)>],
3099 chunk_strides: &[usize],
3100 result_strides: &[usize],
3101 elem_size: usize,
3102 ndim: usize,
3103) -> Result<()> {
3104 if dim_indices.len() != ndim || chunk_strides.len() != ndim || result_strides.len() != ndim {
3105 return Err(Error::InvalidData(format!(
3106 "selected-element copy layout rank does not match rank {ndim}"
3107 )));
3108 }
3109
3110 if dim_indices.iter().any(|v| v.is_empty()) {
3112 return Ok(());
3113 }
3114
3115 let counts: Vec<usize> = dim_indices.iter().map(|v| v.len()).collect();
3117 let total = checked_product_usize(&counts, "selected-element copy count")?;
3118 let mut counters = vec![0usize; ndim];
3119
3120 for _ in 0..total {
3121 let mut chunk_flat = 0;
3122 let mut result_flat = 0;
3123 for d in 0..ndim {
3124 let (cl, ri) = dim_indices[d][counters[d]];
3125 let chunk_term = checked_mul_usize(cl, chunk_strides[d], "selected chunk offset")?;
3126 let result_term = checked_mul_usize(ri, result_strides[d], "selected result offset")?;
3127 chunk_flat = checked_add_usize(chunk_flat, chunk_term, "selected chunk offset")?;
3128 result_flat = checked_add_usize(result_flat, result_term, "selected result offset")?;
3129 }
3130
3131 let src_start = checked_mul_usize(chunk_flat, elem_size, "selected source byte offset")?;
3132 let dst_start =
3133 checked_mul_usize(result_flat, elem_size, "selected destination byte offset")?;
3134 let src_end = checked_add_usize(src_start, elem_size, "selected source byte end")?;
3135 let dst_end = checked_add_usize(dst_start, elem_size, "selected destination byte end")?;
3136
3137 if src_end > chunk_data.len() || dst_end > result_buf.len() {
3138 return Err(Error::InvalidData(format!(
3139 "selected-element copy out of bounds: source range {}..{} of {} bytes, destination range {}..{} of {} bytes",
3140 src_start,
3141 src_end,
3142 chunk_data.len(),
3143 dst_start,
3144 dst_end,
3145 result_buf.len()
3146 )));
3147 }
3148 result_buf[dst_start..dst_end].copy_from_slice(&chunk_data[src_start..src_end]);
3149
3150 let mut carry = true;
3152 for d in (0..ndim).rev() {
3153 if carry {
3154 counters[d] += 1;
3155 if counters[d] < dim_indices[d].len() {
3156 carry = false;
3157 } else {
3158 counters[d] = 0;
3159 }
3160 }
3161 }
3162 }
3163
3164 Ok(())
3165}
3166
3167#[cfg(feature = "rayon")]
3177#[allow(clippy::too_many_arguments)]
3178#[inline(always)]
3179unsafe fn copy_selected_elements_ptr(
3180 chunk_data: &[u8],
3181 result_ptr: *mut u8,
3182 result_len: usize,
3183 dim_indices: &[Vec<(usize, usize)>],
3184 chunk_strides: &[usize],
3185 result_strides: &[usize],
3186 elem_size: usize,
3187 ndim: usize,
3188) -> Result<()> {
3189 if dim_indices.len() != ndim || chunk_strides.len() != ndim || result_strides.len() != ndim {
3190 return Err(Error::InvalidData(format!(
3191 "selected-element copy layout rank does not match rank {ndim}"
3192 )));
3193 }
3194
3195 if dim_indices.iter().any(|v| v.is_empty()) {
3196 return Ok(());
3197 }
3198
3199 let counts: Vec<usize> = dim_indices.iter().map(|v| v.len()).collect();
3200 let total = checked_product_usize(&counts, "selected-element copy count")?;
3201 let mut counters = vec![0usize; ndim];
3202
3203 for _ in 0..total {
3204 let mut chunk_flat = 0;
3205 let mut result_flat = 0;
3206 for d in 0..ndim {
3207 let (cl, ri) = dim_indices[d][counters[d]];
3208 let chunk_term = checked_mul_usize(cl, chunk_strides[d], "selected chunk offset")?;
3209 let result_term = checked_mul_usize(ri, result_strides[d], "selected result offset")?;
3210 chunk_flat = checked_add_usize(chunk_flat, chunk_term, "selected chunk offset")?;
3211 result_flat = checked_add_usize(result_flat, result_term, "selected result offset")?;
3212 }
3213
3214 let src_start = checked_mul_usize(chunk_flat, elem_size, "selected source byte offset")?;
3215 let dst_start =
3216 checked_mul_usize(result_flat, elem_size, "selected destination byte offset")?;
3217 let src_end = checked_add_usize(src_start, elem_size, "selected source byte end")?;
3218 let dst_end = checked_add_usize(dst_start, elem_size, "selected destination byte end")?;
3219
3220 if src_end > chunk_data.len() || dst_end > result_len {
3221 return Err(Error::InvalidData(format!(
3222 "selected-element copy out of bounds: source range {}..{} of {} bytes, destination range {}..{} of {} bytes",
3223 src_start,
3224 src_end,
3225 chunk_data.len(),
3226 dst_start,
3227 dst_end,
3228 result_len
3229 )));
3230 }
3231 std::ptr::copy_nonoverlapping(
3232 chunk_data.as_ptr().add(src_start),
3233 result_ptr.add(dst_start),
3234 elem_size,
3235 );
3236
3237 let mut carry = true;
3238 for d in (0..ndim).rev() {
3239 if carry {
3240 counters[d] += 1;
3241 if counters[d] < dim_indices[d].len() {
3242 carry = false;
3243 } else {
3244 counters[d] = 0;
3245 }
3246 }
3247 }
3248 }
3249
3250 Ok(())
3251}
3252
3253fn slice_array<T: H5Type + Clone>(
3255 array: &ArrayD<T>,
3256 selection: &SliceInfo,
3257 shape: &[u64],
3258) -> Result<ArrayD<T>> {
3259 let mut result_shape = Vec::new();
3261
3262 for (i, sel) in selection.selections.iter().enumerate() {
3263 let dim_size = shape[i];
3264 match sel {
3265 SliceInfoElem::Index(idx) => {
3266 if *idx >= dim_size {
3267 return Err(Error::SliceOutOfBounds {
3268 dim: i,
3269 index: *idx,
3270 size: dim_size,
3271 });
3272 }
3273 }
3275 SliceInfoElem::Slice { start, end, step } => {
3276 let dim_size = checked_usize(dim_size, "slice dimension size")?;
3277 let actual_end = if *end == u64::MAX {
3278 dim_size
3279 } else {
3280 checked_usize(*end, "slice end")?.min(dim_size)
3281 };
3282 let actual_start = checked_usize(*start, "slice start")?;
3283 let actual_step = checked_usize(*step, "slice step")?;
3284 if actual_step == 0 {
3285 return Err(Error::InvalidData("slice step cannot be 0".into()));
3286 }
3287 if actual_start > dim_size {
3288 return Err(Error::SliceOutOfBounds {
3289 dim: i,
3290 index: *start,
3291 size: shape[i],
3292 });
3293 }
3294 let n = (actual_end - actual_start).div_ceil(actual_step);
3295 result_shape.push(n);
3296 }
3297 }
3298 }
3299
3300 let ndim = shape.len();
3302 let total = checked_product_usize(&result_shape, "slice result element count")?;
3303 let mut elements = Vec::with_capacity(total);
3304
3305 let mut result_idx = vec![0usize; result_shape.len()];
3307
3308 for _ in 0..total {
3309 let mut src_idx = Vec::with_capacity(ndim);
3311 let mut ri = 0;
3312 for sel in selection.selections.iter() {
3313 match sel {
3314 SliceInfoElem::Index(idx) => {
3315 src_idx.push(checked_usize(*idx, "slice source index")?);
3316 }
3317 SliceInfoElem::Slice { start, step, .. } => {
3318 let start = checked_usize(*start, "slice start")?;
3319 let step = checked_usize(*step, "slice step")?;
3320 let offset =
3321 checked_mul_usize(result_idx[ri], step, "slice source index offset")?;
3322 src_idx.push(checked_add_usize(start, offset, "slice source index")?);
3323 ri += 1;
3324 }
3325 }
3326 }
3327
3328 elements.push(array[IxDyn(&src_idx)].clone());
3329
3330 if !result_shape.is_empty() {
3332 let mut carry = true;
3333 for d in (0..result_shape.len()).rev() {
3334 if carry {
3335 result_idx[d] += 1;
3336 if result_idx[d] < result_shape[d] {
3337 carry = false;
3338 } else {
3339 result_idx[d] = 0;
3340 }
3341 }
3342 }
3343 }
3344 }
3345
3346 ArrayD::from_shape_vec(IxDyn(&result_shape), elements)
3347 .map_err(|e| Error::InvalidData(format!("slice shape error: {e}")))
3348}
3349
3350#[cfg(test)]
3351mod tests {
3352 use super::*;
3353
3354 #[test]
3355 fn test_slice_info_all() {
3356 let s = SliceInfo::all(3);
3357 assert_eq!(s.selections.len(), 3);
3358 }
3359
3360 #[test]
3361 fn test_copy_chunk_1d() {
3362 let chunk_data = vec![1u8, 2, 3, 4]; let mut flat = vec![0u8; 8];
3364 let chunk_offsets = vec![2u64]; let chunk_shape = vec![4u64];
3366 let dataset_shape = vec![8u64];
3367
3368 copy_chunk_to_flat(
3369 &chunk_data,
3370 &mut flat,
3371 &chunk_offsets,
3372 &chunk_shape,
3373 &dataset_shape,
3374 1,
3375 )
3376 .unwrap();
3377 assert_eq!(flat, vec![0, 0, 1, 2, 3, 4, 0, 0]);
3378 }
3379
3380 #[test]
3381 fn test_copy_chunk_2d_rowwise() {
3382 let chunk_data = vec![1u8, 2, 3, 4, 5, 6];
3383 let mut flat = vec![0u8; 16];
3384 let chunk_offsets = vec![1u64, 1u64];
3385 let chunk_shape = vec![2u64, 3u64];
3386 let dataset_shape = vec![4u64, 4u64];
3387
3388 copy_chunk_to_flat(
3389 &chunk_data,
3390 &mut flat,
3391 &chunk_offsets,
3392 &chunk_shape,
3393 &dataset_shape,
3394 1,
3395 )
3396 .unwrap();
3397
3398 assert_eq!(flat, vec![0, 0, 0, 0, 0, 1, 2, 3, 0, 4, 5, 6, 0, 0, 0, 0,]);
3399 }
3400
3401 #[test]
3402 fn test_copy_unit_stride_chunk_overlap_2d_partial() {
3403 let chunk_data: Vec<u8> = (1..=16).collect();
3404 let mut result = vec![0u8; 6];
3405 let chunk_offsets = vec![0u64, 0u64];
3406 let chunk_shape = vec![4u64, 4u64];
3407 let dataset_shape = vec![4u64, 4u64];
3408 let resolved = ResolvedSelection {
3409 dims: vec![
3410 ResolvedSelectionDim {
3411 start: 1,
3412 end: 3,
3413 step: 1,
3414 count: 2,
3415 },
3416 ResolvedSelectionDim {
3417 start: 1,
3418 end: 4,
3419 step: 1,
3420 count: 3,
3421 },
3422 ],
3423 result_shape: vec![2, 3],
3424 result_elements: 6,
3425 };
3426 let chunk_strides = vec![4usize, 1usize];
3427 let result_strides = vec![3usize, 1usize];
3428
3429 copy_unit_stride_chunk_overlap(
3430 &chunk_data,
3431 &mut result,
3432 UnitStrideCopyLayout {
3433 chunk_offsets: &chunk_offsets,
3434 chunk_shape: &chunk_shape,
3435 dataset_shape: &dataset_shape,
3436 resolved: &resolved,
3437 chunk_strides: &chunk_strides,
3438 result_strides: &result_strides,
3439 elem_size: 1,
3440 },
3441 )
3442 .unwrap();
3443
3444 assert_eq!(result, vec![6, 7, 8, 10, 11, 12]);
3445 }
3446
3447 fn chunk_entry(offsets: &[u64], address: u64) -> chunk_index::ChunkEntry {
3448 chunk_index::ChunkEntry {
3449 address,
3450 size: 0,
3451 filter_mask: 0,
3452 offsets: offsets.to_vec(),
3453 }
3454 }
3455
3456 #[test]
3457 fn test_chunk_grid_coverage_detects_missing_chunk() {
3458 let mut entries = vec![
3459 chunk_entry(&[0, 0], 0x1000),
3460 chunk_entry(&[0, 2], 0x2000),
3461 chunk_entry(&[2, 0], 0x3000),
3462 ];
3463
3464 let complete =
3465 validate_chunk_grid_coverage(&mut entries, &[4, 4], &[2, 2], &[0, 0], &[1, 1]).unwrap();
3466
3467 assert!(!complete);
3468 }
3469
3470 #[test]
3471 fn test_chunk_grid_coverage_rejects_duplicate_offsets() {
3472 let mut entries = vec![
3473 chunk_entry(&[0, 0], 0x1000),
3474 chunk_entry(&[0, 0], 0x2000),
3475 chunk_entry(&[0, 2], 0x3000),
3476 chunk_entry(&[2, 0], 0x4000),
3477 ];
3478
3479 let err = validate_chunk_grid_coverage(&mut entries, &[4, 4], &[2, 2], &[0, 0], &[1, 1])
3480 .unwrap_err();
3481
3482 assert!(matches!(err, Error::InvalidData(_)));
3483 }
3484
3485 #[test]
3486 fn test_decoded_chunk_len_requires_exact_size() {
3487 let entry = chunk_entry(&[0, 0], 0x1000);
3488
3489 validate_decoded_chunk_len(&entry, &[2, 3], 4, 24).unwrap();
3490 let err = validate_decoded_chunk_len(&entry, &[2, 3], 4, 23).unwrap_err();
3491
3492 assert!(matches!(err, Error::InvalidData(_)));
3493 }
3494
3495 #[test]
3496 fn test_copy_chunk_errors_on_short_row() {
3497 let chunk_data = vec![1u8, 2, 3, 4, 5];
3498 let mut flat = vec![0u8; 16];
3499 let chunk_offsets = vec![1u64, 1u64];
3500 let chunk_shape = vec![2u64, 3u64];
3501 let dataset_shape = vec![4u64, 4u64];
3502
3503 let err = copy_chunk_to_flat(
3504 &chunk_data,
3505 &mut flat,
3506 &chunk_offsets,
3507 &chunk_shape,
3508 &dataset_shape,
3509 1,
3510 )
3511 .unwrap_err();
3512
3513 assert!(matches!(err, Error::InvalidData(_)));
3514 }
3515
3516 #[test]
3517 fn test_copy_unit_stride_chunk_overlap_errors_on_short_row() {
3518 let chunk_data: Vec<u8> = (1..=7).collect();
3519 let mut result = vec![0u8; 6];
3520 let chunk_offsets = vec![0u64, 0u64];
3521 let chunk_shape = vec![4u64, 4u64];
3522 let dataset_shape = vec![4u64, 4u64];
3523 let resolved = ResolvedSelection {
3524 dims: vec![
3525 ResolvedSelectionDim {
3526 start: 1,
3527 end: 3,
3528 step: 1,
3529 count: 2,
3530 },
3531 ResolvedSelectionDim {
3532 start: 1,
3533 end: 4,
3534 step: 1,
3535 count: 3,
3536 },
3537 ],
3538 result_shape: vec![2, 3],
3539 result_elements: 6,
3540 };
3541 let chunk_strides = vec![4usize, 1usize];
3542 let result_strides = vec![3usize, 1usize];
3543
3544 let err = copy_unit_stride_chunk_overlap(
3545 &chunk_data,
3546 &mut result,
3547 UnitStrideCopyLayout {
3548 chunk_offsets: &chunk_offsets,
3549 chunk_shape: &chunk_shape,
3550 dataset_shape: &dataset_shape,
3551 resolved: &resolved,
3552 chunk_strides: &chunk_strides,
3553 result_strides: &result_strides,
3554 elem_size: 1,
3555 },
3556 )
3557 .unwrap_err();
3558
3559 assert!(matches!(err, Error::InvalidData(_)));
3560 }
3561}