1use std::mem::MaybeUninit;
2use std::num::NonZeroUsize;
3use std::sync::{Arc, OnceLock};
4
5use lru::LruCache;
6use ndarray::{ArrayD, IxDyn};
7use parking_lot::Mutex;
8#[cfg(feature = "rayon")]
9use rayon::prelude::*;
10use smallvec::SmallVec;
11
12use crate::attribute_api::{
13 collect_attribute_messages_storage, decode_string, decode_varlen_byte_string,
14 read_one_vlen_string_storage, resolve_vlen_bytes_storage, Attribute,
15};
16use crate::cache::{ChunkCache, ChunkKey};
17use crate::chunk_index;
18use crate::datatype_api::{dtype_element_size, H5Type};
19use crate::error::{Error, Result};
20use crate::filters::{self, FilterRegistry};
21use crate::io::Cursor;
22use crate::messages::attribute::AttributeMessage;
23use crate::messages::dataspace::{DataspaceMessage, DataspaceType};
24use crate::messages::datatype::{Datatype, StringSize};
25use crate::messages::fill_value::{FillTime, FillValueMessage};
26use crate::messages::filter_pipeline::FilterPipelineMessage;
27use crate::messages::layout::{ChunkIndexing, DataLayout};
28use crate::messages::HdfMessage;
29use crate::object_header::ObjectHeader;
30use crate::FileContext;
31
32const HOT_FULL_DATASET_CACHE_MAX_BYTES: usize = 32 * 1024 * 1024;
33
34#[derive(Clone, Copy)]
35struct FlatBufferPtr {
36 ptr: *mut u8,
37 len: usize,
38}
39
40#[derive(Clone, Copy)]
41struct ChunkCopyLayout<'a> {
42 chunk_offsets: &'a [u64],
43 chunk_shape: &'a [u64],
44 dataset_shape: &'a [u64],
45 dataset_strides: &'a [usize],
46 chunk_strides: &'a [usize],
47 elem_size: usize,
48}
49
50#[derive(Clone, Copy)]
51struct UnitStrideCopyLayout<'a> {
52 chunk_offsets: &'a [u64],
53 chunk_shape: &'a [u64],
54 dataset_shape: &'a [u64],
55 resolved: &'a ResolvedSelection,
56 chunk_strides: &'a [usize],
57 result_strides: &'a [usize],
58 elem_size: usize,
59}
60
61pub(crate) struct DatasetParseContext {
62 pub(crate) context: Arc<FileContext>,
63}
64
65#[derive(Clone, Copy)]
66struct ChunkEntrySelection<'a> {
67 shape: &'a [u64],
68 ndim: usize,
69 elem_size: usize,
70 chunk_bounds: Option<(&'a [u64], &'a [u64])>,
71}
72
73unsafe impl Send for FlatBufferPtr {}
74
75unsafe impl Sync for FlatBufferPtr {}
76
77impl FlatBufferPtr {
78 #[cfg(feature = "rayon")]
79 #[inline(always)]
80 unsafe fn copy_chunk(self, chunk_data: &[u8], layout: ChunkCopyLayout<'_>) {
81 copy_chunk_to_flat_with_strides_ptr(chunk_data, self, layout);
82 }
83
84 #[cfg(feature = "rayon")]
85 #[inline(always)]
86 unsafe fn copy_selected(
87 self,
88 chunk_data: &[u8],
89 dim_indices: &[Vec<(usize, usize)>],
90 chunk_strides: &[usize],
91 result_strides: &[usize],
92 elem_size: usize,
93 ndim: usize,
94 ) {
95 copy_selected_elements_ptr(
96 chunk_data,
97 self.ptr,
98 self.len,
99 dim_indices,
100 chunk_strides,
101 result_strides,
102 elem_size,
103 ndim,
104 );
105 }
106
107 #[cfg(feature = "rayon")]
108 #[inline(always)]
109 unsafe fn copy_unit_stride_chunk_overlap(
110 self,
111 chunk_data: &[u8],
112 layout: UnitStrideCopyLayout<'_>,
113 ) -> Result<()> {
114 copy_unit_stride_chunk_overlap_ptr(chunk_data, self, layout)
115 }
116}
117
118#[derive(Debug, Clone)]
120pub struct SliceInfo {
121 pub selections: Vec<SliceInfoElem>,
122}
123
124#[derive(Debug, Clone)]
126pub enum SliceInfoElem {
127 Index(u64),
129 Slice { start: u64, end: u64, step: u64 },
131}
132
133#[derive(Clone, Debug)]
134struct ResolvedSelectionDim {
135 start: u64,
136 end: u64,
137 step: u64,
138 count: usize,
139}
140
141#[derive(Clone, Debug, PartialEq, Eq, Hash)]
142struct ChunkEntryCacheKey {
143 index_address: u64,
144 first_chunk: SmallVec<[u64; 4]>,
145 last_chunk: SmallVec<[u64; 4]>,
146}
147
148impl ResolvedSelectionDim {
149 fn chunk_index_range(&self, chunk_extent: u64) -> Option<(u64, u64)> {
150 if self.count == 0 {
151 return None;
152 }
153
154 Some((self.start / chunk_extent, (self.end - 1) / chunk_extent))
155 }
156}
157
158#[derive(Clone, Debug)]
159struct ResolvedSelection {
160 dims: Vec<ResolvedSelectionDim>,
161 result_shape: Vec<usize>,
162 result_elements: usize,
163}
164
165impl ResolvedSelection {
166 fn result_dims_with_collapsed(&self) -> Vec<usize> {
167 self.dims.iter().map(|dim| dim.count).collect()
168 }
169
170 fn is_unit_stride(&self) -> bool {
171 self.dims.iter().all(|dim| dim.step == 1)
172 }
173}
174
175impl SliceInfo {
176 pub fn all(ndim: usize) -> Self {
178 SliceInfo {
179 selections: vec![
180 SliceInfoElem::Slice {
181 start: 0,
182 end: u64::MAX,
183 step: 1,
184 };
185 ndim
186 ],
187 }
188 }
189}
190
191fn checked_usize(value: u64, context: &str) -> Result<usize> {
192 usize::try_from(value).map_err(|_| {
193 Error::InvalidData(format!(
194 "{context} value {value} exceeds platform usize capacity"
195 ))
196 })
197}
198
199fn checked_mul_usize(lhs: usize, rhs: usize, context: &str) -> Result<usize> {
200 lhs.checked_mul(rhs)
201 .ok_or_else(|| Error::InvalidData(format!("{context} exceeds platform usize capacity")))
202}
203
204fn checked_add_usize(lhs: usize, rhs: usize, context: &str) -> Result<usize> {
205 lhs.checked_add(rhs)
206 .ok_or_else(|| Error::InvalidData(format!("{context} exceeds platform usize capacity")))
207}
208
209fn checked_shape_elements_usize(shape: &[u64], context: &str) -> Result<usize> {
210 let mut total = 1usize;
211 for &dim in shape {
212 total = checked_mul_usize(total, checked_usize(dim, context)?, context)?;
213 }
214 Ok(total)
215}
216
217fn expected_chunk_count(first_chunk: &[u64], last_chunk: &[u64]) -> Result<usize> {
218 let mut total = 1usize;
219 for (&first, &last) in first_chunk.iter().zip(last_chunk.iter()) {
220 let dim_count = checked_usize(last - first + 1, "selected chunk count")?;
221 total = checked_mul_usize(total, dim_count, "selected chunk count")?;
222 }
223 Ok(total)
224}
225
226fn full_dataset_chunk_count(shape: &[u64], chunk_shape: &[u64]) -> Result<usize> {
227 let mut total = 1usize;
228 for (&dim, &chunk) in shape.iter().zip(chunk_shape.iter()) {
229 let chunk_count = checked_usize(dim.div_ceil(chunk), "full dataset chunk count")?;
230 total = checked_mul_usize(total, chunk_count, "full dataset chunk count")?;
231 }
232 Ok(total)
233}
234
235fn row_major_strides(shape: &[u64], context: &str) -> Result<Vec<usize>> {
236 let ndim = shape.len();
237 if ndim == 0 {
238 return Ok(Vec::new());
239 }
240
241 let mut strides = vec![1usize; ndim];
242 for i in (0..ndim - 1).rev() {
243 let next_extent = checked_usize(shape[i + 1], context)?;
244 strides[i] = checked_mul_usize(strides[i + 1], next_extent, context)?;
245 }
246 Ok(strides)
247}
248
249fn assume_init_u8_vec(mut buffer: Vec<MaybeUninit<u8>>) -> Vec<u8> {
250 let ptr = buffer.as_mut_ptr() as *mut u8;
251 let len = buffer.len();
252 let capacity = buffer.capacity();
253 std::mem::forget(buffer);
254 unsafe { Vec::from_raw_parts(ptr, len, capacity) }
255}
256
257fn assume_init_vec<T>(mut buffer: Vec<MaybeUninit<T>>) -> Vec<T> {
258 let ptr = buffer.as_mut_ptr() as *mut T;
259 let len = buffer.len();
260 let capacity = buffer.capacity();
261 std::mem::forget(buffer);
262 unsafe { Vec::from_raw_parts(ptr, len, capacity) }
263}
264
265fn normalize_selection(selection: &SliceInfo, shape: &[u64]) -> Result<ResolvedSelection> {
266 if selection.selections.len() != shape.len() {
267 return Err(Error::InvalidData(format!(
268 "slice has {} dimensions but dataset has {}",
269 selection.selections.len(),
270 shape.len()
271 )));
272 }
273
274 let mut dims = Vec::with_capacity(shape.len());
275 let mut result_shape = Vec::new();
276 let mut result_elements = 1usize;
277
278 for (i, sel) in selection.selections.iter().enumerate() {
279 let dim_size = shape[i];
280 match sel {
281 SliceInfoElem::Index(idx) => {
282 if *idx >= dim_size {
283 return Err(Error::SliceOutOfBounds {
284 dim: i,
285 index: *idx,
286 size: dim_size,
287 });
288 }
289 dims.push(ResolvedSelectionDim {
290 start: *idx,
291 end: *idx + 1,
292 step: 1,
293 count: 1,
294 });
295 }
296 SliceInfoElem::Slice { start, end, step } => {
297 if *step == 0 {
298 return Err(Error::InvalidData("slice step cannot be 0".into()));
299 }
300 if *start > dim_size {
301 return Err(Error::SliceOutOfBounds {
302 dim: i,
303 index: *start,
304 size: dim_size,
305 });
306 }
307
308 let actual_end = if *end == u64::MAX {
309 dim_size
310 } else {
311 (*end).min(dim_size)
312 };
313 let count_u64 = if *start >= actual_end {
314 0
315 } else {
316 (actual_end - *start).div_ceil(*step)
317 };
318 let count = checked_usize(count_u64, "slice element count")?;
319
320 dims.push(ResolvedSelectionDim {
321 start: *start,
322 end: actual_end,
323 step: *step,
324 count,
325 });
326 result_shape.push(count);
327 result_elements =
328 checked_mul_usize(result_elements, count, "slice result element count")?;
329 }
330 }
331 }
332
333 Ok(ResolvedSelection {
334 dims,
335 result_shape,
336 result_elements,
337 })
338}
339
340pub struct Dataset {
342 context: Arc<FileContext>,
343 pub(crate) name: String,
344 pub(crate) data_address: u64,
345 pub(crate) dataspace: DataspaceMessage,
346 pub(crate) datatype: Datatype,
347 pub(crate) layout: DataLayout,
348 pub(crate) fill_value: Option<FillValueMessage>,
349 pub(crate) filters: Option<FilterPipelineMessage>,
350 pub(crate) attributes: Vec<AttributeMessage>,
351 pub(crate) chunk_cache: Arc<ChunkCache>,
352 chunk_entry_cache: Arc<Mutex<LruCache<ChunkEntryCacheKey, Arc<Vec<chunk_index::ChunkEntry>>>>>,
353 full_chunk_entries: Arc<OnceLock<Arc<Vec<chunk_index::ChunkEntry>>>>,
354 full_dataset_bytes: Arc<OnceLock<Arc<Vec<u8>>>>,
355 pub(crate) filter_registry: Arc<FilterRegistry>,
356}
357
358pub(crate) struct DatasetTemplate {
359 name: String,
360 data_address: u64,
361 dataspace: DataspaceMessage,
362 datatype: Datatype,
363 layout: DataLayout,
364 fill_value: Option<FillValueMessage>,
365 filters: Option<FilterPipelineMessage>,
366 attributes: Vec<AttributeMessage>,
367 chunk_entry_cache: Arc<Mutex<LruCache<ChunkEntryCacheKey, Arc<Vec<chunk_index::ChunkEntry>>>>>,
368 full_chunk_entries: Arc<OnceLock<Arc<Vec<chunk_index::ChunkEntry>>>>,
369 full_dataset_bytes: Arc<OnceLock<Arc<Vec<u8>>>>,
370}
371
372impl Dataset {
373 pub(crate) fn from_template(context: Arc<FileContext>, template: Arc<DatasetTemplate>) -> Self {
374 Dataset {
375 chunk_cache: context.chunk_cache.clone(),
376 filter_registry: context.filter_registry.clone(),
377 context,
378 name: template.name.clone(),
379 data_address: template.data_address,
380 dataspace: template.dataspace.clone(),
381 datatype: template.datatype.clone(),
382 layout: template.layout.clone(),
383 fill_value: template.fill_value.clone(),
384 filters: template.filters.clone(),
385 attributes: template.attributes.clone(),
386 chunk_entry_cache: template.chunk_entry_cache.clone(),
387 full_chunk_entries: template.full_chunk_entries.clone(),
388 full_dataset_bytes: template.full_dataset_bytes.clone(),
389 }
390 }
391
392 pub(crate) fn template(&self) -> Arc<DatasetTemplate> {
393 Arc::new(DatasetTemplate {
394 name: self.name.clone(),
395 data_address: self.data_address,
396 dataspace: self.dataspace.clone(),
397 datatype: self.datatype.clone(),
398 layout: self.layout.clone(),
399 fill_value: self.fill_value.clone(),
400 filters: self.filters.clone(),
401 attributes: self.attributes.clone(),
402 chunk_entry_cache: self.chunk_entry_cache.clone(),
403 full_chunk_entries: self.full_chunk_entries.clone(),
404 full_dataset_bytes: self.full_dataset_bytes.clone(),
405 })
406 }
407
408 pub(crate) fn from_parsed_header(
409 context: DatasetParseContext,
410 address: u64,
411 name: String,
412 header: &ObjectHeader,
413 ) -> Result<Self> {
414 let mut dataspace: Option<DataspaceMessage> = None;
415 let mut datatype: Option<Datatype> = None;
416 let mut layout: Option<DataLayout> = None;
417 let mut fill_value: Option<FillValueMessage> = None;
418 let mut filter_pipeline: Option<FilterPipelineMessage> = None;
419 let attributes = collect_attribute_messages_storage(
420 header,
421 context.context.storage.as_ref(),
422 context.context.superblock.offset_size,
423 context.context.superblock.length_size,
424 )?;
425
426 for msg in &header.messages {
427 match msg {
428 HdfMessage::Dataspace(ds) => dataspace = Some(ds.clone()),
429 HdfMessage::Datatype(dt) => datatype = Some(dt.datatype.clone()),
430 HdfMessage::DataLayout(dl) => layout = Some(dl.layout.clone()),
431 HdfMessage::FillValue(fv) => fill_value = Some(fv.clone()),
432 HdfMessage::FilterPipeline(fp) => filter_pipeline = Some(fp.clone()),
433 _ => {}
434 }
435 }
436
437 let dataspace =
438 dataspace.ok_or_else(|| Error::InvalidData("dataset missing dataspace".into()))?;
439 let dt = datatype.ok_or_else(|| Error::InvalidData("dataset missing datatype".into()))?;
440 let layout =
441 layout.ok_or_else(|| Error::InvalidData("dataset missing data layout".into()))?;
442 let layout = normalize_layout(layout, &dataspace);
443 let attr_fill_value = attributes
444 .iter()
445 .find(|attr| attr.name == "_FillValue" && attr.dataspace.num_elements() == 1)
446 .map(|attr| FillValueMessage {
447 defined: !attr.raw_data.is_empty(),
448 fill_time: FillTime::IfSet,
449 value: Some(attr.raw_data.clone()),
450 });
451 let fill_value = match fill_value {
452 Some(existing) if existing.value.is_some() => Some(existing),
453 _ => attr_fill_value,
454 };
455
456 Ok(Dataset {
457 context: context.context.clone(),
458 name,
459 data_address: address,
460 dataspace,
461 datatype: dt,
462 layout,
463 fill_value,
464 filters: filter_pipeline,
465 attributes,
466 chunk_cache: context.context.chunk_cache.clone(),
467 chunk_entry_cache: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(32).unwrap()))),
468 full_chunk_entries: Arc::new(OnceLock::new()),
469 full_dataset_bytes: Arc::new(OnceLock::new()),
470 filter_registry: context.context.filter_registry.clone(),
471 })
472 }
473
474 pub fn name(&self) -> &str {
476 &self.name
477 }
478
479 pub fn address(&self) -> u64 {
482 self.data_address
483 }
484
485 pub fn shape(&self) -> &[u64] {
487 &self.dataspace.dims
488 }
489
490 pub fn dtype(&self) -> &Datatype {
492 &self.datatype
493 }
494
495 pub fn ndim(&self) -> usize {
497 self.dataspace.dims.len()
498 }
499
500 fn offset_size(&self) -> u8 {
501 self.context.superblock.offset_size
502 }
503
504 fn length_size(&self) -> u8 {
505 self.context.superblock.length_size
506 }
507
508 pub fn max_dims(&self) -> Option<&[u64]> {
510 self.dataspace.max_dims.as_deref()
511 }
512
513 pub fn chunks(&self) -> Option<Vec<u32>> {
515 match &self.layout {
516 DataLayout::Chunked { dims, .. } => Some(dims.clone()),
517 _ => None,
518 }
519 }
520
521 pub fn fill_value(&self) -> Option<&FillValueMessage> {
523 self.fill_value.as_ref()
524 }
525
526 pub fn attributes(&self) -> Vec<Attribute> {
528 self.attributes
529 .iter()
530 .map(|a| attribute_from_message_storage(a, self.context.as_ref()))
531 .collect()
532 }
533
534 pub fn attribute(&self, name: &str) -> Result<Attribute> {
536 self.attributes
537 .iter()
538 .find(|a| a.name == name)
539 .map(|a| attribute_from_message_storage(a, self.context.as_ref()))
540 .ok_or_else(|| Error::AttributeNotFound(name.to_string()))
541 }
542
543 pub fn read_string(&self) -> Result<String> {
547 let mut strings = self.read_strings()?;
548 match strings.len() {
549 1 => Ok(strings.swap_remove(0)),
550 0 => Err(Error::InvalidData(format!(
551 "dataset '{}' contains no string elements",
552 self.name
553 ))),
554 count => Err(Error::InvalidData(format!(
555 "dataset '{}' contains {count} string elements; use read_strings()",
556 self.name
557 ))),
558 }
559 }
560
561 pub fn read_strings(&self) -> Result<Vec<String>> {
563 match &self.datatype {
564 Datatype::String {
565 size: StringSize::Fixed(len),
566 encoding,
567 padding,
568 } => {
569 let raw = self.read_raw_bytes()?;
570 let elem_size = *len as usize;
571 let count = checked_usize(self.num_elements(), "dataset string element count")?;
572 let expected_bytes =
573 checked_mul_usize(count, elem_size, "dataset string byte size")?;
574 if raw.len() < expected_bytes {
575 return Err(Error::InvalidData(format!(
576 "dataset '{}' string data too short: need {} bytes, have {}",
577 self.name,
578 expected_bytes,
579 raw.len()
580 )));
581 }
582
583 let mut strings = Vec::with_capacity(count);
584 for i in 0..count {
585 let start = i * elem_size;
586 let end = start + elem_size;
587 strings.push(decode_string(&raw[start..end], *padding, *encoding)?);
588 }
589 Ok(strings)
590 }
591 Datatype::String {
592 size: StringSize::Variable,
593 encoding,
594 padding,
595 } => {
596 let raw = self.read_raw_bytes()?;
597 let count = checked_usize(self.num_elements(), "dataset string element count")?;
598 let ref_size = 4 + self.offset_size() as usize + 4;
599 let expected_bytes =
600 checked_mul_usize(count, ref_size, "dataset string reference byte size")?;
601 if raw.len() < expected_bytes {
602 return Err(Error::InvalidData(format!(
603 "dataset '{}' vlen string data too short: need {} bytes, have {}",
604 self.name,
605 expected_bytes,
606 raw.len()
607 )));
608 }
609
610 let mut strings = Vec::with_capacity(count);
611 for i in 0..count {
612 let offset = i * ref_size;
613 strings.push(read_one_vlen_string_storage(
614 &raw,
615 offset,
616 self.context.storage.as_ref(),
617 self.offset_size(),
618 self.length_size(),
619 *padding,
620 *encoding,
621 )?);
622 }
623 Ok(strings)
624 }
625 Datatype::VarLen { base } => {
626 if !matches!(base.as_ref(), Datatype::FixedPoint { size: 1, .. }) {
627 return Err(Error::TypeMismatch {
628 expected: "String dataset".into(),
629 actual: format!("{:?}", self.datatype),
630 });
631 }
632
633 let raw = self.read_raw_bytes()?;
634 let count = checked_usize(self.num_elements(), "dataset string element count")?;
635 let ref_size = 4 + self.offset_size() as usize + 4;
636 let expected_bytes =
637 checked_mul_usize(count, ref_size, "dataset string reference byte size")?;
638 if raw.len() < expected_bytes {
639 return Err(Error::InvalidData(format!(
640 "dataset '{}' vlen byte string data too short: need {} bytes, have {}",
641 self.name,
642 expected_bytes,
643 raw.len()
644 )));
645 }
646
647 let mut strings = Vec::with_capacity(count);
648 for i in 0..count {
649 let offset = i * ref_size;
650 let ref_bytes = &raw[offset..offset + ref_size];
651 let value = resolve_vlen_bytes_storage(
652 ref_bytes,
653 self.context.storage.as_ref(),
654 self.offset_size(),
655 self.length_size(),
656 )
657 .unwrap_or_default();
658 strings.push(decode_varlen_byte_string(&value)?);
659 }
660 Ok(strings)
661 }
662 _ => Err(Error::TypeMismatch {
663 expected: "String dataset".into(),
664 actual: format!("{:?}", self.datatype),
665 }),
666 }
667 }
668
669 pub fn num_elements(&self) -> u64 {
671 if self.dataspace.dims.is_empty() {
672 match self.dataspace.dataspace_type {
673 DataspaceType::Scalar => 1,
674 DataspaceType::Null => 0,
675 DataspaceType::Simple => 0,
676 }
677 } else {
678 self.dataspace.dims.iter().product()
679 }
680 }
681
682 pub fn read_array<T: H5Type>(&self) -> Result<ArrayD<T>> {
684 let result = match &self.layout {
685 DataLayout::Compact { data } => self.read_compact::<T>(data),
686 DataLayout::Contiguous { address, size } => self.read_contiguous::<T>(*address, *size),
687 DataLayout::Chunked {
688 address,
689 dims,
690 element_size,
691 chunk_indexing,
692 } => self.read_chunked::<T>(*address, dims, *element_size, chunk_indexing.as_ref()),
693 };
694 result.map_err(|e| e.with_context(&self.name))
695 }
696
697 #[cfg(feature = "rayon")]
701 pub fn read_array_parallel<T: H5Type>(&self) -> Result<ArrayD<T>> {
702 match &self.layout {
703 DataLayout::Chunked {
704 address,
705 dims,
706 element_size,
707 chunk_indexing,
708 } => self.read_chunked_parallel::<T>(
709 *address,
710 dims,
711 *element_size,
712 chunk_indexing.as_ref(),
713 ),
714 _ => self.read_array::<T>(),
715 }
716 }
717
718 #[cfg(feature = "rayon")]
722 pub fn read_array_in_pool<T: H5Type>(&self, pool: &rayon::ThreadPool) -> Result<ArrayD<T>> {
723 match &self.layout {
724 DataLayout::Chunked {
725 address,
726 dims,
727 element_size,
728 chunk_indexing,
729 } => pool.install(|| {
730 self.read_chunked_parallel::<T>(
731 *address,
732 dims,
733 *element_size,
734 chunk_indexing.as_ref(),
735 )
736 }),
737 _ => self.read_array::<T>(),
738 }
739 }
740
741 #[cfg(feature = "rayon")]
746 pub fn read_slice_parallel<T: H5Type>(&self, selection: &SliceInfo) -> Result<ArrayD<T>> {
747 let resolved = normalize_selection(selection, &self.dataspace.dims)?;
748
749 match &self.layout {
750 DataLayout::Chunked {
751 address,
752 dims,
753 element_size,
754 chunk_indexing,
755 } => self.read_chunked_slice_parallel::<T>(
756 *address,
757 dims,
758 *element_size,
759 chunk_indexing.as_ref(),
760 selection,
761 &resolved,
762 ),
763 _ => self.read_slice::<T>(selection),
764 }
765 }
766
767 pub fn read_slice<T: H5Type>(&self, selection: &SliceInfo) -> Result<ArrayD<T>> {
769 let resolved = normalize_selection(selection, &self.dataspace.dims)?;
770
771 match &self.layout {
772 DataLayout::Contiguous { address, size } => {
773 self.read_contiguous_slice::<T>(*address, *size, selection, &resolved)
774 }
775 DataLayout::Compact { data } => self.read_compact_slice::<T>(data, selection),
776 DataLayout::Chunked {
777 address,
778 dims,
779 element_size,
780 chunk_indexing,
781 } => self.read_chunked_slice::<T>(
782 *address,
783 dims,
784 *element_size,
785 chunk_indexing.as_ref(),
786 selection,
787 &resolved,
788 ),
789 }
790 }
791
792 fn read_compact<T: H5Type>(&self, data: &[u8]) -> Result<ArrayD<T>> {
793 self.decode_raw_data::<T>(data)
794 }
795
796 fn read_raw_bytes(&self) -> Result<Vec<u8>> {
797 let elem_size = dtype_element_size(&self.datatype);
798 let total_elements = checked_usize(self.num_elements(), "dataset element count")?;
799 let total_bytes = checked_mul_usize(total_elements, elem_size, "dataset size in bytes")?;
800
801 let result = match &self.layout {
802 DataLayout::Compact { data } => Ok(self.normalize_raw_bytes(data, total_bytes)),
803 DataLayout::Contiguous { address, size } => {
804 self.read_contiguous_bytes(*address, *size, total_bytes)
805 }
806 DataLayout::Chunked {
807 address,
808 dims,
809 element_size: _,
810 chunk_indexing,
811 } => self.read_chunked_bytes(*address, dims, chunk_indexing.as_ref(), total_bytes),
812 };
813
814 result.map_err(|e| e.with_context(&self.name))
815 }
816
817 fn read_contiguous<T: H5Type>(&self, address: u64, size: u64) -> Result<ArrayD<T>> {
818 if Cursor::is_undefined_offset(address, self.offset_size()) || size == 0 {
819 return self.make_fill_array::<T>();
821 }
822
823 let sz = checked_usize(size, "contiguous dataset size")?;
824 let raw = self.context.read_range(address, sz)?;
825 self.decode_raw_data::<T>(raw.as_ref())
826 }
827
828 fn read_contiguous_bytes(
829 &self,
830 address: u64,
831 size: u64,
832 total_bytes: usize,
833 ) -> Result<Vec<u8>> {
834 if Cursor::is_undefined_offset(address, self.offset_size()) || size == 0 {
835 return Ok(self.make_output_buffer(total_bytes));
836 }
837
838 let sz = checked_usize(size, "contiguous dataset size")?;
839 let raw = self.context.read_range(address, sz)?;
840 Ok(self.normalize_raw_bytes(raw.as_ref(), total_bytes))
841 }
842
843 fn read_chunked<T: H5Type>(
844 &self,
845 index_address: u64,
846 chunk_dims: &[u32],
847 _element_size: u32,
848 chunk_indexing: Option<&ChunkIndexing>,
849 ) -> Result<ArrayD<T>> {
850 if Cursor::is_undefined_offset(index_address, self.offset_size()) {
851 return self.make_fill_array::<T>();
852 }
853
854 let ndim = self.ndim();
855 let shape = &self.dataspace.dims;
856 let elem_size = dtype_element_size(&self.datatype);
857 let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
858 let dataset_strides = row_major_strides(shape, "dataset stride")?;
859 let chunk_strides = row_major_strides(&chunk_shape, "chunk stride")?;
860
861 let total_elements = checked_usize(self.num_elements(), "dataset element count")?;
863 let total_bytes = checked_mul_usize(total_elements, elem_size, "dataset size in bytes")?;
864
865 let entries = self.collect_chunk_entries(
866 index_address,
867 chunk_dims,
868 chunk_indexing,
869 ChunkEntrySelection {
870 shape,
871 ndim,
872 elem_size,
873 chunk_bounds: None,
874 },
875 )?;
876
877 let full_chunk_coverage = entries.len() == full_dataset_chunk_count(shape, &chunk_shape)?;
878 if full_chunk_coverage {
879 let hot_full_dataset_bytes = if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
880 self.full_dataset_bytes.get().cloned()
881 } else {
882 None
883 };
884 if let Some(cached_bytes) = hot_full_dataset_bytes {
885 return self.decode_raw_data::<T>(&cached_bytes);
886 }
887 if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
888 let mut result_values: Vec<MaybeUninit<T>> =
889 std::iter::repeat_with(MaybeUninit::<T>::uninit)
890 .take(total_elements)
891 .collect();
892 let result_ptr = result_values.as_mut_ptr() as *mut u8;
893 let result_len = checked_mul_usize(
894 result_values.len(),
895 std::mem::size_of::<T>(),
896 "typed dataset size in bytes",
897 )?;
898
899 for entry in &entries {
900 let chunk_data =
901 self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
902 unsafe {
903 copy_chunk_to_flat_with_strides_ptr(
904 &chunk_data,
905 FlatBufferPtr {
906 ptr: result_ptr,
907 len: result_len,
908 },
909 ChunkCopyLayout {
910 chunk_offsets: &entry.offsets,
911 chunk_shape: &chunk_shape,
912 dataset_shape: shape,
913 dataset_strides: &dataset_strides,
914 chunk_strides: &chunk_strides,
915 elem_size,
916 },
917 );
918 }
919 }
920
921 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
922 let mut cached_bytes = vec![0u8; total_bytes];
923 unsafe {
924 std::ptr::copy_nonoverlapping(
925 result_ptr,
926 cached_bytes.as_mut_ptr(),
927 total_bytes,
928 );
929 }
930 let _ = self.full_dataset_bytes.set(Arc::new(cached_bytes));
931 }
932
933 let mut result_shape = Vec::with_capacity(shape.len());
934 for &dim in shape {
935 result_shape.push(checked_usize(dim, "dataset dimension")?);
936 }
937 let result_values = assume_init_vec(result_values);
938 return ArrayD::from_shape_vec(IxDyn(&result_shape), result_values)
939 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
940 }
941
942 let mut flat_data = vec![MaybeUninit::<u8>::uninit(); total_bytes];
943 let flat_ptr = flat_data.as_mut_ptr() as *mut u8;
944 let flat_len = flat_data.len();
945
946 for entry in &entries {
947 let chunk_data =
948 self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
949 unsafe {
950 copy_chunk_to_flat_with_strides_ptr(
951 &chunk_data,
952 FlatBufferPtr {
953 ptr: flat_ptr,
954 len: flat_len,
955 },
956 ChunkCopyLayout {
957 chunk_offsets: &entry.offsets,
958 chunk_shape: &chunk_shape,
959 dataset_shape: shape,
960 dataset_strides: &dataset_strides,
961 chunk_strides: &chunk_strides,
962 elem_size,
963 },
964 );
965 }
966 }
967
968 let flat_data = assume_init_u8_vec(flat_data);
969 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
970 let _ = self.full_dataset_bytes.set(Arc::new(flat_data.clone()));
971 }
972 return self.decode_raw_data::<T>(&flat_data);
973 }
974
975 let mut flat_data = self.make_output_buffer(total_bytes);
976 for entry in &entries {
977 let chunk_data = self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
978 copy_chunk_to_flat_with_strides(
979 &chunk_data,
980 &mut flat_data,
981 ChunkCopyLayout {
982 chunk_offsets: &entry.offsets,
983 chunk_shape: &chunk_shape,
984 dataset_shape: shape,
985 dataset_strides: &dataset_strides,
986 chunk_strides: &chunk_strides,
987 elem_size,
988 },
989 );
990 }
991
992 self.decode_raw_data::<T>(&flat_data)
993 }
994
995 fn read_chunked_bytes(
996 &self,
997 index_address: u64,
998 chunk_dims: &[u32],
999 chunk_indexing: Option<&ChunkIndexing>,
1000 total_bytes: usize,
1001 ) -> Result<Vec<u8>> {
1002 if Cursor::is_undefined_offset(index_address, self.offset_size()) {
1003 return Ok(self.make_output_buffer(total_bytes));
1004 }
1005
1006 let ndim = self.ndim();
1007 let shape = &self.dataspace.dims;
1008 let elem_size = dtype_element_size(&self.datatype);
1009 let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1010 let dataset_strides = row_major_strides(shape, "dataset stride")?;
1011 let chunk_strides = row_major_strides(&chunk_shape, "chunk stride")?;
1012
1013 let entries = self.collect_chunk_entries(
1014 index_address,
1015 chunk_dims,
1016 chunk_indexing,
1017 ChunkEntrySelection {
1018 shape,
1019 ndim,
1020 elem_size,
1021 chunk_bounds: None,
1022 },
1023 )?;
1024
1025 let full_chunk_coverage = entries.len() == full_dataset_chunk_count(shape, &chunk_shape)?;
1026 if full_chunk_coverage && total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1027 if let Some(cached_bytes) = self.full_dataset_bytes.get() {
1028 return Ok(cached_bytes.as_ref().clone());
1029 }
1030 }
1031
1032 let mut flat_data = self.make_output_buffer(total_bytes);
1033 for entry in &entries {
1034 let chunk_data = self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1035 copy_chunk_to_flat_with_strides(
1036 &chunk_data,
1037 &mut flat_data,
1038 ChunkCopyLayout {
1039 chunk_offsets: &entry.offsets,
1040 chunk_shape: &chunk_shape,
1041 dataset_shape: shape,
1042 dataset_strides: &dataset_strides,
1043 chunk_strides: &chunk_strides,
1044 elem_size,
1045 },
1046 );
1047 }
1048
1049 if full_chunk_coverage && total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1050 let _ = self.full_dataset_bytes.set(Arc::new(flat_data.clone()));
1051 }
1052
1053 Ok(flat_data)
1054 }
1055
1056 #[cfg(feature = "rayon")]
1057 fn read_chunked_parallel<T: H5Type>(
1058 &self,
1059 index_address: u64,
1060 chunk_dims: &[u32],
1061 _element_size: u32,
1062 chunk_indexing: Option<&ChunkIndexing>,
1063 ) -> Result<ArrayD<T>> {
1064 if Cursor::is_undefined_offset(index_address, self.offset_size()) {
1065 return self.make_fill_array::<T>();
1066 }
1067
1068 let ndim = self.ndim();
1069 let shape = &self.dataspace.dims;
1070 let elem_size = dtype_element_size(&self.datatype);
1071 let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1072 let dataset_strides = row_major_strides(shape, "dataset stride")?;
1073 let chunk_strides = row_major_strides(&chunk_shape, "chunk stride")?;
1074 let total_elements = checked_usize(self.num_elements(), "dataset element count")?;
1075 let total_bytes = checked_mul_usize(total_elements, elem_size, "dataset size in bytes")?;
1076
1077 let mut entries = self.collect_chunk_entries(
1078 index_address,
1079 chunk_dims,
1080 chunk_indexing,
1081 ChunkEntrySelection {
1082 shape,
1083 ndim,
1084 elem_size,
1085 chunk_bounds: None,
1086 },
1087 )?;
1088
1089 entries.sort_by(|a, b| a.offsets.cmp(&b.offsets));
1093 for i in 1..entries.len() {
1094 if entries[i].offsets == entries[i - 1].offsets {
1095 return Err(Error::InvalidData(format!(
1096 "duplicate chunk output offsets {:?} (addresses {:#x} and {:#x})",
1097 entries[i].offsets,
1098 entries[i - 1].address,
1099 entries[i].address
1100 )));
1101 }
1102 }
1103
1104 let full_chunk_coverage = entries.len() == full_dataset_chunk_count(shape, &chunk_shape)?;
1105 if full_chunk_coverage {
1106 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1107 if let Some(cached_bytes) = self.full_dataset_bytes.get() {
1108 return self.decode_raw_data::<T>(cached_bytes);
1109 }
1110 }
1111 if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
1112 let mut result_values: Vec<MaybeUninit<T>> =
1113 std::iter::repeat_with(MaybeUninit::<T>::uninit)
1114 .take(total_elements)
1115 .collect();
1116 let flat = FlatBufferPtr {
1117 ptr: result_values.as_mut_ptr() as *mut u8,
1118 len: checked_mul_usize(
1119 result_values.len(),
1120 std::mem::size_of::<T>(),
1121 "typed dataset size in bytes",
1122 )?,
1123 };
1124
1125 entries
1126 .par_iter()
1127 .map(|entry| {
1128 self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)
1129 .map(|data| unsafe {
1130 flat.copy_chunk(
1131 &data,
1132 ChunkCopyLayout {
1133 chunk_offsets: &entry.offsets,
1134 chunk_shape: &chunk_shape,
1135 dataset_shape: shape,
1136 dataset_strides: &dataset_strides,
1137 chunk_strides: &chunk_strides,
1138 elem_size,
1139 },
1140 );
1141 })
1142 })
1143 .collect::<std::result::Result<Vec<_>, Error>>()?;
1144
1145 let mut result_shape = Vec::with_capacity(shape.len());
1146 for &dim in shape {
1147 result_shape.push(checked_usize(dim, "dataset dimension")?);
1148 }
1149 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1150 let mut cached_bytes = vec![0u8; total_bytes];
1151 unsafe {
1152 std::ptr::copy_nonoverlapping(
1153 flat.ptr,
1154 cached_bytes.as_mut_ptr(),
1155 total_bytes,
1156 );
1157 }
1158 let _ = self.full_dataset_bytes.set(Arc::new(cached_bytes));
1159 }
1160 let result_values = assume_init_vec(result_values);
1161 return ArrayD::from_shape_vec(IxDyn(&result_shape), result_values)
1162 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
1163 }
1164
1165 let mut flat_data = vec![MaybeUninit::<u8>::uninit(); total_bytes];
1166 let flat = FlatBufferPtr {
1167 ptr: flat_data.as_mut_ptr() as *mut u8,
1168 len: flat_data.len(),
1169 };
1170
1171 entries
1172 .par_iter()
1173 .map(|entry| {
1174 self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)
1175 .map(|data| unsafe {
1176 flat.copy_chunk(
1177 &data,
1178 ChunkCopyLayout {
1179 chunk_offsets: &entry.offsets,
1180 chunk_shape: &chunk_shape,
1181 dataset_shape: shape,
1182 dataset_strides: &dataset_strides,
1183 chunk_strides: &chunk_strides,
1184 elem_size,
1185 },
1186 );
1187 })
1188 })
1189 .collect::<std::result::Result<Vec<_>, Error>>()?;
1190
1191 let flat_data = assume_init_u8_vec(flat_data);
1192 if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1193 let _ = self.full_dataset_bytes.set(Arc::new(flat_data.clone()));
1194 }
1195 return self.decode_raw_data::<T>(&flat_data);
1196 }
1197
1198 let mut flat_data = self.make_output_buffer(total_bytes);
1199 let flat = FlatBufferPtr {
1200 ptr: flat_data.as_mut_ptr(),
1201 len: flat_data.len(),
1202 };
1203
1204 entries
1205 .par_iter()
1206 .map(|entry| {
1207 self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)
1208 .map(|data| unsafe {
1209 flat.copy_chunk(
1210 &data,
1211 ChunkCopyLayout {
1212 chunk_offsets: &entry.offsets,
1213 chunk_shape: &chunk_shape,
1214 dataset_shape: shape,
1215 dataset_strides: &dataset_strides,
1216 chunk_strides: &chunk_strides,
1217 elem_size,
1218 },
1219 );
1220 })
1221 })
1222 .collect::<std::result::Result<Vec<_>, Error>>()?;
1223
1224 self.decode_raw_data::<T>(&flat_data)
1225 }
1226
1227 fn collect_chunk_entries(
1231 &self,
1232 index_address: u64,
1233 chunk_dims: &[u32],
1234 chunk_indexing: Option<&ChunkIndexing>,
1235 selection: ChunkEntrySelection<'_>,
1236 ) -> Result<Vec<chunk_index::ChunkEntry>> {
1237 if selection.chunk_bounds.is_none() {
1238 if let Some(cached) = self.full_chunk_entries.get() {
1239 return Ok((**cached).clone());
1240 }
1241 }
1242
1243 let cache_key =
1244 selection
1245 .chunk_bounds
1246 .map(|(first_chunk, last_chunk)| ChunkEntryCacheKey {
1247 index_address,
1248 first_chunk: SmallVec::from_slice(first_chunk),
1249 last_chunk: SmallVec::from_slice(last_chunk),
1250 });
1251
1252 if let Some(ref key) = cache_key {
1253 let mut cache = self.chunk_entry_cache.lock();
1254 if let Some(cached) = cache.get(key) {
1255 return Ok((**cached).clone());
1256 }
1257 }
1258
1259 let entries = match chunk_indexing {
1260 None => {
1261 self.collect_btree_v1_entries(
1263 index_address,
1264 selection.ndim,
1265 chunk_dims,
1266 selection.chunk_bounds,
1267 )
1268 }
1269 Some(ChunkIndexing::SingleChunk {
1270 filtered_size,
1271 filters,
1272 }) => Ok(vec![chunk_index::single_chunk_entry(
1273 index_address,
1274 *filtered_size,
1275 *filters,
1276 selection.ndim,
1277 )]),
1278 Some(ChunkIndexing::BTreeV2) => chunk_index::collect_v2_chunk_entries_storage(
1279 self.context.storage.as_ref(),
1280 index_address,
1281 self.offset_size(),
1282 self.length_size(),
1283 selection.ndim as u32,
1284 chunk_dims,
1285 selection.chunk_bounds,
1286 ),
1287 Some(ChunkIndexing::Implicit) => Ok(chunk_index::collect_implicit_chunk_entries(
1288 index_address,
1289 selection.shape,
1290 chunk_dims,
1291 selection.elem_size,
1292 selection.chunk_bounds,
1293 )),
1294 Some(ChunkIndexing::FixedArray { .. }) => {
1295 crate::fixed_array::collect_fixed_array_chunk_entries_storage(
1296 self.context.storage.as_ref(),
1297 index_address,
1298 self.offset_size(),
1299 self.length_size(),
1300 selection.shape,
1301 chunk_dims,
1302 selection.chunk_bounds,
1303 )
1304 }
1305 Some(ChunkIndexing::ExtensibleArray { .. }) => {
1306 crate::extensible_array::collect_extensible_array_chunk_entries_storage(
1307 self.context.storage.as_ref(),
1308 index_address,
1309 self.offset_size(),
1310 self.length_size(),
1311 selection.shape,
1312 chunk_dims,
1313 selection.chunk_bounds,
1314 )
1315 }
1316 }?;
1317
1318 if let Some(key) = cache_key {
1319 let mut cache = self.chunk_entry_cache.lock();
1320 cache.put(key, Arc::new(entries.clone()));
1321 } else {
1322 let _ = self.full_chunk_entries.set(Arc::new(entries.clone()));
1323 }
1324
1325 Ok(entries)
1326 }
1327
1328 fn collect_btree_v1_entries(
1330 &self,
1331 btree_address: u64,
1332 ndim: usize,
1333 chunk_dims: &[u32],
1334 chunk_bounds: Option<(&[u64], &[u64])>,
1335 ) -> Result<Vec<chunk_index::ChunkEntry>> {
1336 let leaves = crate::btree_v1::collect_btree_v1_leaves_storage(
1337 self.context.storage.as_ref(),
1338 btree_address,
1339 self.offset_size(),
1340 self.length_size(),
1341 Some(ndim as u32),
1342 chunk_dims,
1343 chunk_bounds,
1344 )?;
1345
1346 let mut entries = Vec::with_capacity(leaves.len());
1347 for (key, chunk_addr) in &leaves {
1348 match key {
1349 crate::btree_v1::BTreeV1Key::RawData {
1350 chunk_size,
1351 filter_mask,
1352 offsets,
1353 } => {
1354 entries.push(chunk_index::ChunkEntry {
1355 address: *chunk_addr,
1356 size: *chunk_size as u64,
1357 filter_mask: *filter_mask,
1358 offsets: offsets[..ndim].to_vec(),
1359 });
1360 }
1361 _ => {
1362 return Err(Error::InvalidData(
1363 "expected raw data key in chunk B-tree".into(),
1364 ))
1365 }
1366 }
1367 }
1368 Ok(entries)
1369 }
1370
1371 fn load_chunk_data(
1372 &self,
1373 entry: &chunk_index::ChunkEntry,
1374 dataset_addr: u64,
1375 chunk_shape: &[u64],
1376 elem_size: usize,
1377 ) -> Result<Arc<Vec<u8>>> {
1378 let cache_key = ChunkKey {
1379 dataset_addr,
1380 chunk_offsets: smallvec::SmallVec::from_slice(&entry.offsets),
1381 };
1382
1383 self.chunk_cache.get_or_insert_with(cache_key, || {
1384 let size = if entry.size > 0 {
1385 checked_usize(entry.size, "encoded chunk size")?
1386 } else {
1387 let chunk_elements =
1388 checked_shape_elements_usize(chunk_shape, "chunk element count")?;
1389 checked_mul_usize(chunk_elements, elem_size, "chunk byte size")?
1390 };
1391 let raw = self.context.read_range(entry.address, size)?;
1392
1393 if let Some(ref pipeline) = self.filters {
1394 filters::apply_pipeline(
1395 raw.as_ref(),
1396 &pipeline.filters,
1397 entry.filter_mask,
1398 elem_size,
1399 Some(&self.filter_registry),
1400 )
1401 } else {
1402 Ok(raw.to_vec())
1403 }
1404 })
1405 }
1406
1407 fn read_chunked_slice<T: H5Type>(
1412 &self,
1413 index_address: u64,
1414 chunk_dims: &[u32],
1415 _element_size: u32,
1416 chunk_indexing: Option<&ChunkIndexing>,
1417 _selection: &SliceInfo,
1418 resolved: &ResolvedSelection,
1419 ) -> Result<ArrayD<T>> {
1420 if resolved.result_elements == 0 {
1421 return self.make_fill_array_from_shape::<T>(0, &resolved.result_shape);
1422 }
1423
1424 if Cursor::is_undefined_offset(index_address, self.offset_size()) {
1425 return self
1426 .make_fill_array_from_shape::<T>(resolved.result_elements, &resolved.result_shape);
1427 }
1428
1429 let ndim = self.ndim();
1430 let shape = &self.dataspace.dims;
1431 let elem_size = dtype_element_size(&self.datatype);
1432 let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1433 let mut first_chunk = vec![0u64; ndim];
1434 let mut last_chunk = vec![0u64; ndim];
1435 for d in 0..ndim {
1436 let (first, last) = resolved.dims[d]
1437 .chunk_index_range(chunk_shape[d])
1438 .expect("zero-sized result handled above");
1439 first_chunk[d] = first;
1440 last_chunk[d] = last;
1441 }
1442
1443 let overlapping = self.collect_chunk_entries(
1445 index_address,
1446 chunk_dims,
1447 chunk_indexing,
1448 ChunkEntrySelection {
1449 shape,
1450 ndim,
1451 elem_size,
1452 chunk_bounds: Some((&first_chunk, &last_chunk)),
1453 },
1454 )?;
1455
1456 let result_total_bytes = checked_mul_usize(
1457 resolved.result_elements,
1458 elem_size,
1459 "slice result size in bytes",
1460 )?;
1461 let result_dims = resolved.result_dims_with_collapsed();
1463 let mut result_strides = vec![1usize; ndim];
1464 for d in (0..ndim - 1).rev() {
1465 result_strides[d] =
1466 checked_mul_usize(result_strides[d + 1], result_dims[d + 1], "result stride")?;
1467 }
1468 let mut chunk_strides = vec![1usize; ndim];
1469 for d in (0..ndim - 1).rev() {
1470 chunk_strides[d] = checked_mul_usize(
1471 chunk_strides[d + 1],
1472 chunk_shape[d + 1] as usize,
1473 "chunk stride",
1474 )?;
1475 }
1476 let use_unit_stride_fast_path = resolved.is_unit_stride();
1477 let fully_covered_unit_stride = use_unit_stride_fast_path
1478 && overlapping.len() == expected_chunk_count(&first_chunk, &last_chunk)?;
1479
1480 if fully_covered_unit_stride {
1481 if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
1482 let mut result_values: Vec<MaybeUninit<T>> =
1483 std::iter::repeat_with(MaybeUninit::<T>::uninit)
1484 .take(resolved.result_elements)
1485 .collect();
1486 let result_ptr = result_values.as_mut_ptr() as *mut u8;
1487 let result_len = checked_mul_usize(
1488 result_values.len(),
1489 std::mem::size_of::<T>(),
1490 "typed slice result size in bytes",
1491 )?;
1492
1493 for entry in &overlapping {
1494 let chunk_data =
1495 self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1496
1497 unsafe {
1498 copy_unit_stride_chunk_overlap_ptr(
1499 &chunk_data,
1500 FlatBufferPtr {
1501 ptr: result_ptr,
1502 len: result_len,
1503 },
1504 UnitStrideCopyLayout {
1505 chunk_offsets: &entry.offsets,
1506 chunk_shape: &chunk_shape,
1507 dataset_shape: shape,
1508 resolved,
1509 chunk_strides: &chunk_strides,
1510 result_strides: &result_strides,
1511 elem_size,
1512 },
1513 )?;
1514 }
1515 }
1516
1517 let result_values = assume_init_vec(result_values);
1518 return ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), result_values)
1519 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
1520 }
1521
1522 let mut result_buf = vec![MaybeUninit::<u8>::uninit(); result_total_bytes];
1523 let result_ptr = result_buf.as_mut_ptr() as *mut u8;
1524 let result_len = result_buf.len();
1525
1526 for entry in &overlapping {
1527 let chunk_data =
1528 self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1529
1530 unsafe {
1531 copy_unit_stride_chunk_overlap_ptr(
1532 &chunk_data,
1533 FlatBufferPtr {
1534 ptr: result_ptr,
1535 len: result_len,
1536 },
1537 UnitStrideCopyLayout {
1538 chunk_offsets: &entry.offsets,
1539 chunk_shape: &chunk_shape,
1540 dataset_shape: shape,
1541 resolved,
1542 chunk_strides: &chunk_strides,
1543 result_strides: &result_strides,
1544 elem_size,
1545 },
1546 )?;
1547 }
1548 }
1549
1550 let result_buf = assume_init_u8_vec(result_buf);
1551 return self.decode_buffer_with_shape::<T>(
1552 &result_buf,
1553 resolved.result_elements,
1554 &resolved.result_shape,
1555 );
1556 }
1557
1558 let mut result_buf = self.make_output_buffer(result_total_bytes);
1559
1560 for entry in &overlapping {
1562 let chunk_data = self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1563
1564 if use_unit_stride_fast_path {
1565 copy_unit_stride_chunk_overlap(
1566 &chunk_data,
1567 &mut result_buf,
1568 UnitStrideCopyLayout {
1569 chunk_offsets: &entry.offsets,
1570 chunk_shape: &chunk_shape,
1571 dataset_shape: shape,
1572 resolved,
1573 chunk_strides: &chunk_strides,
1574 result_strides: &result_strides,
1575 elem_size,
1576 },
1577 )?;
1578 continue;
1579 }
1580
1581 let mut dim_indices: Vec<Vec<(usize, usize)>> = Vec::with_capacity(ndim);
1584 for d in 0..ndim {
1585 let chunk_start = entry.offsets[d];
1586 let chunk_end = (chunk_start + chunk_shape[d]).min(shape[d]);
1587 let dim = &resolved.dims[d];
1588 let sel_start = dim.start;
1589 let sel_end = dim.end;
1590 let sel_step = dim.step;
1591 let mut indices = Vec::new();
1592
1593 let first_sel = if sel_start >= chunk_start {
1595 sel_start
1596 } else {
1597 let steps_to_skip = (chunk_start - sel_start).div_ceil(sel_step);
1598 sel_start + steps_to_skip * sel_step
1599 };
1600
1601 let mut sel_idx = first_sel;
1602 while sel_idx < sel_end && sel_idx < chunk_end {
1603 let chunk_local = checked_usize(sel_idx - chunk_start, "chunk-local index")?;
1604 let result_dim_idx =
1606 checked_usize((sel_idx - dim.start) / sel_step, "result index")?;
1607 indices.push((chunk_local, result_dim_idx));
1608 sel_idx += sel_step;
1609 }
1610
1611 dim_indices.push(indices);
1612 }
1613
1614 copy_selected_elements(
1616 &chunk_data,
1617 &mut result_buf,
1618 &dim_indices,
1619 &chunk_strides,
1620 &result_strides,
1621 elem_size,
1622 ndim,
1623 );
1624 }
1625
1626 self.decode_buffer_with_shape::<T>(
1627 &result_buf,
1628 resolved.result_elements,
1629 &resolved.result_shape,
1630 )
1631 }
1632
1633 #[cfg(feature = "rayon")]
1639 fn read_chunked_slice_parallel<T: H5Type>(
1640 &self,
1641 index_address: u64,
1642 chunk_dims: &[u32],
1643 _element_size: u32,
1644 chunk_indexing: Option<&ChunkIndexing>,
1645 _selection: &SliceInfo,
1646 resolved: &ResolvedSelection,
1647 ) -> Result<ArrayD<T>> {
1648 if resolved.result_elements == 0 {
1649 return self.make_fill_array_from_shape::<T>(0, &resolved.result_shape);
1650 }
1651
1652 if Cursor::is_undefined_offset(index_address, self.offset_size()) {
1653 return self
1654 .make_fill_array_from_shape::<T>(resolved.result_elements, &resolved.result_shape);
1655 }
1656
1657 let ndim = self.ndim();
1658 let shape = &self.dataspace.dims;
1659 let elem_size = dtype_element_size(&self.datatype);
1660 let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1661 let mut first_chunk = vec![0u64; ndim];
1662 let mut last_chunk = vec![0u64; ndim];
1663 for d in 0..ndim {
1664 let (first, last) = resolved.dims[d]
1665 .chunk_index_range(chunk_shape[d])
1666 .expect("zero-sized result handled above");
1667 first_chunk[d] = first;
1668 last_chunk[d] = last;
1669 }
1670
1671 let overlapping = self.collect_chunk_entries(
1673 index_address,
1674 chunk_dims,
1675 chunk_indexing,
1676 ChunkEntrySelection {
1677 shape,
1678 ndim,
1679 elem_size,
1680 chunk_bounds: Some((&first_chunk, &last_chunk)),
1681 },
1682 )?;
1683
1684 let result_total_bytes = checked_mul_usize(
1686 resolved.result_elements,
1687 elem_size,
1688 "slice result size in bytes",
1689 )?;
1690 let result_dims = resolved.result_dims_with_collapsed();
1692 let mut result_strides = vec![1usize; ndim];
1693 for d in (0..ndim - 1).rev() {
1694 result_strides[d] =
1695 checked_mul_usize(result_strides[d + 1], result_dims[d + 1], "result stride")?;
1696 }
1697 let mut chunk_strides = vec![1usize; ndim];
1698 for d in (0..ndim - 1).rev() {
1699 chunk_strides[d] = checked_mul_usize(
1700 chunk_strides[d + 1],
1701 chunk_shape[d + 1] as usize,
1702 "chunk stride",
1703 )?;
1704 }
1705 let use_unit_stride_fast_path = resolved.is_unit_stride();
1706 let fully_covered_unit_stride = use_unit_stride_fast_path
1707 && overlapping.len() == expected_chunk_count(&first_chunk, &last_chunk)?;
1708
1709 if fully_covered_unit_stride {
1710 if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
1711 let mut result_values: Vec<MaybeUninit<T>> =
1712 std::iter::repeat_with(MaybeUninit::<T>::uninit)
1713 .take(resolved.result_elements)
1714 .collect();
1715 let flat = FlatBufferPtr {
1716 ptr: result_values.as_mut_ptr() as *mut u8,
1717 len: checked_mul_usize(
1718 result_values.len(),
1719 std::mem::size_of::<T>(),
1720 "typed slice result size in bytes",
1721 )?,
1722 };
1723
1724 overlapping
1725 .par_iter()
1726 .map(|entry| {
1727 let chunk_data =
1728 self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1729
1730 unsafe {
1731 flat.copy_unit_stride_chunk_overlap(
1732 &chunk_data,
1733 UnitStrideCopyLayout {
1734 chunk_offsets: &entry.offsets,
1735 chunk_shape: &chunk_shape,
1736 dataset_shape: shape,
1737 resolved,
1738 chunk_strides: &chunk_strides,
1739 result_strides: &result_strides,
1740 elem_size,
1741 },
1742 )?;
1743 }
1744
1745 Ok(())
1746 })
1747 .collect::<std::result::Result<Vec<_>, Error>>()?;
1748
1749 let result_values = assume_init_vec(result_values);
1750 return ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), result_values)
1751 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
1752 }
1753
1754 let mut result_buf = vec![MaybeUninit::<u8>::uninit(); result_total_bytes];
1755 let flat = FlatBufferPtr {
1756 ptr: result_buf.as_mut_ptr() as *mut u8,
1757 len: result_buf.len(),
1758 };
1759
1760 overlapping
1761 .par_iter()
1762 .map(|entry| {
1763 let chunk_data =
1764 self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1765
1766 unsafe {
1767 flat.copy_unit_stride_chunk_overlap(
1768 &chunk_data,
1769 UnitStrideCopyLayout {
1770 chunk_offsets: &entry.offsets,
1771 chunk_shape: &chunk_shape,
1772 dataset_shape: shape,
1773 resolved,
1774 chunk_strides: &chunk_strides,
1775 result_strides: &result_strides,
1776 elem_size,
1777 },
1778 )?;
1779 }
1780
1781 Ok(())
1782 })
1783 .collect::<std::result::Result<Vec<_>, Error>>()?;
1784
1785 let result_buf = assume_init_u8_vec(result_buf);
1786 return self.decode_buffer_with_shape::<T>(
1787 &result_buf,
1788 resolved.result_elements,
1789 &resolved.result_shape,
1790 );
1791 }
1792
1793 let mut result_buf = self.make_output_buffer(result_total_bytes);
1794
1795 let flat = FlatBufferPtr {
1796 ptr: result_buf.as_mut_ptr(),
1797 len: result_buf.len(),
1798 };
1799
1800 overlapping
1801 .par_iter()
1802 .map(|entry| {
1803 let chunk_data =
1804 self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1805
1806 if use_unit_stride_fast_path {
1807 unsafe {
1808 flat.copy_unit_stride_chunk_overlap(
1809 &chunk_data,
1810 UnitStrideCopyLayout {
1811 chunk_offsets: &entry.offsets,
1812 chunk_shape: &chunk_shape,
1813 dataset_shape: shape,
1814 resolved,
1815 chunk_strides: &chunk_strides,
1816 result_strides: &result_strides,
1817 elem_size,
1818 },
1819 )?;
1820 }
1821 return Ok(());
1822 }
1823
1824 let mut dim_indices: Vec<Vec<(usize, usize)>> = Vec::with_capacity(ndim);
1827 for d in 0..ndim {
1828 let chunk_start = entry.offsets[d];
1829 let chunk_end = (chunk_start + chunk_shape[d]).min(shape[d]);
1830 let dim = &resolved.dims[d];
1831 let sel_start = dim.start;
1832 let sel_end = dim.end;
1833 let sel_step = dim.step;
1834 let mut indices = Vec::new();
1835
1836 let first_sel = if sel_start >= chunk_start {
1837 sel_start
1838 } else {
1839 let steps_to_skip = (chunk_start - sel_start).div_ceil(sel_step);
1840 sel_start + steps_to_skip * sel_step
1841 };
1842
1843 let mut sel_idx = first_sel;
1844 while sel_idx < sel_end && sel_idx < chunk_end {
1845 let chunk_local =
1846 checked_usize(sel_idx - chunk_start, "chunk-local index")?;
1847 let result_dim_idx =
1848 checked_usize((sel_idx - dim.start) / sel_step, "result index")?;
1849 indices.push((chunk_local, result_dim_idx));
1850 sel_idx += sel_step;
1851 }
1852
1853 dim_indices.push(indices);
1854 }
1855
1856 unsafe {
1860 flat.copy_selected(
1861 &chunk_data,
1862 &dim_indices,
1863 &chunk_strides,
1864 &result_strides,
1865 elem_size,
1866 ndim,
1867 );
1868 }
1869
1870 Ok(())
1871 })
1872 .collect::<std::result::Result<Vec<_>, Error>>()?;
1873
1874 self.decode_buffer_with_shape::<T>(
1875 &result_buf,
1876 resolved.result_elements,
1877 &resolved.result_shape,
1878 )
1879 }
1880
1881 fn read_contiguous_slice<T: H5Type>(
1882 &self,
1883 address: u64,
1884 size: u64,
1885 _selection: &SliceInfo,
1886 resolved: &ResolvedSelection,
1887 ) -> Result<ArrayD<T>> {
1888 if resolved.result_elements == 0 {
1889 return self.make_fill_array_from_shape::<T>(0, &resolved.result_shape);
1890 }
1891
1892 if Cursor::is_undefined_offset(address, self.offset_size()) || size == 0 {
1893 return self
1894 .make_fill_array_from_shape::<T>(resolved.result_elements, &resolved.result_shape);
1895 }
1896
1897 let shape = &self.dataspace.dims;
1898 let ndim = shape.len();
1899 let elem_size = dtype_element_size(&self.datatype);
1900 let result_total_bytes = checked_mul_usize(
1901 resolved.result_elements,
1902 elem_size,
1903 "contiguous slice result size in bytes",
1904 )?;
1905 let storage_len = checked_usize(size, "contiguous dataset size")?;
1906 let raw = self.context.read_range(address, storage_len)?;
1907 let dataset_strides = row_major_strides(shape, "contiguous dataset stride")?;
1908 let result_dims = resolved.result_dims_with_collapsed();
1909 let zero_offsets = vec![0u64; ndim];
1910 let mut result_strides = vec![1usize; ndim];
1911 for d in (0..ndim.saturating_sub(1)).rev() {
1912 result_strides[d] =
1913 checked_mul_usize(result_strides[d + 1], result_dims[d + 1], "result stride")?;
1914 }
1915
1916 if resolved.is_unit_stride() {
1917 if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
1918 let mut result_values: Vec<MaybeUninit<T>> =
1919 std::iter::repeat_with(MaybeUninit::<T>::uninit)
1920 .take(resolved.result_elements)
1921 .collect();
1922 let flat = FlatBufferPtr {
1923 ptr: result_values.as_mut_ptr() as *mut u8,
1924 len: checked_mul_usize(
1925 result_values.len(),
1926 std::mem::size_of::<T>(),
1927 "typed contiguous slice size in bytes",
1928 )?,
1929 };
1930
1931 unsafe {
1932 copy_unit_stride_chunk_overlap_ptr(
1933 raw.as_ref(),
1934 flat,
1935 UnitStrideCopyLayout {
1936 chunk_offsets: &zero_offsets,
1937 chunk_shape: shape,
1938 dataset_shape: shape,
1939 resolved,
1940 chunk_strides: &dataset_strides,
1941 result_strides: &result_strides,
1942 elem_size,
1943 },
1944 )?;
1945 }
1946
1947 let result_values = assume_init_vec(result_values);
1948 return ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), result_values)
1949 .map_err(|e| Error::InvalidData(format!("contiguous slice shape error: {e}")));
1950 }
1951
1952 let mut result_buf = vec![MaybeUninit::<u8>::uninit(); result_total_bytes];
1953 unsafe {
1954 copy_unit_stride_chunk_overlap_ptr(
1955 raw.as_ref(),
1956 FlatBufferPtr {
1957 ptr: result_buf.as_mut_ptr() as *mut u8,
1958 len: result_buf.len(),
1959 },
1960 UnitStrideCopyLayout {
1961 chunk_offsets: &zero_offsets,
1962 chunk_shape: shape,
1963 dataset_shape: shape,
1964 resolved,
1965 chunk_strides: &dataset_strides,
1966 result_strides: &result_strides,
1967 elem_size,
1968 },
1969 )?;
1970 }
1971
1972 let result_buf = assume_init_u8_vec(result_buf);
1973 return self.decode_buffer_with_shape::<T>(
1974 &result_buf,
1975 resolved.result_elements,
1976 &resolved.result_shape,
1977 );
1978 }
1979
1980 let mut result_buf = self.make_output_buffer(result_total_bytes);
1981 let mut dim_indices: Vec<Vec<(usize, usize)>> = Vec::with_capacity(ndim);
1982 for dim in &resolved.dims {
1983 let mut indices = Vec::with_capacity(dim.count);
1984 let mut sel_idx = dim.start;
1985 while sel_idx < dim.end {
1986 let src_idx = checked_usize(sel_idx, "contiguous selection source index")?;
1987 let result_idx = checked_usize(
1988 (sel_idx - dim.start) / dim.step,
1989 "contiguous selection result index",
1990 )?;
1991 indices.push((src_idx, result_idx));
1992 sel_idx = sel_idx.saturating_add(dim.step);
1993 }
1994 dim_indices.push(indices);
1995 }
1996
1997 copy_selected_elements(
1998 raw.as_ref(),
1999 &mut result_buf,
2000 &dim_indices,
2001 &dataset_strides,
2002 &result_strides,
2003 elem_size,
2004 ndim,
2005 );
2006
2007 self.decode_buffer_with_shape::<T>(
2008 &result_buf,
2009 resolved.result_elements,
2010 &resolved.result_shape,
2011 )
2012 }
2013
2014 fn read_compact_slice<T: H5Type>(
2015 &self,
2016 data: &[u8],
2017 selection: &SliceInfo,
2018 ) -> Result<ArrayD<T>> {
2019 let full = self.read_compact::<T>(data)?;
2020 slice_array(&full, selection, &self.dataspace.dims)
2021 }
2022
2023 fn decode_buffer_with_shape<T: H5Type>(
2024 &self,
2025 raw: &[u8],
2026 n: usize,
2027 shape: &[usize],
2028 ) -> Result<ArrayD<T>> {
2029 let elem_size = dtype_element_size(&self.datatype);
2030
2031 if let Some(elements) = T::decode_vec(raw, &self.datatype, n) {
2032 let elements = elements?;
2033 return ArrayD::from_shape_vec(IxDyn(shape), elements)
2034 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
2035 }
2036
2037 let mut elements = Vec::with_capacity(n);
2038 for i in 0..n {
2039 let start = checked_mul_usize(i, elem_size, "decoded element byte offset")?;
2040 let end = checked_mul_usize(i + 1, elem_size, "decoded element end offset")?;
2041 if end > raw.len() {
2042 let padded = if end <= raw.len().saturating_add(elem_size) {
2044 let mut buf = vec![0u8; elem_size];
2045 let available = raw.len().saturating_sub(start);
2046 if available > 0 {
2047 buf[..available].copy_from_slice(&raw[start..start + available]);
2048 }
2049 T::from_bytes(&buf, &self.datatype)?
2050 } else {
2051 T::from_bytes(&vec![0u8; elem_size], &self.datatype)?
2052 };
2053 elements.push(padded);
2054 } else {
2055 elements.push(T::from_bytes(&raw[start..end], &self.datatype)?);
2056 }
2057 }
2058
2059 ArrayD::from_shape_vec(IxDyn(shape), elements)
2060 .map_err(|e| Error::InvalidData(format!("array shape error: {e}")))
2061 }
2062
2063 fn decode_raw_data<T: H5Type>(&self, raw: &[u8]) -> Result<ArrayD<T>> {
2064 let n = checked_usize(self.num_elements(), "dataset element count")?;
2065 let mut shape = Vec::with_capacity(self.dataspace.dims.len());
2066 for &dim in &self.dataspace.dims {
2067 shape.push(checked_usize(dim, "dataset dimension")?);
2068 }
2069 self.decode_buffer_with_shape::<T>(raw, n, &shape)
2070 }
2071
2072 fn make_fill_array<T: H5Type>(&self) -> Result<ArrayD<T>> {
2073 let n = checked_usize(self.num_elements(), "dataset element count")?;
2074 let mut shape = Vec::with_capacity(self.dataspace.dims.len());
2075 for &dim in &self.dataspace.dims {
2076 shape.push(checked_usize(dim, "dataset dimension")?);
2077 }
2078 self.make_fill_array_from_shape::<T>(n, &shape)
2079 }
2080
2081 fn make_fill_array_from_shape<T: H5Type>(
2082 &self,
2083 element_count: usize,
2084 shape: &[usize],
2085 ) -> Result<ArrayD<T>> {
2086 let elem_size = dtype_element_size(&self.datatype);
2087 let total_bytes = checked_mul_usize(element_count, elem_size, "fill result size in bytes")?;
2088 let fill = self.make_output_buffer(total_bytes);
2089 self.decode_buffer_with_shape::<T>(&fill, element_count, shape)
2090 }
2091
2092 fn make_output_buffer(&self, total_bytes: usize) -> Vec<u8> {
2093 if let Some(ref fv) = self.fill_value {
2094 if let Some(ref fill_bytes) = fv.value {
2095 let mut buf = vec![0u8; total_bytes];
2096 if !fill_bytes.is_empty() {
2097 for chunk in buf.chunks_exact_mut(fill_bytes.len()) {
2098 chunk.copy_from_slice(fill_bytes);
2099 }
2100 }
2101 buf
2102 } else {
2103 vec![0u8; total_bytes]
2104 }
2105 } else {
2106 vec![0u8; total_bytes]
2107 }
2108 }
2109
2110 fn normalize_raw_bytes(&self, raw: &[u8], total_bytes: usize) -> Vec<u8> {
2111 if raw.len() >= total_bytes {
2112 raw[..total_bytes].to_vec()
2113 } else {
2114 let mut normalized = self.make_output_buffer(total_bytes);
2115 normalized[..raw.len()].copy_from_slice(raw);
2116 normalized
2117 }
2118 }
2119}
2120
2121fn attribute_from_message_storage(message: &AttributeMessage, context: &FileContext) -> Attribute {
2122 let raw_data = match &message.datatype {
2123 Datatype::VarLen { base }
2124 if matches!(base.as_ref(), Datatype::FixedPoint { size: 1, .. })
2125 && message.dataspace.num_elements() == 1 =>
2126 {
2127 resolve_vlen_bytes_storage(
2128 &message.raw_data,
2129 context.storage.as_ref(),
2130 context.superblock.offset_size,
2131 context.superblock.length_size,
2132 )
2133 .unwrap_or_else(|| message.raw_data.clone())
2134 }
2135 _ => message.raw_data.clone(),
2136 };
2137
2138 Attribute {
2139 name: message.name.clone(),
2140 datatype: message.datatype.clone(),
2141 shape: match message.dataspace.dataspace_type {
2142 DataspaceType::Scalar => vec![],
2143 DataspaceType::Null => vec![0],
2144 DataspaceType::Simple => message.dataspace.dims.clone(),
2145 },
2146 raw_data,
2147 }
2148}
2149
2150fn normalize_layout(layout: DataLayout, dataspace: &DataspaceMessage) -> DataLayout {
2151 match layout {
2152 DataLayout::Chunked {
2153 address,
2154 mut dims,
2155 mut element_size,
2156 chunk_indexing,
2157 } if dims.len() == dataspace.dims.len() + 1 => {
2158 if let Some(legacy_element_size) = dims.pop() {
2159 if element_size == 0 {
2160 element_size = legacy_element_size;
2161 }
2162 }
2163 DataLayout::Chunked {
2164 address,
2165 dims,
2166 element_size,
2167 chunk_indexing,
2168 }
2169 }
2170 other => other,
2171 }
2172}
2173
2174#[cfg(test)]
2175fn copy_chunk_to_flat(
2177 chunk_data: &[u8],
2178 flat: &mut [u8],
2179 chunk_offsets: &[u64],
2180 chunk_shape: &[u64],
2181 dataset_shape: &[u64],
2182 elem_size: usize,
2183) {
2184 let dataset_strides = row_major_strides(dataset_shape, "dataset stride")
2185 .expect("dataset strides should fit in usize");
2186 let chunk_strides =
2187 row_major_strides(chunk_shape, "chunk stride").expect("chunk strides should fit in usize");
2188 copy_chunk_to_flat_with_strides(
2189 chunk_data,
2190 flat,
2191 ChunkCopyLayout {
2192 chunk_offsets,
2193 chunk_shape,
2194 dataset_shape,
2195 dataset_strides: &dataset_strides,
2196 chunk_strides: &chunk_strides,
2197 elem_size,
2198 },
2199 );
2200}
2201
2202fn copy_chunk_to_flat_with_strides(
2203 chunk_data: &[u8],
2204 flat: &mut [u8],
2205 layout: ChunkCopyLayout<'_>,
2206) {
2207 unsafe {
2208 copy_chunk_to_flat_with_strides_ptr(
2209 chunk_data,
2210 FlatBufferPtr {
2211 ptr: flat.as_mut_ptr(),
2212 len: flat.len(),
2213 },
2214 layout,
2215 );
2216 }
2217}
2218
2219#[inline(always)]
2220unsafe fn copy_chunk_to_flat_with_strides_ptr(
2221 chunk_data: &[u8],
2222 flat: FlatBufferPtr,
2223 layout: ChunkCopyLayout<'_>,
2224) {
2225 let ndim = layout.dataset_shape.len();
2226
2227 if ndim == 0 {
2228 let bytes = layout.elem_size.min(chunk_data.len()).min(flat.len);
2229 std::ptr::copy_nonoverlapping(chunk_data.as_ptr(), flat.ptr, bytes);
2230 return;
2231 }
2232
2233 let mut actual_chunk_shape = Vec::with_capacity(ndim);
2235 for i in 0..ndim {
2236 let remaining = layout.dataset_shape[i] - layout.chunk_offsets[i];
2237 actual_chunk_shape.push(remaining.min(layout.chunk_shape[i]) as usize);
2238 }
2239
2240 let row_elems = *actual_chunk_shape.last().unwrap_or(&1);
2241 let row_bytes = row_elems * layout.elem_size;
2242 let dataset_origin: usize = layout
2243 .chunk_offsets
2244 .iter()
2245 .enumerate()
2246 .map(|(d, offset)| *offset as usize * layout.dataset_strides[d])
2247 .sum();
2248
2249 if ndim == 1 {
2250 let bytes = row_bytes.min(chunk_data.len());
2251 let dst_start = dataset_origin * layout.elem_size;
2252 let dst_end = dst_start + bytes;
2253 if dst_end <= flat.len {
2254 std::ptr::copy_nonoverlapping(chunk_data.as_ptr(), flat.ptr.add(dst_start), bytes);
2255 }
2256 return;
2257 }
2258
2259 let outer_dims = &actual_chunk_shape[..ndim - 1];
2260 let total_rows: usize = outer_dims.iter().product();
2261 let mut outer_idx = vec![0usize; ndim - 1];
2262
2263 for _ in 0..total_rows {
2264 let mut chunk_row = 0usize;
2265 let mut dataset_row = dataset_origin;
2266 for (d, outer) in outer_idx.iter().copied().enumerate() {
2267 chunk_row += outer * layout.chunk_strides[d];
2268 dataset_row += outer * layout.dataset_strides[d];
2269 }
2270
2271 let src_start = chunk_row * layout.elem_size;
2272 let dst_start = dataset_row * layout.elem_size;
2273 let src_end = src_start + row_bytes;
2274 let dst_end = dst_start + row_bytes;
2275 if src_end <= chunk_data.len() && dst_end <= flat.len {
2276 std::ptr::copy_nonoverlapping(
2277 chunk_data.as_ptr().add(src_start),
2278 flat.ptr.add(dst_start),
2279 row_bytes,
2280 );
2281 }
2282
2283 let mut carry = true;
2284 for d in (0..outer_idx.len()).rev() {
2285 if carry {
2286 outer_idx[d] += 1;
2287 if outer_idx[d] < outer_dims[d] {
2288 carry = false;
2289 } else {
2290 outer_idx[d] = 0;
2291 }
2292 }
2293 }
2294 }
2295}
2296
2297fn checked_product_usize(values: &[usize], context: &str) -> Result<usize> {
2298 let mut product = 1usize;
2299 for &value in values {
2300 product = checked_mul_usize(product, value, context)?;
2301 }
2302 Ok(product)
2303}
2304
2305fn unit_stride_chunk_overlap_plan(
2306 chunk_offsets: &[u64],
2307 chunk_shape: &[u64],
2308 dataset_shape: &[u64],
2309 resolved: &ResolvedSelection,
2310) -> Result<(Vec<usize>, Vec<usize>, Vec<usize>)> {
2311 let ndim = dataset_shape.len();
2312 let mut overlap_counts = Vec::with_capacity(ndim);
2313 let mut chunk_local_start = Vec::with_capacity(ndim);
2314 let mut result_start = Vec::with_capacity(ndim);
2315
2316 for d in 0..ndim {
2317 let chunk_start = chunk_offsets[d];
2318 let chunk_end = (chunk_start + chunk_shape[d]).min(dataset_shape[d]);
2319 let dim = &resolved.dims[d];
2320 let overlap_start = chunk_start.max(dim.start);
2321 let overlap_end = chunk_end.min(dim.end);
2322 if overlap_start >= overlap_end {
2323 return Ok((Vec::new(), Vec::new(), Vec::new()));
2324 }
2325
2326 overlap_counts.push(checked_usize(
2327 overlap_end - overlap_start,
2328 "chunk overlap size",
2329 )?);
2330 chunk_local_start.push(checked_usize(
2331 overlap_start - chunk_start,
2332 "chunk overlap start",
2333 )?);
2334 result_start.push(checked_usize(
2335 overlap_start - dim.start,
2336 "slice result overlap start",
2337 )?);
2338 }
2339
2340 Ok((overlap_counts, chunk_local_start, result_start))
2341}
2342
2343#[inline(always)]
2344fn copy_unit_stride_chunk_overlap(
2345 chunk_data: &[u8],
2346 result_buf: &mut [u8],
2347 layout: UnitStrideCopyLayout<'_>,
2348) -> Result<()> {
2349 unsafe {
2350 copy_unit_stride_chunk_overlap_ptr(
2351 chunk_data,
2352 FlatBufferPtr {
2353 ptr: result_buf.as_mut_ptr(),
2354 len: result_buf.len(),
2355 },
2356 layout,
2357 )
2358 }
2359}
2360
2361#[inline(always)]
2372unsafe fn copy_unit_stride_chunk_overlap_ptr(
2373 chunk_data: &[u8],
2374 result: FlatBufferPtr,
2375 layout: UnitStrideCopyLayout<'_>,
2376) -> Result<()> {
2377 let ndim = layout.dataset_shape.len();
2378
2379 if ndim == 0 {
2380 let bytes = layout.elem_size.min(chunk_data.len()).min(result.len);
2381 std::ptr::copy_nonoverlapping(chunk_data.as_ptr(), result.ptr, bytes);
2382 return Ok(());
2383 }
2384
2385 let (overlap_counts, chunk_local_start, result_start) = unit_stride_chunk_overlap_plan(
2386 layout.chunk_offsets,
2387 layout.chunk_shape,
2388 layout.dataset_shape,
2389 layout.resolved,
2390 )?;
2391 if overlap_counts.is_empty() {
2392 return Ok(());
2393 }
2394
2395 let row_elems = *overlap_counts.last().unwrap_or(&1);
2396 let row_bytes = checked_mul_usize(row_elems, layout.elem_size, "unit-stride slice row bytes")?;
2397
2398 let mut chunk_origin = 0usize;
2399 let mut result_origin = 0usize;
2400 for d in 0..ndim {
2401 let chunk_term = checked_mul_usize(
2402 chunk_local_start[d],
2403 layout.chunk_strides[d],
2404 "chunk overlap origin",
2405 )?;
2406 let result_term = checked_mul_usize(
2407 result_start[d],
2408 layout.result_strides[d],
2409 "slice result origin",
2410 )?;
2411 chunk_origin = checked_add_usize(chunk_origin, chunk_term, "chunk overlap origin")?;
2412 result_origin = checked_add_usize(result_origin, result_term, "slice result origin")?;
2413 }
2414
2415 if ndim == 1 {
2416 let src_start = chunk_origin * layout.elem_size;
2417 let dst_start = result_origin * layout.elem_size;
2418 let src_end = src_start + row_bytes;
2419 let dst_end = dst_start + row_bytes;
2420 if src_end <= chunk_data.len() && dst_end <= result.len {
2421 std::ptr::copy_nonoverlapping(
2422 chunk_data.as_ptr().add(src_start),
2423 result.ptr.add(dst_start),
2424 row_bytes,
2425 );
2426 }
2427 return Ok(());
2428 }
2429
2430 let outer_counts = &overlap_counts[..ndim - 1];
2431 let total_rows = checked_product_usize(outer_counts, "unit-stride slice row count")?;
2432 let mut outer_idx = vec![0usize; ndim - 1];
2433
2434 for _ in 0..total_rows {
2435 let mut chunk_row = chunk_origin;
2436 let mut result_row = result_origin;
2437 for (d, outer) in outer_idx.iter().copied().enumerate() {
2438 chunk_row += outer * layout.chunk_strides[d];
2439 result_row += outer * layout.result_strides[d];
2440 }
2441
2442 let src_start = chunk_row * layout.elem_size;
2443 let dst_start = result_row * layout.elem_size;
2444 let src_end = src_start + row_bytes;
2445 let dst_end = dst_start + row_bytes;
2446 if src_end <= chunk_data.len() && dst_end <= result.len {
2447 std::ptr::copy_nonoverlapping(
2448 chunk_data.as_ptr().add(src_start),
2449 result.ptr.add(dst_start),
2450 row_bytes,
2451 );
2452 }
2453
2454 let mut carry = true;
2455 for d in (0..outer_idx.len()).rev() {
2456 if carry {
2457 outer_idx[d] += 1;
2458 if outer_idx[d] < outer_counts[d] {
2459 carry = false;
2460 } else {
2461 outer_idx[d] = 0;
2462 }
2463 }
2464 }
2465 }
2466
2467 Ok(())
2468}
2469
2470#[allow(clippy::too_many_arguments)]
2471#[inline(always)]
2475fn copy_selected_elements(
2476 chunk_data: &[u8],
2477 result_buf: &mut [u8],
2478 dim_indices: &[Vec<(usize, usize)>],
2479 chunk_strides: &[usize],
2480 result_strides: &[usize],
2481 elem_size: usize,
2482 ndim: usize,
2483) {
2484 if dim_indices.iter().any(|v| v.is_empty()) {
2486 return;
2487 }
2488
2489 let total: usize = dim_indices.iter().map(|v| v.len()).product();
2491 let mut counters = vec![0usize; ndim];
2492
2493 for _ in 0..total {
2494 let mut chunk_flat = 0;
2495 let mut result_flat = 0;
2496 for d in 0..ndim {
2497 let (cl, ri) = dim_indices[d][counters[d]];
2498 chunk_flat += cl * chunk_strides[d];
2499 result_flat += ri * result_strides[d];
2500 }
2501
2502 let src_start = chunk_flat * elem_size;
2503 let dst_start = result_flat * elem_size;
2504 let src_end = src_start + elem_size;
2505 let dst_end = dst_start + elem_size;
2506
2507 if src_end <= chunk_data.len() && dst_end <= result_buf.len() {
2508 result_buf[dst_start..dst_end].copy_from_slice(&chunk_data[src_start..src_end]);
2509 }
2510
2511 let mut carry = true;
2513 for d in (0..ndim).rev() {
2514 if carry {
2515 counters[d] += 1;
2516 if counters[d] < dim_indices[d].len() {
2517 carry = false;
2518 } else {
2519 counters[d] = 0;
2520 }
2521 }
2522 }
2523 }
2524}
2525
2526#[cfg(feature = "rayon")]
2536#[allow(clippy::too_many_arguments)]
2537#[inline(always)]
2538unsafe fn copy_selected_elements_ptr(
2539 chunk_data: &[u8],
2540 result_ptr: *mut u8,
2541 result_len: usize,
2542 dim_indices: &[Vec<(usize, usize)>],
2543 chunk_strides: &[usize],
2544 result_strides: &[usize],
2545 elem_size: usize,
2546 ndim: usize,
2547) {
2548 if dim_indices.iter().any(|v| v.is_empty()) {
2549 return;
2550 }
2551
2552 let total: usize = dim_indices.iter().map(|v| v.len()).product();
2553 let mut counters = vec![0usize; ndim];
2554
2555 for _ in 0..total {
2556 let mut chunk_flat = 0;
2557 let mut result_flat = 0;
2558 for d in 0..ndim {
2559 let (cl, ri) = dim_indices[d][counters[d]];
2560 chunk_flat += cl * chunk_strides[d];
2561 result_flat += ri * result_strides[d];
2562 }
2563
2564 let src_start = chunk_flat * elem_size;
2565 let dst_start = result_flat * elem_size;
2566 let src_end = src_start + elem_size;
2567 let dst_end = dst_start + elem_size;
2568
2569 if src_end <= chunk_data.len() && dst_end <= result_len {
2570 std::ptr::copy_nonoverlapping(
2571 chunk_data.as_ptr().add(src_start),
2572 result_ptr.add(dst_start),
2573 elem_size,
2574 );
2575 }
2576
2577 let mut carry = true;
2578 for d in (0..ndim).rev() {
2579 if carry {
2580 counters[d] += 1;
2581 if counters[d] < dim_indices[d].len() {
2582 carry = false;
2583 } else {
2584 counters[d] = 0;
2585 }
2586 }
2587 }
2588 }
2589}
2590
2591fn slice_array<T: H5Type + Clone>(
2593 array: &ArrayD<T>,
2594 selection: &SliceInfo,
2595 shape: &[u64],
2596) -> Result<ArrayD<T>> {
2597 let mut result_shape = Vec::new();
2599
2600 for (i, sel) in selection.selections.iter().enumerate() {
2601 let dim_size = shape[i];
2602 match sel {
2603 SliceInfoElem::Index(idx) => {
2604 if *idx >= dim_size {
2605 return Err(Error::SliceOutOfBounds {
2606 dim: i,
2607 index: *idx,
2608 size: dim_size,
2609 });
2610 }
2611 }
2613 SliceInfoElem::Slice { start, end, step } => {
2614 let dim_size = checked_usize(dim_size, "slice dimension size")?;
2615 let actual_end = if *end == u64::MAX {
2616 dim_size
2617 } else {
2618 checked_usize(*end, "slice end")?.min(dim_size)
2619 };
2620 let actual_start = checked_usize(*start, "slice start")?;
2621 let actual_step = checked_usize(*step, "slice step")?;
2622 if actual_step == 0 {
2623 return Err(Error::InvalidData("slice step cannot be 0".into()));
2624 }
2625 if actual_start > dim_size {
2626 return Err(Error::SliceOutOfBounds {
2627 dim: i,
2628 index: *start,
2629 size: shape[i],
2630 });
2631 }
2632 let n = (actual_end - actual_start).div_ceil(actual_step);
2633 result_shape.push(n);
2634 }
2635 }
2636 }
2637
2638 let ndim = shape.len();
2640 let total = checked_product_usize(&result_shape, "slice result element count")?;
2641 let mut elements = Vec::with_capacity(total);
2642
2643 let mut result_idx = vec![0usize; result_shape.len()];
2645
2646 for _ in 0..total {
2647 let mut src_idx = Vec::with_capacity(ndim);
2649 let mut ri = 0;
2650 for sel in selection.selections.iter() {
2651 match sel {
2652 SliceInfoElem::Index(idx) => {
2653 src_idx.push(checked_usize(*idx, "slice source index")?);
2654 }
2655 SliceInfoElem::Slice { start, step, .. } => {
2656 let start = checked_usize(*start, "slice start")?;
2657 let step = checked_usize(*step, "slice step")?;
2658 let offset =
2659 checked_mul_usize(result_idx[ri], step, "slice source index offset")?;
2660 src_idx.push(checked_add_usize(start, offset, "slice source index")?);
2661 ri += 1;
2662 }
2663 }
2664 }
2665
2666 elements.push(array[IxDyn(&src_idx)].clone());
2667
2668 if !result_shape.is_empty() {
2670 let mut carry = true;
2671 for d in (0..result_shape.len()).rev() {
2672 if carry {
2673 result_idx[d] += 1;
2674 if result_idx[d] < result_shape[d] {
2675 carry = false;
2676 } else {
2677 result_idx[d] = 0;
2678 }
2679 }
2680 }
2681 }
2682 }
2683
2684 ArrayD::from_shape_vec(IxDyn(&result_shape), elements)
2685 .map_err(|e| Error::InvalidData(format!("slice shape error: {e}")))
2686}
2687
2688#[cfg(test)]
2689mod tests {
2690 use super::*;
2691
2692 #[test]
2693 fn test_slice_info_all() {
2694 let s = SliceInfo::all(3);
2695 assert_eq!(s.selections.len(), 3);
2696 }
2697
2698 #[test]
2699 fn test_copy_chunk_1d() {
2700 let chunk_data = vec![1u8, 2, 3, 4]; let mut flat = vec![0u8; 8];
2702 let chunk_offsets = vec![2u64]; let chunk_shape = vec![4u64];
2704 let dataset_shape = vec![8u64];
2705
2706 copy_chunk_to_flat(
2707 &chunk_data,
2708 &mut flat,
2709 &chunk_offsets,
2710 &chunk_shape,
2711 &dataset_shape,
2712 1,
2713 );
2714 assert_eq!(flat, vec![0, 0, 1, 2, 3, 4, 0, 0]);
2715 }
2716
2717 #[test]
2718 fn test_copy_chunk_2d_rowwise() {
2719 let chunk_data = vec![1u8, 2, 3, 4, 5, 6];
2720 let mut flat = vec![0u8; 16];
2721 let chunk_offsets = vec![1u64, 1u64];
2722 let chunk_shape = vec![2u64, 3u64];
2723 let dataset_shape = vec![4u64, 4u64];
2724
2725 copy_chunk_to_flat(
2726 &chunk_data,
2727 &mut flat,
2728 &chunk_offsets,
2729 &chunk_shape,
2730 &dataset_shape,
2731 1,
2732 );
2733
2734 assert_eq!(flat, vec![0, 0, 0, 0, 0, 1, 2, 3, 0, 4, 5, 6, 0, 0, 0, 0,]);
2735 }
2736
2737 #[test]
2738 fn test_copy_unit_stride_chunk_overlap_2d_partial() {
2739 let chunk_data: Vec<u8> = (1..=16).collect();
2740 let mut result = vec![0u8; 6];
2741 let chunk_offsets = vec![0u64, 0u64];
2742 let chunk_shape = vec![4u64, 4u64];
2743 let dataset_shape = vec![4u64, 4u64];
2744 let resolved = ResolvedSelection {
2745 dims: vec![
2746 ResolvedSelectionDim {
2747 start: 1,
2748 end: 3,
2749 step: 1,
2750 count: 2,
2751 },
2752 ResolvedSelectionDim {
2753 start: 1,
2754 end: 4,
2755 step: 1,
2756 count: 3,
2757 },
2758 ],
2759 result_shape: vec![2, 3],
2760 result_elements: 6,
2761 };
2762 let chunk_strides = vec![4usize, 1usize];
2763 let result_strides = vec![3usize, 1usize];
2764
2765 copy_unit_stride_chunk_overlap(
2766 &chunk_data,
2767 &mut result,
2768 UnitStrideCopyLayout {
2769 chunk_offsets: &chunk_offsets,
2770 chunk_shape: &chunk_shape,
2771 dataset_shape: &dataset_shape,
2772 resolved: &resolved,
2773 chunk_strides: &chunk_strides,
2774 result_strides: &result_strides,
2775 elem_size: 1,
2776 },
2777 )
2778 .unwrap();
2779
2780 assert_eq!(result, vec![6, 7, 8, 10, 11, 12]);
2781 }
2782}