Skip to main content

hdf5_reader/
dataset.rs

1use std::mem::MaybeUninit;
2use std::num::NonZeroUsize;
3use std::sync::{Arc, OnceLock};
4
5use lru::LruCache;
6use ndarray::{ArrayD, IxDyn};
7use parking_lot::Mutex;
8#[cfg(feature = "rayon")]
9use rayon::prelude::*;
10use smallvec::SmallVec;
11
12use crate::attribute_api::{
13    collect_attribute_messages, decode_string, decode_varlen_byte_string, read_one_vlen_string,
14    resolve_vlen_bytes, Attribute,
15};
16use crate::cache::{ChunkCache, ChunkKey};
17use crate::chunk_index;
18use crate::datatype_api::{dtype_element_size, H5Type};
19use crate::error::{Error, Result};
20use crate::filters::{self, FilterRegistry};
21use crate::io::Cursor;
22use crate::messages::attribute::AttributeMessage;
23use crate::messages::dataspace::{DataspaceMessage, DataspaceType};
24use crate::messages::datatype::{Datatype, StringSize};
25use crate::messages::fill_value::{FillTime, FillValueMessage};
26use crate::messages::filter_pipeline::FilterPipelineMessage;
27use crate::messages::layout::{ChunkIndexing, DataLayout};
28use crate::messages::HdfMessage;
29use crate::object_header::ObjectHeader;
30
31const HOT_FULL_DATASET_CACHE_MAX_BYTES: usize = 32 * 1024 * 1024;
32
33#[derive(Clone, Copy)]
34struct FlatBufferPtr {
35    ptr: *mut u8,
36    len: usize,
37}
38
39#[derive(Clone, Copy)]
40struct ChunkCopyLayout<'a> {
41    chunk_offsets: &'a [u64],
42    chunk_shape: &'a [u64],
43    dataset_shape: &'a [u64],
44    dataset_strides: &'a [usize],
45    chunk_strides: &'a [usize],
46    elem_size: usize,
47}
48
49#[derive(Clone, Copy)]
50struct UnitStrideCopyLayout<'a> {
51    chunk_offsets: &'a [u64],
52    chunk_shape: &'a [u64],
53    dataset_shape: &'a [u64],
54    resolved: &'a ResolvedSelection,
55    chunk_strides: &'a [usize],
56    result_strides: &'a [usize],
57    elem_size: usize,
58}
59
60pub(crate) struct DatasetParseContext<'f> {
61    pub(crate) file_data: &'f [u8],
62    pub(crate) offset_size: u8,
63    pub(crate) length_size: u8,
64    pub(crate) chunk_cache: Arc<ChunkCache>,
65    pub(crate) filter_registry: Arc<FilterRegistry>,
66}
67
68#[derive(Clone, Copy)]
69struct ChunkEntrySelection<'a> {
70    shape: &'a [u64],
71    ndim: usize,
72    elem_size: usize,
73    chunk_bounds: Option<(&'a [u64], &'a [u64])>,
74}
75
76unsafe impl Send for FlatBufferPtr {}
77
78unsafe impl Sync for FlatBufferPtr {}
79
80impl FlatBufferPtr {
81    #[cfg(feature = "rayon")]
82    #[inline(always)]
83    unsafe fn copy_chunk(self, chunk_data: &[u8], layout: ChunkCopyLayout<'_>) {
84        copy_chunk_to_flat_with_strides_ptr(chunk_data, self, layout);
85    }
86
87    #[cfg(feature = "rayon")]
88    #[inline(always)]
89    unsafe fn copy_selected(
90        self,
91        chunk_data: &[u8],
92        dim_indices: &[Vec<(usize, usize)>],
93        chunk_strides: &[usize],
94        result_strides: &[usize],
95        elem_size: usize,
96        ndim: usize,
97    ) {
98        copy_selected_elements_ptr(
99            chunk_data,
100            self.ptr,
101            self.len,
102            dim_indices,
103            chunk_strides,
104            result_strides,
105            elem_size,
106            ndim,
107        );
108    }
109
110    #[cfg(feature = "rayon")]
111    #[inline(always)]
112    unsafe fn copy_unit_stride_chunk_overlap(
113        self,
114        chunk_data: &[u8],
115        layout: UnitStrideCopyLayout<'_>,
116    ) -> Result<()> {
117        copy_unit_stride_chunk_overlap_ptr(chunk_data, self, layout)
118    }
119}
120
121/// Hyperslab selection for reading slices of datasets.
122#[derive(Debug, Clone)]
123pub struct SliceInfo {
124    pub selections: Vec<SliceInfoElem>,
125}
126
127/// A single dimension's selection.
128#[derive(Debug, Clone)]
129pub enum SliceInfoElem {
130    /// Select a single index (reduces dimensionality).
131    Index(u64),
132    /// Select a range with optional step.
133    Slice { start: u64, end: u64, step: u64 },
134}
135
136#[derive(Clone, Debug)]
137struct ResolvedSelectionDim {
138    start: u64,
139    end: u64,
140    step: u64,
141    count: usize,
142}
143
144#[derive(Clone, Debug, PartialEq, Eq, Hash)]
145struct ChunkEntryCacheKey {
146    index_address: u64,
147    first_chunk: SmallVec<[u64; 4]>,
148    last_chunk: SmallVec<[u64; 4]>,
149}
150
151impl ResolvedSelectionDim {
152    fn chunk_index_range(&self, chunk_extent: u64) -> Option<(u64, u64)> {
153        if self.count == 0 {
154            return None;
155        }
156
157        Some((self.start / chunk_extent, (self.end - 1) / chunk_extent))
158    }
159}
160
161#[derive(Clone, Debug)]
162struct ResolvedSelection {
163    dims: Vec<ResolvedSelectionDim>,
164    result_shape: Vec<usize>,
165    result_elements: usize,
166}
167
168impl ResolvedSelection {
169    fn result_dims_with_collapsed(&self) -> Vec<usize> {
170        self.dims.iter().map(|dim| dim.count).collect()
171    }
172
173    fn is_unit_stride(&self) -> bool {
174        self.dims.iter().all(|dim| dim.step == 1)
175    }
176}
177
178impl SliceInfo {
179    /// Create a selection that reads everything.
180    pub fn all(ndim: usize) -> Self {
181        SliceInfo {
182            selections: vec![
183                SliceInfoElem::Slice {
184                    start: 0,
185                    end: u64::MAX,
186                    step: 1,
187                };
188                ndim
189            ],
190        }
191    }
192}
193
194fn checked_usize(value: u64, context: &str) -> Result<usize> {
195    usize::try_from(value).map_err(|_| {
196        Error::InvalidData(format!(
197            "{context} value {value} exceeds platform usize capacity"
198        ))
199    })
200}
201
202fn checked_mul_usize(lhs: usize, rhs: usize, context: &str) -> Result<usize> {
203    lhs.checked_mul(rhs)
204        .ok_or_else(|| Error::InvalidData(format!("{context} exceeds platform usize capacity")))
205}
206
207fn checked_add_usize(lhs: usize, rhs: usize, context: &str) -> Result<usize> {
208    lhs.checked_add(rhs)
209        .ok_or_else(|| Error::InvalidData(format!("{context} exceeds platform usize capacity")))
210}
211
212fn expected_chunk_count(first_chunk: &[u64], last_chunk: &[u64]) -> Result<usize> {
213    let mut total = 1usize;
214    for (&first, &last) in first_chunk.iter().zip(last_chunk.iter()) {
215        let dim_count = checked_usize(last - first + 1, "selected chunk count")?;
216        total = checked_mul_usize(total, dim_count, "selected chunk count")?;
217    }
218    Ok(total)
219}
220
221fn full_dataset_chunk_count(shape: &[u64], chunk_shape: &[u64]) -> Result<usize> {
222    let mut total = 1usize;
223    for (&dim, &chunk) in shape.iter().zip(chunk_shape.iter()) {
224        let chunk_count = checked_usize(dim.div_ceil(chunk), "full dataset chunk count")?;
225        total = checked_mul_usize(total, chunk_count, "full dataset chunk count")?;
226    }
227    Ok(total)
228}
229
230fn row_major_strides(shape: &[u64], context: &str) -> Result<Vec<usize>> {
231    let ndim = shape.len();
232    if ndim == 0 {
233        return Ok(Vec::new());
234    }
235
236    let mut strides = vec![1usize; ndim];
237    for i in (0..ndim - 1).rev() {
238        let next_extent = checked_usize(shape[i + 1], context)?;
239        strides[i] = checked_mul_usize(strides[i + 1], next_extent, context)?;
240    }
241    Ok(strides)
242}
243
244fn assume_init_u8_vec(mut buffer: Vec<MaybeUninit<u8>>) -> Vec<u8> {
245    let ptr = buffer.as_mut_ptr() as *mut u8;
246    let len = buffer.len();
247    let capacity = buffer.capacity();
248    std::mem::forget(buffer);
249    unsafe { Vec::from_raw_parts(ptr, len, capacity) }
250}
251
252fn assume_init_vec<T>(mut buffer: Vec<MaybeUninit<T>>) -> Vec<T> {
253    let ptr = buffer.as_mut_ptr() as *mut T;
254    let len = buffer.len();
255    let capacity = buffer.capacity();
256    std::mem::forget(buffer);
257    unsafe { Vec::from_raw_parts(ptr, len, capacity) }
258}
259
260fn normalize_selection(selection: &SliceInfo, shape: &[u64]) -> Result<ResolvedSelection> {
261    if selection.selections.len() != shape.len() {
262        return Err(Error::InvalidData(format!(
263            "slice has {} dimensions but dataset has {}",
264            selection.selections.len(),
265            shape.len()
266        )));
267    }
268
269    let mut dims = Vec::with_capacity(shape.len());
270    let mut result_shape = Vec::new();
271    let mut result_elements = 1usize;
272
273    for (i, sel) in selection.selections.iter().enumerate() {
274        let dim_size = shape[i];
275        match sel {
276            SliceInfoElem::Index(idx) => {
277                if *idx >= dim_size {
278                    return Err(Error::SliceOutOfBounds {
279                        dim: i,
280                        index: *idx,
281                        size: dim_size,
282                    });
283                }
284                dims.push(ResolvedSelectionDim {
285                    start: *idx,
286                    end: *idx + 1,
287                    step: 1,
288                    count: 1,
289                });
290            }
291            SliceInfoElem::Slice { start, end, step } => {
292                if *step == 0 {
293                    return Err(Error::InvalidData("slice step cannot be 0".into()));
294                }
295                if *start > dim_size {
296                    return Err(Error::SliceOutOfBounds {
297                        dim: i,
298                        index: *start,
299                        size: dim_size,
300                    });
301                }
302
303                let actual_end = if *end == u64::MAX {
304                    dim_size
305                } else {
306                    (*end).min(dim_size)
307                };
308                let count_u64 = if *start >= actual_end {
309                    0
310                } else {
311                    (actual_end - *start).div_ceil(*step)
312                };
313                let count = checked_usize(count_u64, "slice element count")?;
314
315                dims.push(ResolvedSelectionDim {
316                    start: *start,
317                    end: actual_end,
318                    step: *step,
319                    count,
320                });
321                result_shape.push(count);
322                result_elements =
323                    checked_mul_usize(result_elements, count, "slice result element count")?;
324            }
325        }
326    }
327
328    Ok(ResolvedSelection {
329        dims,
330        result_shape,
331        result_elements,
332    })
333}
334
335/// A dataset within an HDF5 file.
336pub struct Dataset<'f> {
337    file_data: &'f [u8],
338    offset_size: u8,
339    length_size: u8,
340    pub(crate) name: String,
341    pub(crate) data_address: u64,
342    pub(crate) dataspace: DataspaceMessage,
343    pub(crate) datatype: Datatype,
344    pub(crate) layout: DataLayout,
345    pub(crate) fill_value: Option<FillValueMessage>,
346    pub(crate) filters: Option<FilterPipelineMessage>,
347    pub(crate) attributes: Vec<AttributeMessage>,
348    pub(crate) chunk_cache: Arc<ChunkCache>,
349    chunk_entry_cache: Arc<Mutex<LruCache<ChunkEntryCacheKey, Arc<Vec<chunk_index::ChunkEntry>>>>>,
350    full_chunk_entries: Arc<OnceLock<Arc<Vec<chunk_index::ChunkEntry>>>>,
351    full_dataset_bytes: Arc<OnceLock<Arc<Vec<u8>>>>,
352    pub(crate) filter_registry: Arc<FilterRegistry>,
353}
354
355pub(crate) struct DatasetTemplate {
356    name: String,
357    data_address: u64,
358    dataspace: DataspaceMessage,
359    datatype: Datatype,
360    layout: DataLayout,
361    fill_value: Option<FillValueMessage>,
362    filters: Option<FilterPipelineMessage>,
363    attributes: Vec<AttributeMessage>,
364    chunk_entry_cache: Arc<Mutex<LruCache<ChunkEntryCacheKey, Arc<Vec<chunk_index::ChunkEntry>>>>>,
365    full_chunk_entries: Arc<OnceLock<Arc<Vec<chunk_index::ChunkEntry>>>>,
366    full_dataset_bytes: Arc<OnceLock<Arc<Vec<u8>>>>,
367}
368
369impl<'f> Dataset<'f> {
370    pub(crate) fn from_template(
371        file_data: &'f [u8],
372        offset_size: u8,
373        length_size: u8,
374        template: Arc<DatasetTemplate>,
375        chunk_cache: Arc<ChunkCache>,
376        filter_registry: Arc<FilterRegistry>,
377    ) -> Self {
378        Dataset {
379            file_data,
380            offset_size,
381            length_size,
382            name: template.name.clone(),
383            data_address: template.data_address,
384            dataspace: template.dataspace.clone(),
385            datatype: template.datatype.clone(),
386            layout: template.layout.clone(),
387            fill_value: template.fill_value.clone(),
388            filters: template.filters.clone(),
389            attributes: template.attributes.clone(),
390            chunk_cache,
391            chunk_entry_cache: template.chunk_entry_cache.clone(),
392            full_chunk_entries: template.full_chunk_entries.clone(),
393            full_dataset_bytes: template.full_dataset_bytes.clone(),
394            filter_registry,
395        }
396    }
397
398    pub(crate) fn template(&self) -> Arc<DatasetTemplate> {
399        Arc::new(DatasetTemplate {
400            name: self.name.clone(),
401            data_address: self.data_address,
402            dataspace: self.dataspace.clone(),
403            datatype: self.datatype.clone(),
404            layout: self.layout.clone(),
405            fill_value: self.fill_value.clone(),
406            filters: self.filters.clone(),
407            attributes: self.attributes.clone(),
408            chunk_entry_cache: self.chunk_entry_cache.clone(),
409            full_chunk_entries: self.full_chunk_entries.clone(),
410            full_dataset_bytes: self.full_dataset_bytes.clone(),
411        })
412    }
413
414    pub(crate) fn from_parsed_header(
415        context: DatasetParseContext<'f>,
416        address: u64,
417        name: String,
418        header: &ObjectHeader,
419    ) -> Result<Self> {
420        let mut dataspace: Option<DataspaceMessage> = None;
421        let mut datatype: Option<Datatype> = None;
422        let mut layout: Option<DataLayout> = None;
423        let mut fill_value: Option<FillValueMessage> = None;
424        let mut filter_pipeline: Option<FilterPipelineMessage> = None;
425        let attributes = collect_attribute_messages(
426            header,
427            context.file_data,
428            context.offset_size,
429            context.length_size,
430        )?;
431
432        for msg in &header.messages {
433            match msg {
434                HdfMessage::Dataspace(ds) => dataspace = Some(ds.clone()),
435                HdfMessage::Datatype(dt) => datatype = Some(dt.datatype.clone()),
436                HdfMessage::DataLayout(dl) => layout = Some(dl.layout.clone()),
437                HdfMessage::FillValue(fv) => fill_value = Some(fv.clone()),
438                HdfMessage::FilterPipeline(fp) => filter_pipeline = Some(fp.clone()),
439                _ => {}
440            }
441        }
442
443        let dataspace =
444            dataspace.ok_or_else(|| Error::InvalidData("dataset missing dataspace".into()))?;
445        let dt = datatype.ok_or_else(|| Error::InvalidData("dataset missing datatype".into()))?;
446        let layout =
447            layout.ok_or_else(|| Error::InvalidData("dataset missing data layout".into()))?;
448        let layout = normalize_layout(layout, &dataspace);
449        let attr_fill_value = attributes
450            .iter()
451            .find(|attr| attr.name == "_FillValue" && attr.dataspace.num_elements() == 1)
452            .map(|attr| FillValueMessage {
453                defined: !attr.raw_data.is_empty(),
454                fill_time: FillTime::IfSet,
455                value: Some(attr.raw_data.clone()),
456            });
457        let fill_value = match fill_value {
458            Some(existing) if existing.value.is_some() => Some(existing),
459            _ => attr_fill_value,
460        };
461
462        Ok(Dataset {
463            file_data: context.file_data,
464            offset_size: context.offset_size,
465            length_size: context.length_size,
466            name,
467            data_address: address,
468            dataspace,
469            datatype: dt,
470            layout,
471            fill_value,
472            filters: filter_pipeline,
473            attributes,
474            chunk_cache: context.chunk_cache,
475            chunk_entry_cache: Arc::new(Mutex::new(LruCache::new(NonZeroUsize::new(32).unwrap()))),
476            full_chunk_entries: Arc::new(OnceLock::new()),
477            full_dataset_bytes: Arc::new(OnceLock::new()),
478            filter_registry: context.filter_registry,
479        })
480    }
481
482    /// Dataset name.
483    pub fn name(&self) -> &str {
484        &self.name
485    }
486
487    /// The object header address used to parse this dataset.
488    /// Useful as an opaque identifier or for NC4 data_offset.
489    pub fn address(&self) -> u64 {
490        self.data_address
491    }
492
493    /// Shape of the dataset (dimensions).
494    pub fn shape(&self) -> &[u64] {
495        &self.dataspace.dims
496    }
497
498    /// Datatype of the dataset.
499    pub fn dtype(&self) -> &Datatype {
500        &self.datatype
501    }
502
503    /// Number of dimensions.
504    pub fn ndim(&self) -> usize {
505        self.dataspace.dims.len()
506    }
507
508    /// Maximum dimension sizes, if defined. `u64::MAX` indicates unlimited.
509    pub fn max_dims(&self) -> Option<&[u64]> {
510        self.dataspace.max_dims.as_deref()
511    }
512
513    /// Chunk dimensions, if the dataset is chunked.
514    pub fn chunks(&self) -> Option<Vec<u32>> {
515        match &self.layout {
516            DataLayout::Chunked { dims, .. } => Some(dims.clone()),
517            _ => None,
518        }
519    }
520
521    /// Fill value, if defined.
522    pub fn fill_value(&self) -> Option<&FillValueMessage> {
523        self.fill_value.as_ref()
524    }
525
526    /// Dataset attributes.
527    pub fn attributes(&self) -> Vec<Attribute> {
528        self.attributes
529            .iter()
530            .map(|a| {
531                Attribute::from_message_with_context(
532                    a.clone(),
533                    Some(self.file_data),
534                    self.offset_size,
535                )
536            })
537            .collect()
538    }
539
540    /// Find an attribute by name.
541    pub fn attribute(&self, name: &str) -> Result<Attribute> {
542        self.attributes
543            .iter()
544            .find(|a| a.name == name)
545            .map(|a| {
546                Attribute::from_message_with_context(
547                    a.clone(),
548                    Some(self.file_data),
549                    self.offset_size,
550                )
551            })
552            .ok_or_else(|| Error::AttributeNotFound(name.to_string()))
553    }
554
555    /// Read a scalar string dataset or single string element.
556    ///
557    /// Use [`Dataset::read_strings`] when the dataset contains multiple strings.
558    pub fn read_string(&self) -> Result<String> {
559        let mut strings = self.read_strings()?;
560        match strings.len() {
561            1 => Ok(strings.swap_remove(0)),
562            0 => Err(Error::InvalidData(format!(
563                "dataset '{}' contains no string elements",
564                self.name
565            ))),
566            count => Err(Error::InvalidData(format!(
567                "dataset '{}' contains {count} string elements; use read_strings()",
568                self.name
569            ))),
570        }
571    }
572
573    /// Read all string elements from a string-typed dataset.
574    pub fn read_strings(&self) -> Result<Vec<String>> {
575        match &self.datatype {
576            Datatype::String {
577                size: StringSize::Fixed(len),
578                encoding,
579                padding,
580            } => {
581                let raw = self.read_raw_bytes()?;
582                let elem_size = *len as usize;
583                let count = checked_usize(self.num_elements(), "dataset string element count")?;
584                let expected_bytes =
585                    checked_mul_usize(count, elem_size, "dataset string byte size")?;
586                if raw.len() < expected_bytes {
587                    return Err(Error::InvalidData(format!(
588                        "dataset '{}' string data too short: need {} bytes, have {}",
589                        self.name,
590                        expected_bytes,
591                        raw.len()
592                    )));
593                }
594
595                let mut strings = Vec::with_capacity(count);
596                for i in 0..count {
597                    let start = i * elem_size;
598                    let end = start + elem_size;
599                    strings.push(decode_string(&raw[start..end], *padding, *encoding)?);
600                }
601                Ok(strings)
602            }
603            Datatype::String {
604                size: StringSize::Variable,
605                encoding,
606                padding,
607            } => {
608                let raw = self.read_raw_bytes()?;
609                let count = checked_usize(self.num_elements(), "dataset string element count")?;
610                let ref_size = 4 + self.offset_size as usize + 4;
611                let expected_bytes =
612                    checked_mul_usize(count, ref_size, "dataset string reference byte size")?;
613                if raw.len() < expected_bytes {
614                    return Err(Error::InvalidData(format!(
615                        "dataset '{}' vlen string data too short: need {} bytes, have {}",
616                        self.name,
617                        expected_bytes,
618                        raw.len()
619                    )));
620                }
621
622                let mut strings = Vec::with_capacity(count);
623                for i in 0..count {
624                    let offset = i * ref_size;
625                    strings.push(read_one_vlen_string(
626                        &raw,
627                        offset,
628                        self.file_data,
629                        self.offset_size,
630                        *padding,
631                        *encoding,
632                    )?);
633                }
634                Ok(strings)
635            }
636            Datatype::VarLen { base } => {
637                if !matches!(base.as_ref(), Datatype::FixedPoint { size: 1, .. }) {
638                    return Err(Error::TypeMismatch {
639                        expected: "String dataset".into(),
640                        actual: format!("{:?}", self.datatype),
641                    });
642                }
643
644                let raw = self.read_raw_bytes()?;
645                let count = checked_usize(self.num_elements(), "dataset string element count")?;
646                let ref_size = 4 + self.offset_size as usize + 4;
647                let expected_bytes =
648                    checked_mul_usize(count, ref_size, "dataset string reference byte size")?;
649                if raw.len() < expected_bytes {
650                    return Err(Error::InvalidData(format!(
651                        "dataset '{}' vlen byte string data too short: need {} bytes, have {}",
652                        self.name,
653                        expected_bytes,
654                        raw.len()
655                    )));
656                }
657
658                let mut strings = Vec::with_capacity(count);
659                for i in 0..count {
660                    let offset = i * ref_size;
661                    let ref_bytes = &raw[offset..offset + ref_size];
662                    let value = resolve_vlen_bytes(ref_bytes, self.file_data, self.offset_size)
663                        .unwrap_or_default();
664                    strings.push(decode_varlen_byte_string(&value)?);
665                }
666                Ok(strings)
667            }
668            _ => Err(Error::TypeMismatch {
669                expected: "String dataset".into(),
670                actual: format!("{:?}", self.datatype),
671            }),
672        }
673    }
674
675    /// Total number of elements in the dataset.
676    pub fn num_elements(&self) -> u64 {
677        if self.dataspace.dims.is_empty() {
678            match self.dataspace.dataspace_type {
679                DataspaceType::Scalar => 1,
680                DataspaceType::Null => 0,
681                DataspaceType::Simple => 0,
682            }
683        } else {
684            self.dataspace.dims.iter().product()
685        }
686    }
687
688    /// Read the entire dataset into an n-dimensional array.
689    pub fn read_array<T: H5Type>(&self) -> Result<ArrayD<T>> {
690        let result = match &self.layout {
691            DataLayout::Compact { data } => self.read_compact::<T>(data),
692            DataLayout::Contiguous { address, size } => self.read_contiguous::<T>(*address, *size),
693            DataLayout::Chunked {
694                address,
695                dims,
696                element_size,
697                chunk_indexing,
698            } => self.read_chunked::<T>(*address, dims, *element_size, chunk_indexing.as_ref()),
699        };
700        result.map_err(|e| e.with_context(&self.name))
701    }
702
703    /// Read the entire dataset using internal chunk-level parallelism when possible.
704    ///
705    /// Non-chunked datasets fall back to `read_array`.
706    #[cfg(feature = "rayon")]
707    pub fn read_array_parallel<T: H5Type>(&self) -> Result<ArrayD<T>> {
708        match &self.layout {
709            DataLayout::Chunked {
710                address,
711                dims,
712                element_size,
713                chunk_indexing,
714            } => self.read_chunked_parallel::<T>(
715                *address,
716                dims,
717                *element_size,
718                chunk_indexing.as_ref(),
719            ),
720            _ => self.read_array::<T>(),
721        }
722    }
723
724    /// Read the entire dataset using the provided Rayon thread pool.
725    ///
726    /// Non-chunked datasets fall back to `read_array`.
727    #[cfg(feature = "rayon")]
728    pub fn read_array_in_pool<T: H5Type>(&self, pool: &rayon::ThreadPool) -> Result<ArrayD<T>> {
729        match &self.layout {
730            DataLayout::Chunked {
731                address,
732                dims,
733                element_size,
734                chunk_indexing,
735            } => pool.install(|| {
736                self.read_chunked_parallel::<T>(
737                    *address,
738                    dims,
739                    *element_size,
740                    chunk_indexing.as_ref(),
741                )
742            }),
743            _ => self.read_array::<T>(),
744        }
745    }
746
747    /// Read a hyperslab of the dataset using chunk-level parallelism when possible.
748    ///
749    /// Chunked datasets decompress overlapping chunks in parallel via Rayon.
750    /// Non-chunked layouts fall back to `read_slice`.
751    #[cfg(feature = "rayon")]
752    pub fn read_slice_parallel<T: H5Type>(&self, selection: &SliceInfo) -> Result<ArrayD<T>> {
753        let resolved = normalize_selection(selection, &self.dataspace.dims)?;
754
755        match &self.layout {
756            DataLayout::Chunked {
757                address,
758                dims,
759                element_size,
760                chunk_indexing,
761            } => self.read_chunked_slice_parallel::<T>(
762                *address,
763                dims,
764                *element_size,
765                chunk_indexing.as_ref(),
766                selection,
767                &resolved,
768            ),
769            _ => self.read_slice::<T>(selection),
770        }
771    }
772
773    /// Read a hyperslab of the dataset.
774    pub fn read_slice<T: H5Type>(&self, selection: &SliceInfo) -> Result<ArrayD<T>> {
775        let resolved = normalize_selection(selection, &self.dataspace.dims)?;
776
777        match &self.layout {
778            DataLayout::Contiguous { address, size } => {
779                self.read_contiguous_slice::<T>(*address, *size, selection, &resolved)
780            }
781            DataLayout::Compact { data } => self.read_compact_slice::<T>(data, selection),
782            DataLayout::Chunked {
783                address,
784                dims,
785                element_size,
786                chunk_indexing,
787            } => self.read_chunked_slice::<T>(
788                *address,
789                dims,
790                *element_size,
791                chunk_indexing.as_ref(),
792                selection,
793                &resolved,
794            ),
795        }
796    }
797
798    fn read_compact<T: H5Type>(&self, data: &[u8]) -> Result<ArrayD<T>> {
799        self.decode_raw_data::<T>(data)
800    }
801
802    fn read_raw_bytes(&self) -> Result<Vec<u8>> {
803        let elem_size = dtype_element_size(&self.datatype);
804        let total_elements = checked_usize(self.num_elements(), "dataset element count")?;
805        let total_bytes = checked_mul_usize(total_elements, elem_size, "dataset size in bytes")?;
806
807        let result = match &self.layout {
808            DataLayout::Compact { data } => Ok(self.normalize_raw_bytes(data, total_bytes)),
809            DataLayout::Contiguous { address, size } => {
810                self.read_contiguous_bytes(*address, *size, total_bytes)
811            }
812            DataLayout::Chunked {
813                address,
814                dims,
815                element_size: _,
816                chunk_indexing,
817            } => self.read_chunked_bytes(*address, dims, chunk_indexing.as_ref(), total_bytes),
818        };
819
820        result.map_err(|e| e.with_context(&self.name))
821    }
822
823    fn read_contiguous<T: H5Type>(&self, address: u64, size: u64) -> Result<ArrayD<T>> {
824        if Cursor::is_undefined_offset(address, self.offset_size) || size == 0 {
825            // Dataset with no data written — return fill values
826            return self.make_fill_array::<T>();
827        }
828
829        let addr = address as usize;
830        let sz = size as usize;
831        if addr + sz > self.file_data.len() {
832            return Err(Error::OffsetOutOfBounds(address));
833        }
834
835        let raw = &self.file_data[addr..addr + sz];
836        self.decode_raw_data::<T>(raw)
837    }
838
839    fn read_contiguous_bytes(
840        &self,
841        address: u64,
842        size: u64,
843        total_bytes: usize,
844    ) -> Result<Vec<u8>> {
845        if Cursor::is_undefined_offset(address, self.offset_size) || size == 0 {
846            return Ok(self.make_output_buffer(total_bytes));
847        }
848
849        let addr = address as usize;
850        let sz = size as usize;
851        if addr + sz > self.file_data.len() {
852            return Err(Error::OffsetOutOfBounds(address));
853        }
854
855        Ok(self.normalize_raw_bytes(&self.file_data[addr..addr + sz], total_bytes))
856    }
857
858    fn read_chunked<T: H5Type>(
859        &self,
860        index_address: u64,
861        chunk_dims: &[u32],
862        _element_size: u32,
863        chunk_indexing: Option<&ChunkIndexing>,
864    ) -> Result<ArrayD<T>> {
865        if Cursor::is_undefined_offset(index_address, self.offset_size) {
866            return self.make_fill_array::<T>();
867        }
868
869        let ndim = self.ndim();
870        let shape = &self.dataspace.dims;
871        let elem_size = dtype_element_size(&self.datatype);
872        let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
873        let dataset_strides = row_major_strides(shape, "dataset stride")?;
874        let chunk_strides = row_major_strides(&chunk_shape, "chunk stride")?;
875
876        // Allocate output initialized from the dataset's fill value.
877        let total_elements = checked_usize(self.num_elements(), "dataset element count")?;
878        let total_bytes = checked_mul_usize(total_elements, elem_size, "dataset size in bytes")?;
879
880        let entries = self.collect_chunk_entries(
881            index_address,
882            chunk_dims,
883            chunk_indexing,
884            ChunkEntrySelection {
885                shape,
886                ndim,
887                elem_size,
888                chunk_bounds: None,
889            },
890        )?;
891
892        let full_chunk_coverage = entries.len() == full_dataset_chunk_count(shape, &chunk_shape)?;
893        if full_chunk_coverage {
894            let hot_full_dataset_bytes = if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
895                self.full_dataset_bytes.get().cloned()
896            } else {
897                None
898            };
899            if let Some(cached_bytes) = hot_full_dataset_bytes {
900                return self.decode_raw_data::<T>(&cached_bytes);
901            }
902            if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
903                let mut result_values: Vec<MaybeUninit<T>> =
904                    std::iter::repeat_with(MaybeUninit::<T>::uninit)
905                        .take(total_elements)
906                        .collect();
907                let result_ptr = result_values.as_mut_ptr() as *mut u8;
908                let result_len = checked_mul_usize(
909                    result_values.len(),
910                    std::mem::size_of::<T>(),
911                    "typed dataset size in bytes",
912                )?;
913
914                for entry in &entries {
915                    let chunk_data =
916                        self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
917                    unsafe {
918                        copy_chunk_to_flat_with_strides_ptr(
919                            &chunk_data,
920                            FlatBufferPtr {
921                                ptr: result_ptr,
922                                len: result_len,
923                            },
924                            ChunkCopyLayout {
925                                chunk_offsets: &entry.offsets,
926                                chunk_shape: &chunk_shape,
927                                dataset_shape: shape,
928                                dataset_strides: &dataset_strides,
929                                chunk_strides: &chunk_strides,
930                                elem_size,
931                            },
932                        );
933                    }
934                }
935
936                if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
937                    let mut cached_bytes = vec![0u8; total_bytes];
938                    unsafe {
939                        std::ptr::copy_nonoverlapping(
940                            result_ptr,
941                            cached_bytes.as_mut_ptr(),
942                            total_bytes,
943                        );
944                    }
945                    let _ = self.full_dataset_bytes.set(Arc::new(cached_bytes));
946                }
947
948                let mut result_shape = Vec::with_capacity(shape.len());
949                for &dim in shape {
950                    result_shape.push(checked_usize(dim, "dataset dimension")?);
951                }
952                let result_values = assume_init_vec(result_values);
953                return ArrayD::from_shape_vec(IxDyn(&result_shape), result_values)
954                    .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
955            }
956
957            let mut flat_data = vec![MaybeUninit::<u8>::uninit(); total_bytes];
958            let flat_ptr = flat_data.as_mut_ptr() as *mut u8;
959            let flat_len = flat_data.len();
960
961            for entry in &entries {
962                let chunk_data =
963                    self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
964                unsafe {
965                    copy_chunk_to_flat_with_strides_ptr(
966                        &chunk_data,
967                        FlatBufferPtr {
968                            ptr: flat_ptr,
969                            len: flat_len,
970                        },
971                        ChunkCopyLayout {
972                            chunk_offsets: &entry.offsets,
973                            chunk_shape: &chunk_shape,
974                            dataset_shape: shape,
975                            dataset_strides: &dataset_strides,
976                            chunk_strides: &chunk_strides,
977                            elem_size,
978                        },
979                    );
980                }
981            }
982
983            let flat_data = assume_init_u8_vec(flat_data);
984            if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
985                let _ = self.full_dataset_bytes.set(Arc::new(flat_data.clone()));
986            }
987            return self.decode_raw_data::<T>(&flat_data);
988        }
989
990        let mut flat_data = self.make_output_buffer(total_bytes);
991        for entry in &entries {
992            let chunk_data = self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
993            copy_chunk_to_flat_with_strides(
994                &chunk_data,
995                &mut flat_data,
996                ChunkCopyLayout {
997                    chunk_offsets: &entry.offsets,
998                    chunk_shape: &chunk_shape,
999                    dataset_shape: shape,
1000                    dataset_strides: &dataset_strides,
1001                    chunk_strides: &chunk_strides,
1002                    elem_size,
1003                },
1004            );
1005        }
1006
1007        self.decode_raw_data::<T>(&flat_data)
1008    }
1009
1010    fn read_chunked_bytes(
1011        &self,
1012        index_address: u64,
1013        chunk_dims: &[u32],
1014        chunk_indexing: Option<&ChunkIndexing>,
1015        total_bytes: usize,
1016    ) -> Result<Vec<u8>> {
1017        if Cursor::is_undefined_offset(index_address, self.offset_size) {
1018            return Ok(self.make_output_buffer(total_bytes));
1019        }
1020
1021        let ndim = self.ndim();
1022        let shape = &self.dataspace.dims;
1023        let elem_size = dtype_element_size(&self.datatype);
1024        let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1025        let dataset_strides = row_major_strides(shape, "dataset stride")?;
1026        let chunk_strides = row_major_strides(&chunk_shape, "chunk stride")?;
1027
1028        let entries = self.collect_chunk_entries(
1029            index_address,
1030            chunk_dims,
1031            chunk_indexing,
1032            ChunkEntrySelection {
1033                shape,
1034                ndim,
1035                elem_size,
1036                chunk_bounds: None,
1037            },
1038        )?;
1039
1040        let full_chunk_coverage = entries.len() == full_dataset_chunk_count(shape, &chunk_shape)?;
1041        if full_chunk_coverage && total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1042            if let Some(cached_bytes) = self.full_dataset_bytes.get() {
1043                return Ok(cached_bytes.as_ref().clone());
1044            }
1045        }
1046
1047        let mut flat_data = self.make_output_buffer(total_bytes);
1048        for entry in &entries {
1049            let chunk_data = self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1050            copy_chunk_to_flat_with_strides(
1051                &chunk_data,
1052                &mut flat_data,
1053                ChunkCopyLayout {
1054                    chunk_offsets: &entry.offsets,
1055                    chunk_shape: &chunk_shape,
1056                    dataset_shape: shape,
1057                    dataset_strides: &dataset_strides,
1058                    chunk_strides: &chunk_strides,
1059                    elem_size,
1060                },
1061            );
1062        }
1063
1064        if full_chunk_coverage && total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1065            let _ = self.full_dataset_bytes.set(Arc::new(flat_data.clone()));
1066        }
1067
1068        Ok(flat_data)
1069    }
1070
1071    #[cfg(feature = "rayon")]
1072    fn read_chunked_parallel<T: H5Type>(
1073        &self,
1074        index_address: u64,
1075        chunk_dims: &[u32],
1076        _element_size: u32,
1077        chunk_indexing: Option<&ChunkIndexing>,
1078    ) -> Result<ArrayD<T>> {
1079        if Cursor::is_undefined_offset(index_address, self.offset_size) {
1080            return self.make_fill_array::<T>();
1081        }
1082
1083        let ndim = self.ndim();
1084        let shape = &self.dataspace.dims;
1085        let elem_size = dtype_element_size(&self.datatype);
1086        let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1087        let dataset_strides = row_major_strides(shape, "dataset stride")?;
1088        let chunk_strides = row_major_strides(&chunk_shape, "chunk stride")?;
1089        let total_elements = checked_usize(self.num_elements(), "dataset element count")?;
1090        let total_bytes = checked_mul_usize(total_elements, elem_size, "dataset size in bytes")?;
1091
1092        let mut entries = self.collect_chunk_entries(
1093            index_address,
1094            chunk_dims,
1095            chunk_indexing,
1096            ChunkEntrySelection {
1097                shape,
1098                ndim,
1099                elem_size,
1100                chunk_bounds: None,
1101            },
1102        )?;
1103
1104        // Dedup check: sort by output offsets and reject duplicates.
1105        // Two chunks claiming the same output offsets would cause data races
1106        // when writing into the flat buffer in parallel.
1107        entries.sort_by(|a, b| a.offsets.cmp(&b.offsets));
1108        for i in 1..entries.len() {
1109            if entries[i].offsets == entries[i - 1].offsets {
1110                return Err(Error::InvalidData(format!(
1111                    "duplicate chunk output offsets {:?} (addresses {:#x} and {:#x})",
1112                    entries[i].offsets,
1113                    entries[i - 1].address,
1114                    entries[i].address
1115                )));
1116            }
1117        }
1118
1119        let full_chunk_coverage = entries.len() == full_dataset_chunk_count(shape, &chunk_shape)?;
1120        if full_chunk_coverage {
1121            if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1122                if let Some(cached_bytes) = self.full_dataset_bytes.get() {
1123                    return self.decode_raw_data::<T>(cached_bytes);
1124                }
1125            }
1126            if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
1127                let mut result_values: Vec<MaybeUninit<T>> =
1128                    std::iter::repeat_with(MaybeUninit::<T>::uninit)
1129                        .take(total_elements)
1130                        .collect();
1131                let flat = FlatBufferPtr {
1132                    ptr: result_values.as_mut_ptr() as *mut u8,
1133                    len: checked_mul_usize(
1134                        result_values.len(),
1135                        std::mem::size_of::<T>(),
1136                        "typed dataset size in bytes",
1137                    )?,
1138                };
1139
1140                entries
1141                    .par_iter()
1142                    .map(|entry| {
1143                        self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)
1144                            .map(|data| unsafe {
1145                                flat.copy_chunk(
1146                                    &data,
1147                                    ChunkCopyLayout {
1148                                        chunk_offsets: &entry.offsets,
1149                                        chunk_shape: &chunk_shape,
1150                                        dataset_shape: shape,
1151                                        dataset_strides: &dataset_strides,
1152                                        chunk_strides: &chunk_strides,
1153                                        elem_size,
1154                                    },
1155                                );
1156                            })
1157                    })
1158                    .collect::<std::result::Result<Vec<_>, Error>>()?;
1159
1160                let mut result_shape = Vec::with_capacity(shape.len());
1161                for &dim in shape {
1162                    result_shape.push(checked_usize(dim, "dataset dimension")?);
1163                }
1164                if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1165                    let mut cached_bytes = vec![0u8; total_bytes];
1166                    unsafe {
1167                        std::ptr::copy_nonoverlapping(
1168                            flat.ptr,
1169                            cached_bytes.as_mut_ptr(),
1170                            total_bytes,
1171                        );
1172                    }
1173                    let _ = self.full_dataset_bytes.set(Arc::new(cached_bytes));
1174                }
1175                let result_values = assume_init_vec(result_values);
1176                return ArrayD::from_shape_vec(IxDyn(&result_shape), result_values)
1177                    .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
1178            }
1179
1180            let mut flat_data = vec![MaybeUninit::<u8>::uninit(); total_bytes];
1181            let flat = FlatBufferPtr {
1182                ptr: flat_data.as_mut_ptr() as *mut u8,
1183                len: flat_data.len(),
1184            };
1185
1186            entries
1187                .par_iter()
1188                .map(|entry| {
1189                    self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)
1190                        .map(|data| unsafe {
1191                            flat.copy_chunk(
1192                                &data,
1193                                ChunkCopyLayout {
1194                                    chunk_offsets: &entry.offsets,
1195                                    chunk_shape: &chunk_shape,
1196                                    dataset_shape: shape,
1197                                    dataset_strides: &dataset_strides,
1198                                    chunk_strides: &chunk_strides,
1199                                    elem_size,
1200                                },
1201                            );
1202                        })
1203                })
1204                .collect::<std::result::Result<Vec<_>, Error>>()?;
1205
1206            let flat_data = assume_init_u8_vec(flat_data);
1207            if total_bytes <= HOT_FULL_DATASET_CACHE_MAX_BYTES {
1208                let _ = self.full_dataset_bytes.set(Arc::new(flat_data.clone()));
1209            }
1210            return self.decode_raw_data::<T>(&flat_data);
1211        }
1212
1213        let mut flat_data = self.make_output_buffer(total_bytes);
1214        let flat = FlatBufferPtr {
1215            ptr: flat_data.as_mut_ptr(),
1216            len: flat_data.len(),
1217        };
1218
1219        entries
1220            .par_iter()
1221            .map(|entry| {
1222                self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)
1223                    .map(|data| unsafe {
1224                        flat.copy_chunk(
1225                            &data,
1226                            ChunkCopyLayout {
1227                                chunk_offsets: &entry.offsets,
1228                                chunk_shape: &chunk_shape,
1229                                dataset_shape: shape,
1230                                dataset_strides: &dataset_strides,
1231                                chunk_strides: &chunk_strides,
1232                                elem_size,
1233                            },
1234                        );
1235                    })
1236            })
1237            .collect::<std::result::Result<Vec<_>, Error>>()?;
1238
1239        self.decode_raw_data::<T>(&flat_data)
1240    }
1241
1242    /// Collect all chunk entries by dispatching on the chunk indexing type.
1243    ///
1244    /// Shared by `read_chunked` and `read_chunked_slice`.
1245    fn collect_chunk_entries(
1246        &self,
1247        index_address: u64,
1248        chunk_dims: &[u32],
1249        chunk_indexing: Option<&ChunkIndexing>,
1250        selection: ChunkEntrySelection<'_>,
1251    ) -> Result<Vec<chunk_index::ChunkEntry>> {
1252        if selection.chunk_bounds.is_none() {
1253            if let Some(cached) = self.full_chunk_entries.get() {
1254                return Ok((**cached).clone());
1255            }
1256        }
1257
1258        let cache_key =
1259            selection
1260                .chunk_bounds
1261                .map(|(first_chunk, last_chunk)| ChunkEntryCacheKey {
1262                    index_address,
1263                    first_chunk: SmallVec::from_slice(first_chunk),
1264                    last_chunk: SmallVec::from_slice(last_chunk),
1265                });
1266
1267        if let Some(ref key) = cache_key {
1268            let mut cache = self.chunk_entry_cache.lock();
1269            if let Some(cached) = cache.get(key) {
1270                return Ok((**cached).clone());
1271            }
1272        }
1273
1274        let entries = match chunk_indexing {
1275            None => {
1276                // V1-V3: B-tree v1 chunk indexing
1277                self.collect_btree_v1_entries(
1278                    index_address,
1279                    selection.ndim,
1280                    chunk_dims,
1281                    selection.chunk_bounds,
1282                )
1283            }
1284            Some(ChunkIndexing::SingleChunk {
1285                filtered_size,
1286                filters,
1287            }) => Ok(vec![chunk_index::single_chunk_entry(
1288                index_address,
1289                *filtered_size,
1290                *filters,
1291                selection.ndim,
1292            )]),
1293            Some(ChunkIndexing::BTreeV2) => chunk_index::collect_v2_chunk_entries(
1294                self.file_data,
1295                index_address,
1296                self.offset_size,
1297                self.length_size,
1298                selection.ndim as u32,
1299                chunk_dims,
1300                selection.chunk_bounds,
1301            ),
1302            Some(ChunkIndexing::Implicit) => Ok(chunk_index::collect_implicit_chunk_entries(
1303                index_address,
1304                selection.shape,
1305                chunk_dims,
1306                selection.elem_size,
1307                selection.chunk_bounds,
1308            )),
1309            Some(ChunkIndexing::FixedArray { .. }) => {
1310                crate::fixed_array::collect_fixed_array_chunk_entries(
1311                    self.file_data,
1312                    index_address,
1313                    self.offset_size,
1314                    self.length_size,
1315                    selection.shape,
1316                    chunk_dims,
1317                    selection.chunk_bounds,
1318                )
1319            }
1320            Some(ChunkIndexing::ExtensibleArray { .. }) => {
1321                crate::extensible_array::collect_extensible_array_chunk_entries(
1322                    self.file_data,
1323                    index_address,
1324                    self.offset_size,
1325                    self.length_size,
1326                    selection.shape,
1327                    chunk_dims,
1328                    selection.chunk_bounds,
1329                )
1330            }
1331        }?;
1332
1333        if let Some(key) = cache_key {
1334            let mut cache = self.chunk_entry_cache.lock();
1335            cache.put(key, Arc::new(entries.clone()));
1336        } else {
1337            let _ = self.full_chunk_entries.set(Arc::new(entries.clone()));
1338        }
1339
1340        Ok(entries)
1341    }
1342
1343    /// Collect chunk entries from a B-tree v1 index.
1344    fn collect_btree_v1_entries(
1345        &self,
1346        btree_address: u64,
1347        ndim: usize,
1348        chunk_dims: &[u32],
1349        chunk_bounds: Option<(&[u64], &[u64])>,
1350    ) -> Result<Vec<chunk_index::ChunkEntry>> {
1351        let leaves = crate::btree_v1::collect_btree_v1_leaves(
1352            self.file_data,
1353            btree_address,
1354            self.offset_size,
1355            self.length_size,
1356            Some(ndim as u32),
1357            chunk_dims,
1358            chunk_bounds,
1359        )?;
1360
1361        let mut entries = Vec::with_capacity(leaves.len());
1362        for (key, chunk_addr) in &leaves {
1363            match key {
1364                crate::btree_v1::BTreeV1Key::RawData {
1365                    chunk_size,
1366                    filter_mask,
1367                    offsets,
1368                } => {
1369                    entries.push(chunk_index::ChunkEntry {
1370                        address: *chunk_addr,
1371                        size: *chunk_size as u64,
1372                        filter_mask: *filter_mask,
1373                        offsets: offsets[..ndim].to_vec(),
1374                    });
1375                }
1376                _ => {
1377                    return Err(Error::InvalidData(
1378                        "expected raw data key in chunk B-tree".into(),
1379                    ))
1380                }
1381            }
1382        }
1383        Ok(entries)
1384    }
1385
1386    fn load_chunk_data(
1387        &self,
1388        entry: &chunk_index::ChunkEntry,
1389        dataset_addr: u64,
1390        chunk_shape: &[u64],
1391        elem_size: usize,
1392    ) -> Result<Arc<Vec<u8>>> {
1393        let cache_key = ChunkKey {
1394            dataset_addr,
1395            chunk_offsets: smallvec::SmallVec::from_slice(&entry.offsets),
1396        };
1397
1398        if let Some(cached) = self.chunk_cache.get(&cache_key) {
1399            return Ok(cached);
1400        }
1401
1402        let addr = entry.address as usize;
1403        let size = if entry.size > 0 {
1404            entry.size as usize
1405        } else {
1406            chunk_shape.iter().product::<u64>() as usize * elem_size
1407        };
1408        if addr + size > self.file_data.len() {
1409            return Err(Error::OffsetOutOfBounds(entry.address));
1410        }
1411        let raw = &self.file_data[addr..addr + size];
1412
1413        let decoded = if let Some(ref pipeline) = self.filters {
1414            filters::apply_pipeline(
1415                raw,
1416                &pipeline.filters,
1417                entry.filter_mask,
1418                elem_size,
1419                Some(&self.filter_registry),
1420            )?
1421        } else {
1422            raw.to_vec()
1423        };
1424
1425        Ok(self.chunk_cache.insert(cache_key, decoded))
1426    }
1427
1428    /// Chunked slice: only read chunks that overlap the selection.
1429    ///
1430    /// Resolves each `SliceInfoElem` to concrete ranges, computes the chunk
1431    /// grid range per dimension, and only decompresses overlapping chunks.
1432    fn read_chunked_slice<T: H5Type>(
1433        &self,
1434        index_address: u64,
1435        chunk_dims: &[u32],
1436        _element_size: u32,
1437        chunk_indexing: Option<&ChunkIndexing>,
1438        _selection: &SliceInfo,
1439        resolved: &ResolvedSelection,
1440    ) -> Result<ArrayD<T>> {
1441        if resolved.result_elements == 0 {
1442            return self.make_fill_array_from_shape::<T>(0, &resolved.result_shape);
1443        }
1444
1445        if Cursor::is_undefined_offset(index_address, self.offset_size) {
1446            return self
1447                .make_fill_array_from_shape::<T>(resolved.result_elements, &resolved.result_shape);
1448        }
1449
1450        let ndim = self.ndim();
1451        let shape = &self.dataspace.dims;
1452        let elem_size = dtype_element_size(&self.datatype);
1453        let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1454        let mut first_chunk = vec![0u64; ndim];
1455        let mut last_chunk = vec![0u64; ndim];
1456        for d in 0..ndim {
1457            let (first, last) = resolved.dims[d]
1458                .chunk_index_range(chunk_shape[d])
1459                .expect("zero-sized result handled above");
1460            first_chunk[d] = first;
1461            last_chunk[d] = last;
1462        }
1463
1464        // Collect all chunk entries.
1465        let overlapping = self.collect_chunk_entries(
1466            index_address,
1467            chunk_dims,
1468            chunk_indexing,
1469            ChunkEntrySelection {
1470                shape,
1471                ndim,
1472                elem_size,
1473                chunk_bounds: Some((&first_chunk, &last_chunk)),
1474            },
1475        )?;
1476
1477        let result_total_bytes = checked_mul_usize(
1478            resolved.result_elements,
1479            elem_size,
1480            "slice result size in bytes",
1481        )?;
1482        // Compute result strides (including collapsed dims — they have count=1).
1483        let result_dims = resolved.result_dims_with_collapsed();
1484        let mut result_strides = vec![1usize; ndim];
1485        for d in (0..ndim - 1).rev() {
1486            result_strides[d] =
1487                checked_mul_usize(result_strides[d + 1], result_dims[d + 1], "result stride")?;
1488        }
1489        let mut chunk_strides = vec![1usize; ndim];
1490        for d in (0..ndim - 1).rev() {
1491            chunk_strides[d] = checked_mul_usize(
1492                chunk_strides[d + 1],
1493                chunk_shape[d + 1] as usize,
1494                "chunk stride",
1495            )?;
1496        }
1497        let use_unit_stride_fast_path = resolved.is_unit_stride();
1498        let fully_covered_unit_stride = use_unit_stride_fast_path
1499            && overlapping.len() == expected_chunk_count(&first_chunk, &last_chunk)?;
1500
1501        if fully_covered_unit_stride {
1502            if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
1503                let mut result_values: Vec<MaybeUninit<T>> =
1504                    std::iter::repeat_with(MaybeUninit::<T>::uninit)
1505                        .take(resolved.result_elements)
1506                        .collect();
1507                let result_ptr = result_values.as_mut_ptr() as *mut u8;
1508                let result_len = checked_mul_usize(
1509                    result_values.len(),
1510                    std::mem::size_of::<T>(),
1511                    "typed slice result size in bytes",
1512                )?;
1513
1514                for entry in &overlapping {
1515                    let chunk_data =
1516                        self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1517
1518                    unsafe {
1519                        copy_unit_stride_chunk_overlap_ptr(
1520                            &chunk_data,
1521                            FlatBufferPtr {
1522                                ptr: result_ptr,
1523                                len: result_len,
1524                            },
1525                            UnitStrideCopyLayout {
1526                                chunk_offsets: &entry.offsets,
1527                                chunk_shape: &chunk_shape,
1528                                dataset_shape: shape,
1529                                resolved,
1530                                chunk_strides: &chunk_strides,
1531                                result_strides: &result_strides,
1532                                elem_size,
1533                            },
1534                        )?;
1535                    }
1536                }
1537
1538                let result_values = assume_init_vec(result_values);
1539                return ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), result_values)
1540                    .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
1541            }
1542
1543            let mut result_buf = vec![MaybeUninit::<u8>::uninit(); result_total_bytes];
1544            let result_ptr = result_buf.as_mut_ptr() as *mut u8;
1545            let result_len = result_buf.len();
1546
1547            for entry in &overlapping {
1548                let chunk_data =
1549                    self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1550
1551                unsafe {
1552                    copy_unit_stride_chunk_overlap_ptr(
1553                        &chunk_data,
1554                        FlatBufferPtr {
1555                            ptr: result_ptr,
1556                            len: result_len,
1557                        },
1558                        UnitStrideCopyLayout {
1559                            chunk_offsets: &entry.offsets,
1560                            chunk_shape: &chunk_shape,
1561                            dataset_shape: shape,
1562                            resolved,
1563                            chunk_strides: &chunk_strides,
1564                            result_strides: &result_strides,
1565                            elem_size,
1566                        },
1567                    )?;
1568                }
1569            }
1570
1571            let result_buf = assume_init_u8_vec(result_buf);
1572            return self.decode_buffer_with_shape::<T>(
1573                &result_buf,
1574                resolved.result_elements,
1575                &resolved.result_shape,
1576            );
1577        }
1578
1579        let mut result_buf = self.make_output_buffer(result_total_bytes);
1580
1581        // For each overlapping chunk: decompress and copy matching elements.
1582        for entry in &overlapping {
1583            let cache_key = crate::cache::ChunkKey {
1584                dataset_addr: index_address,
1585                chunk_offsets: smallvec::SmallVec::from_slice(&entry.offsets),
1586            };
1587
1588            let chunk_data = if let Some(cached) = self.chunk_cache.get(&cache_key) {
1589                cached
1590            } else {
1591                let addr = entry.address as usize;
1592                let size = if entry.size > 0 {
1593                    entry.size as usize
1594                } else {
1595                    chunk_shape.iter().product::<u64>() as usize * elem_size
1596                };
1597                if addr + size > self.file_data.len() {
1598                    return Err(Error::OffsetOutOfBounds(entry.address));
1599                }
1600                let raw = &self.file_data[addr..addr + size];
1601                let decoded = if let Some(ref pipeline) = self.filters {
1602                    filters::apply_pipeline(
1603                        raw,
1604                        &pipeline.filters,
1605                        entry.filter_mask,
1606                        elem_size,
1607                        Some(&self.filter_registry),
1608                    )?
1609                } else {
1610                    raw.to_vec()
1611                };
1612                self.chunk_cache.insert(cache_key, decoded)
1613            };
1614
1615            if use_unit_stride_fast_path {
1616                copy_unit_stride_chunk_overlap(
1617                    &chunk_data,
1618                    &mut result_buf,
1619                    UnitStrideCopyLayout {
1620                        chunk_offsets: &entry.offsets,
1621                        chunk_shape: &chunk_shape,
1622                        dataset_shape: shape,
1623                        resolved,
1624                        chunk_strides: &chunk_strides,
1625                        result_strides: &result_strides,
1626                        elem_size,
1627                    },
1628                )?;
1629                continue;
1630            }
1631
1632            // For each dimension, compute which elements within this chunk fall
1633            // within the selection.
1634            let mut dim_indices: Vec<Vec<(usize, usize)>> = Vec::with_capacity(ndim);
1635            for d in 0..ndim {
1636                let chunk_start = entry.offsets[d];
1637                let chunk_end = (chunk_start + chunk_shape[d]).min(shape[d]);
1638                let dim = &resolved.dims[d];
1639                let sel_start = dim.start;
1640                let sel_end = dim.end;
1641                let sel_step = dim.step;
1642                let mut indices = Vec::new();
1643
1644                // Find first selected index >= chunk_start
1645                let first_sel = if sel_start >= chunk_start {
1646                    sel_start
1647                } else {
1648                    let steps_to_skip = (chunk_start - sel_start).div_ceil(sel_step);
1649                    sel_start + steps_to_skip * sel_step
1650                };
1651
1652                let mut sel_idx = first_sel;
1653                while sel_idx < sel_end && sel_idx < chunk_end {
1654                    let chunk_local = checked_usize(sel_idx - chunk_start, "chunk-local index")?;
1655                    // Compute result-space index for this dimension.
1656                    let result_dim_idx =
1657                        checked_usize((sel_idx - dim.start) / sel_step, "result index")?;
1658                    indices.push((chunk_local, result_dim_idx));
1659                    sel_idx += sel_step;
1660                }
1661
1662                dim_indices.push(indices);
1663            }
1664
1665            // Iterate over the cartesian product of matching indices.
1666            copy_selected_elements(
1667                &chunk_data,
1668                &mut result_buf,
1669                &dim_indices,
1670                &chunk_strides,
1671                &result_strides,
1672                elem_size,
1673                ndim,
1674            );
1675        }
1676
1677        self.decode_buffer_with_shape::<T>(
1678            &result_buf,
1679            resolved.result_elements,
1680            &resolved.result_shape,
1681        )
1682    }
1683
1684    /// Parallel variant of `read_chunked_slice`: decompresses overlapping chunks
1685    /// in parallel using Rayon, then copies selected elements into the result buffer.
1686    ///
1687    /// Each chunk writes to a disjoint region of the result buffer (chunks don't
1688    /// overlap in output space), so this is safe to parallelize.
1689    #[cfg(feature = "rayon")]
1690    fn read_chunked_slice_parallel<T: H5Type>(
1691        &self,
1692        index_address: u64,
1693        chunk_dims: &[u32],
1694        _element_size: u32,
1695        chunk_indexing: Option<&ChunkIndexing>,
1696        _selection: &SliceInfo,
1697        resolved: &ResolvedSelection,
1698    ) -> Result<ArrayD<T>> {
1699        if resolved.result_elements == 0 {
1700            return self.make_fill_array_from_shape::<T>(0, &resolved.result_shape);
1701        }
1702
1703        if Cursor::is_undefined_offset(index_address, self.offset_size) {
1704            return self
1705                .make_fill_array_from_shape::<T>(resolved.result_elements, &resolved.result_shape);
1706        }
1707
1708        let ndim = self.ndim();
1709        let shape = &self.dataspace.dims;
1710        let elem_size = dtype_element_size(&self.datatype);
1711        let chunk_shape: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
1712        let mut first_chunk = vec![0u64; ndim];
1713        let mut last_chunk = vec![0u64; ndim];
1714        for d in 0..ndim {
1715            let (first, last) = resolved.dims[d]
1716                .chunk_index_range(chunk_shape[d])
1717                .expect("zero-sized result handled above");
1718            first_chunk[d] = first;
1719            last_chunk[d] = last;
1720        }
1721
1722        // Collect all chunk entries.
1723        let overlapping = self.collect_chunk_entries(
1724            index_address,
1725            chunk_dims,
1726            chunk_indexing,
1727            ChunkEntrySelection {
1728                shape,
1729                ndim,
1730                elem_size,
1731                chunk_bounds: Some((&first_chunk, &last_chunk)),
1732            },
1733        )?;
1734
1735        // Allocate result buffer (raw bytes) initialized from fill value.
1736        let result_total_bytes = checked_mul_usize(
1737            resolved.result_elements,
1738            elem_size,
1739            "slice result size in bytes",
1740        )?;
1741        // Compute result strides (including collapsed dims — they have count=1).
1742        let result_dims = resolved.result_dims_with_collapsed();
1743        let mut result_strides = vec![1usize; ndim];
1744        for d in (0..ndim - 1).rev() {
1745            result_strides[d] =
1746                checked_mul_usize(result_strides[d + 1], result_dims[d + 1], "result stride")?;
1747        }
1748        let mut chunk_strides = vec![1usize; ndim];
1749        for d in (0..ndim - 1).rev() {
1750            chunk_strides[d] = checked_mul_usize(
1751                chunk_strides[d + 1],
1752                chunk_shape[d + 1] as usize,
1753                "chunk stride",
1754            )?;
1755        }
1756        let use_unit_stride_fast_path = resolved.is_unit_stride();
1757        let fully_covered_unit_stride = use_unit_stride_fast_path
1758            && overlapping.len() == expected_chunk_count(&first_chunk, &last_chunk)?;
1759
1760        if fully_covered_unit_stride {
1761            if T::native_copy_compatible(&self.datatype) && std::mem::size_of::<T>() == elem_size {
1762                let mut result_values: Vec<MaybeUninit<T>> =
1763                    std::iter::repeat_with(MaybeUninit::<T>::uninit)
1764                        .take(resolved.result_elements)
1765                        .collect();
1766                let flat = FlatBufferPtr {
1767                    ptr: result_values.as_mut_ptr() as *mut u8,
1768                    len: checked_mul_usize(
1769                        result_values.len(),
1770                        std::mem::size_of::<T>(),
1771                        "typed slice result size in bytes",
1772                    )?,
1773                };
1774
1775                overlapping
1776                    .par_iter()
1777                    .map(|entry| {
1778                        let chunk_data =
1779                            self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1780
1781                        unsafe {
1782                            flat.copy_unit_stride_chunk_overlap(
1783                                &chunk_data,
1784                                UnitStrideCopyLayout {
1785                                    chunk_offsets: &entry.offsets,
1786                                    chunk_shape: &chunk_shape,
1787                                    dataset_shape: shape,
1788                                    resolved,
1789                                    chunk_strides: &chunk_strides,
1790                                    result_strides: &result_strides,
1791                                    elem_size,
1792                                },
1793                            )?;
1794                        }
1795
1796                        Ok(())
1797                    })
1798                    .collect::<std::result::Result<Vec<_>, Error>>()?;
1799
1800                let result_values = assume_init_vec(result_values);
1801                return ArrayD::from_shape_vec(IxDyn(&resolved.result_shape), result_values)
1802                    .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
1803            }
1804
1805            let mut result_buf = vec![MaybeUninit::<u8>::uninit(); result_total_bytes];
1806            let flat = FlatBufferPtr {
1807                ptr: result_buf.as_mut_ptr() as *mut u8,
1808                len: result_buf.len(),
1809            };
1810
1811            overlapping
1812                .par_iter()
1813                .map(|entry| {
1814                    let chunk_data =
1815                        self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1816
1817                    unsafe {
1818                        flat.copy_unit_stride_chunk_overlap(
1819                            &chunk_data,
1820                            UnitStrideCopyLayout {
1821                                chunk_offsets: &entry.offsets,
1822                                chunk_shape: &chunk_shape,
1823                                dataset_shape: shape,
1824                                resolved,
1825                                chunk_strides: &chunk_strides,
1826                                result_strides: &result_strides,
1827                                elem_size,
1828                            },
1829                        )?;
1830                    }
1831
1832                    Ok(())
1833                })
1834                .collect::<std::result::Result<Vec<_>, Error>>()?;
1835
1836            let result_buf = assume_init_u8_vec(result_buf);
1837            return self.decode_buffer_with_shape::<T>(
1838                &result_buf,
1839                resolved.result_elements,
1840                &resolved.result_shape,
1841            );
1842        }
1843
1844        let mut result_buf = self.make_output_buffer(result_total_bytes);
1845
1846        let flat = FlatBufferPtr {
1847            ptr: result_buf.as_mut_ptr(),
1848            len: result_buf.len(),
1849        };
1850
1851        overlapping
1852            .par_iter()
1853            .map(|entry| {
1854                let chunk_data =
1855                    self.load_chunk_data(entry, index_address, &chunk_shape, elem_size)?;
1856
1857                if use_unit_stride_fast_path {
1858                    unsafe {
1859                        flat.copy_unit_stride_chunk_overlap(
1860                            &chunk_data,
1861                            UnitStrideCopyLayout {
1862                                chunk_offsets: &entry.offsets,
1863                                chunk_shape: &chunk_shape,
1864                                dataset_shape: shape,
1865                                resolved,
1866                                chunk_strides: &chunk_strides,
1867                                result_strides: &result_strides,
1868                                elem_size,
1869                            },
1870                        )?;
1871                    }
1872                    return Ok(());
1873                }
1874
1875                // For each dimension, compute which elements within this chunk fall
1876                // within the selection.
1877                let mut dim_indices: Vec<Vec<(usize, usize)>> = Vec::with_capacity(ndim);
1878                for d in 0..ndim {
1879                    let chunk_start = entry.offsets[d];
1880                    let chunk_end = (chunk_start + chunk_shape[d]).min(shape[d]);
1881                    let dim = &resolved.dims[d];
1882                    let sel_start = dim.start;
1883                    let sel_end = dim.end;
1884                    let sel_step = dim.step;
1885                    let mut indices = Vec::new();
1886
1887                    let first_sel = if sel_start >= chunk_start {
1888                        sel_start
1889                    } else {
1890                        let steps_to_skip = (chunk_start - sel_start).div_ceil(sel_step);
1891                        sel_start + steps_to_skip * sel_step
1892                    };
1893
1894                    let mut sel_idx = first_sel;
1895                    while sel_idx < sel_end && sel_idx < chunk_end {
1896                        let chunk_local =
1897                            checked_usize(sel_idx - chunk_start, "chunk-local index")?;
1898                        let result_dim_idx =
1899                            checked_usize((sel_idx - dim.start) / sel_step, "result index")?;
1900                        indices.push((chunk_local, result_dim_idx));
1901                        sel_idx += sel_step;
1902                    }
1903
1904                    dim_indices.push(indices);
1905                }
1906
1907                // SAFETY: each chunk writes to disjoint output positions because
1908                // chunks occupy non-overlapping regions of the dataset grid and
1909                // the selection maps each dataset coordinate to a unique result index.
1910                unsafe {
1911                    flat.copy_selected(
1912                        &chunk_data,
1913                        &dim_indices,
1914                        &chunk_strides,
1915                        &result_strides,
1916                        elem_size,
1917                        ndim,
1918                    );
1919                }
1920
1921                Ok(())
1922            })
1923            .collect::<std::result::Result<Vec<_>, Error>>()?;
1924
1925        self.decode_buffer_with_shape::<T>(
1926            &result_buf,
1927            resolved.result_elements,
1928            &resolved.result_shape,
1929        )
1930    }
1931
1932    fn read_contiguous_slice<T: H5Type>(
1933        &self,
1934        address: u64,
1935        size: u64,
1936        selection: &SliceInfo,
1937        resolved: &ResolvedSelection,
1938    ) -> Result<ArrayD<T>> {
1939        if resolved.result_elements == 0 {
1940            return self.make_fill_array_from_shape::<T>(0, &resolved.result_shape);
1941        }
1942
1943        if Cursor::is_undefined_offset(address, self.offset_size) || size == 0 {
1944            return self
1945                .make_fill_array_from_shape::<T>(resolved.result_elements, &resolved.result_shape);
1946        }
1947
1948        let shape = &self.dataspace.dims;
1949        let ndim = shape.len();
1950        let elem_size = dtype_element_size(&self.datatype);
1951
1952        // Check if this is a simple contiguous sub-range where we can compute
1953        // byte offsets directly (all Slice selections with step=1 and the
1954        // selection is contiguous in memory — i.e., all dimensions except the
1955        // outermost select the full range).
1956        let can_direct_extract = ndim > 0
1957            && selection.selections.iter().enumerate().all(|(d, sel)| {
1958                match sel {
1959                    SliceInfoElem::Slice { step, start, end } => {
1960                        if *step != 1 {
1961                            return false;
1962                        }
1963                        // Inner dimensions (d > 0) must select the full range
1964                        // for the data to be contiguous in memory.
1965                        if d > 0 {
1966                            *start == 0 && (*end == u64::MAX || *end >= shape[d])
1967                        } else {
1968                            true
1969                        }
1970                    }
1971                    SliceInfoElem::Index(_) => {
1972                        // Index on the outermost dim is fine (single row),
1973                        // but on inner dims it breaks contiguity.
1974                        d == 0
1975                    }
1976                }
1977            });
1978
1979        if can_direct_extract {
1980            // Compute the byte range to read from the mmap.
1981            let row_stride: u64 = shape[1..].iter().product::<u64>().max(1);
1982            let row_bytes = row_stride as usize * elem_size;
1983
1984            let (first_row, num_rows, result_shape) = match &selection.selections[0] {
1985                SliceInfoElem::Index(idx) => {
1986                    let mut rs: Vec<usize> = shape[1..].iter().map(|&d| d as usize).collect();
1987                    if rs.is_empty() {
1988                        rs = vec![];
1989                    }
1990                    (*idx, 1u64, rs)
1991                }
1992                SliceInfoElem::Slice { start, end, .. } => {
1993                    let actual_end = if *end == u64::MAX {
1994                        shape[0]
1995                    } else {
1996                        (*end).min(shape[0])
1997                    };
1998                    let count = actual_end.saturating_sub(*start);
1999                    let mut rs = vec![checked_usize(count, "contiguous slice row count")?];
2000                    for &dim in &shape[1..] {
2001                        rs.push(checked_usize(dim, "dataset dimension")?);
2002                    }
2003                    (*start, count, rs)
2004                }
2005            };
2006
2007            let byte_offset = checked_usize(address, "contiguous data address")?
2008                + checked_mul_usize(
2009                    checked_usize(first_row, "slice row offset")?,
2010                    row_bytes,
2011                    "contiguous byte offset",
2012                )?;
2013            let total_bytes = checked_mul_usize(
2014                checked_usize(num_rows, "contiguous slice row count")?,
2015                row_bytes,
2016                "contiguous slice size in bytes",
2017            )?;
2018
2019            if byte_offset + total_bytes > self.file_data.len() {
2020                return Err(Error::OffsetOutOfBounds(address));
2021            }
2022
2023            let raw = &self.file_data[byte_offset..byte_offset + total_bytes];
2024            let n = (total_bytes) / elem_size;
2025
2026            let elements = if let Some(decoded) = T::decode_vec(raw, &self.datatype, n) {
2027                decoded?
2028            } else {
2029                let mut elements = Vec::with_capacity(n);
2030                for i in 0..n {
2031                    let start = i * elem_size;
2032                    elements.push(T::from_bytes(
2033                        &raw[start..start + elem_size],
2034                        &self.datatype,
2035                    )?);
2036                }
2037                elements
2038            };
2039
2040            return ArrayD::from_shape_vec(IxDyn(&result_shape), elements)
2041                .map_err(|e| Error::InvalidData(format!("contiguous slice shape error: {e}")));
2042        }
2043
2044        // Fallback: read full data then slice.
2045        let full = self.read_contiguous::<T>(address, size)?;
2046        slice_array(&full, selection, &self.dataspace.dims)
2047    }
2048
2049    fn read_compact_slice<T: H5Type>(
2050        &self,
2051        data: &[u8],
2052        selection: &SliceInfo,
2053    ) -> Result<ArrayD<T>> {
2054        let full = self.read_compact::<T>(data)?;
2055        slice_array(&full, selection, &self.dataspace.dims)
2056    }
2057
2058    fn decode_buffer_with_shape<T: H5Type>(
2059        &self,
2060        raw: &[u8],
2061        n: usize,
2062        shape: &[usize],
2063    ) -> Result<ArrayD<T>> {
2064        let elem_size = dtype_element_size(&self.datatype);
2065
2066        if let Some(elements) = T::decode_vec(raw, &self.datatype, n) {
2067            let elements = elements?;
2068            return ArrayD::from_shape_vec(IxDyn(shape), elements)
2069                .map_err(|e| Error::InvalidData(format!("array shape error: {e}")));
2070        }
2071
2072        let mut elements = Vec::with_capacity(n);
2073        for i in 0..n {
2074            let start = checked_mul_usize(i, elem_size, "decoded element byte offset")?;
2075            let end = checked_mul_usize(i + 1, elem_size, "decoded element end offset")?;
2076            if end > raw.len() {
2077                // Pad with fill value or zeros if data is short.
2078                let padded = if end <= raw.len().saturating_add(elem_size) {
2079                    let mut buf = vec![0u8; elem_size];
2080                    let available = raw.len().saturating_sub(start);
2081                    if available > 0 {
2082                        buf[..available].copy_from_slice(&raw[start..start + available]);
2083                    }
2084                    T::from_bytes(&buf, &self.datatype)?
2085                } else {
2086                    T::from_bytes(&vec![0u8; elem_size], &self.datatype)?
2087                };
2088                elements.push(padded);
2089            } else {
2090                elements.push(T::from_bytes(&raw[start..end], &self.datatype)?);
2091            }
2092        }
2093
2094        ArrayD::from_shape_vec(IxDyn(shape), elements)
2095            .map_err(|e| Error::InvalidData(format!("array shape error: {e}")))
2096    }
2097
2098    fn decode_raw_data<T: H5Type>(&self, raw: &[u8]) -> Result<ArrayD<T>> {
2099        let n = checked_usize(self.num_elements(), "dataset element count")?;
2100        let mut shape = Vec::with_capacity(self.dataspace.dims.len());
2101        for &dim in &self.dataspace.dims {
2102            shape.push(checked_usize(dim, "dataset dimension")?);
2103        }
2104        self.decode_buffer_with_shape::<T>(raw, n, &shape)
2105    }
2106
2107    fn make_fill_array<T: H5Type>(&self) -> Result<ArrayD<T>> {
2108        let n = checked_usize(self.num_elements(), "dataset element count")?;
2109        let mut shape = Vec::with_capacity(self.dataspace.dims.len());
2110        for &dim in &self.dataspace.dims {
2111            shape.push(checked_usize(dim, "dataset dimension")?);
2112        }
2113        self.make_fill_array_from_shape::<T>(n, &shape)
2114    }
2115
2116    fn make_fill_array_from_shape<T: H5Type>(
2117        &self,
2118        element_count: usize,
2119        shape: &[usize],
2120    ) -> Result<ArrayD<T>> {
2121        let elem_size = dtype_element_size(&self.datatype);
2122        let total_bytes = checked_mul_usize(element_count, elem_size, "fill result size in bytes")?;
2123        let fill = self.make_output_buffer(total_bytes);
2124        self.decode_buffer_with_shape::<T>(&fill, element_count, shape)
2125    }
2126
2127    fn make_output_buffer(&self, total_bytes: usize) -> Vec<u8> {
2128        if let Some(ref fv) = self.fill_value {
2129            if let Some(ref fill_bytes) = fv.value {
2130                let mut buf = vec![0u8; total_bytes];
2131                if !fill_bytes.is_empty() {
2132                    for chunk in buf.chunks_exact_mut(fill_bytes.len()) {
2133                        chunk.copy_from_slice(fill_bytes);
2134                    }
2135                }
2136                buf
2137            } else {
2138                vec![0u8; total_bytes]
2139            }
2140        } else {
2141            vec![0u8; total_bytes]
2142        }
2143    }
2144
2145    fn normalize_raw_bytes(&self, raw: &[u8], total_bytes: usize) -> Vec<u8> {
2146        if raw.len() >= total_bytes {
2147            raw[..total_bytes].to_vec()
2148        } else {
2149            let mut normalized = self.make_output_buffer(total_bytes);
2150            normalized[..raw.len()].copy_from_slice(raw);
2151            normalized
2152        }
2153    }
2154}
2155
2156fn normalize_layout(layout: DataLayout, dataspace: &DataspaceMessage) -> DataLayout {
2157    match layout {
2158        DataLayout::Chunked {
2159            address,
2160            mut dims,
2161            mut element_size,
2162            chunk_indexing,
2163        } if dims.len() == dataspace.dims.len() + 1 => {
2164            if let Some(legacy_element_size) = dims.pop() {
2165                if element_size == 0 {
2166                    element_size = legacy_element_size;
2167                }
2168            }
2169            DataLayout::Chunked {
2170                address,
2171                dims,
2172                element_size,
2173                chunk_indexing,
2174            }
2175        }
2176        other => other,
2177    }
2178}
2179
2180#[cfg(test)]
2181/// Copy a chunk's data into the flat output buffer at the correct position.
2182fn copy_chunk_to_flat(
2183    chunk_data: &[u8],
2184    flat: &mut [u8],
2185    chunk_offsets: &[u64],
2186    chunk_shape: &[u64],
2187    dataset_shape: &[u64],
2188    elem_size: usize,
2189) {
2190    let dataset_strides = row_major_strides(dataset_shape, "dataset stride")
2191        .expect("dataset strides should fit in usize");
2192    let chunk_strides =
2193        row_major_strides(chunk_shape, "chunk stride").expect("chunk strides should fit in usize");
2194    copy_chunk_to_flat_with_strides(
2195        chunk_data,
2196        flat,
2197        ChunkCopyLayout {
2198            chunk_offsets,
2199            chunk_shape,
2200            dataset_shape,
2201            dataset_strides: &dataset_strides,
2202            chunk_strides: &chunk_strides,
2203            elem_size,
2204        },
2205    );
2206}
2207
2208fn copy_chunk_to_flat_with_strides(
2209    chunk_data: &[u8],
2210    flat: &mut [u8],
2211    layout: ChunkCopyLayout<'_>,
2212) {
2213    unsafe {
2214        copy_chunk_to_flat_with_strides_ptr(
2215            chunk_data,
2216            FlatBufferPtr {
2217                ptr: flat.as_mut_ptr(),
2218                len: flat.len(),
2219            },
2220            layout,
2221        );
2222    }
2223}
2224
2225#[inline(always)]
2226unsafe fn copy_chunk_to_flat_with_strides_ptr(
2227    chunk_data: &[u8],
2228    flat: FlatBufferPtr,
2229    layout: ChunkCopyLayout<'_>,
2230) {
2231    let ndim = layout.dataset_shape.len();
2232
2233    if ndim == 0 {
2234        let bytes = layout.elem_size.min(chunk_data.len()).min(flat.len);
2235        std::ptr::copy_nonoverlapping(chunk_data.as_ptr(), flat.ptr, bytes);
2236        return;
2237    }
2238
2239    // Total elements in this chunk (clamped to dataset boundaries)
2240    let mut actual_chunk_shape = Vec::with_capacity(ndim);
2241    for i in 0..ndim {
2242        let remaining = layout.dataset_shape[i] - layout.chunk_offsets[i];
2243        actual_chunk_shape.push(remaining.min(layout.chunk_shape[i]) as usize);
2244    }
2245
2246    let row_elems = *actual_chunk_shape.last().unwrap_or(&1);
2247    let row_bytes = row_elems * layout.elem_size;
2248    let dataset_origin: usize = layout
2249        .chunk_offsets
2250        .iter()
2251        .enumerate()
2252        .map(|(d, offset)| *offset as usize * layout.dataset_strides[d])
2253        .sum();
2254
2255    if ndim == 1 {
2256        let bytes = row_bytes.min(chunk_data.len());
2257        let dst_start = dataset_origin * layout.elem_size;
2258        let dst_end = dst_start + bytes;
2259        if dst_end <= flat.len {
2260            std::ptr::copy_nonoverlapping(chunk_data.as_ptr(), flat.ptr.add(dst_start), bytes);
2261        }
2262        return;
2263    }
2264
2265    let outer_dims = &actual_chunk_shape[..ndim - 1];
2266    let total_rows: usize = outer_dims.iter().product();
2267    let mut outer_idx = vec![0usize; ndim - 1];
2268
2269    for _ in 0..total_rows {
2270        let mut chunk_row = 0usize;
2271        let mut dataset_row = dataset_origin;
2272        for (d, outer) in outer_idx.iter().copied().enumerate() {
2273            chunk_row += outer * layout.chunk_strides[d];
2274            dataset_row += outer * layout.dataset_strides[d];
2275        }
2276
2277        let src_start = chunk_row * layout.elem_size;
2278        let dst_start = dataset_row * layout.elem_size;
2279        let src_end = src_start + row_bytes;
2280        let dst_end = dst_start + row_bytes;
2281        if src_end <= chunk_data.len() && dst_end <= flat.len {
2282            std::ptr::copy_nonoverlapping(
2283                chunk_data.as_ptr().add(src_start),
2284                flat.ptr.add(dst_start),
2285                row_bytes,
2286            );
2287        }
2288
2289        let mut carry = true;
2290        for d in (0..outer_idx.len()).rev() {
2291            if carry {
2292                outer_idx[d] += 1;
2293                if outer_idx[d] < outer_dims[d] {
2294                    carry = false;
2295                } else {
2296                    outer_idx[d] = 0;
2297                }
2298            }
2299        }
2300    }
2301}
2302
2303fn checked_product_usize(values: &[usize], context: &str) -> Result<usize> {
2304    let mut product = 1usize;
2305    for &value in values {
2306        product = checked_mul_usize(product, value, context)?;
2307    }
2308    Ok(product)
2309}
2310
2311fn unit_stride_chunk_overlap_plan(
2312    chunk_offsets: &[u64],
2313    chunk_shape: &[u64],
2314    dataset_shape: &[u64],
2315    resolved: &ResolvedSelection,
2316) -> Result<(Vec<usize>, Vec<usize>, Vec<usize>)> {
2317    let ndim = dataset_shape.len();
2318    let mut overlap_counts = Vec::with_capacity(ndim);
2319    let mut chunk_local_start = Vec::with_capacity(ndim);
2320    let mut result_start = Vec::with_capacity(ndim);
2321
2322    for d in 0..ndim {
2323        let chunk_start = chunk_offsets[d];
2324        let chunk_end = (chunk_start + chunk_shape[d]).min(dataset_shape[d]);
2325        let dim = &resolved.dims[d];
2326        let overlap_start = chunk_start.max(dim.start);
2327        let overlap_end = chunk_end.min(dim.end);
2328        if overlap_start >= overlap_end {
2329            return Ok((Vec::new(), Vec::new(), Vec::new()));
2330        }
2331
2332        overlap_counts.push(checked_usize(
2333            overlap_end - overlap_start,
2334            "chunk overlap size",
2335        )?);
2336        chunk_local_start.push(checked_usize(
2337            overlap_start - chunk_start,
2338            "chunk overlap start",
2339        )?);
2340        result_start.push(checked_usize(
2341            overlap_start - dim.start,
2342            "slice result overlap start",
2343        )?);
2344    }
2345
2346    Ok((overlap_counts, chunk_local_start, result_start))
2347}
2348
2349#[inline(always)]
2350fn copy_unit_stride_chunk_overlap(
2351    chunk_data: &[u8],
2352    result_buf: &mut [u8],
2353    layout: UnitStrideCopyLayout<'_>,
2354) -> Result<()> {
2355    unsafe {
2356        copy_unit_stride_chunk_overlap_ptr(
2357            chunk_data,
2358            FlatBufferPtr {
2359                ptr: result_buf.as_mut_ptr(),
2360                len: result_buf.len(),
2361            },
2362            layout,
2363        )
2364    }
2365}
2366
2367/// Copy a unit-step rectangular overlap from a chunk into the result buffer.
2368///
2369/// This is the hot path for contiguous hyperslab reads over chunked datasets:
2370/// rather than copying one element at a time, it copies contiguous runs along
2371/// the innermost dimension with a single memcpy per output row.
2372///
2373/// # Safety
2374///
2375/// The caller must guarantee that `[result_ptr .. result_ptr + result_len)` is
2376/// valid for writes. Concurrent callers must write to disjoint byte ranges.
2377#[inline(always)]
2378unsafe fn copy_unit_stride_chunk_overlap_ptr(
2379    chunk_data: &[u8],
2380    result: FlatBufferPtr,
2381    layout: UnitStrideCopyLayout<'_>,
2382) -> Result<()> {
2383    let ndim = layout.dataset_shape.len();
2384
2385    if ndim == 0 {
2386        let bytes = layout.elem_size.min(chunk_data.len()).min(result.len);
2387        std::ptr::copy_nonoverlapping(chunk_data.as_ptr(), result.ptr, bytes);
2388        return Ok(());
2389    }
2390
2391    let (overlap_counts, chunk_local_start, result_start) = unit_stride_chunk_overlap_plan(
2392        layout.chunk_offsets,
2393        layout.chunk_shape,
2394        layout.dataset_shape,
2395        layout.resolved,
2396    )?;
2397    if overlap_counts.is_empty() {
2398        return Ok(());
2399    }
2400
2401    let row_elems = *overlap_counts.last().unwrap_or(&1);
2402    let row_bytes = checked_mul_usize(row_elems, layout.elem_size, "unit-stride slice row bytes")?;
2403
2404    let mut chunk_origin = 0usize;
2405    let mut result_origin = 0usize;
2406    for d in 0..ndim {
2407        let chunk_term = checked_mul_usize(
2408            chunk_local_start[d],
2409            layout.chunk_strides[d],
2410            "chunk overlap origin",
2411        )?;
2412        let result_term = checked_mul_usize(
2413            result_start[d],
2414            layout.result_strides[d],
2415            "slice result origin",
2416        )?;
2417        chunk_origin = checked_add_usize(chunk_origin, chunk_term, "chunk overlap origin")?;
2418        result_origin = checked_add_usize(result_origin, result_term, "slice result origin")?;
2419    }
2420
2421    if ndim == 1 {
2422        let src_start = chunk_origin * layout.elem_size;
2423        let dst_start = result_origin * layout.elem_size;
2424        let src_end = src_start + row_bytes;
2425        let dst_end = dst_start + row_bytes;
2426        if src_end <= chunk_data.len() && dst_end <= result.len {
2427            std::ptr::copy_nonoverlapping(
2428                chunk_data.as_ptr().add(src_start),
2429                result.ptr.add(dst_start),
2430                row_bytes,
2431            );
2432        }
2433        return Ok(());
2434    }
2435
2436    let outer_counts = &overlap_counts[..ndim - 1];
2437    let total_rows = checked_product_usize(outer_counts, "unit-stride slice row count")?;
2438    let mut outer_idx = vec![0usize; ndim - 1];
2439
2440    for _ in 0..total_rows {
2441        let mut chunk_row = chunk_origin;
2442        let mut result_row = result_origin;
2443        for (d, outer) in outer_idx.iter().copied().enumerate() {
2444            chunk_row += outer * layout.chunk_strides[d];
2445            result_row += outer * layout.result_strides[d];
2446        }
2447
2448        let src_start = chunk_row * layout.elem_size;
2449        let dst_start = result_row * layout.elem_size;
2450        let src_end = src_start + row_bytes;
2451        let dst_end = dst_start + row_bytes;
2452        if src_end <= chunk_data.len() && dst_end <= result.len {
2453            std::ptr::copy_nonoverlapping(
2454                chunk_data.as_ptr().add(src_start),
2455                result.ptr.add(dst_start),
2456                row_bytes,
2457            );
2458        }
2459
2460        let mut carry = true;
2461        for d in (0..outer_idx.len()).rev() {
2462            if carry {
2463                outer_idx[d] += 1;
2464                if outer_idx[d] < outer_counts[d] {
2465                    carry = false;
2466                } else {
2467                    outer_idx[d] = 0;
2468                }
2469            }
2470        }
2471    }
2472
2473    Ok(())
2474}
2475
2476#[allow(clippy::too_many_arguments)]
2477/// Copy selected elements from a chunk into the result buffer.
2478///
2479/// `dim_indices[d]` is a list of `(chunk_local_idx, result_dim_idx)` pairs for dimension `d`.
2480#[inline(always)]
2481fn copy_selected_elements(
2482    chunk_data: &[u8],
2483    result_buf: &mut [u8],
2484    dim_indices: &[Vec<(usize, usize)>],
2485    chunk_strides: &[usize],
2486    result_strides: &[usize],
2487    elem_size: usize,
2488    ndim: usize,
2489) {
2490    // Check for empty selection
2491    if dim_indices.iter().any(|v| v.is_empty()) {
2492        return;
2493    }
2494
2495    // Recursive cartesian-product iteration, but unrolled iteratively.
2496    let total: usize = dim_indices.iter().map(|v| v.len()).product();
2497    let mut counters = vec![0usize; ndim];
2498
2499    for _ in 0..total {
2500        let mut chunk_flat = 0;
2501        let mut result_flat = 0;
2502        for d in 0..ndim {
2503            let (cl, ri) = dim_indices[d][counters[d]];
2504            chunk_flat += cl * chunk_strides[d];
2505            result_flat += ri * result_strides[d];
2506        }
2507
2508        let src_start = chunk_flat * elem_size;
2509        let dst_start = result_flat * elem_size;
2510        let src_end = src_start + elem_size;
2511        let dst_end = dst_start + elem_size;
2512
2513        if src_end <= chunk_data.len() && dst_end <= result_buf.len() {
2514            result_buf[dst_start..dst_end].copy_from_slice(&chunk_data[src_start..src_end]);
2515        }
2516
2517        // Increment counters (row-major)
2518        let mut carry = true;
2519        for d in (0..ndim).rev() {
2520            if carry {
2521                counters[d] += 1;
2522                if counters[d] < dim_indices[d].len() {
2523                    carry = false;
2524                } else {
2525                    counters[d] = 0;
2526                }
2527            }
2528        }
2529    }
2530}
2531
2532/// Copy selected elements from a chunk into a raw output pointer.
2533///
2534/// This is the pointer-based variant of `copy_selected_elements`, suitable for
2535/// parallel use where multiple threads write to disjoint regions of the same buffer.
2536///
2537/// # Safety
2538///
2539/// The caller must guarantee that no two concurrent calls write to the same
2540/// byte range within `[result_ptr .. result_ptr + result_len)`.
2541#[cfg(feature = "rayon")]
2542#[allow(clippy::too_many_arguments)]
2543#[inline(always)]
2544unsafe fn copy_selected_elements_ptr(
2545    chunk_data: &[u8],
2546    result_ptr: *mut u8,
2547    result_len: usize,
2548    dim_indices: &[Vec<(usize, usize)>],
2549    chunk_strides: &[usize],
2550    result_strides: &[usize],
2551    elem_size: usize,
2552    ndim: usize,
2553) {
2554    if dim_indices.iter().any(|v| v.is_empty()) {
2555        return;
2556    }
2557
2558    let total: usize = dim_indices.iter().map(|v| v.len()).product();
2559    let mut counters = vec![0usize; ndim];
2560
2561    for _ in 0..total {
2562        let mut chunk_flat = 0;
2563        let mut result_flat = 0;
2564        for d in 0..ndim {
2565            let (cl, ri) = dim_indices[d][counters[d]];
2566            chunk_flat += cl * chunk_strides[d];
2567            result_flat += ri * result_strides[d];
2568        }
2569
2570        let src_start = chunk_flat * elem_size;
2571        let dst_start = result_flat * elem_size;
2572        let src_end = src_start + elem_size;
2573        let dst_end = dst_start + elem_size;
2574
2575        if src_end <= chunk_data.len() && dst_end <= result_len {
2576            std::ptr::copy_nonoverlapping(
2577                chunk_data.as_ptr().add(src_start),
2578                result_ptr.add(dst_start),
2579                elem_size,
2580            );
2581        }
2582
2583        let mut carry = true;
2584        for d in (0..ndim).rev() {
2585            if carry {
2586                counters[d] += 1;
2587                if counters[d] < dim_indices[d].len() {
2588                    carry = false;
2589                } else {
2590                    counters[d] = 0;
2591                }
2592            }
2593        }
2594    }
2595}
2596
2597/// Slice an ndarray according to a SliceInfo selection.
2598fn slice_array<T: H5Type + Clone>(
2599    array: &ArrayD<T>,
2600    selection: &SliceInfo,
2601    shape: &[u64],
2602) -> Result<ArrayD<T>> {
2603    // Build result shape
2604    let mut result_shape = Vec::new();
2605
2606    for (i, sel) in selection.selections.iter().enumerate() {
2607        let dim_size = shape[i];
2608        match sel {
2609            SliceInfoElem::Index(idx) => {
2610                if *idx >= dim_size {
2611                    return Err(Error::SliceOutOfBounds {
2612                        dim: i,
2613                        index: *idx,
2614                        size: dim_size,
2615                    });
2616                }
2617                // Don't add to result_shape — this dimension is collapsed
2618            }
2619            SliceInfoElem::Slice { start, end, step } => {
2620                let actual_end = if *end == u64::MAX {
2621                    dim_size as usize
2622                } else {
2623                    (*end as usize).min(dim_size as usize)
2624                };
2625                let actual_start = *start as usize;
2626                let actual_step = *step as usize;
2627                if actual_step == 0 {
2628                    return Err(Error::InvalidData("slice step cannot be 0".into()));
2629                }
2630                let n = (actual_end - actual_start).div_ceil(actual_step);
2631                result_shape.push(n);
2632            }
2633        }
2634    }
2635
2636    // Extract elements manually (ndarray's slicing API is complex with dynamic dims)
2637    let ndim = shape.len();
2638    let total: usize = result_shape.iter().product();
2639    let mut elements = Vec::with_capacity(total);
2640
2641    // Generate all indices in the result
2642    let mut result_idx = vec![0usize; result_shape.len()];
2643
2644    for _ in 0..total {
2645        // Map result index to source index
2646        let mut src_idx = Vec::with_capacity(ndim);
2647        let mut ri = 0;
2648        for sel in selection.selections.iter() {
2649            match sel {
2650                SliceInfoElem::Index(idx) => {
2651                    src_idx.push(*idx as usize);
2652                }
2653                SliceInfoElem::Slice { start, step, .. } => {
2654                    src_idx.push(*start as usize + result_idx[ri] * *step as usize);
2655                    ri += 1;
2656                }
2657            }
2658        }
2659
2660        elements.push(array[IxDyn(&src_idx)].clone());
2661
2662        // Increment result index
2663        if !result_shape.is_empty() {
2664            let mut carry = true;
2665            for d in (0..result_shape.len()).rev() {
2666                if carry {
2667                    result_idx[d] += 1;
2668                    if result_idx[d] < result_shape[d] {
2669                        carry = false;
2670                    } else {
2671                        result_idx[d] = 0;
2672                    }
2673                }
2674            }
2675        }
2676    }
2677
2678    ArrayD::from_shape_vec(IxDyn(&result_shape), elements)
2679        .map_err(|e| Error::InvalidData(format!("slice shape error: {e}")))
2680}
2681
2682#[cfg(test)]
2683mod tests {
2684    use super::*;
2685
2686    #[test]
2687    fn test_slice_info_all() {
2688        let s = SliceInfo::all(3);
2689        assert_eq!(s.selections.len(), 3);
2690    }
2691
2692    #[test]
2693    fn test_copy_chunk_1d() {
2694        let chunk_data = vec![1u8, 2, 3, 4]; // 4 elements of 1 byte each
2695        let mut flat = vec![0u8; 8];
2696        let chunk_offsets = vec![2u64]; // starts at index 2
2697        let chunk_shape = vec![4u64];
2698        let dataset_shape = vec![8u64];
2699
2700        copy_chunk_to_flat(
2701            &chunk_data,
2702            &mut flat,
2703            &chunk_offsets,
2704            &chunk_shape,
2705            &dataset_shape,
2706            1,
2707        );
2708        assert_eq!(flat, vec![0, 0, 1, 2, 3, 4, 0, 0]);
2709    }
2710
2711    #[test]
2712    fn test_copy_chunk_2d_rowwise() {
2713        let chunk_data = vec![1u8, 2, 3, 4, 5, 6];
2714        let mut flat = vec![0u8; 16];
2715        let chunk_offsets = vec![1u64, 1u64];
2716        let chunk_shape = vec![2u64, 3u64];
2717        let dataset_shape = vec![4u64, 4u64];
2718
2719        copy_chunk_to_flat(
2720            &chunk_data,
2721            &mut flat,
2722            &chunk_offsets,
2723            &chunk_shape,
2724            &dataset_shape,
2725            1,
2726        );
2727
2728        assert_eq!(flat, vec![0, 0, 0, 0, 0, 1, 2, 3, 0, 4, 5, 6, 0, 0, 0, 0,]);
2729    }
2730
2731    #[test]
2732    fn test_copy_unit_stride_chunk_overlap_2d_partial() {
2733        let chunk_data: Vec<u8> = (1..=16).collect();
2734        let mut result = vec![0u8; 6];
2735        let chunk_offsets = vec![0u64, 0u64];
2736        let chunk_shape = vec![4u64, 4u64];
2737        let dataset_shape = vec![4u64, 4u64];
2738        let resolved = ResolvedSelection {
2739            dims: vec![
2740                ResolvedSelectionDim {
2741                    start: 1,
2742                    end: 3,
2743                    step: 1,
2744                    count: 2,
2745                },
2746                ResolvedSelectionDim {
2747                    start: 1,
2748                    end: 4,
2749                    step: 1,
2750                    count: 3,
2751                },
2752            ],
2753            result_shape: vec![2, 3],
2754            result_elements: 6,
2755        };
2756        let chunk_strides = vec![4usize, 1usize];
2757        let result_strides = vec![3usize, 1usize];
2758
2759        copy_unit_stride_chunk_overlap(
2760            &chunk_data,
2761            &mut result,
2762            UnitStrideCopyLayout {
2763                chunk_offsets: &chunk_offsets,
2764                chunk_shape: &chunk_shape,
2765                dataset_shape: &dataset_shape,
2766                resolved: &resolved,
2767                chunk_strides: &chunk_strides,
2768                result_strides: &result_strides,
2769                elem_size: 1,
2770            },
2771        )
2772        .unwrap();
2773
2774        assert_eq!(result, vec![6, 7, 8, 10, 11, 12]);
2775    }
2776}