Skip to main content

rust_hdf5/
dataset.rs

1//! Dataset creation and I/O.
2//!
3//! Datasets are created via the fluent [`DatasetBuilder`] API obtained from
4//! [`H5File::new_dataset`](crate::file::H5File::new_dataset). Once created,
5//! the [`H5Dataset`] handle can read or write raw typed data.
6
7use crate::attribute::AttrBuilder;
8use crate::error::{Hdf5Error, Result};
9use crate::file::{borrow_inner, borrow_inner_mut, clone_inner, H5FileInner, SharedInner};
10use crate::types::H5Type;
11
12// ---------------------------------------------------------------------------
13// DatasetBuilder
14// ---------------------------------------------------------------------------
15
16/// A fluent builder for creating datasets.
17///
18/// Obtained from [`H5File::new_dataset::<T>()`](crate::file::H5File::new_dataset).
19///
20/// ```no_run
21/// # use rust_hdf5::H5File;
22/// let file = H5File::create("builder.h5").unwrap();
23/// let ds = file.new_dataset::<f32>()
24///     .shape(&[10, 20])
25///     .create("temperatures")
26///     .unwrap();
27/// ```
28pub struct DatasetBuilder<T: H5Type> {
29    file_inner: SharedInner,
30    shape: Option<Vec<usize>>,
31    chunk_dims: Option<Vec<usize>>,
32    max_shape: Option<Vec<Option<usize>>>,
33    deflate_level: Option<u32>,
34    shuffle_deflate_level: Option<u32>,
35    custom_pipeline: Option<crate::format::messages::filter::FilterPipeline>,
36    group_path: Option<String>,
37    fill_value: Option<Vec<u8>>,
38    _marker: std::marker::PhantomData<T>,
39}
40
41impl<T: H5Type> DatasetBuilder<T> {
42    pub(crate) fn new(file_inner: SharedInner) -> Self {
43        Self {
44            file_inner,
45            shape: None,
46            chunk_dims: None,
47            max_shape: None,
48            deflate_level: None,
49            shuffle_deflate_level: None,
50            custom_pipeline: None,
51            group_path: None,
52            fill_value: None,
53            _marker: std::marker::PhantomData,
54        }
55    }
56
57    pub(crate) fn new_in_group(file_inner: SharedInner, group_path: String) -> Self {
58        Self {
59            file_inner,
60            shape: None,
61            chunk_dims: None,
62            max_shape: None,
63            deflate_level: None,
64            shuffle_deflate_level: None,
65            custom_pipeline: None,
66            group_path: Some(group_path),
67            fill_value: None,
68            _marker: std::marker::PhantomData,
69        }
70    }
71
72    /// Set the dataset dimensions.
73    ///
74    /// This is required before calling [`create`](Self::create).
75    /// Use an empty slice `&[]` for a scalar (0-dimensional) dataset.
76    #[must_use]
77    pub fn shape<S: AsRef<[usize]>>(mut self, dims: S) -> Self {
78        self.shape = Some(dims.as_ref().to_vec());
79        self
80    }
81
82    /// Create a scalar (0-dimensional) dataset holding a single value.
83    #[must_use]
84    pub fn scalar(mut self) -> Self {
85        self.shape = Some(vec![]);
86        self
87    }
88
89    /// Set chunk dimensions for chunked storage.
90    ///
91    /// When set, the dataset uses chunked storage with the extensible array
92    /// index. You should also call [`max_shape`](Self::max_shape) or
93    /// [`resizable`](Self::resizable) to allow extending.
94    #[must_use]
95    pub fn chunk(mut self, chunk_dims: &[usize]) -> Self {
96        self.chunk_dims = Some(chunk_dims.to_vec());
97        self
98    }
99
100    /// Make all dimensions unlimited (resizable).
101    ///
102    /// This sets max_dims to u64::MAX for all dimensions.
103    #[must_use]
104    pub fn resizable(mut self) -> Self {
105        self.max_shape = Some(vec![None; self.shape.as_ref().map_or(0, |s| s.len())]);
106        self
107    }
108
109    /// Set maximum dimensions. `None` means unlimited for that dimension.
110    #[must_use]
111    pub fn max_shape(mut self, max: &[Option<usize>]) -> Self {
112        self.max_shape = Some(max.to_vec());
113        self
114    }
115
116    /// Enable deflate (gzip) compression with the given level (0-9).
117    ///
118    /// Requires chunked storage (call `.chunk()` before `.create()`).
119    /// Level 0 = no compression, 9 = maximum compression. Default is 6.
120    #[must_use]
121    pub fn deflate(mut self, level: u32) -> Self {
122        self.deflate_level = Some(level);
123        self
124    }
125
126    /// Enable shuffle + deflate compression.
127    ///
128    /// Shuffle reorders bytes by position within elements before compression,
129    /// which typically improves compression ratios for numeric data.
130    /// Requires chunked storage.
131    #[must_use]
132    pub fn shuffle_deflate(mut self, level: u32) -> Self {
133        self.shuffle_deflate_level = Some(level);
134        self
135    }
136
137    /// Enable Zstandard compression with the given level (1-22, default 3).
138    ///
139    /// Requires chunked storage (call `.chunk()` before `.create()`).
140    #[must_use]
141    pub fn zstd(mut self, level: u32) -> Self {
142        self.custom_pipeline = Some(crate::format::messages::filter::FilterPipeline::zstd(level));
143        self
144    }
145
146    /// Set a custom filter pipeline for compression.
147    ///
148    /// This takes precedence over [`deflate`](Self::deflate) and
149    /// [`shuffle_deflate`](Self::shuffle_deflate). Requires chunked storage.
150    #[must_use]
151    pub fn filter_pipeline(
152        mut self,
153        pipeline: crate::format::messages::filter::FilterPipeline,
154    ) -> Self {
155        self.custom_pipeline = Some(pipeline);
156        self
157    }
158
159    /// Set a user-defined fill value for unwritten elements.
160    ///
161    /// Without this, datasets use the HDF5 default zero-fill. When set,
162    /// the value is written into the dataset's fill-value message
163    /// (`fill_defined = 2`), so HDF5 readers treat unallocated chunks and
164    /// unwritten regions as this value rather than zero.
165    ///
166    /// ```no_run
167    /// # use rust_hdf5::H5File;
168    /// let file = H5File::create("fv.h5").unwrap();
169    /// let ds = file.new_dataset::<f32>()
170    ///     .shape(&[100])
171    ///     .fill_value(f32::NAN)
172    ///     .create("data")
173    ///     .unwrap();
174    /// ```
175    #[must_use]
176    pub fn fill_value(mut self, value: T) -> Self {
177        let es = T::element_size();
178        // Safety: `T: H5Type` is a `Copy` numeric primitive with a
179        // well-defined byte representation; `element_size()` matches
180        // `size_of::<T>()`. The slice borrows `value` only for this call.
181        let raw = unsafe { std::slice::from_raw_parts(&value as *const T as *const u8, es) };
182        self.fill_value = Some(raw.to_vec());
183        self
184    }
185
186    /// Finalize and create the dataset with the given `name`.
187    ///
188    /// The name is the link name within the root group (e.g. `"data"` or
189    /// `"group1/data"` once nested groups are supported).
190    pub fn create(self, name: &str) -> Result<H5Dataset> {
191        let shape = self.shape.ok_or_else(|| {
192            Hdf5Error::InvalidState("shape must be set before calling create()".into())
193        })?;
194
195        // Build the full name: if created within a group, prefix with group path
196        let full_name = if let Some(ref gp) = self.group_path {
197            if gp == "/" {
198                name.to_string()
199            } else {
200                let trimmed = gp.trim_start_matches('/');
201                format!("{}/{}", trimmed, name)
202            }
203        } else {
204            name.to_string()
205        };
206        let group_path = self.group_path.clone();
207        let fill_value = self.fill_value.clone();
208
209        let dims_u64: Vec<u64> = shape.iter().map(|&d| d as u64).collect();
210        let datatype = T::hdf5_type();
211        let element_size = T::element_size();
212
213        if let Some(ref chunk_dims) = self.chunk_dims {
214            // Chunked dataset
215            let chunk_u64: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
216            let max_u64: Vec<u64> = if let Some(ref max) = self.max_shape {
217                max.iter()
218                    .map(|m| m.map_or(u64::MAX, |v| v as u64))
219                    .collect()
220            } else {
221                // Default: max = current
222                dims_u64.clone()
223            };
224
225            // libhdf5 selects the chunk index from the dataspace: a v2
226            // B-tree for two or more unlimited dimensions, an extensible
227            // array for exactly one, and a fixed array when there are none.
228            let n_unlimited = max_u64.iter().filter(|&&m| m == u64::MAX).count();
229            let is_btree2 = n_unlimited >= 2;
230            let is_fixed_array = n_unlimited == 0;
231            let wants_filter = self.custom_pipeline.is_some()
232                || self.shuffle_deflate_level.is_some()
233                || self.deflate_level.is_some();
234
235            let index = {
236                let mut inner = borrow_inner_mut(&self.file_inner);
237                match &mut *inner {
238                    H5FileInner::Writer(writer) => {
239                        let idx = if is_btree2 {
240                            if wants_filter {
241                                return Err(Hdf5Error::InvalidState(
242                                    "compression of v2 B-tree (multi-unlimited-dimension) \
243                                     datasets is not yet supported"
244                                        .into(),
245                                ));
246                            }
247                            writer.create_btree_v2_dataset(
248                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
249                            )?
250                        } else if is_fixed_array {
251                            // A chunked dataset with no unlimited dimension
252                            // must use the fixed-array index — libhdf5
253                            // rejects an extensible-array index here. A
254                            // compressed fixed-shape dataset uses a *filtered*
255                            // fixed array (FA client id 1).
256                            if wants_filter {
257                                let pipeline = if let Some(p) = self.custom_pipeline {
258                                    p
259                                } else if let Some(level) = self.shuffle_deflate_level {
260                                    crate::format::messages::filter::FilterPipeline::shuffle_deflate(
261                                        T::element_size() as u32,
262                                        level,
263                                    )
264                                } else {
265                                    // deflate_level (checked by wants_filter).
266                                    crate::format::messages::filter::FilterPipeline::deflate(
267                                        self.deflate_level.unwrap(),
268                                    )
269                                };
270                                writer.create_fixed_array_dataset_with_pipeline(
271                                    &full_name, datatype, &dims_u64, &chunk_u64, pipeline,
272                                )?
273                            } else {
274                                writer.create_fixed_array_dataset(
275                                    &full_name, datatype, &dims_u64, &chunk_u64,
276                                )?
277                            }
278                        } else if let Some(pipeline) = self.custom_pipeline {
279                            writer.create_chunked_dataset_with_pipeline(
280                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
281                            )?
282                        } else if let Some(level) = self.shuffle_deflate_level {
283                            let pipeline =
284                                crate::format::messages::filter::FilterPipeline::shuffle_deflate(
285                                    T::element_size() as u32,
286                                    level,
287                                );
288                            writer.create_chunked_dataset_with_pipeline(
289                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
290                            )?
291                        } else if let Some(level) = self.deflate_level {
292                            writer.create_chunked_dataset_compressed(
293                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, level,
294                            )?
295                        } else {
296                            writer.create_chunked_dataset(
297                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
298                            )?
299                        };
300                        if let Some(ref gp) = group_path {
301                            if gp != "/" {
302                                writer.assign_dataset_to_group(gp, idx)?;
303                            }
304                        }
305                        if let Some(ref fv) = fill_value {
306                            writer.set_dataset_fill_value(idx, fv.clone())?;
307                        }
308                        idx
309                    }
310                    H5FileInner::Reader(_) => {
311                        return Err(Hdf5Error::InvalidState(
312                            "cannot create a dataset in read mode".into(),
313                        ));
314                    }
315                    H5FileInner::Closed => {
316                        return Err(Hdf5Error::InvalidState("file is closed".into()));
317                    }
318                }
319            };
320
321            Ok(H5Dataset {
322                file_inner: clone_inner(&self.file_inner),
323                info: DatasetInfo::Writer {
324                    index,
325                    shape,
326                    element_size,
327                    chunked: true,
328                    btree2: is_btree2,
329                    fixed_array: is_fixed_array,
330                },
331            })
332        } else {
333            // Contiguous dataset (original path)
334            let index = {
335                let mut inner = borrow_inner_mut(&self.file_inner);
336                match &mut *inner {
337                    H5FileInner::Writer(writer) => {
338                        let idx = writer.create_dataset(&full_name, datatype, &dims_u64)?;
339                        if let Some(ref gp) = group_path {
340                            if gp != "/" {
341                                writer.assign_dataset_to_group(gp, idx)?;
342                            }
343                        }
344                        if let Some(ref fv) = fill_value {
345                            writer.set_dataset_fill_value(idx, fv.clone())?;
346                        }
347                        idx
348                    }
349                    H5FileInner::Reader(_) => {
350                        return Err(Hdf5Error::InvalidState(
351                            "cannot create a dataset in read mode".into(),
352                        ));
353                    }
354                    H5FileInner::Closed => {
355                        return Err(Hdf5Error::InvalidState("file is closed".into()));
356                    }
357                }
358            };
359
360            Ok(H5Dataset {
361                file_inner: clone_inner(&self.file_inner),
362                info: DatasetInfo::Writer {
363                    index,
364                    shape,
365                    element_size,
366                    chunked: false,
367                    btree2: false,
368                    fixed_array: false,
369                },
370            })
371        }
372    }
373}
374
375// ---------------------------------------------------------------------------
376// DatasetInfo
377// ---------------------------------------------------------------------------
378
379/// Internal metadata about a dataset handle.
380enum DatasetInfo {
381    /// A dataset created via `new_dataset().create()` in write mode.
382    Writer {
383        /// Index into the writer's dataset list.
384        index: usize,
385        /// Shape (current dimensions).
386        shape: Vec<usize>,
387        /// Size of one element in bytes.
388        element_size: usize,
389        /// Whether this is a chunked dataset.
390        chunked: bool,
391        /// Whether the chunk index is a v2 B-tree (multiple unlimited dims).
392        btree2: bool,
393        /// Whether the chunk index is a Fixed Array (no unlimited dims).
394        fixed_array: bool,
395    },
396    /// A dataset opened by name in read mode.
397    Reader {
398        /// The link name of the dataset.
399        name: String,
400        /// Shape (current dimensions).
401        shape: Vec<usize>,
402        /// Size of one element in bytes.
403        element_size: usize,
404    },
405}
406
407// ---------------------------------------------------------------------------
408// H5Dataset
409// ---------------------------------------------------------------------------
410
411/// A handle to an HDF5 dataset, supporting typed read and write operations.
412///
413/// The dataset holds a shared reference to the file's I/O backend, so it
414/// remains valid even if the originating [`H5File`](crate::file::H5File) is
415/// moved or dropped (they share ownership via `Rc`).
416pub struct H5Dataset {
417    file_inner: SharedInner,
418    info: DatasetInfo,
419}
420
421impl H5Dataset {
422    /// Create a reader-mode dataset handle (called internally by `H5File::dataset`).
423    pub(crate) fn new_reader(
424        file_inner: SharedInner,
425        name: String,
426        shape: Vec<usize>,
427        element_size: usize,
428    ) -> Self {
429        Self {
430            file_inner,
431            info: DatasetInfo::Reader {
432                name,
433                shape,
434                element_size,
435            },
436        }
437    }
438
439    /// Return the dataset dimensions.
440    pub fn shape(&self) -> Vec<usize> {
441        match &self.info {
442            DatasetInfo::Writer { shape, .. } => shape.clone(),
443            DatasetInfo::Reader { shape, .. } => shape.clone(),
444        }
445    }
446
447    /// Return the number of dimensions (rank) of the dataset.
448    pub fn ndims(&self) -> usize {
449        match &self.info {
450            DatasetInfo::Writer { shape, .. } => shape.len(),
451            DatasetInfo::Reader { shape, .. } => shape.len(),
452        }
453    }
454
455    /// Return the total number of elements in the dataset.
456    pub fn total_elements(&self) -> usize {
457        match &self.info {
458            DatasetInfo::Writer { shape, .. } => shape.iter().product(),
459            DatasetInfo::Reader { shape, .. } => shape.iter().product(),
460        }
461    }
462
463    /// Return the size of one element in bytes.
464    pub fn element_size(&self) -> usize {
465        match &self.info {
466            DatasetInfo::Writer { element_size, .. } => *element_size,
467            DatasetInfo::Reader { element_size, .. } => *element_size,
468        }
469    }
470
471    /// Return the chunk dimensions, if this is a chunked dataset.
472    pub fn chunk_dims(&self) -> Option<Vec<usize>> {
473        match &self.info {
474            DatasetInfo::Reader { name, .. } => {
475                let inner = borrow_inner(&self.file_inner);
476                if let H5FileInner::Reader(reader) = &*inner {
477                    if let Some(info) = reader.dataset_info(name) {
478                        use crate::format::messages::data_layout::DataLayoutMessage;
479                        let chunk_dims = match &info.layout {
480                            DataLayoutMessage::ChunkedV4 { chunk_dims, .. }
481                            | DataLayoutMessage::ChunkedV3 { chunk_dims, .. } => Some(chunk_dims),
482                            _ => None,
483                        };
484                        if let Some(chunk_dims) = chunk_dims {
485                            // Strip trailing element-size dimension
486                            return Some(
487                                chunk_dims[..chunk_dims.len() - 1]
488                                    .iter()
489                                    .map(|&d| d as usize)
490                                    .collect(),
491                            );
492                        }
493                    }
494                }
495                None
496            }
497            DatasetInfo::Writer { .. } => None,
498        }
499    }
500
501    /// Return whether this is a chunked dataset.
502    pub fn is_chunked(&self) -> bool {
503        match &self.info {
504            DatasetInfo::Writer { chunked, .. } => *chunked,
505            DatasetInfo::Reader { name, .. } => {
506                let inner = borrow_inner(&self.file_inner);
507                match &*inner {
508                    H5FileInner::Reader(reader) => {
509                        if let Some(info) = reader.dataset_info(name) {
510                            use crate::format::messages::data_layout::DataLayoutMessage;
511                            matches!(
512                                info.layout,
513                                DataLayoutMessage::ChunkedV4 { .. }
514                                    | DataLayoutMessage::ChunkedV3 { .. }
515                            )
516                        } else {
517                            false
518                        }
519                    }
520                    _ => false,
521                }
522            }
523        }
524    }
525
526    /// Return the names of all attributes on this dataset (read mode only).
527    pub fn attr_names(&self) -> Result<Vec<String>> {
528        match &self.info {
529            DatasetInfo::Reader { name, .. } => {
530                let inner = borrow_inner(&self.file_inner);
531                match &*inner {
532                    H5FileInner::Reader(reader) => Ok(reader.dataset_attr_names(name)?),
533                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
534                }
535            }
536            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
537                "attr_names not available in write mode".into(),
538            )),
539        }
540    }
541
542    /// Open an attribute by name (read mode only).
543    pub fn attr(&self, attr_name: &str) -> Result<crate::attribute::H5Attribute> {
544        match &self.info {
545            DatasetInfo::Reader { name, .. } => {
546                let inner = borrow_inner(&self.file_inner);
547                match &*inner {
548                    H5FileInner::Reader(reader) => {
549                        let attr_msg = reader.dataset_attr(name, attr_name)?.clone();
550                        Ok(crate::attribute::H5Attribute::new_reader(
551                            clone_inner(&self.file_inner),
552                            attr_msg,
553                        ))
554                    }
555                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
556                }
557            }
558            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
559                "attr() not available in write mode".into(),
560            )),
561        }
562    }
563
564    /// Start building a new attribute on this dataset.
565    ///
566    /// Returns a fluent builder. Call `.shape(())` for a scalar attribute
567    /// and `.create("name")` to finalize.
568    ///
569    /// # Example
570    ///
571    /// ```no_run
572    /// # use rust_hdf5::H5File;
573    /// # use rust_hdf5::types::VarLenUnicode;
574    /// let file = H5File::create("attr.h5").unwrap();
575    /// let ds = file.new_dataset::<f32>().shape(&[10]).create("data").unwrap();
576    /// let attr = ds.new_attr::<VarLenUnicode>().shape(()).create("units").unwrap();
577    /// attr.write_scalar(&VarLenUnicode("meters".to_string())).unwrap();
578    /// ```
579    pub fn new_attr<T: 'static>(&self) -> AttrBuilder<'_, T> {
580        let ds_index = match &self.info {
581            DatasetInfo::Writer { index, .. } => *index,
582            DatasetInfo::Reader { .. } => {
583                // Reader mode: we'll return a builder that will error on create.
584                // Using usize::MAX as sentinel.
585                usize::MAX
586            }
587        };
588        AttrBuilder::new(&self.file_inner, ds_index)
589    }
590
591    /// Write a typed slice to the dataset (contiguous datasets only).
592    ///
593    /// The slice length must match the total number of elements declared by
594    /// the dataset shape. The data is reinterpreted as raw bytes and written
595    /// to the file.
596    ///
597    /// # Errors
598    ///
599    /// Returns an error if:
600    /// - The file is in read mode.
601    /// - The data length does not match the declared shape.
602    pub fn write_raw<T: H5Type>(&self, data: &[T]) -> Result<()> {
603        match &self.info {
604            DatasetInfo::Writer {
605                index,
606                shape,
607                element_size,
608                chunked,
609                btree2: _,
610                fixed_array: _,
611            } => {
612                if *chunked {
613                    return Err(Hdf5Error::InvalidState(
614                        "use write_chunk for chunked datasets".into(),
615                    ));
616                }
617
618                let total_elements: usize = shape.iter().product();
619                if data.len() != total_elements {
620                    return Err(Hdf5Error::InvalidState(format!(
621                        "data length {} does not match dataset size {}",
622                        data.len(),
623                        total_elements,
624                    )));
625                }
626
627                // Verify element size matches
628                if T::element_size() != *element_size {
629                    return Err(Hdf5Error::TypeMismatch(format!(
630                        "write type has element size {} but dataset expects {}",
631                        T::element_size(),
632                        element_size,
633                    )));
634                }
635
636                // Safety: T: Copy + 'static (numeric primitive) with well-defined
637                // byte representation. The resulting slice borrows `data` and
638                // lives only as long as this block.
639                let byte_len = data.len() * T::element_size();
640                let raw =
641                    unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
642
643                let mut inner = borrow_inner_mut(&self.file_inner);
644                match &mut *inner {
645                    H5FileInner::Writer(writer) => {
646                        writer.write_dataset_raw(*index, raw)?;
647                        Ok(())
648                    }
649                    _ => Err(Hdf5Error::InvalidState(
650                        "file is no longer in write mode".into(),
651                    )),
652                }
653            }
654            DatasetInfo::Reader { .. } => Err(Hdf5Error::InvalidState(
655                "cannot write to a dataset opened in read mode".into(),
656            )),
657        }
658    }
659
660    /// Write a single chunk to a chunked dataset.
661    ///
662    /// `chunk_idx` is the linear chunk index (typically the frame number for
663    /// streaming datasets). `data` is the raw byte data for one chunk.
664    ///
665    /// For datasets with two or more unlimited dimensions (v2 B-tree index),
666    /// use [`write_chunk_at`](Self::write_chunk_at) instead.
667    pub fn write_chunk(&self, chunk_idx: usize, data: &[u8]) -> Result<()> {
668        match &self.info {
669            DatasetInfo::Writer {
670                index,
671                chunked,
672                btree2,
673                fixed_array,
674                ..
675            } => {
676                if !*chunked {
677                    return Err(Hdf5Error::InvalidState(
678                        "write_chunk is only for chunked datasets".into(),
679                    ));
680                }
681                if *btree2 {
682                    return Err(Hdf5Error::InvalidState(
683                        "this dataset uses a v2 B-tree chunk index; use write_chunk_at \
684                         with the chunk's grid coordinates"
685                            .into(),
686                    ));
687                }
688
689                let mut inner = borrow_inner_mut(&self.file_inner);
690                match &mut *inner {
691                    H5FileInner::Writer(writer) => {
692                        if *fixed_array {
693                            // Fixed-array dataset: convert the linear chunk
694                            // index into row-major grid coordinates.
695                            let chunk_dims = writer
696                                .dataset_chunk_dims(*index)
697                                .ok_or_else(|| {
698                                    Hdf5Error::InvalidState("dataset has no chunk info".into())
699                                })?
700                                .to_vec();
701                            let dims = writer.dataset_dims(*index).to_vec();
702                            let mut grid = vec![0u64; dims.len()];
703                            for d in 0..dims.len() {
704                                grid[d] = if chunk_dims[d] > 0 {
705                                    dims[d].div_ceil(chunk_dims[d])
706                                } else {
707                                    1
708                                };
709                            }
710                            // A zero-extent dimension yields a grid of 0
711                            // chunks — there is no chunk to write.
712                            if grid.contains(&0) {
713                                return Err(Hdf5Error::InvalidState(
714                                    "dataset has a zero-extent dimension and no chunks".into(),
715                                ));
716                            }
717                            let mut rem = chunk_idx as u64;
718                            let mut coords = vec![0u64; dims.len()];
719                            for d in (0..dims.len()).rev() {
720                                coords[d] = rem % grid[d];
721                                rem /= grid[d];
722                            }
723                            // A leftover means chunk_idx exceeded the grid.
724                            if rem != 0 {
725                                return Err(Hdf5Error::InvalidState(format!(
726                                    "chunk index {chunk_idx} is out of range for this dataset"
727                                )));
728                            }
729                            writer.write_chunk_fixed_array(*index, &coords, data)?;
730                        } else {
731                            writer.write_chunk(*index, chunk_idx as u64, data)?;
732                        }
733                        Ok(())
734                    }
735                    _ => Err(Hdf5Error::InvalidState(
736                        "file is no longer in write mode".into(),
737                    )),
738                }
739            }
740            DatasetInfo::Reader { .. } => {
741                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
742            }
743        }
744    }
745
746    /// Write a single chunk to a v2-B-tree-indexed dataset, addressed by its
747    /// chunk-grid coordinates (one per dimension).
748    ///
749    /// This is the entry point for datasets with two or more unlimited
750    /// dimensions. The dataset's logical dimensions are extended to cover
751    /// the written chunk. `data` is the raw bytes of one full chunk.
752    ///
753    /// ```no_run
754    /// # use rust_hdf5::H5File;
755    /// let file = H5File::create("bt2.h5").unwrap();
756    /// let ds = file.new_dataset::<i32>()
757    ///     .shape(&[0, 0])
758    ///     .chunk(&[2, 2])
759    ///     .max_shape(&[None, None])
760    ///     .create("grid")
761    ///     .unwrap();
762    /// let chunk = [0i32, 1, 2, 3];
763    /// let bytes: Vec<u8> = chunk.iter().flat_map(|v| v.to_le_bytes()).collect();
764    /// ds.write_chunk_at(&[0, 0], &bytes).unwrap();
765    /// ```
766    pub fn write_chunk_at(&self, chunk_coords: &[usize], data: &[u8]) -> Result<()> {
767        match &self.info {
768            DatasetInfo::Writer {
769                index,
770                chunked,
771                btree2,
772                fixed_array,
773                ..
774            } => {
775                if !*chunked {
776                    return Err(Hdf5Error::InvalidState(
777                        "write_chunk_at is only for chunked datasets".into(),
778                    ));
779                }
780                let coords: Vec<u64> = chunk_coords.iter().map(|&c| c as u64).collect();
781                let btree2 = *btree2;
782                let fixed_array = *fixed_array;
783                let mut inner = borrow_inner_mut(&self.file_inner);
784                let writer = match &mut *inner {
785                    H5FileInner::Writer(w) => w,
786                    _ => {
787                        return Err(Hdf5Error::InvalidState(
788                            "file is no longer in write mode".into(),
789                        ))
790                    }
791                };
792                let chunk_dims = writer
793                    .dataset_chunk_dims(*index)
794                    .ok_or_else(|| Hdf5Error::InvalidState("dataset has no chunk info".into()))?
795                    .to_vec();
796                let dims = writer.dataset_dims(*index).to_vec();
797                if coords.len() != dims.len() {
798                    return Err(Hdf5Error::InvalidState(format!(
799                        "chunk_coords has {} entries but the dataset has {} dimensions",
800                        coords.len(),
801                        dims.len()
802                    )));
803                }
804                if chunk_dims.len() != dims.len() {
805                    return Err(Hdf5Error::InvalidState(format!(
806                        "dataset chunk shape has {} dimensions but the dataspace has {}",
807                        chunk_dims.len(),
808                        dims.len()
809                    )));
810                }
811
812                // Validate coordinates and compute the grown dimensions
813                // up-front, before any chunk is written, so an overflowing
814                // coordinate cannot leave an orphaned chunk in the file.
815                let mut new_dims = dims.clone();
816                for d in 0..dims.len() {
817                    let needed = coords[d]
818                        .checked_add(1)
819                        .and_then(|c| c.checked_mul(chunk_dims[d]))
820                        .ok_or_else(|| {
821                            Hdf5Error::InvalidState(format!(
822                                "chunk coordinate {} in dimension {} is too large",
823                                coords[d], d
824                            ))
825                        })?;
826                    if needed > new_dims[d] {
827                        new_dims[d] = needed;
828                    }
829                }
830
831                if fixed_array {
832                    // Fixed-array (fixed-shape) dataset: no dimension growth.
833                    writer.write_chunk_fixed_array(*index, &coords, data)?;
834                    return Ok(());
835                }
836
837                if btree2 {
838                    writer.write_chunk_btree_v2(*index, &coords, data)?;
839                } else {
840                    // Extensible array: linearize the chunk-grid coordinates
841                    // (row-major) into the array's chunk index.
842                    let mut linear = 0u64;
843                    for d in 0..dims.len() {
844                        let grid = if chunk_dims[d] > 0 {
845                            dims[d].div_ceil(chunk_dims[d])
846                        } else {
847                            1
848                        };
849                        linear = linear
850                            .checked_mul(grid)
851                            .and_then(|l| l.checked_add(coords[d]))
852                            .ok_or_else(|| {
853                                Hdf5Error::InvalidState(
854                                    "chunk coordinates overflow the array index".into(),
855                                )
856                            })?;
857                    }
858                    writer.write_chunk(*index, linear, data)?;
859                }
860
861                if new_dims != dims {
862                    writer.extend_dataset(*index, &new_dims)?;
863                }
864                Ok(())
865            }
866            DatasetInfo::Reader { .. } => {
867                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
868            }
869        }
870    }
871
872    /// Write multiple chunks in a batch, optionally compressing in parallel.
873    ///
874    /// `chunks` is a slice of `(chunk_index, raw_data)` pairs. When a filter
875    /// pipeline is configured and the `parallel` feature is enabled, all
876    /// chunks are compressed concurrently via rayon.
877    pub fn write_chunks_batch(&self, chunks: &[(usize, &[u8])]) -> Result<()> {
878        match &self.info {
879            DatasetInfo::Writer { index, chunked, .. } => {
880                if !*chunked {
881                    return Err(Hdf5Error::InvalidState(
882                        "write_chunks_batch is only for chunked datasets".into(),
883                    ));
884                }
885                let pairs: Vec<(u64, &[u8])> = chunks
886                    .iter()
887                    .map(|(idx, data)| (*idx as u64, *data))
888                    .collect();
889                let mut inner = borrow_inner_mut(&self.file_inner);
890                match &mut *inner {
891                    H5FileInner::Writer(writer) => {
892                        writer.write_chunks_batch(*index, &pairs)?;
893                        Ok(())
894                    }
895                    _ => Err(Hdf5Error::InvalidState(
896                        "file is no longer in write mode".into(),
897                    )),
898                }
899            }
900            DatasetInfo::Reader { .. } => {
901                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
902            }
903        }
904    }
905
906    /// Append data along the first dimension of a chunked dataset.
907    ///
908    /// `data` must contain a whole number of "frames" — slices along
909    /// dimension 0. For example, if the dataset has shape `[N, H, W]`
910    /// and `chunk_dims = [1, H, W]`, then `data.len()` must be a
911    /// multiple of `H * W`.
912    ///
913    /// This method writes the necessary chunks and extends the dataset
914    /// shape automatically.
915    ///
916    /// ```no_run
917    /// # use rust_hdf5::H5File;
918    /// let file = H5File::create("append.h5").unwrap();
919    /// let ds = file.new_dataset::<f64>()
920    ///     .shape(&[0, 3])
921    ///     .chunk(&[1, 3])
922    ///     .max_shape(&[None, Some(3)])
923    ///     .create("data")
924    ///     .unwrap();
925    /// ds.append(&[1.0, 2.0, 3.0]).unwrap();       // shape becomes [1, 3]
926    /// ds.append(&[4.0, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap(); // shape becomes [3, 3]
927    /// ```
928    pub fn append<T: H5Type>(&self, data: &[T]) -> Result<()> {
929        match &self.info {
930            DatasetInfo::Writer {
931                index,
932                element_size,
933                chunked,
934                ..
935            } => {
936                if !*chunked {
937                    return Err(Hdf5Error::InvalidState(
938                        "append is only for chunked datasets".into(),
939                    ));
940                }
941                if T::element_size() != *element_size {
942                    return Err(Hdf5Error::TypeMismatch(format!(
943                        "append type has element size {} but dataset expects {}",
944                        T::element_size(),
945                        element_size,
946                    )));
947                }
948
949                let ds_index = *index;
950                let es = *element_size;
951
952                let mut inner = borrow_inner_mut(&self.file_inner);
953                let writer = match &mut *inner {
954                    H5FileInner::Writer(w) => w,
955                    _ => {
956                        return Err(Hdf5Error::InvalidState(
957                            "file is no longer in write mode".into(),
958                        ))
959                    }
960                };
961
962                let chunk_dims = writer
963                    .dataset_chunk_dims(ds_index)
964                    .ok_or_else(|| Hdf5Error::InvalidState("dataset has no chunk info".into()))?
965                    .to_vec();
966                let dims = writer.dataset_dims(ds_index).to_vec();
967
968                // Frame size = product of dims[1..]
969                let frame_elems: usize = if dims.len() > 1 {
970                    dims[1..].iter().map(|&d| d as usize).product()
971                } else {
972                    1
973                };
974
975                if frame_elems == 0 {
976                    return Err(Hdf5Error::InvalidState(
977                        "cannot append to dataset with zero-size trailing dimensions".into(),
978                    ));
979                }
980
981                if !data.len().is_multiple_of(frame_elems) {
982                    return Err(Hdf5Error::InvalidState(format!(
983                        "data length {} is not a multiple of frame size {}",
984                        data.len(),
985                        frame_elems,
986                    )));
987                }
988
989                let n_new_frames = data.len() / frame_elems;
990                let current_dim0 = dims[0] as usize;
991
992                // Chunk size along first dimension
993                let chunk_dim0 = chunk_dims[0] as usize;
994                let frame_bytes = frame_elems * es;
995
996                let raw = unsafe {
997                    std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * es)
998                };
999
1000                // Merge buffered data with new data
1001                let ds = &mut writer.datasets[ds_index];
1002                let buffered_frames = ds.append_buffered_frames as usize;
1003                let mut combined = std::mem::take(&mut ds.append_buffer);
1004                combined.extend_from_slice(raw);
1005                ds.append_buffered_frames = 0;
1006
1007                let total_frames = buffered_frames + n_new_frames;
1008                let total_bytes = combined.len();
1009
1010                // Base chunk index: account for buffered frames
1011                let base_dim0 = current_dim0 - buffered_frames;
1012                let mut byte_pos = 0usize;
1013                let mut frame_pos = 0usize;
1014
1015                while frame_pos < total_frames {
1016                    let abs_frame = base_dim0 + frame_pos;
1017                    let chunk_idx = abs_frame / chunk_dim0;
1018                    let remaining_frames = total_frames - frame_pos;
1019                    let frames_to_fill = chunk_dim0 - (abs_frame % chunk_dim0);
1020
1021                    if remaining_frames >= frames_to_fill {
1022                        // Full chunk — write
1023                        let end = byte_pos + frames_to_fill * frame_bytes;
1024                        if frames_to_fill == chunk_dim0 {
1025                            writer.write_chunk(
1026                                ds_index,
1027                                chunk_idx as u64,
1028                                &combined[byte_pos..end],
1029                            )?;
1030                        } else {
1031                            // Partial-chunk write: this branch only runs with
1032                            // offset_in_chunk > 0, meaning the chunk already
1033                            // holds earlier frames on disk. Read-modify-write
1034                            // so those frames survive — a fresh fill buffer
1035                            // would erase them.
1036                            let offset_in_chunk = (abs_frame % chunk_dim0) * frame_bytes;
1037                            let mut chunk_buf =
1038                                match writer.read_chunk_if_present(ds_index, chunk_idx as u64)? {
1039                                    Some(existing) => existing,
1040                                    None => {
1041                                        return Err(Hdf5Error::InvalidState(format!(
1042                                            "cannot append into partially-written chunk {}: \
1043                                         its existing content was not found in the chunk \
1044                                         index (the file may be inconsistent)",
1045                                            chunk_idx
1046                                        )));
1047                                    }
1048                                };
1049                            chunk_buf
1050                                [offset_in_chunk..offset_in_chunk + frames_to_fill * frame_bytes]
1051                                .copy_from_slice(&combined[byte_pos..end]);
1052                            writer.write_chunk(ds_index, chunk_idx as u64, &chunk_buf)?;
1053                        }
1054                        byte_pos = end;
1055                        frame_pos += frames_to_fill;
1056                    } else {
1057                        // Partial chunk — buffer for next append
1058                        let ds = &mut writer.datasets[ds_index];
1059                        ds.append_buffer = combined[byte_pos..total_bytes].to_vec();
1060                        ds.append_buffered_frames = remaining_frames as u64;
1061                        frame_pos = total_frames;
1062                    }
1063                }
1064
1065                // Extend dims to include all frames (buffered + new)
1066                let logical_dim0 = base_dim0 + total_frames;
1067                let mut new_dims: Vec<u64> = dims;
1068                new_dims[0] = logical_dim0 as u64;
1069                writer.extend_dataset(ds_index, &new_dims)?;
1070
1071                Ok(())
1072            }
1073            DatasetInfo::Reader { .. } => {
1074                Err(Hdf5Error::InvalidState("cannot append in read mode".into()))
1075            }
1076        }
1077    }
1078
1079    /// Extend the dimensions of a chunked dataset.
1080    pub fn extend(&self, new_dims: &[usize]) -> Result<()> {
1081        match &self.info {
1082            DatasetInfo::Writer { index, chunked, .. } => {
1083                if !*chunked {
1084                    return Err(Hdf5Error::InvalidState(
1085                        "extend is only for chunked datasets".into(),
1086                    ));
1087                }
1088
1089                let dims_u64: Vec<u64> = new_dims.iter().map(|&d| d as u64).collect();
1090                let mut inner = borrow_inner_mut(&self.file_inner);
1091                match &mut *inner {
1092                    H5FileInner::Writer(writer) => {
1093                        writer.extend_dataset(*index, &dims_u64)?;
1094                        Ok(())
1095                    }
1096                    _ => Err(Hdf5Error::InvalidState(
1097                        "file is no longer in write mode".into(),
1098                    )),
1099                }
1100            }
1101            DatasetInfo::Reader { .. } => {
1102                Err(Hdf5Error::InvalidState("cannot extend in read mode".into()))
1103            }
1104        }
1105    }
1106
1107    /// Set the logical extent of a chunked dataset, growing **or
1108    /// shrinking** any dimension.
1109    ///
1110    /// Unlike [`extend`](Self::extend), which only grows, this can reduce a
1111    /// dimension — for example to correct an over-extended frame count
1112    /// after writing a partial multi-frame chunk. Shrinking changes the
1113    /// logical dataspace only: data in chunks beyond the new extent stays
1114    /// in the file but is no longer visible on read, exactly as libhdf5's
1115    /// `H5Dset_extent` behaves. The new extent must not exceed the
1116    /// dataset's maximum dimensions.
1117    pub fn set_extent(&self, new_dims: &[usize]) -> Result<()> {
1118        match &self.info {
1119            DatasetInfo::Writer { index, .. } => {
1120                let dims_u64: Vec<u64> = new_dims.iter().map(|&d| d as u64).collect();
1121                let mut inner = borrow_inner_mut(&self.file_inner);
1122                match &mut *inner {
1123                    H5FileInner::Writer(writer) => {
1124                        writer.set_dataset_extent(*index, &dims_u64)?;
1125                        Ok(())
1126                    }
1127                    _ => Err(Hdf5Error::InvalidState(
1128                        "file is no longer in write mode".into(),
1129                    )),
1130                }
1131            }
1132            DatasetInfo::Reader { .. } => Err(Hdf5Error::InvalidState(
1133                "cannot set extent in read mode".into(),
1134            )),
1135        }
1136    }
1137
1138    /// Flush a chunked dataset's index structures to disk.
1139    pub fn flush(&self) -> Result<()> {
1140        match &self.info {
1141            DatasetInfo::Writer { index, .. } => {
1142                let mut inner = borrow_inner_mut(&self.file_inner);
1143                match &mut *inner {
1144                    H5FileInner::Writer(writer) => {
1145                        writer.flush_dataset(*index)?;
1146                        Ok(())
1147                    }
1148                    _ => Ok(()),
1149                }
1150            }
1151            DatasetInfo::Reader { .. } => Ok(()),
1152        }
1153    }
1154
1155    /// Read a slice (hyperslab) of the dataset as a typed vector.
1156    ///
1157    /// `starts` and `counts` define the N-dimensional selection:
1158    /// `starts[d]` = first index along dim d, `counts[d]` = how many elements.
1159    pub fn read_slice<T: H5Type>(&self, starts: &[usize], counts: &[usize]) -> Result<Vec<T>> {
1160        match &self.info {
1161            DatasetInfo::Reader {
1162                name, element_size, ..
1163            } => {
1164                if T::element_size() != *element_size {
1165                    return Err(Hdf5Error::TypeMismatch(format!(
1166                        "read type has element size {} but dataset has element size {}",
1167                        T::element_size(),
1168                        element_size,
1169                    )));
1170                }
1171                let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
1172                let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
1173
1174                let raw = {
1175                    let mut inner = borrow_inner_mut(&self.file_inner);
1176                    match &mut *inner {
1177                        H5FileInner::Reader(reader) => {
1178                            reader.read_slice(name, &starts_u64, &counts_u64)?
1179                        }
1180                        _ => {
1181                            return Err(Hdf5Error::InvalidState("file is not in read mode".into()))
1182                        }
1183                    }
1184                };
1185
1186                if raw.len() % T::element_size() != 0 {
1187                    return Err(Hdf5Error::TypeMismatch(format!(
1188                        "raw data size {} is not a multiple of element size {}",
1189                        raw.len(),
1190                        T::element_size(),
1191                    )));
1192                }
1193
1194                let count = raw.len() / T::element_size();
1195                let mut result = Vec::<T>::with_capacity(count);
1196                unsafe {
1197                    std::ptr::copy_nonoverlapping(
1198                        raw.as_ptr(),
1199                        result.as_mut_ptr() as *mut u8,
1200                        raw.len(),
1201                    );
1202                    result.set_len(count);
1203                }
1204                Ok(result)
1205            }
1206            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1207                "cannot read_slice from a dataset in write mode".into(),
1208            )),
1209        }
1210    }
1211
1212    /// Write a typed slice to a sub-region of a contiguous dataset.
1213    ///
1214    /// `starts` and `counts` define the N-dimensional selection.
1215    pub fn write_slice<T: H5Type>(
1216        &self,
1217        starts: &[usize],
1218        counts: &[usize],
1219        data: &[T],
1220    ) -> Result<()> {
1221        match &self.info {
1222            DatasetInfo::Writer {
1223                index,
1224                element_size,
1225                chunked,
1226                ..
1227            } => {
1228                if *chunked {
1229                    return Err(Hdf5Error::InvalidState(
1230                        "write_slice is only for contiguous datasets".into(),
1231                    ));
1232                }
1233                if T::element_size() != *element_size {
1234                    return Err(Hdf5Error::TypeMismatch(format!(
1235                        "write type has element size {} but dataset expects {}",
1236                        T::element_size(),
1237                        element_size,
1238                    )));
1239                }
1240
1241                let expected: usize = counts.iter().product();
1242                if data.len() != expected {
1243                    return Err(Hdf5Error::InvalidState(format!(
1244                        "data length {} does not match slice size {}",
1245                        data.len(),
1246                        expected,
1247                    )));
1248                }
1249
1250                let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
1251                let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
1252
1253                let byte_len = data.len() * T::element_size();
1254                let raw =
1255                    unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
1256
1257                let mut inner = borrow_inner_mut(&self.file_inner);
1258                match &mut *inner {
1259                    H5FileInner::Writer(writer) => {
1260                        writer.write_slice(*index, &starts_u64, &counts_u64, raw)?;
1261                        Ok(())
1262                    }
1263                    _ => Err(Hdf5Error::InvalidState(
1264                        "file is no longer in write mode".into(),
1265                    )),
1266                }
1267            }
1268            DatasetInfo::Reader { .. } => {
1269                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
1270            }
1271        }
1272    }
1273
1274    /// Read variable-length strings from a dataset.
1275    ///
1276    /// This handles h5py-style vlen string datasets that store strings
1277    /// as global heap references. Returns one String per element.
1278    pub fn read_vlen_strings(&self) -> Result<Vec<String>> {
1279        match &self.info {
1280            DatasetInfo::Reader { name, .. } => {
1281                let mut inner = borrow_inner_mut(&self.file_inner);
1282                match &mut *inner {
1283                    H5FileInner::Reader(reader) => Ok(reader.read_vlen_strings(name)?),
1284                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
1285                }
1286            }
1287            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1288                "cannot read vlen strings from a dataset in write mode".into(),
1289            )),
1290        }
1291    }
1292
1293    /// Read the entire dataset as a typed vector.
1294    ///
1295    /// The raw bytes are read from the file and reinterpreted as `T`. The
1296    /// caller must ensure that `T` matches the datatype used when the dataset
1297    /// was written.
1298    ///
1299    /// # Errors
1300    ///
1301    /// Returns an error if:
1302    /// - The file is in write mode.
1303    /// - The raw data size is not a multiple of `T::element_size()`.
1304    pub fn read_raw<T: H5Type>(&self) -> Result<Vec<T>> {
1305        match &self.info {
1306            DatasetInfo::Reader {
1307                name, element_size, ..
1308            } => {
1309                if T::element_size() != *element_size {
1310                    return Err(Hdf5Error::TypeMismatch(format!(
1311                        "read type has element size {} but dataset has element size {}",
1312                        T::element_size(),
1313                        element_size,
1314                    )));
1315                }
1316
1317                let raw = {
1318                    let mut inner = borrow_inner_mut(&self.file_inner);
1319                    match &mut *inner {
1320                        H5FileInner::Reader(reader) => reader.read_dataset_raw(name)?,
1321                        _ => {
1322                            return Err(Hdf5Error::InvalidState("file is not in read mode".into()));
1323                        }
1324                    }
1325                };
1326
1327                if raw.len() % T::element_size() != 0 {
1328                    return Err(Hdf5Error::TypeMismatch(format!(
1329                        "raw data size {} is not a multiple of element size {}",
1330                        raw.len(),
1331                        T::element_size(),
1332                    )));
1333                }
1334
1335                let count = raw.len() / T::element_size();
1336                let mut result = Vec::<T>::with_capacity(count);
1337
1338                // Safety: T is Copy + 'static (required by H5Type). We verified
1339                // the byte count matches count * size_of::<T>() above.
1340                // copy_nonoverlapping fills the memory with valid bit patterns
1341                // for all H5Type implementors (numeric primitives).
1342                // We call set_len AFTER the copy so that if an unexpected panic
1343                // occurs, uninitialized memory is never exposed.
1344                unsafe {
1345                    std::ptr::copy_nonoverlapping(
1346                        raw.as_ptr(),
1347                        result.as_mut_ptr() as *mut u8,
1348                        raw.len(),
1349                    );
1350                    result.set_len(count);
1351                }
1352
1353                Ok(result)
1354            }
1355            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1356                "cannot read from a dataset in write mode".into(),
1357            )),
1358        }
1359    }
1360}
1361
1362#[cfg(test)]
1363mod tests {
1364    use crate::H5File;
1365    use std::path::PathBuf;
1366
1367    fn temp_path(name: &str) -> PathBuf {
1368        // Include PID + a per-call atomic counter so that concurrent
1369        // cargo invocations and any kernel-level "lock not yet
1370        // released" races between sequential opens cannot collide.
1371        use std::sync::atomic::{AtomicU64, Ordering};
1372        static COUNTER: AtomicU64 = AtomicU64::new(0);
1373        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
1374        std::env::temp_dir().join(format!(
1375            "hdf5_dataset_test_{}_{}_{}.h5",
1376            name,
1377            std::process::id(),
1378            n
1379        ))
1380    }
1381
1382    #[test]
1383    fn builder_requires_shape() {
1384        let path = temp_path("no_shape");
1385        let file = H5File::create(&path).unwrap();
1386        let result = file.new_dataset::<u8>().create("data");
1387        assert!(result.is_err());
1388        std::fs::remove_file(&path).ok();
1389    }
1390
1391    #[test]
1392    fn write_raw_size_mismatch() {
1393        let path = temp_path("size_mismatch");
1394        let file = H5File::create(&path).unwrap();
1395        let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1396        // Provide 3 elements instead of 4
1397        let result = ds.write_raw(&[1u8, 2, 3]);
1398        assert!(result.is_err());
1399        std::fs::remove_file(&path).ok();
1400    }
1401
1402    #[test]
1403    fn roundtrip_u8_1d() {
1404        let path = temp_path("rt_u8_1d");
1405        let data: Vec<u8> = (0..10).collect();
1406
1407        {
1408            let file = H5File::create(&path).unwrap();
1409            let ds = file.new_dataset::<u8>().shape([10]).create("seq").unwrap();
1410            ds.write_raw(&data).unwrap();
1411            file.close().unwrap();
1412        }
1413
1414        {
1415            let file = H5File::open(&path).unwrap();
1416            let ds = file.dataset("seq").unwrap();
1417            assert_eq!(ds.shape(), vec![10]);
1418            let readback = ds.read_raw::<u8>().unwrap();
1419            assert_eq!(readback, data);
1420        }
1421
1422        std::fs::remove_file(&path).ok();
1423    }
1424
1425    #[test]
1426    fn roundtrip_i32_2d() {
1427        let path = temp_path("rt_i32_2d");
1428        let data: Vec<i32> = vec![-1, 0, 1, 2, 3, 4];
1429
1430        {
1431            let file = H5File::create(&path).unwrap();
1432            let ds = file
1433                .new_dataset::<i32>()
1434                .shape([2, 3])
1435                .create("matrix")
1436                .unwrap();
1437            ds.write_raw(&data).unwrap();
1438            file.close().unwrap();
1439        }
1440
1441        {
1442            let file = H5File::open(&path).unwrap();
1443            let ds = file.dataset("matrix").unwrap();
1444            assert_eq!(ds.shape(), vec![2, 3]);
1445            let readback = ds.read_raw::<i32>().unwrap();
1446            assert_eq!(readback, data);
1447        }
1448
1449        std::fs::remove_file(&path).ok();
1450    }
1451
1452    #[test]
1453    fn roundtrip_f64_3d() {
1454        let path = temp_path("rt_f64_3d");
1455        let data: Vec<f64> = (0..24).map(|i| i as f64 * 0.5).collect();
1456
1457        {
1458            let file = H5File::create(&path).unwrap();
1459            let ds = file
1460                .new_dataset::<f64>()
1461                .shape([2, 3, 4])
1462                .create("cube")
1463                .unwrap();
1464            ds.write_raw(&data).unwrap();
1465            file.close().unwrap();
1466        }
1467
1468        {
1469            let file = H5File::open(&path).unwrap();
1470            let ds = file.dataset("cube").unwrap();
1471            assert_eq!(ds.shape(), vec![2, 3, 4]);
1472            let readback = ds.read_raw::<f64>().unwrap();
1473            assert_eq!(readback, data);
1474        }
1475
1476        std::fs::remove_file(&path).ok();
1477    }
1478
1479    #[test]
1480    fn cannot_read_in_write_mode() {
1481        let path = temp_path("no_read_write");
1482        let file = H5File::create(&path).unwrap();
1483        let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1484        ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1485        let result = ds.read_raw::<u8>();
1486        assert!(result.is_err());
1487        std::fs::remove_file(&path).ok();
1488    }
1489
1490    #[test]
1491    fn cannot_write_in_read_mode() {
1492        let path = temp_path("no_write_read");
1493
1494        {
1495            let file = H5File::create(&path).unwrap();
1496            let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1497            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1498            file.close().unwrap();
1499        }
1500
1501        {
1502            let file = H5File::open(&path).unwrap();
1503            let ds = file.dataset("x").unwrap();
1504            let result = ds.write_raw(&[5u8, 6, 7, 8]);
1505            assert!(result.is_err());
1506        }
1507
1508        std::fs::remove_file(&path).ok();
1509    }
1510
1511    #[test]
1512    fn numeric_attr_roundtrip() {
1513        let path = temp_path("num_attr");
1514        {
1515            let file = H5File::create(&path).unwrap();
1516            let ds = file.new_dataset::<f32>().shape([4]).create("data").unwrap();
1517            ds.write_raw(&[1.0f32; 4]).unwrap();
1518
1519            let a1 = ds.new_attr::<f64>().shape(()).create("scale").unwrap();
1520            a1.write_numeric(&1.2345f64).unwrap();
1521
1522            let a2 = ds.new_attr::<i32>().shape(()).create("count").unwrap();
1523            a2.write_numeric(&42i32).unwrap();
1524
1525            file.close().unwrap();
1526        }
1527        {
1528            let file = H5File::open(&path).unwrap();
1529            let ds = file.dataset("data").unwrap();
1530
1531            let scale = ds.attr("scale").unwrap();
1532            let val: f64 = scale.read_numeric().unwrap();
1533            assert!((val - 1.2345).abs() < 1e-10);
1534
1535            let count = ds.attr("count").unwrap();
1536            let val: i32 = count.read_numeric().unwrap();
1537            assert_eq!(val, 42);
1538        }
1539        std::fs::remove_file(&path).ok();
1540    }
1541
1542    #[test]
1543    fn cannot_create_dataset_in_read_mode() {
1544        let path = temp_path("no_create_read");
1545
1546        {
1547            let _file = H5File::create(&path).unwrap();
1548        }
1549
1550        {
1551            let file = H5File::open(&path).unwrap();
1552            let result = file.new_dataset::<u8>().shape([4]).create("x");
1553            assert!(result.is_err());
1554        }
1555
1556        std::fs::remove_file(&path).ok();
1557    }
1558
1559    #[test]
1560    fn shape_accessor() {
1561        let path = temp_path("shape_acc");
1562
1563        let file = H5File::create(&path).unwrap();
1564        let ds = file
1565            .new_dataset::<f32>()
1566            .shape([5, 10, 3])
1567            .create("tensor")
1568            .unwrap();
1569        assert_eq!(ds.shape(), vec![5, 10, 3]);
1570
1571        std::fs::remove_file(&path).ok();
1572    }
1573
1574    #[test]
1575    fn slice_roundtrip_2d() {
1576        let path = temp_path("slice_2d");
1577
1578        // Create a 4x5 dataset, write full, then read a slice
1579        let data: Vec<i32> = (0..20).collect();
1580        {
1581            let file = H5File::create(&path).unwrap();
1582            let ds = file
1583                .new_dataset::<i32>()
1584                .shape([4, 5])
1585                .create("mat")
1586                .unwrap();
1587            ds.write_raw(&data).unwrap();
1588            file.close().unwrap();
1589        }
1590        {
1591            let file = H5File::open(&path).unwrap();
1592            let ds = file.dataset("mat").unwrap();
1593            // Read rows 1..3, cols 2..4 (2x2 slice)
1594            let slice = ds.read_slice::<i32>(&[1, 2], &[2, 2]).unwrap();
1595            // Row 1: [5,6,7,8,9] -> cols 2..4 = [7,8]
1596            // Row 2: [10,11,12,13,14] -> cols 2..4 = [12,13]
1597            assert_eq!(slice, vec![7, 8, 12, 13]);
1598        }
1599
1600        std::fs::remove_file(&path).ok();
1601    }
1602
1603    #[test]
1604    fn write_slice_2d() {
1605        let path = temp_path("write_slice_2d");
1606
1607        {
1608            let file = H5File::create(&path).unwrap();
1609            let ds = file
1610                .new_dataset::<f32>()
1611                .shape([3, 4])
1612                .create("data")
1613                .unwrap();
1614            ds.write_raw(&[0.0f32; 12]).unwrap();
1615            // Overwrite a 2x2 sub-region
1616            ds.write_slice(&[1, 1], &[2, 2], &[10.0f32, 20.0, 30.0, 40.0])
1617                .unwrap();
1618            file.close().unwrap();
1619        }
1620        {
1621            let file = H5File::open(&path).unwrap();
1622            let ds = file.dataset("data").unwrap();
1623            let full = ds.read_raw::<f32>().unwrap();
1624            // Row 0: [0,0,0,0]
1625            // Row 1: [0,10,20,0]
1626            // Row 2: [0,30,40,0]
1627            assert_eq!(
1628                full,
1629                vec![0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 20.0, 0.0, 0.0, 30.0, 40.0, 0.0,]
1630            );
1631        }
1632
1633        std::fs::remove_file(&path).ok();
1634    }
1635
1636    #[test]
1637    fn write_slice_out_of_bounds_rejected() {
1638        let path = temp_path("write_slice_oob");
1639        let file = H5File::create(&path).unwrap();
1640        let ds = file.new_dataset::<i32>().shape([4]).create("d").unwrap();
1641        ds.write_raw(&[0i32; 4]).unwrap();
1642        // start 2 + count 6 = 8 > extent 4 -> must error, not corrupt.
1643        assert!(ds.write_slice(&[2], &[6], &[9i32; 6]).is_err());
1644        // An in-bounds slice still works.
1645        assert!(ds.write_slice(&[1], &[2], &[7i32, 8]).is_ok());
1646        std::fs::remove_file(&path).ok();
1647    }
1648
1649    #[test]
1650    fn duplicate_dataset_name_rejected() {
1651        let path = temp_path("dup_name");
1652        let file = H5File::create(&path).unwrap();
1653        let _ = file.new_dataset::<i32>().shape([2]).create("d").unwrap();
1654        assert!(file.new_dataset::<i32>().shape([2]).create("d").is_err());
1655        std::fs::remove_file(&path).ok();
1656    }
1657
1658    #[test]
1659    fn extend_cannot_shrink() {
1660        let path = temp_path("extend_shrink");
1661        let file = H5File::create(&path).unwrap();
1662        let ds = file
1663            .new_dataset::<i32>()
1664            .shape([0])
1665            .chunk(&[2])
1666            .max_shape(&[None])
1667            .create("d")
1668            .unwrap();
1669        ds.append(&[1i32, 2, 3, 4]).unwrap();
1670        // Shrinking below the written extent must be rejected.
1671        assert!(ds.extend(&[2]).is_err());
1672        // Growing is fine.
1673        assert!(ds.extend(&[6]).is_ok());
1674        std::fs::remove_file(&path).ok();
1675    }
1676
1677    #[test]
1678    fn attr_read_roundtrip() {
1679        use crate::types::VarLenUnicode;
1680        let path = temp_path("attr_read");
1681
1682        {
1683            let file = H5File::create(&path).unwrap();
1684            let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1685            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1686            let a1 = ds
1687                .new_attr::<VarLenUnicode>()
1688                .shape(())
1689                .create("units")
1690                .unwrap();
1691            a1.write_string("meters").unwrap();
1692            let a2 = ds
1693                .new_attr::<VarLenUnicode>()
1694                .shape(())
1695                .create("desc")
1696                .unwrap();
1697            a2.write_string("test data").unwrap();
1698            file.close().unwrap();
1699        }
1700        {
1701            let file = H5File::open(&path).unwrap();
1702            let ds = file.dataset("data").unwrap();
1703
1704            let names = ds.attr_names().unwrap();
1705            assert!(names.contains(&"units".to_string()));
1706            assert!(names.contains(&"desc".to_string()));
1707
1708            let units = ds.attr("units").unwrap();
1709            assert_eq!(units.read_string().unwrap(), "meters");
1710
1711            let desc = ds.attr("desc").unwrap();
1712            assert_eq!(desc.read_string().unwrap(), "test data");
1713        }
1714
1715        std::fs::remove_file(&path).ok();
1716    }
1717
1718    #[test]
1719    fn type_mismatch_element_size() {
1720        let path = temp_path("type_mismatch");
1721
1722        {
1723            let file = H5File::create(&path).unwrap();
1724            let ds = file.new_dataset::<f64>().shape([4]).create("data").unwrap();
1725            ds.write_raw(&[1.0f64, 2.0, 3.0, 4.0]).unwrap();
1726            file.close().unwrap();
1727        }
1728
1729        {
1730            let file = H5File::open(&path).unwrap();
1731            let ds = file.dataset("data").unwrap();
1732            // Try to read as u8 (element_size = 1) from a f64 dataset (element_size = 8)
1733            let result = ds.read_raw::<u8>();
1734            assert!(result.is_err());
1735        }
1736
1737        std::fs::remove_file(&path).ok();
1738    }
1739
1740    #[test]
1741    fn dataset_survives_file_move() {
1742        let path = temp_path("ds_survives");
1743
1744        let ds = {
1745            let file = H5File::create(&path).unwrap();
1746            file.new_dataset::<u8>().shape([4]).create("x").unwrap()
1747        };
1748        // file is dropped here, but ds still holds Rc to the inner state
1749        ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1750        // The writer will finalize on drop of the last Rc
1751
1752        std::fs::remove_file(&path).ok();
1753    }
1754
1755    #[test]
1756    fn new_attr_scalar_string() {
1757        use crate::types::VarLenUnicode;
1758
1759        let path = temp_path("attr_scalar_string");
1760        {
1761            let file = H5File::create(&path).unwrap();
1762            let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1763            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1764
1765            let attr = ds
1766                .new_attr::<VarLenUnicode>()
1767                .shape(())
1768                .create("name")
1769                .unwrap();
1770            attr.write_scalar(&VarLenUnicode("test_value".to_string()))
1771                .unwrap();
1772
1773            file.close().unwrap();
1774        }
1775
1776        // Verify the file is still valid and readable
1777        {
1778            let file = H5File::open(&path).unwrap();
1779            let ds = file.dataset("data").unwrap();
1780            assert_eq!(ds.shape(), vec![4]);
1781            let readback = ds.read_raw::<u8>().unwrap();
1782            assert_eq!(readback, vec![1u8, 2, 3, 4]);
1783        }
1784
1785        std::fs::remove_file(&path).ok();
1786    }
1787
1788    #[test]
1789    fn all_numeric_types_roundtrip() {
1790        let path = temp_path("all_types");
1791
1792        {
1793            let file = H5File::create(&path).unwrap();
1794
1795            let ds = file.new_dataset::<u8>().shape([2]).create("u8").unwrap();
1796            ds.write_raw(&[1u8, 2]).unwrap();
1797
1798            let ds = file.new_dataset::<i8>().shape([2]).create("i8").unwrap();
1799            ds.write_raw(&[-1i8, 1]).unwrap();
1800
1801            let ds = file.new_dataset::<u16>().shape([2]).create("u16").unwrap();
1802            ds.write_raw(&[100u16, 200]).unwrap();
1803
1804            let ds = file.new_dataset::<i16>().shape([2]).create("i16").unwrap();
1805            ds.write_raw(&[-100i16, 100]).unwrap();
1806
1807            let ds = file.new_dataset::<u32>().shape([2]).create("u32").unwrap();
1808            ds.write_raw(&[1000u32, 2000]).unwrap();
1809
1810            let ds = file.new_dataset::<i32>().shape([2]).create("i32").unwrap();
1811            ds.write_raw(&[-1000i32, 1000]).unwrap();
1812
1813            let ds = file.new_dataset::<u64>().shape([2]).create("u64").unwrap();
1814            ds.write_raw(&[10000u64, 20000]).unwrap();
1815
1816            let ds = file.new_dataset::<i64>().shape([2]).create("i64").unwrap();
1817            ds.write_raw(&[-10000i64, 10000]).unwrap();
1818
1819            let ds = file.new_dataset::<f32>().shape([2]).create("f32").unwrap();
1820            ds.write_raw(&[1.5f32, 2.5]).unwrap();
1821
1822            let ds = file.new_dataset::<f64>().shape([2]).create("f64").unwrap();
1823            ds.write_raw(&[1.23456f64, 7.89012]).unwrap();
1824
1825            file.close().unwrap();
1826        }
1827
1828        {
1829            let file = H5File::open(&path).unwrap();
1830
1831            assert_eq!(
1832                file.dataset("u8").unwrap().read_raw::<u8>().unwrap(),
1833                vec![1u8, 2]
1834            );
1835            assert_eq!(
1836                file.dataset("i8").unwrap().read_raw::<i8>().unwrap(),
1837                vec![-1i8, 1]
1838            );
1839            assert_eq!(
1840                file.dataset("u16").unwrap().read_raw::<u16>().unwrap(),
1841                vec![100u16, 200]
1842            );
1843            assert_eq!(
1844                file.dataset("i16").unwrap().read_raw::<i16>().unwrap(),
1845                vec![-100i16, 100]
1846            );
1847            assert_eq!(
1848                file.dataset("u32").unwrap().read_raw::<u32>().unwrap(),
1849                vec![1000u32, 2000]
1850            );
1851            assert_eq!(
1852                file.dataset("i32").unwrap().read_raw::<i32>().unwrap(),
1853                vec![-1000i32, 1000]
1854            );
1855            assert_eq!(
1856                file.dataset("u64").unwrap().read_raw::<u64>().unwrap(),
1857                vec![10000u64, 20000]
1858            );
1859            assert_eq!(
1860                file.dataset("i64").unwrap().read_raw::<i64>().unwrap(),
1861                vec![-10000i64, 10000]
1862            );
1863            assert_eq!(
1864                file.dataset("f32").unwrap().read_raw::<f32>().unwrap(),
1865                vec![1.5f32, 2.5]
1866            );
1867            assert_eq!(
1868                file.dataset("f64").unwrap().read_raw::<f64>().unwrap(),
1869                vec![1.23456f64, 7.89012]
1870            );
1871        }
1872
1873        std::fs::remove_file(&path).ok();
1874    }
1875
1876    #[test]
1877    fn append_chunked_roundtrip() {
1878        let path = temp_path("append_chunked");
1879
1880        {
1881            let file = H5File::create(&path).unwrap();
1882            let ds = file
1883                .new_dataset::<f64>()
1884                .shape([0, 3])
1885                .chunk(&[1, 3])
1886                .max_shape(&[None, Some(3)])
1887                .create("data")
1888                .unwrap();
1889
1890            // Append one frame
1891            ds.append(&[1.0f64, 2.0, 3.0]).unwrap();
1892            // Append two frames at once
1893            ds.append(&[4.0f64, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap();
1894
1895            file.close().unwrap();
1896        }
1897
1898        {
1899            let file = H5File::open(&path).unwrap();
1900            let ds = file.dataset("data").unwrap();
1901            assert_eq!(ds.shape(), vec![3, 3]);
1902            let all = ds.read_raw::<f64>().unwrap();
1903            assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
1904        }
1905
1906        std::fs::remove_file(&path).ok();
1907    }
1908
1909    #[test]
1910    fn append_1d_chunked() {
1911        let path = temp_path("append_1d");
1912
1913        {
1914            let file = H5File::create(&path).unwrap();
1915            let ds = file
1916                .new_dataset::<i32>()
1917                .shape([0])
1918                .chunk(&[4])
1919                .max_shape(&[None])
1920                .create("values")
1921                .unwrap();
1922
1923            ds.append(&[10i32, 20, 30]).unwrap(); // partial chunk
1924            ds.append(&[40i32]).unwrap(); // fills chunk boundary
1925            ds.append(&[50i32, 60, 70, 80]).unwrap(); // full chunk
1926
1927            file.close().unwrap();
1928        }
1929
1930        {
1931            let file = H5File::open(&path).unwrap();
1932            let ds = file.dataset("values").unwrap();
1933            assert_eq!(ds.shape(), vec![8]);
1934            let all = ds.read_raw::<i32>().unwrap();
1935            assert_eq!(all, vec![10, 20, 30, 40, 50, 60, 70, 80]);
1936        }
1937
1938        std::fs::remove_file(&path).ok();
1939    }
1940
1941    #[test]
1942    fn append_partial_chunk_flushed_on_close() {
1943        let path = temp_path("append_partial_close");
1944
1945        {
1946            let file = H5File::create(&path).unwrap();
1947            let ds = file
1948                .new_dataset::<f64>()
1949                .shape([0])
1950                .chunk(&[4])
1951                .max_shape(&[None])
1952                .create("vals")
1953                .unwrap();
1954
1955            // Append 5 elements: chunk 0 = full [1,2,3,4], chunk 1 = partial [5,0,0,0]
1956            ds.append(&[1.0f64, 2.0, 3.0, 4.0, 5.0]).unwrap();
1957            file.close().unwrap();
1958        }
1959
1960        {
1961            let file = H5File::open(&path).unwrap();
1962            let ds = file.dataset("vals").unwrap();
1963            assert_eq!(ds.shape(), vec![5]);
1964            let all = ds.read_raw::<f64>().unwrap();
1965            // The full dataset is 2 chunks * 4 = 8 elements; shape says 5
1966            // read_raw reads total shape elements
1967            assert_eq!(all.len(), 5);
1968            assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1969        }
1970
1971        std::fs::remove_file(&path).ok();
1972    }
1973
1974    #[cfg(feature = "deflate")]
1975    #[test]
1976    fn vlen_append_after_reopen_filtered() {
1977        // Reopen + append into a partially-written *compressed* vlen chunk
1978        // (index-block chunk). Exercises filtered-index-block reconstruction
1979        // in open_append plus filtered read-modify-write.
1980        let path = temp_path("vlen_reopen_filtered");
1981        {
1982            let file = H5File::create(&path).unwrap();
1983            file.create_appendable_vlen_dataset(
1984                "strs",
1985                4,
1986                Some(crate::format::messages::filter::FilterPipeline::deflate(6)),
1987            )
1988            .unwrap();
1989            file.append_vlen_strings("strs", &["alpha", "beta", "gamma"])
1990                .unwrap();
1991            file.close().unwrap();
1992        }
1993        {
1994            let file = H5File::open_rw(&path).unwrap();
1995            file.append_vlen_strings("strs", &["delta"]).unwrap();
1996            file.close().unwrap();
1997        }
1998        {
1999            let file = H5File::open(&path).unwrap();
2000            let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
2001            assert_eq!(
2002                got.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
2003                vec!["alpha", "beta", "gamma", "delta"]
2004            );
2005        }
2006        std::fs::remove_file(&path).ok();
2007    }
2008
2009    #[test]
2010    fn vlen_append_after_reopen_data_block() {
2011        // Reopen + append into a partial chunk that lives in an extensible-
2012        // array *data block* (chunk index >= idx_blk_elmts). Exercises
2013        // data-block resolution in read_chunk_if_present and write_chunk.
2014        let path = temp_path("vlen_reopen_datablk");
2015        let labels: Vec<String> = (0..9).map(|i| format!("s{i}")).collect();
2016        {
2017            let file = H5File::create(&path).unwrap();
2018            file.create_appendable_vlen_dataset("strs", 2, None)
2019                .unwrap();
2020            let refs: Vec<&str> = labels.iter().map(|s| s.as_str()).collect();
2021            file.append_vlen_strings("strs", &refs).unwrap();
2022            file.close().unwrap();
2023        }
2024        {
2025            let file = H5File::open_rw(&path).unwrap();
2026            file.append_vlen_strings("strs", &["s9"]).unwrap();
2027            file.close().unwrap();
2028        }
2029        {
2030            let file = H5File::open(&path).unwrap();
2031            let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
2032            let want: Vec<String> = (0..10).map(|i| format!("s{i}")).collect();
2033            assert_eq!(got, want);
2034        }
2035        std::fs::remove_file(&path).ok();
2036    }
2037
2038    #[test]
2039    fn vlen_append_after_reopen_super_block() {
2040        // Reopen + append into a partial chunk whose index falls in an
2041        // extensible-array *super block* (chunk index 244 with the default
2042        // EA geometry: idx_blk_elmts=4, data_blk_min_elmts=16,
2043        // sup_blk_min_data_ptrs=4 -> chunks 0..=243 are reached via the
2044        // index block or its direct data blocks, so chunk 244 is reached
2045        // via a super block read from disk). Exercises the ViaSblk branch
2046        // of read_chunk_if_present.
2047        let path = temp_path("vlen_reopen_super");
2048        // 489 strings, chunk size 2 -> chunk 244 holds one string only
2049        // (partially filled) and is flushed to disk on close.
2050        let labels: Vec<String> = (0..489).map(|i| format!("v{i}")).collect();
2051        {
2052            let file = H5File::create(&path).unwrap();
2053            file.create_appendable_vlen_dataset("strs", 2, None)
2054                .unwrap();
2055            let refs: Vec<&str> = labels.iter().map(|s| s.as_str()).collect();
2056            file.append_vlen_strings("strs", &refs).unwrap();
2057            file.close().unwrap();
2058        }
2059        {
2060            let file = H5File::open_rw(&path).unwrap();
2061            file.append_vlen_strings("strs", &["v489"]).unwrap();
2062            file.close().unwrap();
2063        }
2064        {
2065            let file = H5File::open(&path).unwrap();
2066            let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
2067            let want: Vec<String> = (0..490).map(|i| format!("v{i}")).collect();
2068            assert_eq!(got, want);
2069        }
2070        std::fs::remove_file(&path).ok();
2071    }
2072
2073    #[cfg(feature = "deflate")]
2074    #[test]
2075    fn vlen_append_after_reopen_filtered_data_block() {
2076        // The hardest path: compressed + chunk in a data block + partial
2077        // read-modify-write across a reopen.
2078        let path = temp_path("vlen_reopen_filt_datablk");
2079        let labels: Vec<String> = (0..9).map(|i| format!("item{i:02}")).collect();
2080        {
2081            let file = H5File::create(&path).unwrap();
2082            file.create_appendable_vlen_dataset(
2083                "strs",
2084                2,
2085                Some(crate::format::messages::filter::FilterPipeline::deflate(6)),
2086            )
2087            .unwrap();
2088            let refs: Vec<&str> = labels.iter().map(|s| s.as_str()).collect();
2089            file.append_vlen_strings("strs", &refs).unwrap();
2090            file.close().unwrap();
2091        }
2092        {
2093            let file = H5File::open_rw(&path).unwrap();
2094            file.append_vlen_strings("strs", &["item09"]).unwrap();
2095            file.close().unwrap();
2096        }
2097        {
2098            let file = H5File::open(&path).unwrap();
2099            let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
2100            let want: Vec<String> = (0..10).map(|i| format!("item{i:02}")).collect();
2101            assert_eq!(got, want);
2102        }
2103        std::fs::remove_file(&path).ok();
2104    }
2105
2106    #[test]
2107    fn group_nx_class_attribute_roundtrip() {
2108        // Non-root groups carry attributes (NeXus `NX_class`) in their
2109        // own object header, and the reader reads them back by path.
2110        let path = temp_path("group_nx_class");
2111        {
2112            let file = H5File::create(&path).unwrap();
2113            let entry = file.create_group("entry").unwrap();
2114            entry.set_attr_string("NX_class", "NXentry").unwrap();
2115            let det = entry.create_group("detector").unwrap();
2116            det.set_attr_string("NX_class", "NXdetector").unwrap();
2117            det.set_attr_numeric("frame_count", &7i32).unwrap();
2118            det.new_dataset::<f32>()
2119                .shape([4])
2120                .create("data")
2121                .unwrap()
2122                .write_raw(&[1.0f32; 4])
2123                .unwrap();
2124            file.close().unwrap();
2125        }
2126        {
2127            let file = H5File::open(&path).unwrap();
2128            let entry = file.root_group().group("entry").unwrap();
2129            assert_eq!(entry.attr_string("NX_class").unwrap(), "NXentry");
2130            let det = entry.group("detector").unwrap();
2131            assert_eq!(det.attr_string("NX_class").unwrap(), "NXdetector");
2132            let names = det.attr_names().unwrap();
2133            assert!(names.contains(&"NX_class".to_string()));
2134            assert!(names.contains(&"frame_count".to_string()));
2135        }
2136        std::fs::remove_file(&path).ok();
2137    }
2138
2139    #[test]
2140    fn ea_super_block_roundtrip() {
2141        // 2000 chunks span several extensible-array super blocks. Before
2142        // super-block support the writer errored at chunk index 228.
2143        let path = temp_path("ea_super_rt");
2144        {
2145            let file = H5File::create(&path).unwrap();
2146            let ds = file
2147                .new_dataset::<i32>()
2148                .shape([0])
2149                .chunk(&[1])
2150                .max_shape(&[None])
2151                .create("v")
2152                .unwrap();
2153            ds.append(&(0..2000).collect::<Vec<i32>>()).unwrap();
2154            file.close().unwrap();
2155        }
2156        {
2157            let file = H5File::open(&path).unwrap();
2158            let v = file.dataset("v").unwrap().read_raw::<i32>().unwrap();
2159            assert_eq!(v.len(), 2000);
2160            assert!(v.iter().enumerate().all(|(i, &x)| x == i as i32));
2161        }
2162        std::fs::remove_file(&path).ok();
2163    }
2164
2165    #[cfg(feature = "deflate")]
2166    #[test]
2167    fn ea_filtered_super_block_roundtrip() {
2168        // Compressed chunks across super blocks.
2169        let path = temp_path("ea_filt_super");
2170        {
2171            let file = H5File::create(&path).unwrap();
2172            let ds = file
2173                .new_dataset::<i32>()
2174                .shape([0])
2175                .chunk(&[1])
2176                .max_shape(&[None])
2177                .deflate(4)
2178                .create("v")
2179                .unwrap();
2180            ds.append(&(0..600).collect::<Vec<i32>>()).unwrap();
2181            file.close().unwrap();
2182        }
2183        {
2184            let file = H5File::open(&path).unwrap();
2185            let v = file.dataset("v").unwrap().read_raw::<i32>().unwrap();
2186            assert_eq!(v, (0..600).collect::<Vec<i32>>());
2187        }
2188        std::fs::remove_file(&path).ok();
2189    }
2190
2191    #[test]
2192    fn ea_super_block_open_append() {
2193        // Reopen a dataset and append chunks that fall in super blocks.
2194        let path = temp_path("ea_super_append");
2195        {
2196            let file = H5File::create(&path).unwrap();
2197            let ds = file
2198                .new_dataset::<i32>()
2199                .shape([0])
2200                .chunk(&[1])
2201                .max_shape(&[None])
2202                .create("v")
2203                .unwrap();
2204            ds.append(&(0..300).collect::<Vec<i32>>()).unwrap();
2205            file.close().unwrap();
2206        }
2207        {
2208            let mut w = crate::io::writer::Hdf5Writer::open_append(&path).unwrap();
2209            let idx = w.dataset_index("v").unwrap();
2210            for c in 300..900u64 {
2211                w.write_chunk(idx, c, &(c as i32).to_le_bytes()).unwrap();
2212            }
2213            w.extend_dataset(idx, &[900]).unwrap();
2214            w.close().unwrap();
2215        }
2216        {
2217            let file = H5File::open(&path).unwrap();
2218            let v = file.dataset("v").unwrap().read_raw::<i32>().unwrap();
2219            assert_eq!(v.len(), 900);
2220            assert!(v.iter().enumerate().all(|(i, &x)| x == i as i32));
2221        }
2222        std::fs::remove_file(&path).ok();
2223    }
2224
2225    #[test]
2226    fn btree_v2_multi_unlimited_roundtrip() {
2227        // A dataset with two unlimited dimensions uses the v2 B-tree chunk
2228        // index; chunks are written by grid coordinates with write_chunk_at.
2229        let path = temp_path("bt2_multi");
2230        {
2231            let file = H5File::create(&path).unwrap();
2232            let ds = file
2233                .new_dataset::<i32>()
2234                .shape([0, 0])
2235                .chunk(&[2, 2])
2236                .max_shape(&[None, None])
2237                .create("grid")
2238                .unwrap();
2239            assert!(ds.is_chunked());
2240            // 4x4 logical grid, value[r][c] = r*4 + c, in 2x2 chunks.
2241            for cr in 0..2usize {
2242                for cc in 0..2usize {
2243                    let mut bytes = Vec::new();
2244                    for i in 0..2usize {
2245                        for j in 0..2usize {
2246                            let v = ((cr * 2 + i) * 4 + (cc * 2 + j)) as i32;
2247                            bytes.extend_from_slice(&v.to_le_bytes());
2248                        }
2249                    }
2250                    ds.write_chunk_at(&[cr, cc], &bytes).unwrap();
2251                }
2252            }
2253            file.close().unwrap();
2254        }
2255        {
2256            let file = H5File::open(&path).unwrap();
2257            let ds = file.dataset("grid").unwrap();
2258            assert_eq!(ds.shape(), vec![4, 4]);
2259            assert_eq!(ds.read_raw::<i32>().unwrap(), (0..16).collect::<Vec<i32>>());
2260        }
2261        std::fs::remove_file(&path).ok();
2262    }
2263
2264    #[test]
2265    fn subframe_chunking_roundtrip() {
2266        // A chunk smaller than a frame: shape [N,8,8], chunk [1,4,4], so each
2267        // frame is tiled into a 2x2 grid of 4x4 chunks. write_chunk_at takes
2268        // the chunk-grid coordinates.
2269        let path = temp_path("subframe");
2270        {
2271            let file = H5File::create(&path).unwrap();
2272            let ds = file
2273                .new_dataset::<i32>()
2274                .shape([0, 8, 8])
2275                .chunk(&[1, 4, 4])
2276                .max_shape(&[None, Some(8), Some(8)])
2277                .create("v")
2278                .unwrap();
2279            for f in 0..3usize {
2280                for cr in 0..2usize {
2281                    for cc in 0..2usize {
2282                        let mut bytes = Vec::new();
2283                        for i in 0..4usize {
2284                            for j in 0..4usize {
2285                                let v = (f * 64 + (cr * 4 + i) * 8 + (cc * 4 + j)) as i32;
2286                                bytes.extend_from_slice(&v.to_le_bytes());
2287                            }
2288                        }
2289                        ds.write_chunk_at(&[f, cr, cc], &bytes).unwrap();
2290                    }
2291                }
2292            }
2293            file.close().unwrap();
2294        }
2295        {
2296            let file = H5File::open(&path).unwrap();
2297            let ds = file.dataset("v").unwrap();
2298            assert_eq!(ds.shape(), vec![3, 8, 8]);
2299            assert_eq!(
2300                ds.read_raw::<i32>().unwrap(),
2301                (0..192).collect::<Vec<i32>>()
2302            );
2303        }
2304        std::fs::remove_file(&path).ok();
2305    }
2306
2307    #[test]
2308    fn fill_value_contiguous_roundtrip() {
2309        let path = temp_path("fill_value_contig");
2310        {
2311            let file = H5File::create(&path).unwrap();
2312            let ds = file
2313                .new_dataset::<f32>()
2314                .shape([4])
2315                .fill_value(2.5f32)
2316                .create("data")
2317                .unwrap();
2318            ds.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
2319            file.close().unwrap();
2320        }
2321        // open_append decodes the fill-value message back from the header.
2322        {
2323            let writer = crate::io::writer::Hdf5Writer::open_append(&path).unwrap();
2324            let idx = writer.dataset_index("data").unwrap();
2325            assert_eq!(
2326                writer.datasets[idx].fill_value,
2327                Some(2.5f32.to_le_bytes().to_vec())
2328            );
2329        }
2330        // Data still reads back correctly.
2331        {
2332            let file = H5File::open(&path).unwrap();
2333            let ds = file.dataset("data").unwrap();
2334            assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0, 2.0, 3.0, 4.0]);
2335        }
2336        std::fs::remove_file(&path).ok();
2337    }
2338
2339    #[test]
2340    fn fill_value_chunked_roundtrip() {
2341        let path = temp_path("fill_value_chunked");
2342        {
2343            let file = H5File::create(&path).unwrap();
2344            let ds = file
2345                .new_dataset::<i32>()
2346                .shape([0])
2347                .chunk(&[4])
2348                .max_shape(&[None])
2349                .fill_value(-7i32)
2350                .create("vals")
2351                .unwrap();
2352            ds.append(&[1i32, 2, 3, 4]).unwrap();
2353            file.close().unwrap();
2354        }
2355        {
2356            let writer = crate::io::writer::Hdf5Writer::open_append(&path).unwrap();
2357            let idx = writer.dataset_index("vals").unwrap();
2358            assert_eq!(
2359                writer.datasets[idx].fill_value,
2360                Some((-7i32).to_le_bytes().to_vec())
2361            );
2362        }
2363        std::fs::remove_file(&path).ok();
2364    }
2365
2366    #[test]
2367    fn fill_value_read_missing_chunks() {
2368        // A chunked dataset with chunk 1 left unwritten must read that
2369        // gap back as the user-defined fill value, not zero.
2370        fn i32_bytes(vals: &[i32]) -> Vec<u8> {
2371            vals.iter().flat_map(|v| v.to_le_bytes()).collect()
2372        }
2373        let path = temp_path("fill_value_read_missing");
2374        {
2375            let file = H5File::create(&path).unwrap();
2376            let ds = file
2377                .new_dataset::<i32>()
2378                .shape([0])
2379                .chunk(&[2])
2380                .max_shape(&[None])
2381                .fill_value(-1i32)
2382                .create("vals")
2383                .unwrap();
2384            // chunk 0 = [10,20]; chunk 1 unwritten; chunk 2 = [50,60].
2385            ds.write_chunk(0, &i32_bytes(&[10, 20])).unwrap();
2386            ds.write_chunk(2, &i32_bytes(&[50, 60])).unwrap();
2387            ds.extend(&[6]).unwrap();
2388            file.close().unwrap();
2389        }
2390        {
2391            let file = H5File::open(&path).unwrap();
2392            let ds = file.dataset("vals").unwrap();
2393            let all = ds.read_raw::<i32>().unwrap();
2394            assert_eq!(all, vec![10, 20, -1, -1, 50, 60]);
2395        }
2396        std::fs::remove_file(&path).ok();
2397    }
2398
2399    #[test]
2400    fn fill_value_partial_chunk_padded_with_fill() {
2401        // A partial trailing chunk flushed at close must pad its unwritten
2402        // tail with the fill value. That pad sits beyond the logical shape,
2403        // so it is verified by scanning the on-disk chunk bytes directly.
2404        let path = temp_path("fill_value_partial_pad");
2405        {
2406            let file = H5File::create(&path).unwrap();
2407            let ds = file
2408                .new_dataset::<i32>()
2409                .shape([0])
2410                .chunk(&[4])
2411                .max_shape(&[None])
2412                .fill_value(-9i32)
2413                .create("vals")
2414                .unwrap();
2415            // 3 of 4 frames -> flushed as a partial chunk on close.
2416            ds.append(&[1i32, 2, 3]).unwrap();
2417            file.close().unwrap();
2418        }
2419        let bytes = std::fs::read(&path).unwrap();
2420        // Locate the chunk: i32 LE of [1, 2, 3] written contiguously.
2421        let needle: Vec<u8> = [1i32, 2, 3].iter().flat_map(|v| v.to_le_bytes()).collect();
2422        let pos = bytes
2423            .windows(needle.len())
2424            .position(|w| w == needle)
2425            .expect("chunk data [1,2,3] not found in file");
2426        let pad = &bytes[pos + needle.len()..pos + needle.len() + 4];
2427        assert_eq!(
2428            pad,
2429            &(-9i32).to_le_bytes(),
2430            "partial chunk tail must be padded with fill value -9, got {:?}",
2431            pad
2432        );
2433        std::fs::remove_file(&path).ok();
2434    }
2435
2436    #[test]
2437    fn vlen_append_after_reopen_preserves_existing() {
2438        // Reopening and appending into a partially-written vlen chunk must
2439        // read-modify-write: the strings already on disk must survive.
2440        let path = temp_path("vlen_append_reopen");
2441        {
2442            let file = H5File::create(&path).unwrap();
2443            file.create_appendable_vlen_dataset("strs", 4, None)
2444                .unwrap();
2445            // 3 of 4 frames -> flushed as a partial chunk on close.
2446            file.append_vlen_strings("strs", &["a", "b", "c"]).unwrap();
2447            file.close().unwrap();
2448        }
2449        {
2450            // Append a 4th string -> partial-chunk write into chunk 0.
2451            let file = H5File::open_rw(&path).unwrap();
2452            file.append_vlen_strings("strs", &["d"]).unwrap();
2453            file.close().unwrap();
2454        }
2455        {
2456            let file = H5File::open(&path).unwrap();
2457            let ds = file.dataset("strs").unwrap();
2458            let got = ds.read_vlen_strings().unwrap();
2459            assert_eq!(
2460                got.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
2461                vec!["a", "b", "c", "d"]
2462            );
2463        }
2464        std::fs::remove_file(&path).ok();
2465    }
2466
2467    #[test]
2468    fn fill_value_size_mismatch_errors() {
2469        let path = temp_path("fill_value_mismatch");
2470        let mut writer = crate::io::writer::Hdf5Writer::create(&path).unwrap();
2471        let dt = <f64 as crate::types::H5Type>::hdf5_type();
2472        let idx = writer.create_dataset("d", dt, &[4u64]).unwrap();
2473        // f64 element size is 8; a 4-byte fill value must be rejected.
2474        assert!(writer.set_dataset_fill_value(idx, vec![0u8; 4]).is_err());
2475        // The correct width succeeds.
2476        writer.set_dataset_fill_value(idx, vec![0u8; 8]).unwrap();
2477        writer.close().unwrap();
2478        std::fs::remove_file(&path).ok();
2479    }
2480}