Skip to main content

rust_hdf5/
dataset.rs

1//! Dataset creation and I/O.
2//!
3//! Datasets are created via the fluent [`DatasetBuilder`] API obtained from
4//! [`H5File::new_dataset`](crate::file::H5File::new_dataset). Once created,
5//! the [`H5Dataset`] handle can read or write raw typed data.
6
7use crate::attribute::AttrBuilder;
8use crate::error::{Hdf5Error, Result};
9use crate::file::{borrow_inner, borrow_inner_mut, clone_inner, H5FileInner, SharedInner};
10use crate::types::H5Type;
11
12// ---------------------------------------------------------------------------
13// DatasetBuilder
14// ---------------------------------------------------------------------------
15
16/// A fluent builder for creating datasets.
17///
18/// Obtained from [`H5File::new_dataset::<T>()`](crate::file::H5File::new_dataset).
19///
20/// ```no_run
21/// # use rust_hdf5::H5File;
22/// let file = H5File::create("builder.h5").unwrap();
23/// let ds = file.new_dataset::<f32>()
24///     .shape(&[10, 20])
25///     .create("temperatures")
26///     .unwrap();
27/// ```
28pub struct DatasetBuilder<T: H5Type> {
29    file_inner: SharedInner,
30    shape: Option<Vec<usize>>,
31    chunk_dims: Option<Vec<usize>>,
32    max_shape: Option<Vec<Option<usize>>>,
33    deflate_level: Option<u32>,
34    shuffle_deflate_level: Option<u32>,
35    custom_pipeline: Option<crate::format::messages::filter::FilterPipeline>,
36    group_path: Option<String>,
37    fill_value: Option<Vec<u8>>,
38    _marker: std::marker::PhantomData<T>,
39}
40
41impl<T: H5Type> DatasetBuilder<T> {
42    pub(crate) fn new(file_inner: SharedInner) -> Self {
43        Self {
44            file_inner,
45            shape: None,
46            chunk_dims: None,
47            max_shape: None,
48            deflate_level: None,
49            shuffle_deflate_level: None,
50            custom_pipeline: None,
51            group_path: None,
52            fill_value: None,
53            _marker: std::marker::PhantomData,
54        }
55    }
56
57    pub(crate) fn new_in_group(file_inner: SharedInner, group_path: String) -> Self {
58        Self {
59            file_inner,
60            shape: None,
61            chunk_dims: None,
62            max_shape: None,
63            deflate_level: None,
64            shuffle_deflate_level: None,
65            custom_pipeline: None,
66            group_path: Some(group_path),
67            fill_value: None,
68            _marker: std::marker::PhantomData,
69        }
70    }
71
72    /// Set the dataset dimensions.
73    ///
74    /// This is required before calling [`create`](Self::create).
75    /// Use an empty slice `&[]` for a scalar (0-dimensional) dataset.
76    #[must_use]
77    pub fn shape<S: AsRef<[usize]>>(mut self, dims: S) -> Self {
78        self.shape = Some(dims.as_ref().to_vec());
79        self
80    }
81
82    /// Create a scalar (0-dimensional) dataset holding a single value.
83    #[must_use]
84    pub fn scalar(mut self) -> Self {
85        self.shape = Some(vec![]);
86        self
87    }
88
89    /// Set chunk dimensions for chunked storage.
90    ///
91    /// When set, the dataset uses chunked storage with the extensible array
92    /// index. You should also call [`max_shape`](Self::max_shape) or
93    /// [`resizable`](Self::resizable) to allow extending.
94    #[must_use]
95    pub fn chunk(mut self, chunk_dims: &[usize]) -> Self {
96        self.chunk_dims = Some(chunk_dims.to_vec());
97        self
98    }
99
100    /// Make all dimensions unlimited (resizable).
101    ///
102    /// This sets max_dims to u64::MAX for all dimensions.
103    #[must_use]
104    pub fn resizable(mut self) -> Self {
105        self.max_shape = Some(vec![None; self.shape.as_ref().map_or(0, |s| s.len())]);
106        self
107    }
108
109    /// Set maximum dimensions. `None` means unlimited for that dimension.
110    #[must_use]
111    pub fn max_shape(mut self, max: &[Option<usize>]) -> Self {
112        self.max_shape = Some(max.to_vec());
113        self
114    }
115
116    /// Enable deflate (gzip) compression with the given level (0-9).
117    ///
118    /// Requires chunked storage (call `.chunk()` before `.create()`).
119    /// Level 0 = no compression, 9 = maximum compression. Default is 6.
120    #[must_use]
121    pub fn deflate(mut self, level: u32) -> Self {
122        self.deflate_level = Some(level);
123        self
124    }
125
126    /// Enable shuffle + deflate compression.
127    ///
128    /// Shuffle reorders bytes by position within elements before compression,
129    /// which typically improves compression ratios for numeric data.
130    /// Requires chunked storage.
131    #[must_use]
132    pub fn shuffle_deflate(mut self, level: u32) -> Self {
133        self.shuffle_deflate_level = Some(level);
134        self
135    }
136
137    /// Enable Zstandard compression with the given level (1-22, default 3).
138    ///
139    /// Requires chunked storage (call `.chunk()` before `.create()`).
140    #[must_use]
141    pub fn zstd(mut self, level: u32) -> Self {
142        self.custom_pipeline = Some(crate::format::messages::filter::FilterPipeline::zstd(level));
143        self
144    }
145
146    /// Set a custom filter pipeline for compression.
147    ///
148    /// This takes precedence over [`deflate`](Self::deflate) and
149    /// [`shuffle_deflate`](Self::shuffle_deflate). Requires chunked storage.
150    #[must_use]
151    pub fn filter_pipeline(
152        mut self,
153        pipeline: crate::format::messages::filter::FilterPipeline,
154    ) -> Self {
155        self.custom_pipeline = Some(pipeline);
156        self
157    }
158
159    /// Set a user-defined fill value for unwritten elements.
160    ///
161    /// Without this, datasets use the HDF5 default zero-fill. When set,
162    /// the value is written into the dataset's fill-value message
163    /// (`fill_defined = 2`), so HDF5 readers treat unallocated chunks and
164    /// unwritten regions as this value rather than zero.
165    ///
166    /// ```no_run
167    /// # use rust_hdf5::H5File;
168    /// let file = H5File::create("fv.h5").unwrap();
169    /// let ds = file.new_dataset::<f32>()
170    ///     .shape(&[100])
171    ///     .fill_value(f32::NAN)
172    ///     .create("data")
173    ///     .unwrap();
174    /// ```
175    #[must_use]
176    pub fn fill_value(mut self, value: T) -> Self {
177        let es = T::element_size();
178        // Safety: `T: H5Type` is a `Copy` numeric primitive with a
179        // well-defined byte representation; `element_size()` matches
180        // `size_of::<T>()`. The slice borrows `value` only for this call.
181        let raw = unsafe { std::slice::from_raw_parts(&value as *const T as *const u8, es) };
182        self.fill_value = Some(raw.to_vec());
183        self
184    }
185
186    /// Finalize and create the dataset with the given `name`.
187    ///
188    /// The name is the link name within the root group (e.g. `"data"` or
189    /// `"group1/data"` once nested groups are supported).
190    pub fn create(self, name: &str) -> Result<H5Dataset> {
191        let shape = self.shape.ok_or_else(|| {
192            Hdf5Error::InvalidState("shape must be set before calling create()".into())
193        })?;
194
195        // Build the full name: if created within a group, prefix with group path
196        let full_name = if let Some(ref gp) = self.group_path {
197            if gp == "/" {
198                name.to_string()
199            } else {
200                let trimmed = gp.trim_start_matches('/');
201                format!("{}/{}", trimmed, name)
202            }
203        } else {
204            name.to_string()
205        };
206        let group_path = self.group_path.clone();
207        let fill_value = self.fill_value.clone();
208
209        let dims_u64: Vec<u64> = shape.iter().map(|&d| d as u64).collect();
210        let datatype = T::hdf5_type();
211        let element_size = T::element_size();
212
213        if let Some(ref chunk_dims) = self.chunk_dims {
214            // Chunked dataset
215            let chunk_u64: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
216            let max_u64: Vec<u64> = if let Some(ref max) = self.max_shape {
217                max.iter()
218                    .map(|m| m.map_or(u64::MAX, |v| v as u64))
219                    .collect()
220            } else {
221                // Default: max = current
222                dims_u64.clone()
223            };
224
225            // libhdf5 selects the chunk index from the dataspace: a v2
226            // B-tree for two or more unlimited dimensions, an extensible
227            // array for exactly one, and a fixed array when there are none.
228            let n_unlimited = max_u64.iter().filter(|&&m| m == u64::MAX).count();
229            let is_btree2 = n_unlimited >= 2;
230            let is_fixed_array = n_unlimited == 0;
231            let wants_filter = self.custom_pipeline.is_some()
232                || self.shuffle_deflate_level.is_some()
233                || self.deflate_level.is_some();
234
235            let index = {
236                let mut inner = borrow_inner_mut(&self.file_inner);
237                match &mut *inner {
238                    H5FileInner::Writer(writer) => {
239                        let idx = if is_btree2 {
240                            if wants_filter {
241                                return Err(Hdf5Error::InvalidState(
242                                    "compression of v2 B-tree (multi-unlimited-dimension) \
243                                     datasets is not yet supported"
244                                        .into(),
245                                ));
246                            }
247                            writer.create_btree_v2_dataset(
248                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
249                            )?
250                        } else if is_fixed_array {
251                            // A chunked dataset with no unlimited dimension
252                            // must use the fixed-array index — libhdf5
253                            // rejects an extensible-array index here. A
254                            // compressed fixed-shape dataset uses a *filtered*
255                            // fixed array (FA client id 1).
256                            if wants_filter {
257                                let pipeline = if let Some(p) = self.custom_pipeline {
258                                    p
259                                } else if let Some(level) = self.shuffle_deflate_level {
260                                    crate::format::messages::filter::FilterPipeline::shuffle_deflate(
261                                        T::element_size() as u32,
262                                        level,
263                                    )
264                                } else {
265                                    // deflate_level (checked by wants_filter).
266                                    crate::format::messages::filter::FilterPipeline::deflate(
267                                        self.deflate_level.unwrap(),
268                                    )
269                                };
270                                writer.create_fixed_array_dataset_with_pipeline(
271                                    &full_name, datatype, &dims_u64, &chunk_u64, pipeline,
272                                )?
273                            } else {
274                                writer.create_fixed_array_dataset(
275                                    &full_name, datatype, &dims_u64, &chunk_u64,
276                                )?
277                            }
278                        } else if let Some(pipeline) = self.custom_pipeline {
279                            writer.create_chunked_dataset_with_pipeline(
280                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
281                            )?
282                        } else if let Some(level) = self.shuffle_deflate_level {
283                            let pipeline =
284                                crate::format::messages::filter::FilterPipeline::shuffle_deflate(
285                                    T::element_size() as u32,
286                                    level,
287                                );
288                            writer.create_chunked_dataset_with_pipeline(
289                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
290                            )?
291                        } else if let Some(level) = self.deflate_level {
292                            writer.create_chunked_dataset_compressed(
293                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, level,
294                            )?
295                        } else {
296                            writer.create_chunked_dataset(
297                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
298                            )?
299                        };
300                        if let Some(ref gp) = group_path {
301                            if gp != "/" {
302                                writer.assign_dataset_to_group(gp, idx)?;
303                            }
304                        }
305                        if let Some(ref fv) = fill_value {
306                            writer.set_dataset_fill_value(idx, fv.clone())?;
307                        }
308                        idx
309                    }
310                    H5FileInner::Reader(_) => {
311                        return Err(Hdf5Error::InvalidState(
312                            "cannot create a dataset in read mode".into(),
313                        ));
314                    }
315                    H5FileInner::Closed => {
316                        return Err(Hdf5Error::InvalidState("file is closed".into()));
317                    }
318                }
319            };
320
321            Ok(H5Dataset {
322                file_inner: clone_inner(&self.file_inner),
323                info: DatasetInfo::Writer {
324                    index,
325                    shape,
326                    element_size,
327                    chunked: true,
328                    btree2: is_btree2,
329                    fixed_array: is_fixed_array,
330                },
331            })
332        } else {
333            // Contiguous dataset (original path)
334            let index = {
335                let mut inner = borrow_inner_mut(&self.file_inner);
336                match &mut *inner {
337                    H5FileInner::Writer(writer) => {
338                        let idx = writer.create_dataset(&full_name, datatype, &dims_u64)?;
339                        if let Some(ref gp) = group_path {
340                            if gp != "/" {
341                                writer.assign_dataset_to_group(gp, idx)?;
342                            }
343                        }
344                        if let Some(ref fv) = fill_value {
345                            writer.set_dataset_fill_value(idx, fv.clone())?;
346                        }
347                        idx
348                    }
349                    H5FileInner::Reader(_) => {
350                        return Err(Hdf5Error::InvalidState(
351                            "cannot create a dataset in read mode".into(),
352                        ));
353                    }
354                    H5FileInner::Closed => {
355                        return Err(Hdf5Error::InvalidState("file is closed".into()));
356                    }
357                }
358            };
359
360            Ok(H5Dataset {
361                file_inner: clone_inner(&self.file_inner),
362                info: DatasetInfo::Writer {
363                    index,
364                    shape,
365                    element_size,
366                    chunked: false,
367                    btree2: false,
368                    fixed_array: false,
369                },
370            })
371        }
372    }
373}
374
375// ---------------------------------------------------------------------------
376// DatasetInfo
377// ---------------------------------------------------------------------------
378
379/// Internal metadata about a dataset handle.
380enum DatasetInfo {
381    /// A dataset created via `new_dataset().create()` in write mode.
382    Writer {
383        /// Index into the writer's dataset list.
384        index: usize,
385        /// Shape (current dimensions).
386        shape: Vec<usize>,
387        /// Size of one element in bytes.
388        element_size: usize,
389        /// Whether this is a chunked dataset.
390        chunked: bool,
391        /// Whether the chunk index is a v2 B-tree (multiple unlimited dims).
392        btree2: bool,
393        /// Whether the chunk index is a Fixed Array (no unlimited dims).
394        fixed_array: bool,
395    },
396    /// A dataset opened by name in read mode.
397    Reader {
398        /// The link name of the dataset.
399        name: String,
400        /// Shape (current dimensions).
401        shape: Vec<usize>,
402        /// Size of one element in bytes.
403        element_size: usize,
404    },
405}
406
407// ---------------------------------------------------------------------------
408// H5Dataset
409// ---------------------------------------------------------------------------
410
411/// A handle to an HDF5 dataset, supporting typed read and write operations.
412///
413/// The dataset holds a shared reference to the file's I/O backend, so it
414/// remains valid even if the originating [`H5File`](crate::file::H5File) is
415/// moved or dropped (they share ownership via `Rc`).
416pub struct H5Dataset {
417    file_inner: SharedInner,
418    info: DatasetInfo,
419}
420
421impl H5Dataset {
422    /// Create a reader-mode dataset handle (called internally by `H5File::dataset`).
423    pub(crate) fn new_reader(
424        file_inner: SharedInner,
425        name: String,
426        shape: Vec<usize>,
427        element_size: usize,
428    ) -> Self {
429        Self {
430            file_inner,
431            info: DatasetInfo::Reader {
432                name,
433                shape,
434                element_size,
435            },
436        }
437    }
438
439    /// Return the dataset dimensions.
440    pub fn shape(&self) -> Vec<usize> {
441        match &self.info {
442            DatasetInfo::Writer { shape, .. } => shape.clone(),
443            DatasetInfo::Reader { shape, .. } => shape.clone(),
444        }
445    }
446
447    /// Return the number of dimensions (rank) of the dataset.
448    pub fn ndims(&self) -> usize {
449        match &self.info {
450            DatasetInfo::Writer { shape, .. } => shape.len(),
451            DatasetInfo::Reader { shape, .. } => shape.len(),
452        }
453    }
454
455    /// Return the total number of elements in the dataset.
456    pub fn total_elements(&self) -> usize {
457        match &self.info {
458            DatasetInfo::Writer { shape, .. } => shape.iter().product(),
459            DatasetInfo::Reader { shape, .. } => shape.iter().product(),
460        }
461    }
462
463    /// Return the size of one element in bytes.
464    pub fn element_size(&self) -> usize {
465        match &self.info {
466            DatasetInfo::Writer { element_size, .. } => *element_size,
467            DatasetInfo::Reader { element_size, .. } => *element_size,
468        }
469    }
470
471    /// Return the chunk dimensions, if this is a chunked dataset.
472    pub fn chunk_dims(&self) -> Option<Vec<usize>> {
473        match &self.info {
474            DatasetInfo::Reader { name, .. } => {
475                let inner = borrow_inner(&self.file_inner);
476                if let H5FileInner::Reader(reader) = &*inner {
477                    if let Some(info) = reader.dataset_info(name) {
478                        use crate::format::messages::data_layout::DataLayoutMessage;
479                        let chunk_dims = match &info.layout {
480                            DataLayoutMessage::ChunkedV4 { chunk_dims, .. }
481                            | DataLayoutMessage::ChunkedV3 { chunk_dims, .. } => Some(chunk_dims),
482                            _ => None,
483                        };
484                        if let Some(chunk_dims) = chunk_dims {
485                            // Strip trailing element-size dimension
486                            return Some(
487                                chunk_dims[..chunk_dims.len() - 1]
488                                    .iter()
489                                    .map(|&d| d as usize)
490                                    .collect(),
491                            );
492                        }
493                    }
494                }
495                None
496            }
497            DatasetInfo::Writer { .. } => None,
498        }
499    }
500
501    /// Return whether this is a chunked dataset.
502    pub fn is_chunked(&self) -> bool {
503        match &self.info {
504            DatasetInfo::Writer { chunked, .. } => *chunked,
505            DatasetInfo::Reader { name, .. } => {
506                let inner = borrow_inner(&self.file_inner);
507                match &*inner {
508                    H5FileInner::Reader(reader) => {
509                        if let Some(info) = reader.dataset_info(name) {
510                            use crate::format::messages::data_layout::DataLayoutMessage;
511                            matches!(
512                                info.layout,
513                                DataLayoutMessage::ChunkedV4 { .. }
514                                    | DataLayoutMessage::ChunkedV3 { .. }
515                            )
516                        } else {
517                            false
518                        }
519                    }
520                    _ => false,
521                }
522            }
523        }
524    }
525
526    /// Return the names of all attributes on this dataset (read mode only).
527    pub fn attr_names(&self) -> Result<Vec<String>> {
528        match &self.info {
529            DatasetInfo::Reader { name, .. } => {
530                let inner = borrow_inner(&self.file_inner);
531                match &*inner {
532                    H5FileInner::Reader(reader) => Ok(reader.dataset_attr_names(name)?),
533                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
534                }
535            }
536            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
537                "attr_names not available in write mode".into(),
538            )),
539        }
540    }
541
542    /// Open an attribute by name (read mode only).
543    pub fn attr(&self, attr_name: &str) -> Result<crate::attribute::H5Attribute> {
544        match &self.info {
545            DatasetInfo::Reader { name, .. } => {
546                let inner = borrow_inner(&self.file_inner);
547                match &*inner {
548                    H5FileInner::Reader(reader) => {
549                        let attr_msg = reader.dataset_attr(name, attr_name)?.clone();
550                        Ok(crate::attribute::H5Attribute::new_reader(
551                            clone_inner(&self.file_inner),
552                            attr_msg,
553                        ))
554                    }
555                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
556                }
557            }
558            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
559                "attr() not available in write mode".into(),
560            )),
561        }
562    }
563
564    /// Start building a new attribute on this dataset.
565    ///
566    /// Returns a fluent builder. Call `.shape(())` for a scalar attribute
567    /// and `.create("name")` to finalize.
568    ///
569    /// # Example
570    ///
571    /// ```no_run
572    /// # use rust_hdf5::H5File;
573    /// # use rust_hdf5::types::VarLenUnicode;
574    /// let file = H5File::create("attr.h5").unwrap();
575    /// let ds = file.new_dataset::<f32>().shape(&[10]).create("data").unwrap();
576    /// let attr = ds.new_attr::<VarLenUnicode>().shape(()).create("units").unwrap();
577    /// attr.write_scalar(&VarLenUnicode("meters".to_string())).unwrap();
578    /// ```
579    pub fn new_attr<T: 'static>(&self) -> AttrBuilder<'_, T> {
580        let ds_index = match &self.info {
581            DatasetInfo::Writer { index, .. } => *index,
582            DatasetInfo::Reader { .. } => {
583                // Reader mode: we'll return a builder that will error on create.
584                // Using usize::MAX as sentinel.
585                usize::MAX
586            }
587        };
588        AttrBuilder::new(&self.file_inner, ds_index)
589    }
590
591    /// Write a typed slice to the dataset (contiguous datasets only).
592    ///
593    /// The slice length must match the total number of elements declared by
594    /// the dataset shape. The data is reinterpreted as raw bytes and written
595    /// to the file.
596    ///
597    /// # Errors
598    ///
599    /// Returns an error if:
600    /// - The file is in read mode.
601    /// - The data length does not match the declared shape.
602    pub fn write_raw<T: H5Type>(&self, data: &[T]) -> Result<()> {
603        match &self.info {
604            DatasetInfo::Writer {
605                index,
606                shape,
607                element_size,
608                chunked,
609                btree2: _,
610                fixed_array: _,
611            } => {
612                if *chunked {
613                    return Err(Hdf5Error::InvalidState(
614                        "use write_chunk for chunked datasets".into(),
615                    ));
616                }
617
618                let total_elements: usize = shape.iter().product();
619                if data.len() != total_elements {
620                    return Err(Hdf5Error::InvalidState(format!(
621                        "data length {} does not match dataset size {}",
622                        data.len(),
623                        total_elements,
624                    )));
625                }
626
627                // Verify element size matches
628                if T::element_size() != *element_size {
629                    return Err(Hdf5Error::TypeMismatch(format!(
630                        "write type has element size {} but dataset expects {}",
631                        T::element_size(),
632                        element_size,
633                    )));
634                }
635
636                // Safety: T: Copy + 'static (numeric primitive) with well-defined
637                // byte representation. The resulting slice borrows `data` and
638                // lives only as long as this block.
639                let byte_len = data.len() * T::element_size();
640                let raw =
641                    unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
642
643                let mut inner = borrow_inner_mut(&self.file_inner);
644                match &mut *inner {
645                    H5FileInner::Writer(writer) => {
646                        writer.write_dataset_raw(*index, raw)?;
647                        Ok(())
648                    }
649                    _ => Err(Hdf5Error::InvalidState(
650                        "file is no longer in write mode".into(),
651                    )),
652                }
653            }
654            DatasetInfo::Reader { .. } => Err(Hdf5Error::InvalidState(
655                "cannot write to a dataset opened in read mode".into(),
656            )),
657        }
658    }
659
660    /// Write a single chunk to a chunked dataset.
661    ///
662    /// `chunk_idx` is the linear chunk index (typically the frame number for
663    /// streaming datasets). `data` is the raw byte data for one chunk.
664    ///
665    /// For datasets with two or more unlimited dimensions (v2 B-tree index),
666    /// use [`write_chunk_at`](Self::write_chunk_at) instead.
667    pub fn write_chunk(&self, chunk_idx: usize, data: &[u8]) -> Result<()> {
668        match &self.info {
669            DatasetInfo::Writer {
670                index,
671                chunked,
672                btree2,
673                fixed_array,
674                ..
675            } => {
676                if !*chunked {
677                    return Err(Hdf5Error::InvalidState(
678                        "write_chunk is only for chunked datasets".into(),
679                    ));
680                }
681                if *btree2 {
682                    return Err(Hdf5Error::InvalidState(
683                        "this dataset uses a v2 B-tree chunk index; use write_chunk_at \
684                         with the chunk's grid coordinates"
685                            .into(),
686                    ));
687                }
688
689                let mut inner = borrow_inner_mut(&self.file_inner);
690                match &mut *inner {
691                    H5FileInner::Writer(writer) => {
692                        if *fixed_array {
693                            // Fixed-array dataset: convert the linear chunk
694                            // index into row-major grid coordinates.
695                            let chunk_dims = writer
696                                .dataset_chunk_dims(*index)
697                                .ok_or_else(|| {
698                                    Hdf5Error::InvalidState("dataset has no chunk info".into())
699                                })?
700                                .to_vec();
701                            let dims = writer.dataset_dims(*index).to_vec();
702                            let mut grid = vec![0u64; dims.len()];
703                            for d in 0..dims.len() {
704                                grid[d] = if chunk_dims[d] > 0 {
705                                    dims[d].div_ceil(chunk_dims[d])
706                                } else {
707                                    1
708                                };
709                            }
710                            // A zero-extent dimension yields a grid of 0
711                            // chunks — there is no chunk to write.
712                            if grid.contains(&0) {
713                                return Err(Hdf5Error::InvalidState(
714                                    "dataset has a zero-extent dimension and no chunks".into(),
715                                ));
716                            }
717                            let mut rem = chunk_idx as u64;
718                            let mut coords = vec![0u64; dims.len()];
719                            for d in (0..dims.len()).rev() {
720                                coords[d] = rem % grid[d];
721                                rem /= grid[d];
722                            }
723                            // A leftover means chunk_idx exceeded the grid.
724                            if rem != 0 {
725                                return Err(Hdf5Error::InvalidState(format!(
726                                    "chunk index {chunk_idx} is out of range for this dataset"
727                                )));
728                            }
729                            writer.write_chunk_fixed_array(*index, &coords, data)?;
730                        } else {
731                            writer.write_chunk(*index, chunk_idx as u64, data)?;
732                        }
733                        Ok(())
734                    }
735                    _ => Err(Hdf5Error::InvalidState(
736                        "file is no longer in write mode".into(),
737                    )),
738                }
739            }
740            DatasetInfo::Reader { .. } => {
741                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
742            }
743        }
744    }
745
746    /// Write a single chunk to a v2-B-tree-indexed dataset, addressed by its
747    /// chunk-grid coordinates (one per dimension).
748    ///
749    /// This is the entry point for datasets with two or more unlimited
750    /// dimensions. The dataset's logical dimensions are extended to cover
751    /// the written chunk. `data` is the raw bytes of one full chunk.
752    ///
753    /// ```no_run
754    /// # use rust_hdf5::H5File;
755    /// let file = H5File::create("bt2.h5").unwrap();
756    /// let ds = file.new_dataset::<i32>()
757    ///     .shape(&[0, 0])
758    ///     .chunk(&[2, 2])
759    ///     .max_shape(&[None, None])
760    ///     .create("grid")
761    ///     .unwrap();
762    /// let chunk = [0i32, 1, 2, 3];
763    /// let bytes: Vec<u8> = chunk.iter().flat_map(|v| v.to_le_bytes()).collect();
764    /// ds.write_chunk_at(&[0, 0], &bytes).unwrap();
765    /// ```
766    pub fn write_chunk_at(&self, chunk_coords: &[usize], data: &[u8]) -> Result<()> {
767        match &self.info {
768            DatasetInfo::Writer {
769                index,
770                chunked,
771                btree2,
772                fixed_array,
773                ..
774            } => {
775                if !*chunked {
776                    return Err(Hdf5Error::InvalidState(
777                        "write_chunk_at is only for chunked datasets".into(),
778                    ));
779                }
780                let coords: Vec<u64> = chunk_coords.iter().map(|&c| c as u64).collect();
781                let btree2 = *btree2;
782                let fixed_array = *fixed_array;
783                let mut inner = borrow_inner_mut(&self.file_inner);
784                let writer = match &mut *inner {
785                    H5FileInner::Writer(w) => w,
786                    _ => {
787                        return Err(Hdf5Error::InvalidState(
788                            "file is no longer in write mode".into(),
789                        ))
790                    }
791                };
792                let chunk_dims = writer
793                    .dataset_chunk_dims(*index)
794                    .ok_or_else(|| Hdf5Error::InvalidState("dataset has no chunk info".into()))?
795                    .to_vec();
796                let dims = writer.dataset_dims(*index).to_vec();
797                if coords.len() != dims.len() {
798                    return Err(Hdf5Error::InvalidState(format!(
799                        "chunk_coords has {} entries but the dataset has {} dimensions",
800                        coords.len(),
801                        dims.len()
802                    )));
803                }
804                if chunk_dims.len() != dims.len() {
805                    return Err(Hdf5Error::InvalidState(format!(
806                        "dataset chunk shape has {} dimensions but the dataspace has {}",
807                        chunk_dims.len(),
808                        dims.len()
809                    )));
810                }
811
812                // Validate coordinates and compute the grown dimensions
813                // up-front, before any chunk is written, so an overflowing
814                // coordinate cannot leave an orphaned chunk in the file.
815                let mut new_dims = dims.clone();
816                for d in 0..dims.len() {
817                    let needed = coords[d]
818                        .checked_add(1)
819                        .and_then(|c| c.checked_mul(chunk_dims[d]))
820                        .ok_or_else(|| {
821                            Hdf5Error::InvalidState(format!(
822                                "chunk coordinate {} in dimension {} is too large",
823                                coords[d], d
824                            ))
825                        })?;
826                    if needed > new_dims[d] {
827                        new_dims[d] = needed;
828                    }
829                }
830
831                if fixed_array {
832                    // Fixed-array (fixed-shape) dataset: no dimension growth.
833                    writer.write_chunk_fixed_array(*index, &coords, data)?;
834                    return Ok(());
835                }
836
837                if btree2 {
838                    writer.write_chunk_btree_v2(*index, &coords, data)?;
839                } else {
840                    // Extensible array: linearize the chunk-grid coordinates
841                    // (row-major) into the array's chunk index.
842                    let mut linear = 0u64;
843                    for d in 0..dims.len() {
844                        let grid = if chunk_dims[d] > 0 {
845                            dims[d].div_ceil(chunk_dims[d])
846                        } else {
847                            1
848                        };
849                        linear = linear
850                            .checked_mul(grid)
851                            .and_then(|l| l.checked_add(coords[d]))
852                            .ok_or_else(|| {
853                                Hdf5Error::InvalidState(
854                                    "chunk coordinates overflow the array index".into(),
855                                )
856                            })?;
857                    }
858                    writer.write_chunk(*index, linear, data)?;
859                }
860
861                if new_dims != dims {
862                    writer.extend_dataset(*index, &new_dims)?;
863                }
864                Ok(())
865            }
866            DatasetInfo::Reader { .. } => {
867                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
868            }
869        }
870    }
871
872    /// Write multiple chunks in a batch, optionally compressing in parallel.
873    ///
874    /// `chunks` is a slice of `(chunk_index, raw_data)` pairs. When a filter
875    /// pipeline is configured and the `parallel` feature is enabled, all
876    /// chunks are compressed concurrently via rayon.
877    pub fn write_chunks_batch(&self, chunks: &[(usize, &[u8])]) -> Result<()> {
878        match &self.info {
879            DatasetInfo::Writer { index, chunked, .. } => {
880                if !*chunked {
881                    return Err(Hdf5Error::InvalidState(
882                        "write_chunks_batch is only for chunked datasets".into(),
883                    ));
884                }
885                let pairs: Vec<(u64, &[u8])> = chunks
886                    .iter()
887                    .map(|(idx, data)| (*idx as u64, *data))
888                    .collect();
889                let mut inner = borrow_inner_mut(&self.file_inner);
890                match &mut *inner {
891                    H5FileInner::Writer(writer) => {
892                        writer.write_chunks_batch(*index, &pairs)?;
893                        Ok(())
894                    }
895                    _ => Err(Hdf5Error::InvalidState(
896                        "file is no longer in write mode".into(),
897                    )),
898                }
899            }
900            DatasetInfo::Reader { .. } => {
901                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
902            }
903        }
904    }
905
906    /// Append data along the first dimension of a chunked dataset.
907    ///
908    /// `data` must contain a whole number of "frames" — slices along
909    /// dimension 0. For example, if the dataset has shape `[N, H, W]`
910    /// and `chunk_dims = [1, H, W]`, then `data.len()` must be a
911    /// multiple of `H * W`.
912    ///
913    /// This method writes the necessary chunks and extends the dataset
914    /// shape automatically.
915    ///
916    /// ```no_run
917    /// # use rust_hdf5::H5File;
918    /// let file = H5File::create("append.h5").unwrap();
919    /// let ds = file.new_dataset::<f64>()
920    ///     .shape(&[0, 3])
921    ///     .chunk(&[1, 3])
922    ///     .max_shape(&[None, Some(3)])
923    ///     .create("data")
924    ///     .unwrap();
925    /// ds.append(&[1.0, 2.0, 3.0]).unwrap();       // shape becomes [1, 3]
926    /// ds.append(&[4.0, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap(); // shape becomes [3, 3]
927    /// ```
928    pub fn append<T: H5Type>(&self, data: &[T]) -> Result<()> {
929        match &self.info {
930            DatasetInfo::Writer {
931                index,
932                element_size,
933                chunked,
934                ..
935            } => {
936                if !*chunked {
937                    return Err(Hdf5Error::InvalidState(
938                        "append is only for chunked datasets".into(),
939                    ));
940                }
941                if T::element_size() != *element_size {
942                    return Err(Hdf5Error::TypeMismatch(format!(
943                        "append type has element size {} but dataset expects {}",
944                        T::element_size(),
945                        element_size,
946                    )));
947                }
948
949                let ds_index = *index;
950                let es = *element_size;
951
952                let mut inner = borrow_inner_mut(&self.file_inner);
953                let writer = match &mut *inner {
954                    H5FileInner::Writer(w) => w,
955                    _ => {
956                        return Err(Hdf5Error::InvalidState(
957                            "file is no longer in write mode".into(),
958                        ))
959                    }
960                };
961
962                let chunk_dims = writer
963                    .dataset_chunk_dims(ds_index)
964                    .ok_or_else(|| Hdf5Error::InvalidState("dataset has no chunk info".into()))?
965                    .to_vec();
966                let dims = writer.dataset_dims(ds_index).to_vec();
967
968                // Frame size = product of dims[1..]
969                let frame_elems: usize = if dims.len() > 1 {
970                    dims[1..].iter().map(|&d| d as usize).product()
971                } else {
972                    1
973                };
974
975                if frame_elems == 0 {
976                    return Err(Hdf5Error::InvalidState(
977                        "cannot append to dataset with zero-size trailing dimensions".into(),
978                    ));
979                }
980
981                if !data.len().is_multiple_of(frame_elems) {
982                    return Err(Hdf5Error::InvalidState(format!(
983                        "data length {} is not a multiple of frame size {}",
984                        data.len(),
985                        frame_elems,
986                    )));
987                }
988
989                let n_new_frames = data.len() / frame_elems;
990                let current_dim0 = dims[0] as usize;
991
992                // Chunk size along first dimension
993                let chunk_dim0 = chunk_dims[0] as usize;
994                let frame_bytes = frame_elems * es;
995
996                let raw = unsafe {
997                    std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * es)
998                };
999
1000                // Merge buffered data with new data
1001                let ds = &mut writer.datasets[ds_index];
1002                let buffered_frames = ds.append_buffered_frames as usize;
1003                let mut combined = std::mem::take(&mut ds.append_buffer);
1004                combined.extend_from_slice(raw);
1005                ds.append_buffered_frames = 0;
1006
1007                let total_frames = buffered_frames + n_new_frames;
1008                let total_bytes = combined.len();
1009
1010                // Base chunk index: account for buffered frames
1011                let base_dim0 = current_dim0 - buffered_frames;
1012                let mut byte_pos = 0usize;
1013                let mut frame_pos = 0usize;
1014
1015                while frame_pos < total_frames {
1016                    let abs_frame = base_dim0 + frame_pos;
1017                    let chunk_idx = abs_frame / chunk_dim0;
1018                    let remaining_frames = total_frames - frame_pos;
1019                    let frames_to_fill = chunk_dim0 - (abs_frame % chunk_dim0);
1020
1021                    if remaining_frames >= frames_to_fill {
1022                        // Full chunk — write
1023                        let end = byte_pos + frames_to_fill * frame_bytes;
1024                        if frames_to_fill == chunk_dim0 {
1025                            writer.write_chunk(
1026                                ds_index,
1027                                chunk_idx as u64,
1028                                &combined[byte_pos..end],
1029                            )?;
1030                        } else {
1031                            // Partial-chunk write: this branch only runs with
1032                            // offset_in_chunk > 0, meaning the chunk already
1033                            // holds earlier frames on disk. Read-modify-write
1034                            // so those frames survive — a fresh fill buffer
1035                            // would erase them.
1036                            let offset_in_chunk = (abs_frame % chunk_dim0) * frame_bytes;
1037                            let mut chunk_buf =
1038                                match writer.read_chunk_if_present(ds_index, chunk_idx as u64)? {
1039                                    Some(existing) => existing,
1040                                    None => {
1041                                        return Err(Hdf5Error::InvalidState(format!(
1042                                            "cannot append into partially-written chunk {}: \
1043                                         its existing content was not found in the chunk \
1044                                         index (the file may be inconsistent)",
1045                                            chunk_idx
1046                                        )));
1047                                    }
1048                                };
1049                            chunk_buf
1050                                [offset_in_chunk..offset_in_chunk + frames_to_fill * frame_bytes]
1051                                .copy_from_slice(&combined[byte_pos..end]);
1052                            writer.write_chunk(ds_index, chunk_idx as u64, &chunk_buf)?;
1053                        }
1054                        byte_pos = end;
1055                        frame_pos += frames_to_fill;
1056                    } else {
1057                        // Partial chunk — buffer for next append
1058                        let ds = &mut writer.datasets[ds_index];
1059                        ds.append_buffer = combined[byte_pos..total_bytes].to_vec();
1060                        ds.append_buffered_frames = remaining_frames as u64;
1061                        frame_pos = total_frames;
1062                    }
1063                }
1064
1065                // Extend dims to include all frames (buffered + new)
1066                let logical_dim0 = base_dim0 + total_frames;
1067                let mut new_dims: Vec<u64> = dims;
1068                new_dims[0] = logical_dim0 as u64;
1069                writer.extend_dataset(ds_index, &new_dims)?;
1070
1071                Ok(())
1072            }
1073            DatasetInfo::Reader { .. } => {
1074                Err(Hdf5Error::InvalidState("cannot append in read mode".into()))
1075            }
1076        }
1077    }
1078
1079    /// Extend the dimensions of a chunked dataset.
1080    pub fn extend(&self, new_dims: &[usize]) -> Result<()> {
1081        match &self.info {
1082            DatasetInfo::Writer { index, chunked, .. } => {
1083                if !*chunked {
1084                    return Err(Hdf5Error::InvalidState(
1085                        "extend is only for chunked datasets".into(),
1086                    ));
1087                }
1088
1089                let dims_u64: Vec<u64> = new_dims.iter().map(|&d| d as u64).collect();
1090                let mut inner = borrow_inner_mut(&self.file_inner);
1091                match &mut *inner {
1092                    H5FileInner::Writer(writer) => {
1093                        writer.extend_dataset(*index, &dims_u64)?;
1094                        Ok(())
1095                    }
1096                    _ => Err(Hdf5Error::InvalidState(
1097                        "file is no longer in write mode".into(),
1098                    )),
1099                }
1100            }
1101            DatasetInfo::Reader { .. } => {
1102                Err(Hdf5Error::InvalidState("cannot extend in read mode".into()))
1103            }
1104        }
1105    }
1106
1107    /// Flush a chunked dataset's index structures to disk.
1108    pub fn flush(&self) -> Result<()> {
1109        match &self.info {
1110            DatasetInfo::Writer { index, .. } => {
1111                let mut inner = borrow_inner_mut(&self.file_inner);
1112                match &mut *inner {
1113                    H5FileInner::Writer(writer) => {
1114                        writer.flush_dataset(*index)?;
1115                        Ok(())
1116                    }
1117                    _ => Ok(()),
1118                }
1119            }
1120            DatasetInfo::Reader { .. } => Ok(()),
1121        }
1122    }
1123
1124    /// Read a slice (hyperslab) of the dataset as a typed vector.
1125    ///
1126    /// `starts` and `counts` define the N-dimensional selection:
1127    /// `starts[d]` = first index along dim d, `counts[d]` = how many elements.
1128    pub fn read_slice<T: H5Type>(&self, starts: &[usize], counts: &[usize]) -> Result<Vec<T>> {
1129        match &self.info {
1130            DatasetInfo::Reader {
1131                name, element_size, ..
1132            } => {
1133                if T::element_size() != *element_size {
1134                    return Err(Hdf5Error::TypeMismatch(format!(
1135                        "read type has element size {} but dataset has element size {}",
1136                        T::element_size(),
1137                        element_size,
1138                    )));
1139                }
1140                let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
1141                let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
1142
1143                let raw = {
1144                    let mut inner = borrow_inner_mut(&self.file_inner);
1145                    match &mut *inner {
1146                        H5FileInner::Reader(reader) => {
1147                            reader.read_slice(name, &starts_u64, &counts_u64)?
1148                        }
1149                        _ => {
1150                            return Err(Hdf5Error::InvalidState("file is not in read mode".into()))
1151                        }
1152                    }
1153                };
1154
1155                if raw.len() % T::element_size() != 0 {
1156                    return Err(Hdf5Error::TypeMismatch(format!(
1157                        "raw data size {} is not a multiple of element size {}",
1158                        raw.len(),
1159                        T::element_size(),
1160                    )));
1161                }
1162
1163                let count = raw.len() / T::element_size();
1164                let mut result = Vec::<T>::with_capacity(count);
1165                unsafe {
1166                    std::ptr::copy_nonoverlapping(
1167                        raw.as_ptr(),
1168                        result.as_mut_ptr() as *mut u8,
1169                        raw.len(),
1170                    );
1171                    result.set_len(count);
1172                }
1173                Ok(result)
1174            }
1175            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1176                "cannot read_slice from a dataset in write mode".into(),
1177            )),
1178        }
1179    }
1180
1181    /// Write a typed slice to a sub-region of a contiguous dataset.
1182    ///
1183    /// `starts` and `counts` define the N-dimensional selection.
1184    pub fn write_slice<T: H5Type>(
1185        &self,
1186        starts: &[usize],
1187        counts: &[usize],
1188        data: &[T],
1189    ) -> Result<()> {
1190        match &self.info {
1191            DatasetInfo::Writer {
1192                index,
1193                element_size,
1194                chunked,
1195                ..
1196            } => {
1197                if *chunked {
1198                    return Err(Hdf5Error::InvalidState(
1199                        "write_slice is only for contiguous datasets".into(),
1200                    ));
1201                }
1202                if T::element_size() != *element_size {
1203                    return Err(Hdf5Error::TypeMismatch(format!(
1204                        "write type has element size {} but dataset expects {}",
1205                        T::element_size(),
1206                        element_size,
1207                    )));
1208                }
1209
1210                let expected: usize = counts.iter().product();
1211                if data.len() != expected {
1212                    return Err(Hdf5Error::InvalidState(format!(
1213                        "data length {} does not match slice size {}",
1214                        data.len(),
1215                        expected,
1216                    )));
1217                }
1218
1219                let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
1220                let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
1221
1222                let byte_len = data.len() * T::element_size();
1223                let raw =
1224                    unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
1225
1226                let mut inner = borrow_inner_mut(&self.file_inner);
1227                match &mut *inner {
1228                    H5FileInner::Writer(writer) => {
1229                        writer.write_slice(*index, &starts_u64, &counts_u64, raw)?;
1230                        Ok(())
1231                    }
1232                    _ => Err(Hdf5Error::InvalidState(
1233                        "file is no longer in write mode".into(),
1234                    )),
1235                }
1236            }
1237            DatasetInfo::Reader { .. } => {
1238                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
1239            }
1240        }
1241    }
1242
1243    /// Read variable-length strings from a dataset.
1244    ///
1245    /// This handles h5py-style vlen string datasets that store strings
1246    /// as global heap references. Returns one String per element.
1247    pub fn read_vlen_strings(&self) -> Result<Vec<String>> {
1248        match &self.info {
1249            DatasetInfo::Reader { name, .. } => {
1250                let mut inner = borrow_inner_mut(&self.file_inner);
1251                match &mut *inner {
1252                    H5FileInner::Reader(reader) => Ok(reader.read_vlen_strings(name)?),
1253                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
1254                }
1255            }
1256            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1257                "cannot read vlen strings from a dataset in write mode".into(),
1258            )),
1259        }
1260    }
1261
1262    /// Read the entire dataset as a typed vector.
1263    ///
1264    /// The raw bytes are read from the file and reinterpreted as `T`. The
1265    /// caller must ensure that `T` matches the datatype used when the dataset
1266    /// was written.
1267    ///
1268    /// # Errors
1269    ///
1270    /// Returns an error if:
1271    /// - The file is in write mode.
1272    /// - The raw data size is not a multiple of `T::element_size()`.
1273    pub fn read_raw<T: H5Type>(&self) -> Result<Vec<T>> {
1274        match &self.info {
1275            DatasetInfo::Reader {
1276                name, element_size, ..
1277            } => {
1278                if T::element_size() != *element_size {
1279                    return Err(Hdf5Error::TypeMismatch(format!(
1280                        "read type has element size {} but dataset has element size {}",
1281                        T::element_size(),
1282                        element_size,
1283                    )));
1284                }
1285
1286                let raw = {
1287                    let mut inner = borrow_inner_mut(&self.file_inner);
1288                    match &mut *inner {
1289                        H5FileInner::Reader(reader) => reader.read_dataset_raw(name)?,
1290                        _ => {
1291                            return Err(Hdf5Error::InvalidState("file is not in read mode".into()));
1292                        }
1293                    }
1294                };
1295
1296                if raw.len() % T::element_size() != 0 {
1297                    return Err(Hdf5Error::TypeMismatch(format!(
1298                        "raw data size {} is not a multiple of element size {}",
1299                        raw.len(),
1300                        T::element_size(),
1301                    )));
1302                }
1303
1304                let count = raw.len() / T::element_size();
1305                let mut result = Vec::<T>::with_capacity(count);
1306
1307                // Safety: T is Copy + 'static (required by H5Type). We verified
1308                // the byte count matches count * size_of::<T>() above.
1309                // copy_nonoverlapping fills the memory with valid bit patterns
1310                // for all H5Type implementors (numeric primitives).
1311                // We call set_len AFTER the copy so that if an unexpected panic
1312                // occurs, uninitialized memory is never exposed.
1313                unsafe {
1314                    std::ptr::copy_nonoverlapping(
1315                        raw.as_ptr(),
1316                        result.as_mut_ptr() as *mut u8,
1317                        raw.len(),
1318                    );
1319                    result.set_len(count);
1320                }
1321
1322                Ok(result)
1323            }
1324            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1325                "cannot read from a dataset in write mode".into(),
1326            )),
1327        }
1328    }
1329}
1330
1331#[cfg(test)]
1332mod tests {
1333    use crate::H5File;
1334    use std::path::PathBuf;
1335
1336    fn temp_path(name: &str) -> PathBuf {
1337        // Include PID + a per-call atomic counter so that concurrent
1338        // cargo invocations and any kernel-level "lock not yet
1339        // released" races between sequential opens cannot collide.
1340        use std::sync::atomic::{AtomicU64, Ordering};
1341        static COUNTER: AtomicU64 = AtomicU64::new(0);
1342        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
1343        std::env::temp_dir().join(format!(
1344            "hdf5_dataset_test_{}_{}_{}.h5",
1345            name,
1346            std::process::id(),
1347            n
1348        ))
1349    }
1350
1351    #[test]
1352    fn builder_requires_shape() {
1353        let path = temp_path("no_shape");
1354        let file = H5File::create(&path).unwrap();
1355        let result = file.new_dataset::<u8>().create("data");
1356        assert!(result.is_err());
1357        std::fs::remove_file(&path).ok();
1358    }
1359
1360    #[test]
1361    fn write_raw_size_mismatch() {
1362        let path = temp_path("size_mismatch");
1363        let file = H5File::create(&path).unwrap();
1364        let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1365        // Provide 3 elements instead of 4
1366        let result = ds.write_raw(&[1u8, 2, 3]);
1367        assert!(result.is_err());
1368        std::fs::remove_file(&path).ok();
1369    }
1370
1371    #[test]
1372    fn roundtrip_u8_1d() {
1373        let path = temp_path("rt_u8_1d");
1374        let data: Vec<u8> = (0..10).collect();
1375
1376        {
1377            let file = H5File::create(&path).unwrap();
1378            let ds = file.new_dataset::<u8>().shape([10]).create("seq").unwrap();
1379            ds.write_raw(&data).unwrap();
1380            file.close().unwrap();
1381        }
1382
1383        {
1384            let file = H5File::open(&path).unwrap();
1385            let ds = file.dataset("seq").unwrap();
1386            assert_eq!(ds.shape(), vec![10]);
1387            let readback = ds.read_raw::<u8>().unwrap();
1388            assert_eq!(readback, data);
1389        }
1390
1391        std::fs::remove_file(&path).ok();
1392    }
1393
1394    #[test]
1395    fn roundtrip_i32_2d() {
1396        let path = temp_path("rt_i32_2d");
1397        let data: Vec<i32> = vec![-1, 0, 1, 2, 3, 4];
1398
1399        {
1400            let file = H5File::create(&path).unwrap();
1401            let ds = file
1402                .new_dataset::<i32>()
1403                .shape([2, 3])
1404                .create("matrix")
1405                .unwrap();
1406            ds.write_raw(&data).unwrap();
1407            file.close().unwrap();
1408        }
1409
1410        {
1411            let file = H5File::open(&path).unwrap();
1412            let ds = file.dataset("matrix").unwrap();
1413            assert_eq!(ds.shape(), vec![2, 3]);
1414            let readback = ds.read_raw::<i32>().unwrap();
1415            assert_eq!(readback, data);
1416        }
1417
1418        std::fs::remove_file(&path).ok();
1419    }
1420
1421    #[test]
1422    fn roundtrip_f64_3d() {
1423        let path = temp_path("rt_f64_3d");
1424        let data: Vec<f64> = (0..24).map(|i| i as f64 * 0.5).collect();
1425
1426        {
1427            let file = H5File::create(&path).unwrap();
1428            let ds = file
1429                .new_dataset::<f64>()
1430                .shape([2, 3, 4])
1431                .create("cube")
1432                .unwrap();
1433            ds.write_raw(&data).unwrap();
1434            file.close().unwrap();
1435        }
1436
1437        {
1438            let file = H5File::open(&path).unwrap();
1439            let ds = file.dataset("cube").unwrap();
1440            assert_eq!(ds.shape(), vec![2, 3, 4]);
1441            let readback = ds.read_raw::<f64>().unwrap();
1442            assert_eq!(readback, data);
1443        }
1444
1445        std::fs::remove_file(&path).ok();
1446    }
1447
1448    #[test]
1449    fn cannot_read_in_write_mode() {
1450        let path = temp_path("no_read_write");
1451        let file = H5File::create(&path).unwrap();
1452        let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1453        ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1454        let result = ds.read_raw::<u8>();
1455        assert!(result.is_err());
1456        std::fs::remove_file(&path).ok();
1457    }
1458
1459    #[test]
1460    fn cannot_write_in_read_mode() {
1461        let path = temp_path("no_write_read");
1462
1463        {
1464            let file = H5File::create(&path).unwrap();
1465            let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1466            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1467            file.close().unwrap();
1468        }
1469
1470        {
1471            let file = H5File::open(&path).unwrap();
1472            let ds = file.dataset("x").unwrap();
1473            let result = ds.write_raw(&[5u8, 6, 7, 8]);
1474            assert!(result.is_err());
1475        }
1476
1477        std::fs::remove_file(&path).ok();
1478    }
1479
1480    #[test]
1481    fn numeric_attr_roundtrip() {
1482        let path = temp_path("num_attr");
1483        {
1484            let file = H5File::create(&path).unwrap();
1485            let ds = file.new_dataset::<f32>().shape([4]).create("data").unwrap();
1486            ds.write_raw(&[1.0f32; 4]).unwrap();
1487
1488            let a1 = ds.new_attr::<f64>().shape(()).create("scale").unwrap();
1489            a1.write_numeric(&1.2345f64).unwrap();
1490
1491            let a2 = ds.new_attr::<i32>().shape(()).create("count").unwrap();
1492            a2.write_numeric(&42i32).unwrap();
1493
1494            file.close().unwrap();
1495        }
1496        {
1497            let file = H5File::open(&path).unwrap();
1498            let ds = file.dataset("data").unwrap();
1499
1500            let scale = ds.attr("scale").unwrap();
1501            let val: f64 = scale.read_numeric().unwrap();
1502            assert!((val - 1.2345).abs() < 1e-10);
1503
1504            let count = ds.attr("count").unwrap();
1505            let val: i32 = count.read_numeric().unwrap();
1506            assert_eq!(val, 42);
1507        }
1508        std::fs::remove_file(&path).ok();
1509    }
1510
1511    #[test]
1512    fn cannot_create_dataset_in_read_mode() {
1513        let path = temp_path("no_create_read");
1514
1515        {
1516            let _file = H5File::create(&path).unwrap();
1517        }
1518
1519        {
1520            let file = H5File::open(&path).unwrap();
1521            let result = file.new_dataset::<u8>().shape([4]).create("x");
1522            assert!(result.is_err());
1523        }
1524
1525        std::fs::remove_file(&path).ok();
1526    }
1527
1528    #[test]
1529    fn shape_accessor() {
1530        let path = temp_path("shape_acc");
1531
1532        let file = H5File::create(&path).unwrap();
1533        let ds = file
1534            .new_dataset::<f32>()
1535            .shape([5, 10, 3])
1536            .create("tensor")
1537            .unwrap();
1538        assert_eq!(ds.shape(), vec![5, 10, 3]);
1539
1540        std::fs::remove_file(&path).ok();
1541    }
1542
1543    #[test]
1544    fn slice_roundtrip_2d() {
1545        let path = temp_path("slice_2d");
1546
1547        // Create a 4x5 dataset, write full, then read a slice
1548        let data: Vec<i32> = (0..20).collect();
1549        {
1550            let file = H5File::create(&path).unwrap();
1551            let ds = file
1552                .new_dataset::<i32>()
1553                .shape([4, 5])
1554                .create("mat")
1555                .unwrap();
1556            ds.write_raw(&data).unwrap();
1557            file.close().unwrap();
1558        }
1559        {
1560            let file = H5File::open(&path).unwrap();
1561            let ds = file.dataset("mat").unwrap();
1562            // Read rows 1..3, cols 2..4 (2x2 slice)
1563            let slice = ds.read_slice::<i32>(&[1, 2], &[2, 2]).unwrap();
1564            // Row 1: [5,6,7,8,9] -> cols 2..4 = [7,8]
1565            // Row 2: [10,11,12,13,14] -> cols 2..4 = [12,13]
1566            assert_eq!(slice, vec![7, 8, 12, 13]);
1567        }
1568
1569        std::fs::remove_file(&path).ok();
1570    }
1571
1572    #[test]
1573    fn write_slice_2d() {
1574        let path = temp_path("write_slice_2d");
1575
1576        {
1577            let file = H5File::create(&path).unwrap();
1578            let ds = file
1579                .new_dataset::<f32>()
1580                .shape([3, 4])
1581                .create("data")
1582                .unwrap();
1583            ds.write_raw(&[0.0f32; 12]).unwrap();
1584            // Overwrite a 2x2 sub-region
1585            ds.write_slice(&[1, 1], &[2, 2], &[10.0f32, 20.0, 30.0, 40.0])
1586                .unwrap();
1587            file.close().unwrap();
1588        }
1589        {
1590            let file = H5File::open(&path).unwrap();
1591            let ds = file.dataset("data").unwrap();
1592            let full = ds.read_raw::<f32>().unwrap();
1593            // Row 0: [0,0,0,0]
1594            // Row 1: [0,10,20,0]
1595            // Row 2: [0,30,40,0]
1596            assert_eq!(
1597                full,
1598                vec![0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 20.0, 0.0, 0.0, 30.0, 40.0, 0.0,]
1599            );
1600        }
1601
1602        std::fs::remove_file(&path).ok();
1603    }
1604
1605    #[test]
1606    fn write_slice_out_of_bounds_rejected() {
1607        let path = temp_path("write_slice_oob");
1608        let file = H5File::create(&path).unwrap();
1609        let ds = file.new_dataset::<i32>().shape([4]).create("d").unwrap();
1610        ds.write_raw(&[0i32; 4]).unwrap();
1611        // start 2 + count 6 = 8 > extent 4 -> must error, not corrupt.
1612        assert!(ds.write_slice(&[2], &[6], &[9i32; 6]).is_err());
1613        // An in-bounds slice still works.
1614        assert!(ds.write_slice(&[1], &[2], &[7i32, 8]).is_ok());
1615        std::fs::remove_file(&path).ok();
1616    }
1617
1618    #[test]
1619    fn duplicate_dataset_name_rejected() {
1620        let path = temp_path("dup_name");
1621        let file = H5File::create(&path).unwrap();
1622        let _ = file.new_dataset::<i32>().shape([2]).create("d").unwrap();
1623        assert!(file.new_dataset::<i32>().shape([2]).create("d").is_err());
1624        std::fs::remove_file(&path).ok();
1625    }
1626
1627    #[test]
1628    fn extend_cannot_shrink() {
1629        let path = temp_path("extend_shrink");
1630        let file = H5File::create(&path).unwrap();
1631        let ds = file
1632            .new_dataset::<i32>()
1633            .shape([0])
1634            .chunk(&[2])
1635            .max_shape(&[None])
1636            .create("d")
1637            .unwrap();
1638        ds.append(&[1i32, 2, 3, 4]).unwrap();
1639        // Shrinking below the written extent must be rejected.
1640        assert!(ds.extend(&[2]).is_err());
1641        // Growing is fine.
1642        assert!(ds.extend(&[6]).is_ok());
1643        std::fs::remove_file(&path).ok();
1644    }
1645
1646    #[test]
1647    fn attr_read_roundtrip() {
1648        use crate::types::VarLenUnicode;
1649        let path = temp_path("attr_read");
1650
1651        {
1652            let file = H5File::create(&path).unwrap();
1653            let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1654            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1655            let a1 = ds
1656                .new_attr::<VarLenUnicode>()
1657                .shape(())
1658                .create("units")
1659                .unwrap();
1660            a1.write_string("meters").unwrap();
1661            let a2 = ds
1662                .new_attr::<VarLenUnicode>()
1663                .shape(())
1664                .create("desc")
1665                .unwrap();
1666            a2.write_string("test data").unwrap();
1667            file.close().unwrap();
1668        }
1669        {
1670            let file = H5File::open(&path).unwrap();
1671            let ds = file.dataset("data").unwrap();
1672
1673            let names = ds.attr_names().unwrap();
1674            assert!(names.contains(&"units".to_string()));
1675            assert!(names.contains(&"desc".to_string()));
1676
1677            let units = ds.attr("units").unwrap();
1678            assert_eq!(units.read_string().unwrap(), "meters");
1679
1680            let desc = ds.attr("desc").unwrap();
1681            assert_eq!(desc.read_string().unwrap(), "test data");
1682        }
1683
1684        std::fs::remove_file(&path).ok();
1685    }
1686
1687    #[test]
1688    fn type_mismatch_element_size() {
1689        let path = temp_path("type_mismatch");
1690
1691        {
1692            let file = H5File::create(&path).unwrap();
1693            let ds = file.new_dataset::<f64>().shape([4]).create("data").unwrap();
1694            ds.write_raw(&[1.0f64, 2.0, 3.0, 4.0]).unwrap();
1695            file.close().unwrap();
1696        }
1697
1698        {
1699            let file = H5File::open(&path).unwrap();
1700            let ds = file.dataset("data").unwrap();
1701            // Try to read as u8 (element_size = 1) from a f64 dataset (element_size = 8)
1702            let result = ds.read_raw::<u8>();
1703            assert!(result.is_err());
1704        }
1705
1706        std::fs::remove_file(&path).ok();
1707    }
1708
1709    #[test]
1710    fn dataset_survives_file_move() {
1711        let path = temp_path("ds_survives");
1712
1713        let ds = {
1714            let file = H5File::create(&path).unwrap();
1715            file.new_dataset::<u8>().shape([4]).create("x").unwrap()
1716        };
1717        // file is dropped here, but ds still holds Rc to the inner state
1718        ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1719        // The writer will finalize on drop of the last Rc
1720
1721        std::fs::remove_file(&path).ok();
1722    }
1723
1724    #[test]
1725    fn new_attr_scalar_string() {
1726        use crate::types::VarLenUnicode;
1727
1728        let path = temp_path("attr_scalar_string");
1729        {
1730            let file = H5File::create(&path).unwrap();
1731            let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1732            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1733
1734            let attr = ds
1735                .new_attr::<VarLenUnicode>()
1736                .shape(())
1737                .create("name")
1738                .unwrap();
1739            attr.write_scalar(&VarLenUnicode("test_value".to_string()))
1740                .unwrap();
1741
1742            file.close().unwrap();
1743        }
1744
1745        // Verify the file is still valid and readable
1746        {
1747            let file = H5File::open(&path).unwrap();
1748            let ds = file.dataset("data").unwrap();
1749            assert_eq!(ds.shape(), vec![4]);
1750            let readback = ds.read_raw::<u8>().unwrap();
1751            assert_eq!(readback, vec![1u8, 2, 3, 4]);
1752        }
1753
1754        std::fs::remove_file(&path).ok();
1755    }
1756
1757    #[test]
1758    fn all_numeric_types_roundtrip() {
1759        let path = temp_path("all_types");
1760
1761        {
1762            let file = H5File::create(&path).unwrap();
1763
1764            let ds = file.new_dataset::<u8>().shape([2]).create("u8").unwrap();
1765            ds.write_raw(&[1u8, 2]).unwrap();
1766
1767            let ds = file.new_dataset::<i8>().shape([2]).create("i8").unwrap();
1768            ds.write_raw(&[-1i8, 1]).unwrap();
1769
1770            let ds = file.new_dataset::<u16>().shape([2]).create("u16").unwrap();
1771            ds.write_raw(&[100u16, 200]).unwrap();
1772
1773            let ds = file.new_dataset::<i16>().shape([2]).create("i16").unwrap();
1774            ds.write_raw(&[-100i16, 100]).unwrap();
1775
1776            let ds = file.new_dataset::<u32>().shape([2]).create("u32").unwrap();
1777            ds.write_raw(&[1000u32, 2000]).unwrap();
1778
1779            let ds = file.new_dataset::<i32>().shape([2]).create("i32").unwrap();
1780            ds.write_raw(&[-1000i32, 1000]).unwrap();
1781
1782            let ds = file.new_dataset::<u64>().shape([2]).create("u64").unwrap();
1783            ds.write_raw(&[10000u64, 20000]).unwrap();
1784
1785            let ds = file.new_dataset::<i64>().shape([2]).create("i64").unwrap();
1786            ds.write_raw(&[-10000i64, 10000]).unwrap();
1787
1788            let ds = file.new_dataset::<f32>().shape([2]).create("f32").unwrap();
1789            ds.write_raw(&[1.5f32, 2.5]).unwrap();
1790
1791            let ds = file.new_dataset::<f64>().shape([2]).create("f64").unwrap();
1792            ds.write_raw(&[1.23456f64, 7.89012]).unwrap();
1793
1794            file.close().unwrap();
1795        }
1796
1797        {
1798            let file = H5File::open(&path).unwrap();
1799
1800            assert_eq!(
1801                file.dataset("u8").unwrap().read_raw::<u8>().unwrap(),
1802                vec![1u8, 2]
1803            );
1804            assert_eq!(
1805                file.dataset("i8").unwrap().read_raw::<i8>().unwrap(),
1806                vec![-1i8, 1]
1807            );
1808            assert_eq!(
1809                file.dataset("u16").unwrap().read_raw::<u16>().unwrap(),
1810                vec![100u16, 200]
1811            );
1812            assert_eq!(
1813                file.dataset("i16").unwrap().read_raw::<i16>().unwrap(),
1814                vec![-100i16, 100]
1815            );
1816            assert_eq!(
1817                file.dataset("u32").unwrap().read_raw::<u32>().unwrap(),
1818                vec![1000u32, 2000]
1819            );
1820            assert_eq!(
1821                file.dataset("i32").unwrap().read_raw::<i32>().unwrap(),
1822                vec![-1000i32, 1000]
1823            );
1824            assert_eq!(
1825                file.dataset("u64").unwrap().read_raw::<u64>().unwrap(),
1826                vec![10000u64, 20000]
1827            );
1828            assert_eq!(
1829                file.dataset("i64").unwrap().read_raw::<i64>().unwrap(),
1830                vec![-10000i64, 10000]
1831            );
1832            assert_eq!(
1833                file.dataset("f32").unwrap().read_raw::<f32>().unwrap(),
1834                vec![1.5f32, 2.5]
1835            );
1836            assert_eq!(
1837                file.dataset("f64").unwrap().read_raw::<f64>().unwrap(),
1838                vec![1.23456f64, 7.89012]
1839            );
1840        }
1841
1842        std::fs::remove_file(&path).ok();
1843    }
1844
1845    #[test]
1846    fn append_chunked_roundtrip() {
1847        let path = temp_path("append_chunked");
1848
1849        {
1850            let file = H5File::create(&path).unwrap();
1851            let ds = file
1852                .new_dataset::<f64>()
1853                .shape([0, 3])
1854                .chunk(&[1, 3])
1855                .max_shape(&[None, Some(3)])
1856                .create("data")
1857                .unwrap();
1858
1859            // Append one frame
1860            ds.append(&[1.0f64, 2.0, 3.0]).unwrap();
1861            // Append two frames at once
1862            ds.append(&[4.0f64, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap();
1863
1864            file.close().unwrap();
1865        }
1866
1867        {
1868            let file = H5File::open(&path).unwrap();
1869            let ds = file.dataset("data").unwrap();
1870            assert_eq!(ds.shape(), vec![3, 3]);
1871            let all = ds.read_raw::<f64>().unwrap();
1872            assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
1873        }
1874
1875        std::fs::remove_file(&path).ok();
1876    }
1877
1878    #[test]
1879    fn append_1d_chunked() {
1880        let path = temp_path("append_1d");
1881
1882        {
1883            let file = H5File::create(&path).unwrap();
1884            let ds = file
1885                .new_dataset::<i32>()
1886                .shape([0])
1887                .chunk(&[4])
1888                .max_shape(&[None])
1889                .create("values")
1890                .unwrap();
1891
1892            ds.append(&[10i32, 20, 30]).unwrap(); // partial chunk
1893            ds.append(&[40i32]).unwrap(); // fills chunk boundary
1894            ds.append(&[50i32, 60, 70, 80]).unwrap(); // full chunk
1895
1896            file.close().unwrap();
1897        }
1898
1899        {
1900            let file = H5File::open(&path).unwrap();
1901            let ds = file.dataset("values").unwrap();
1902            assert_eq!(ds.shape(), vec![8]);
1903            let all = ds.read_raw::<i32>().unwrap();
1904            assert_eq!(all, vec![10, 20, 30, 40, 50, 60, 70, 80]);
1905        }
1906
1907        std::fs::remove_file(&path).ok();
1908    }
1909
1910    #[test]
1911    fn append_partial_chunk_flushed_on_close() {
1912        let path = temp_path("append_partial_close");
1913
1914        {
1915            let file = H5File::create(&path).unwrap();
1916            let ds = file
1917                .new_dataset::<f64>()
1918                .shape([0])
1919                .chunk(&[4])
1920                .max_shape(&[None])
1921                .create("vals")
1922                .unwrap();
1923
1924            // Append 5 elements: chunk 0 = full [1,2,3,4], chunk 1 = partial [5,0,0,0]
1925            ds.append(&[1.0f64, 2.0, 3.0, 4.0, 5.0]).unwrap();
1926            file.close().unwrap();
1927        }
1928
1929        {
1930            let file = H5File::open(&path).unwrap();
1931            let ds = file.dataset("vals").unwrap();
1932            assert_eq!(ds.shape(), vec![5]);
1933            let all = ds.read_raw::<f64>().unwrap();
1934            // The full dataset is 2 chunks * 4 = 8 elements; shape says 5
1935            // read_raw reads total shape elements
1936            assert_eq!(all.len(), 5);
1937            assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1938        }
1939
1940        std::fs::remove_file(&path).ok();
1941    }
1942
1943    #[test]
1944    fn vlen_append_after_reopen_filtered() {
1945        // Reopen + append into a partially-written *compressed* vlen chunk
1946        // (index-block chunk). Exercises filtered-index-block reconstruction
1947        // in open_append plus filtered read-modify-write.
1948        let path = temp_path("vlen_reopen_filtered");
1949        {
1950            let file = H5File::create(&path).unwrap();
1951            file.create_appendable_vlen_dataset(
1952                "strs",
1953                4,
1954                Some(crate::format::messages::filter::FilterPipeline::deflate(6)),
1955            )
1956            .unwrap();
1957            file.append_vlen_strings("strs", &["alpha", "beta", "gamma"])
1958                .unwrap();
1959            file.close().unwrap();
1960        }
1961        {
1962            let file = H5File::open_rw(&path).unwrap();
1963            file.append_vlen_strings("strs", &["delta"]).unwrap();
1964            file.close().unwrap();
1965        }
1966        {
1967            let file = H5File::open(&path).unwrap();
1968            let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
1969            assert_eq!(
1970                got.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
1971                vec!["alpha", "beta", "gamma", "delta"]
1972            );
1973        }
1974        std::fs::remove_file(&path).ok();
1975    }
1976
1977    #[test]
1978    fn vlen_append_after_reopen_data_block() {
1979        // Reopen + append into a partial chunk that lives in an extensible-
1980        // array *data block* (chunk index >= idx_blk_elmts). Exercises
1981        // data-block resolution in read_chunk_if_present and write_chunk.
1982        let path = temp_path("vlen_reopen_datablk");
1983        let labels: Vec<String> = (0..9).map(|i| format!("s{i}")).collect();
1984        {
1985            let file = H5File::create(&path).unwrap();
1986            file.create_appendable_vlen_dataset("strs", 2, None)
1987                .unwrap();
1988            let refs: Vec<&str> = labels.iter().map(|s| s.as_str()).collect();
1989            file.append_vlen_strings("strs", &refs).unwrap();
1990            file.close().unwrap();
1991        }
1992        {
1993            let file = H5File::open_rw(&path).unwrap();
1994            file.append_vlen_strings("strs", &["s9"]).unwrap();
1995            file.close().unwrap();
1996        }
1997        {
1998            let file = H5File::open(&path).unwrap();
1999            let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
2000            let want: Vec<String> = (0..10).map(|i| format!("s{i}")).collect();
2001            assert_eq!(got, want);
2002        }
2003        std::fs::remove_file(&path).ok();
2004    }
2005
2006    #[test]
2007    fn vlen_append_after_reopen_super_block() {
2008        // Reopen + append into a partial chunk whose index falls in an
2009        // extensible-array *super block* (chunk index 244 with the default
2010        // EA geometry: idx_blk_elmts=4, data_blk_min_elmts=16,
2011        // sup_blk_min_data_ptrs=4 -> chunks 0..=243 are reached via the
2012        // index block or its direct data blocks, so chunk 244 is reached
2013        // via a super block read from disk). Exercises the ViaSblk branch
2014        // of read_chunk_if_present.
2015        let path = temp_path("vlen_reopen_super");
2016        // 489 strings, chunk size 2 -> chunk 244 holds one string only
2017        // (partially filled) and is flushed to disk on close.
2018        let labels: Vec<String> = (0..489).map(|i| format!("v{i}")).collect();
2019        {
2020            let file = H5File::create(&path).unwrap();
2021            file.create_appendable_vlen_dataset("strs", 2, None)
2022                .unwrap();
2023            let refs: Vec<&str> = labels.iter().map(|s| s.as_str()).collect();
2024            file.append_vlen_strings("strs", &refs).unwrap();
2025            file.close().unwrap();
2026        }
2027        {
2028            let file = H5File::open_rw(&path).unwrap();
2029            file.append_vlen_strings("strs", &["v489"]).unwrap();
2030            file.close().unwrap();
2031        }
2032        {
2033            let file = H5File::open(&path).unwrap();
2034            let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
2035            let want: Vec<String> = (0..490).map(|i| format!("v{i}")).collect();
2036            assert_eq!(got, want);
2037        }
2038        std::fs::remove_file(&path).ok();
2039    }
2040
2041    #[test]
2042    fn vlen_append_after_reopen_filtered_data_block() {
2043        // The hardest path: compressed + chunk in a data block + partial
2044        // read-modify-write across a reopen.
2045        let path = temp_path("vlen_reopen_filt_datablk");
2046        let labels: Vec<String> = (0..9).map(|i| format!("item{i:02}")).collect();
2047        {
2048            let file = H5File::create(&path).unwrap();
2049            file.create_appendable_vlen_dataset(
2050                "strs",
2051                2,
2052                Some(crate::format::messages::filter::FilterPipeline::deflate(6)),
2053            )
2054            .unwrap();
2055            let refs: Vec<&str> = labels.iter().map(|s| s.as_str()).collect();
2056            file.append_vlen_strings("strs", &refs).unwrap();
2057            file.close().unwrap();
2058        }
2059        {
2060            let file = H5File::open_rw(&path).unwrap();
2061            file.append_vlen_strings("strs", &["item09"]).unwrap();
2062            file.close().unwrap();
2063        }
2064        {
2065            let file = H5File::open(&path).unwrap();
2066            let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
2067            let want: Vec<String> = (0..10).map(|i| format!("item{i:02}")).collect();
2068            assert_eq!(got, want);
2069        }
2070        std::fs::remove_file(&path).ok();
2071    }
2072
2073    #[test]
2074    fn group_nx_class_attribute_roundtrip() {
2075        // Non-root groups carry attributes (NeXus `NX_class`) in their
2076        // own object header, and the reader reads them back by path.
2077        let path = temp_path("group_nx_class");
2078        {
2079            let file = H5File::create(&path).unwrap();
2080            let entry = file.create_group("entry").unwrap();
2081            entry.set_attr_string("NX_class", "NXentry").unwrap();
2082            let det = entry.create_group("detector").unwrap();
2083            det.set_attr_string("NX_class", "NXdetector").unwrap();
2084            det.set_attr_numeric("frame_count", &7i32).unwrap();
2085            det.new_dataset::<f32>()
2086                .shape([4])
2087                .create("data")
2088                .unwrap()
2089                .write_raw(&[1.0f32; 4])
2090                .unwrap();
2091            file.close().unwrap();
2092        }
2093        {
2094            let file = H5File::open(&path).unwrap();
2095            let entry = file.root_group().group("entry").unwrap();
2096            assert_eq!(entry.attr_string("NX_class").unwrap(), "NXentry");
2097            let det = entry.group("detector").unwrap();
2098            assert_eq!(det.attr_string("NX_class").unwrap(), "NXdetector");
2099            let names = det.attr_names().unwrap();
2100            assert!(names.contains(&"NX_class".to_string()));
2101            assert!(names.contains(&"frame_count".to_string()));
2102        }
2103        std::fs::remove_file(&path).ok();
2104    }
2105
2106    #[test]
2107    fn ea_super_block_roundtrip() {
2108        // 2000 chunks span several extensible-array super blocks. Before
2109        // super-block support the writer errored at chunk index 228.
2110        let path = temp_path("ea_super_rt");
2111        {
2112            let file = H5File::create(&path).unwrap();
2113            let ds = file
2114                .new_dataset::<i32>()
2115                .shape([0])
2116                .chunk(&[1])
2117                .max_shape(&[None])
2118                .create("v")
2119                .unwrap();
2120            ds.append(&(0..2000).collect::<Vec<i32>>()).unwrap();
2121            file.close().unwrap();
2122        }
2123        {
2124            let file = H5File::open(&path).unwrap();
2125            let v = file.dataset("v").unwrap().read_raw::<i32>().unwrap();
2126            assert_eq!(v.len(), 2000);
2127            assert!(v.iter().enumerate().all(|(i, &x)| x == i as i32));
2128        }
2129        std::fs::remove_file(&path).ok();
2130    }
2131
2132    #[test]
2133    fn ea_filtered_super_block_roundtrip() {
2134        // Compressed chunks across super blocks.
2135        let path = temp_path("ea_filt_super");
2136        {
2137            let file = H5File::create(&path).unwrap();
2138            let ds = file
2139                .new_dataset::<i32>()
2140                .shape([0])
2141                .chunk(&[1])
2142                .max_shape(&[None])
2143                .deflate(4)
2144                .create("v")
2145                .unwrap();
2146            ds.append(&(0..600).collect::<Vec<i32>>()).unwrap();
2147            file.close().unwrap();
2148        }
2149        {
2150            let file = H5File::open(&path).unwrap();
2151            let v = file.dataset("v").unwrap().read_raw::<i32>().unwrap();
2152            assert_eq!(v, (0..600).collect::<Vec<i32>>());
2153        }
2154        std::fs::remove_file(&path).ok();
2155    }
2156
2157    #[test]
2158    fn ea_super_block_open_append() {
2159        // Reopen a dataset and append chunks that fall in super blocks.
2160        let path = temp_path("ea_super_append");
2161        {
2162            let file = H5File::create(&path).unwrap();
2163            let ds = file
2164                .new_dataset::<i32>()
2165                .shape([0])
2166                .chunk(&[1])
2167                .max_shape(&[None])
2168                .create("v")
2169                .unwrap();
2170            ds.append(&(0..300).collect::<Vec<i32>>()).unwrap();
2171            file.close().unwrap();
2172        }
2173        {
2174            let mut w = crate::io::writer::Hdf5Writer::open_append(&path).unwrap();
2175            let idx = w.dataset_index("v").unwrap();
2176            for c in 300..900u64 {
2177                w.write_chunk(idx, c, &(c as i32).to_le_bytes()).unwrap();
2178            }
2179            w.extend_dataset(idx, &[900]).unwrap();
2180            w.close().unwrap();
2181        }
2182        {
2183            let file = H5File::open(&path).unwrap();
2184            let v = file.dataset("v").unwrap().read_raw::<i32>().unwrap();
2185            assert_eq!(v.len(), 900);
2186            assert!(v.iter().enumerate().all(|(i, &x)| x == i as i32));
2187        }
2188        std::fs::remove_file(&path).ok();
2189    }
2190
2191    #[test]
2192    fn btree_v2_multi_unlimited_roundtrip() {
2193        // A dataset with two unlimited dimensions uses the v2 B-tree chunk
2194        // index; chunks are written by grid coordinates with write_chunk_at.
2195        let path = temp_path("bt2_multi");
2196        {
2197            let file = H5File::create(&path).unwrap();
2198            let ds = file
2199                .new_dataset::<i32>()
2200                .shape([0, 0])
2201                .chunk(&[2, 2])
2202                .max_shape(&[None, None])
2203                .create("grid")
2204                .unwrap();
2205            assert!(ds.is_chunked());
2206            // 4x4 logical grid, value[r][c] = r*4 + c, in 2x2 chunks.
2207            for cr in 0..2usize {
2208                for cc in 0..2usize {
2209                    let mut bytes = Vec::new();
2210                    for i in 0..2usize {
2211                        for j in 0..2usize {
2212                            let v = ((cr * 2 + i) * 4 + (cc * 2 + j)) as i32;
2213                            bytes.extend_from_slice(&v.to_le_bytes());
2214                        }
2215                    }
2216                    ds.write_chunk_at(&[cr, cc], &bytes).unwrap();
2217                }
2218            }
2219            file.close().unwrap();
2220        }
2221        {
2222            let file = H5File::open(&path).unwrap();
2223            let ds = file.dataset("grid").unwrap();
2224            assert_eq!(ds.shape(), vec![4, 4]);
2225            assert_eq!(ds.read_raw::<i32>().unwrap(), (0..16).collect::<Vec<i32>>());
2226        }
2227        std::fs::remove_file(&path).ok();
2228    }
2229
2230    #[test]
2231    fn subframe_chunking_roundtrip() {
2232        // A chunk smaller than a frame: shape [N,8,8], chunk [1,4,4], so each
2233        // frame is tiled into a 2x2 grid of 4x4 chunks. write_chunk_at takes
2234        // the chunk-grid coordinates.
2235        let path = temp_path("subframe");
2236        {
2237            let file = H5File::create(&path).unwrap();
2238            let ds = file
2239                .new_dataset::<i32>()
2240                .shape([0, 8, 8])
2241                .chunk(&[1, 4, 4])
2242                .max_shape(&[None, Some(8), Some(8)])
2243                .create("v")
2244                .unwrap();
2245            for f in 0..3usize {
2246                for cr in 0..2usize {
2247                    for cc in 0..2usize {
2248                        let mut bytes = Vec::new();
2249                        for i in 0..4usize {
2250                            for j in 0..4usize {
2251                                let v = (f * 64 + (cr * 4 + i) * 8 + (cc * 4 + j)) as i32;
2252                                bytes.extend_from_slice(&v.to_le_bytes());
2253                            }
2254                        }
2255                        ds.write_chunk_at(&[f, cr, cc], &bytes).unwrap();
2256                    }
2257                }
2258            }
2259            file.close().unwrap();
2260        }
2261        {
2262            let file = H5File::open(&path).unwrap();
2263            let ds = file.dataset("v").unwrap();
2264            assert_eq!(ds.shape(), vec![3, 8, 8]);
2265            assert_eq!(
2266                ds.read_raw::<i32>().unwrap(),
2267                (0..192).collect::<Vec<i32>>()
2268            );
2269        }
2270        std::fs::remove_file(&path).ok();
2271    }
2272
2273    #[test]
2274    fn fill_value_contiguous_roundtrip() {
2275        let path = temp_path("fill_value_contig");
2276        {
2277            let file = H5File::create(&path).unwrap();
2278            let ds = file
2279                .new_dataset::<f32>()
2280                .shape([4])
2281                .fill_value(2.5f32)
2282                .create("data")
2283                .unwrap();
2284            ds.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
2285            file.close().unwrap();
2286        }
2287        // open_append decodes the fill-value message back from the header.
2288        {
2289            let writer = crate::io::writer::Hdf5Writer::open_append(&path).unwrap();
2290            let idx = writer.dataset_index("data").unwrap();
2291            assert_eq!(
2292                writer.datasets[idx].fill_value,
2293                Some(2.5f32.to_le_bytes().to_vec())
2294            );
2295        }
2296        // Data still reads back correctly.
2297        {
2298            let file = H5File::open(&path).unwrap();
2299            let ds = file.dataset("data").unwrap();
2300            assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0, 2.0, 3.0, 4.0]);
2301        }
2302        std::fs::remove_file(&path).ok();
2303    }
2304
2305    #[test]
2306    fn fill_value_chunked_roundtrip() {
2307        let path = temp_path("fill_value_chunked");
2308        {
2309            let file = H5File::create(&path).unwrap();
2310            let ds = file
2311                .new_dataset::<i32>()
2312                .shape([0])
2313                .chunk(&[4])
2314                .max_shape(&[None])
2315                .fill_value(-7i32)
2316                .create("vals")
2317                .unwrap();
2318            ds.append(&[1i32, 2, 3, 4]).unwrap();
2319            file.close().unwrap();
2320        }
2321        {
2322            let writer = crate::io::writer::Hdf5Writer::open_append(&path).unwrap();
2323            let idx = writer.dataset_index("vals").unwrap();
2324            assert_eq!(
2325                writer.datasets[idx].fill_value,
2326                Some((-7i32).to_le_bytes().to_vec())
2327            );
2328        }
2329        std::fs::remove_file(&path).ok();
2330    }
2331
2332    #[test]
2333    fn fill_value_read_missing_chunks() {
2334        // A chunked dataset with chunk 1 left unwritten must read that
2335        // gap back as the user-defined fill value, not zero.
2336        fn i32_bytes(vals: &[i32]) -> Vec<u8> {
2337            vals.iter().flat_map(|v| v.to_le_bytes()).collect()
2338        }
2339        let path = temp_path("fill_value_read_missing");
2340        {
2341            let file = H5File::create(&path).unwrap();
2342            let ds = file
2343                .new_dataset::<i32>()
2344                .shape([0])
2345                .chunk(&[2])
2346                .max_shape(&[None])
2347                .fill_value(-1i32)
2348                .create("vals")
2349                .unwrap();
2350            // chunk 0 = [10,20]; chunk 1 unwritten; chunk 2 = [50,60].
2351            ds.write_chunk(0, &i32_bytes(&[10, 20])).unwrap();
2352            ds.write_chunk(2, &i32_bytes(&[50, 60])).unwrap();
2353            ds.extend(&[6]).unwrap();
2354            file.close().unwrap();
2355        }
2356        {
2357            let file = H5File::open(&path).unwrap();
2358            let ds = file.dataset("vals").unwrap();
2359            let all = ds.read_raw::<i32>().unwrap();
2360            assert_eq!(all, vec![10, 20, -1, -1, 50, 60]);
2361        }
2362        std::fs::remove_file(&path).ok();
2363    }
2364
2365    #[test]
2366    fn fill_value_partial_chunk_padded_with_fill() {
2367        // A partial trailing chunk flushed at close must pad its unwritten
2368        // tail with the fill value. That pad sits beyond the logical shape,
2369        // so it is verified by scanning the on-disk chunk bytes directly.
2370        let path = temp_path("fill_value_partial_pad");
2371        {
2372            let file = H5File::create(&path).unwrap();
2373            let ds = file
2374                .new_dataset::<i32>()
2375                .shape([0])
2376                .chunk(&[4])
2377                .max_shape(&[None])
2378                .fill_value(-9i32)
2379                .create("vals")
2380                .unwrap();
2381            // 3 of 4 frames -> flushed as a partial chunk on close.
2382            ds.append(&[1i32, 2, 3]).unwrap();
2383            file.close().unwrap();
2384        }
2385        let bytes = std::fs::read(&path).unwrap();
2386        // Locate the chunk: i32 LE of [1, 2, 3] written contiguously.
2387        let needle: Vec<u8> = [1i32, 2, 3].iter().flat_map(|v| v.to_le_bytes()).collect();
2388        let pos = bytes
2389            .windows(needle.len())
2390            .position(|w| w == needle)
2391            .expect("chunk data [1,2,3] not found in file");
2392        let pad = &bytes[pos + needle.len()..pos + needle.len() + 4];
2393        assert_eq!(
2394            pad,
2395            &(-9i32).to_le_bytes(),
2396            "partial chunk tail must be padded with fill value -9, got {:?}",
2397            pad
2398        );
2399        std::fs::remove_file(&path).ok();
2400    }
2401
2402    #[test]
2403    fn vlen_append_after_reopen_preserves_existing() {
2404        // Reopening and appending into a partially-written vlen chunk must
2405        // read-modify-write: the strings already on disk must survive.
2406        let path = temp_path("vlen_append_reopen");
2407        {
2408            let file = H5File::create(&path).unwrap();
2409            file.create_appendable_vlen_dataset("strs", 4, None)
2410                .unwrap();
2411            // 3 of 4 frames -> flushed as a partial chunk on close.
2412            file.append_vlen_strings("strs", &["a", "b", "c"]).unwrap();
2413            file.close().unwrap();
2414        }
2415        {
2416            // Append a 4th string -> partial-chunk write into chunk 0.
2417            let file = H5File::open_rw(&path).unwrap();
2418            file.append_vlen_strings("strs", &["d"]).unwrap();
2419            file.close().unwrap();
2420        }
2421        {
2422            let file = H5File::open(&path).unwrap();
2423            let ds = file.dataset("strs").unwrap();
2424            let got = ds.read_vlen_strings().unwrap();
2425            assert_eq!(
2426                got.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
2427                vec!["a", "b", "c", "d"]
2428            );
2429        }
2430        std::fs::remove_file(&path).ok();
2431    }
2432
2433    #[test]
2434    fn fill_value_size_mismatch_errors() {
2435        let path = temp_path("fill_value_mismatch");
2436        let mut writer = crate::io::writer::Hdf5Writer::create(&path).unwrap();
2437        let dt = <f64 as crate::types::H5Type>::hdf5_type();
2438        let idx = writer.create_dataset("d", dt, &[4u64]).unwrap();
2439        // f64 element size is 8; a 4-byte fill value must be rejected.
2440        assert!(writer.set_dataset_fill_value(idx, vec![0u8; 4]).is_err());
2441        // The correct width succeeds.
2442        writer.set_dataset_fill_value(idx, vec![0u8; 8]).unwrap();
2443        writer.close().unwrap();
2444        std::fs::remove_file(&path).ok();
2445    }
2446}