Skip to main content

rust_hdf5/
dataset.rs

1//! Dataset creation and I/O.
2//!
3//! Datasets are created via the fluent [`DatasetBuilder`] API obtained from
4//! [`H5File::new_dataset`](crate::file::H5File::new_dataset). Once created,
5//! the [`H5Dataset`] handle can read or write raw typed data.
6
7use crate::attribute::AttrBuilder;
8use crate::error::{Hdf5Error, Result};
9use crate::file::{borrow_inner, borrow_inner_mut, clone_inner, H5FileInner, SharedInner};
10use crate::types::H5Type;
11
12// ---------------------------------------------------------------------------
13// DatasetBuilder
14// ---------------------------------------------------------------------------
15
16/// A fluent builder for creating datasets.
17///
18/// Obtained from [`H5File::new_dataset::<T>()`](crate::file::H5File::new_dataset).
19///
20/// ```no_run
21/// # use rust_hdf5::H5File;
22/// let file = H5File::create("builder.h5").unwrap();
23/// let ds = file.new_dataset::<f32>()
24///     .shape(&[10, 20])
25///     .create("temperatures")
26///     .unwrap();
27/// ```
28pub struct DatasetBuilder<T: H5Type> {
29    file_inner: SharedInner,
30    shape: Option<Vec<usize>>,
31    chunk_dims: Option<Vec<usize>>,
32    max_shape: Option<Vec<Option<usize>>>,
33    deflate_level: Option<u32>,
34    shuffle_deflate_level: Option<u32>,
35    custom_pipeline: Option<crate::format::messages::filter::FilterPipeline>,
36    group_path: Option<String>,
37    _marker: std::marker::PhantomData<T>,
38}
39
40impl<T: H5Type> DatasetBuilder<T> {
41    pub(crate) fn new(file_inner: SharedInner) -> Self {
42        Self {
43            file_inner,
44            shape: None,
45            chunk_dims: None,
46            max_shape: None,
47            deflate_level: None,
48            shuffle_deflate_level: None,
49            custom_pipeline: None,
50            group_path: None,
51            _marker: std::marker::PhantomData,
52        }
53    }
54
55    pub(crate) fn new_in_group(file_inner: SharedInner, group_path: String) -> Self {
56        Self {
57            file_inner,
58            shape: None,
59            chunk_dims: None,
60            max_shape: None,
61            deflate_level: None,
62            shuffle_deflate_level: None,
63            custom_pipeline: None,
64            group_path: Some(group_path),
65            _marker: std::marker::PhantomData,
66        }
67    }
68
69    /// Set the dataset dimensions.
70    ///
71    /// This is required before calling [`create`](Self::create).
72    /// Use an empty slice `&[]` for a scalar (0-dimensional) dataset.
73    #[must_use]
74    pub fn shape<S: AsRef<[usize]>>(mut self, dims: S) -> Self {
75        self.shape = Some(dims.as_ref().to_vec());
76        self
77    }
78
79    /// Create a scalar (0-dimensional) dataset holding a single value.
80    #[must_use]
81    pub fn scalar(mut self) -> Self {
82        self.shape = Some(vec![]);
83        self
84    }
85
86    /// Set chunk dimensions for chunked storage.
87    ///
88    /// When set, the dataset uses chunked storage with the extensible array
89    /// index. You should also call [`max_shape`](Self::max_shape) or
90    /// [`resizable`](Self::resizable) to allow extending.
91    #[must_use]
92    pub fn chunk(mut self, chunk_dims: &[usize]) -> Self {
93        self.chunk_dims = Some(chunk_dims.to_vec());
94        self
95    }
96
97    /// Make all dimensions unlimited (resizable).
98    ///
99    /// This sets max_dims to u64::MAX for all dimensions.
100    #[must_use]
101    pub fn resizable(mut self) -> Self {
102        self.max_shape = Some(vec![None; self.shape.as_ref().map_or(0, |s| s.len())]);
103        self
104    }
105
106    /// Set maximum dimensions. `None` means unlimited for that dimension.
107    #[must_use]
108    pub fn max_shape(mut self, max: &[Option<usize>]) -> Self {
109        self.max_shape = Some(max.to_vec());
110        self
111    }
112
113    /// Enable deflate (gzip) compression with the given level (0-9).
114    ///
115    /// Requires chunked storage (call `.chunk()` before `.create()`).
116    /// Level 0 = no compression, 9 = maximum compression. Default is 6.
117    #[must_use]
118    pub fn deflate(mut self, level: u32) -> Self {
119        self.deflate_level = Some(level);
120        self
121    }
122
123    /// Enable shuffle + deflate compression.
124    ///
125    /// Shuffle reorders bytes by position within elements before compression,
126    /// which typically improves compression ratios for numeric data.
127    /// Requires chunked storage.
128    #[must_use]
129    pub fn shuffle_deflate(mut self, level: u32) -> Self {
130        self.shuffle_deflate_level = Some(level);
131        self
132    }
133
134    /// Enable Zstandard compression with the given level (1-22, default 3).
135    ///
136    /// Requires chunked storage (call `.chunk()` before `.create()`).
137    #[must_use]
138    pub fn zstd(mut self, level: u32) -> Self {
139        self.custom_pipeline = Some(crate::format::messages::filter::FilterPipeline::zstd(level));
140        self
141    }
142
143    /// Set a custom filter pipeline for compression.
144    ///
145    /// This takes precedence over [`deflate`](Self::deflate) and
146    /// [`shuffle_deflate`](Self::shuffle_deflate). Requires chunked storage.
147    #[must_use]
148    pub fn filter_pipeline(
149        mut self,
150        pipeline: crate::format::messages::filter::FilterPipeline,
151    ) -> Self {
152        self.custom_pipeline = Some(pipeline);
153        self
154    }
155
156    /// Finalize and create the dataset with the given `name`.
157    ///
158    /// The name is the link name within the root group (e.g. `"data"` or
159    /// `"group1/data"` once nested groups are supported).
160    pub fn create(self, name: &str) -> Result<H5Dataset> {
161        let shape = self.shape.ok_or_else(|| {
162            Hdf5Error::InvalidState("shape must be set before calling create()".into())
163        })?;
164
165        // Build the full name: if created within a group, prefix with group path
166        let full_name = if let Some(ref gp) = self.group_path {
167            if gp == "/" {
168                name.to_string()
169            } else {
170                let trimmed = gp.trim_start_matches('/');
171                format!("{}/{}", trimmed, name)
172            }
173        } else {
174            name.to_string()
175        };
176        let group_path = self.group_path.clone();
177
178        let dims_u64: Vec<u64> = shape.iter().map(|&d| d as u64).collect();
179        let datatype = T::hdf5_type();
180        let element_size = T::element_size();
181
182        if let Some(ref chunk_dims) = self.chunk_dims {
183            // Chunked dataset
184            let chunk_u64: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
185            let max_u64: Vec<u64> = if let Some(ref max) = self.max_shape {
186                max.iter()
187                    .map(|m| m.map_or(u64::MAX, |v| v as u64))
188                    .collect()
189            } else {
190                // Default: max = current
191                dims_u64.clone()
192            };
193
194            let index = {
195                let mut inner = borrow_inner_mut(&self.file_inner);
196                match &mut *inner {
197                    H5FileInner::Writer(writer) => {
198                        let idx = if let Some(pipeline) = self.custom_pipeline {
199                            writer.create_chunked_dataset_with_pipeline(
200                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
201                            )?
202                        } else if let Some(level) = self.shuffle_deflate_level {
203                            let pipeline =
204                                crate::format::messages::filter::FilterPipeline::shuffle_deflate(
205                                    T::element_size() as u32,
206                                    level,
207                                );
208                            writer.create_chunked_dataset_with_pipeline(
209                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
210                            )?
211                        } else if let Some(level) = self.deflate_level {
212                            writer.create_chunked_dataset_compressed(
213                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, level,
214                            )?
215                        } else {
216                            writer.create_chunked_dataset(
217                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
218                            )?
219                        };
220                        if let Some(ref gp) = group_path {
221                            if gp != "/" {
222                                writer.assign_dataset_to_group(gp, idx)?;
223                            }
224                        }
225                        idx
226                    }
227                    H5FileInner::Reader(_) => {
228                        return Err(Hdf5Error::InvalidState(
229                            "cannot create a dataset in read mode".into(),
230                        ));
231                    }
232                    H5FileInner::Closed => {
233                        return Err(Hdf5Error::InvalidState("file is closed".into()));
234                    }
235                }
236            };
237
238            Ok(H5Dataset {
239                file_inner: clone_inner(&self.file_inner),
240                info: DatasetInfo::Writer {
241                    index,
242                    shape,
243                    element_size,
244                    chunked: true,
245                },
246            })
247        } else {
248            // Contiguous dataset (original path)
249            let index = {
250                let mut inner = borrow_inner_mut(&self.file_inner);
251                match &mut *inner {
252                    H5FileInner::Writer(writer) => {
253                        let idx = writer.create_dataset(&full_name, datatype, &dims_u64)?;
254                        if let Some(ref gp) = group_path {
255                            if gp != "/" {
256                                writer.assign_dataset_to_group(gp, idx)?;
257                            }
258                        }
259                        idx
260                    }
261                    H5FileInner::Reader(_) => {
262                        return Err(Hdf5Error::InvalidState(
263                            "cannot create a dataset in read mode".into(),
264                        ));
265                    }
266                    H5FileInner::Closed => {
267                        return Err(Hdf5Error::InvalidState("file is closed".into()));
268                    }
269                }
270            };
271
272            Ok(H5Dataset {
273                file_inner: clone_inner(&self.file_inner),
274                info: DatasetInfo::Writer {
275                    index,
276                    shape,
277                    element_size,
278                    chunked: false,
279                },
280            })
281        }
282    }
283}
284
285// ---------------------------------------------------------------------------
286// DatasetInfo
287// ---------------------------------------------------------------------------
288
289/// Internal metadata about a dataset handle.
290enum DatasetInfo {
291    /// A dataset created via `new_dataset().create()` in write mode.
292    Writer {
293        /// Index into the writer's dataset list.
294        index: usize,
295        /// Shape (current dimensions).
296        shape: Vec<usize>,
297        /// Size of one element in bytes.
298        element_size: usize,
299        /// Whether this is a chunked dataset.
300        chunked: bool,
301    },
302    /// A dataset opened by name in read mode.
303    Reader {
304        /// The link name of the dataset.
305        name: String,
306        /// Shape (current dimensions).
307        shape: Vec<usize>,
308        /// Size of one element in bytes.
309        element_size: usize,
310    },
311}
312
313// ---------------------------------------------------------------------------
314// H5Dataset
315// ---------------------------------------------------------------------------
316
317/// A handle to an HDF5 dataset, supporting typed read and write operations.
318///
319/// The dataset holds a shared reference to the file's I/O backend, so it
320/// remains valid even if the originating [`H5File`](crate::file::H5File) is
321/// moved or dropped (they share ownership via `Rc`).
322pub struct H5Dataset {
323    file_inner: SharedInner,
324    info: DatasetInfo,
325}
326
327impl H5Dataset {
328    /// Create a reader-mode dataset handle (called internally by `H5File::dataset`).
329    pub(crate) fn new_reader(
330        file_inner: SharedInner,
331        name: String,
332        shape: Vec<usize>,
333        element_size: usize,
334    ) -> Self {
335        Self {
336            file_inner,
337            info: DatasetInfo::Reader {
338                name,
339                shape,
340                element_size,
341            },
342        }
343    }
344
345    /// Return the dataset dimensions.
346    pub fn shape(&self) -> Vec<usize> {
347        match &self.info {
348            DatasetInfo::Writer { shape, .. } => shape.clone(),
349            DatasetInfo::Reader { shape, .. } => shape.clone(),
350        }
351    }
352
353    /// Return the number of dimensions (rank) of the dataset.
354    pub fn ndims(&self) -> usize {
355        match &self.info {
356            DatasetInfo::Writer { shape, .. } => shape.len(),
357            DatasetInfo::Reader { shape, .. } => shape.len(),
358        }
359    }
360
361    /// Return the total number of elements in the dataset.
362    pub fn total_elements(&self) -> usize {
363        match &self.info {
364            DatasetInfo::Writer { shape, .. } => shape.iter().product(),
365            DatasetInfo::Reader { shape, .. } => shape.iter().product(),
366        }
367    }
368
369    /// Return the size of one element in bytes.
370    pub fn element_size(&self) -> usize {
371        match &self.info {
372            DatasetInfo::Writer { element_size, .. } => *element_size,
373            DatasetInfo::Reader { element_size, .. } => *element_size,
374        }
375    }
376
377    /// Return the chunk dimensions, if this is a chunked dataset.
378    pub fn chunk_dims(&self) -> Option<Vec<usize>> {
379        match &self.info {
380            DatasetInfo::Reader { name, .. } => {
381                let inner = borrow_inner(&self.file_inner);
382                if let H5FileInner::Reader(reader) = &*inner {
383                    if let Some(info) = reader.dataset_info(name) {
384                        if let crate::format::messages::data_layout::DataLayoutMessage::ChunkedV4 {
385                            chunk_dims,
386                            ..
387                        } = &info.layout
388                        {
389                            // Strip trailing element-size dimension
390                            return Some(
391                                chunk_dims[..chunk_dims.len() - 1]
392                                    .iter()
393                                    .map(|&d| d as usize)
394                                    .collect(),
395                            );
396                        }
397                    }
398                }
399                None
400            }
401            DatasetInfo::Writer { .. } => None,
402        }
403    }
404
405    /// Return whether this is a chunked dataset.
406    pub fn is_chunked(&self) -> bool {
407        match &self.info {
408            DatasetInfo::Writer { chunked, .. } => *chunked,
409            DatasetInfo::Reader { name, .. } => {
410                let inner = borrow_inner(&self.file_inner);
411                match &*inner {
412                    H5FileInner::Reader(reader) => {
413                        if let Some(info) = reader.dataset_info(name) {
414                            matches!(
415                                info.layout,
416                                crate::format::messages::data_layout::DataLayoutMessage::ChunkedV4 { .. }
417                            )
418                        } else {
419                            false
420                        }
421                    }
422                    _ => false,
423                }
424            }
425        }
426    }
427
428    /// Return the names of all attributes on this dataset (read mode only).
429    pub fn attr_names(&self) -> Result<Vec<String>> {
430        match &self.info {
431            DatasetInfo::Reader { name, .. } => {
432                let inner = borrow_inner(&self.file_inner);
433                match &*inner {
434                    H5FileInner::Reader(reader) => Ok(reader.dataset_attr_names(name)?),
435                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
436                }
437            }
438            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
439                "attr_names not available in write mode".into(),
440            )),
441        }
442    }
443
444    /// Open an attribute by name (read mode only).
445    pub fn attr(&self, attr_name: &str) -> Result<crate::attribute::H5Attribute> {
446        match &self.info {
447            DatasetInfo::Reader { name, .. } => {
448                let inner = borrow_inner(&self.file_inner);
449                match &*inner {
450                    H5FileInner::Reader(reader) => {
451                        let attr_msg = reader.dataset_attr(name, attr_name)?;
452                        Ok(crate::attribute::H5Attribute::new_reader(
453                            clone_inner(&self.file_inner),
454                            attr_msg.name.clone(),
455                            attr_msg.data.clone(),
456                        ))
457                    }
458                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
459                }
460            }
461            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
462                "attr() not available in write mode".into(),
463            )),
464        }
465    }
466
467    /// Start building a new attribute on this dataset.
468    ///
469    /// Returns a fluent builder. Call `.shape(())` for a scalar attribute
470    /// and `.create("name")` to finalize.
471    ///
472    /// # Example
473    ///
474    /// ```no_run
475    /// # use rust_hdf5::H5File;
476    /// # use rust_hdf5::types::VarLenUnicode;
477    /// let file = H5File::create("attr.h5").unwrap();
478    /// let ds = file.new_dataset::<f32>().shape(&[10]).create("data").unwrap();
479    /// let attr = ds.new_attr::<VarLenUnicode>().shape(()).create("units").unwrap();
480    /// attr.write_scalar(&VarLenUnicode("meters".to_string())).unwrap();
481    /// ```
482    pub fn new_attr<T: 'static>(&self) -> AttrBuilder<'_, T> {
483        let ds_index = match &self.info {
484            DatasetInfo::Writer { index, .. } => *index,
485            DatasetInfo::Reader { .. } => {
486                // Reader mode: we'll return a builder that will error on create.
487                // Using usize::MAX as sentinel.
488                usize::MAX
489            }
490        };
491        AttrBuilder::new(&self.file_inner, ds_index)
492    }
493
494    /// Write a typed slice to the dataset (contiguous datasets only).
495    ///
496    /// The slice length must match the total number of elements declared by
497    /// the dataset shape. The data is reinterpreted as raw bytes and written
498    /// to the file.
499    ///
500    /// # Errors
501    ///
502    /// Returns an error if:
503    /// - The file is in read mode.
504    /// - The data length does not match the declared shape.
505    pub fn write_raw<T: H5Type>(&self, data: &[T]) -> Result<()> {
506        match &self.info {
507            DatasetInfo::Writer {
508                index,
509                shape,
510                element_size,
511                chunked,
512            } => {
513                if *chunked {
514                    return Err(Hdf5Error::InvalidState(
515                        "use write_chunk for chunked datasets".into(),
516                    ));
517                }
518
519                let total_elements: usize = shape.iter().product();
520                if data.len() != total_elements {
521                    return Err(Hdf5Error::InvalidState(format!(
522                        "data length {} does not match dataset size {}",
523                        data.len(),
524                        total_elements,
525                    )));
526                }
527
528                // Verify element size matches
529                if T::element_size() != *element_size {
530                    return Err(Hdf5Error::TypeMismatch(format!(
531                        "write type has element size {} but dataset expects {}",
532                        T::element_size(),
533                        element_size,
534                    )));
535                }
536
537                // Safety: T: Copy + 'static (numeric primitive) with well-defined
538                // byte representation. The resulting slice borrows `data` and
539                // lives only as long as this block.
540                let byte_len = data.len() * T::element_size();
541                let raw =
542                    unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
543
544                let mut inner = borrow_inner_mut(&self.file_inner);
545                match &mut *inner {
546                    H5FileInner::Writer(writer) => {
547                        writer.write_dataset_raw(*index, raw)?;
548                        Ok(())
549                    }
550                    _ => Err(Hdf5Error::InvalidState(
551                        "file is no longer in write mode".into(),
552                    )),
553                }
554            }
555            DatasetInfo::Reader { .. } => Err(Hdf5Error::InvalidState(
556                "cannot write to a dataset opened in read mode".into(),
557            )),
558        }
559    }
560
561    /// Write a single chunk to a chunked dataset.
562    ///
563    /// `chunk_idx` is the linear chunk index (typically the frame number for
564    /// streaming datasets). `data` is the raw byte data for one chunk.
565    pub fn write_chunk(&self, chunk_idx: usize, data: &[u8]) -> Result<()> {
566        match &self.info {
567            DatasetInfo::Writer { index, chunked, .. } => {
568                if !*chunked {
569                    return Err(Hdf5Error::InvalidState(
570                        "write_chunk is only for chunked datasets".into(),
571                    ));
572                }
573
574                let mut inner = borrow_inner_mut(&self.file_inner);
575                match &mut *inner {
576                    H5FileInner::Writer(writer) => {
577                        writer.write_chunk(*index, chunk_idx as u64, data)?;
578                        Ok(())
579                    }
580                    _ => Err(Hdf5Error::InvalidState(
581                        "file is no longer in write mode".into(),
582                    )),
583                }
584            }
585            DatasetInfo::Reader { .. } => {
586                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
587            }
588        }
589    }
590
591    /// Write multiple chunks in a batch, optionally compressing in parallel.
592    ///
593    /// `chunks` is a slice of `(chunk_index, raw_data)` pairs. When a filter
594    /// pipeline is configured and the `parallel` feature is enabled, all
595    /// chunks are compressed concurrently via rayon.
596    pub fn write_chunks_batch(&self, chunks: &[(usize, &[u8])]) -> Result<()> {
597        match &self.info {
598            DatasetInfo::Writer { index, chunked, .. } => {
599                if !*chunked {
600                    return Err(Hdf5Error::InvalidState(
601                        "write_chunks_batch is only for chunked datasets".into(),
602                    ));
603                }
604                let pairs: Vec<(u64, &[u8])> = chunks
605                    .iter()
606                    .map(|(idx, data)| (*idx as u64, *data))
607                    .collect();
608                let mut inner = borrow_inner_mut(&self.file_inner);
609                match &mut *inner {
610                    H5FileInner::Writer(writer) => {
611                        writer.write_chunks_batch(*index, &pairs)?;
612                        Ok(())
613                    }
614                    _ => Err(Hdf5Error::InvalidState(
615                        "file is no longer in write mode".into(),
616                    )),
617                }
618            }
619            DatasetInfo::Reader { .. } => {
620                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
621            }
622        }
623    }
624
625    /// Append data along the first dimension of a chunked dataset.
626    ///
627    /// `data` must contain a whole number of "frames" — slices along
628    /// dimension 0. For example, if the dataset has shape `[N, H, W]`
629    /// and `chunk_dims = [1, H, W]`, then `data.len()` must be a
630    /// multiple of `H * W`.
631    ///
632    /// This method writes the necessary chunks and extends the dataset
633    /// shape automatically.
634    ///
635    /// ```no_run
636    /// # use rust_hdf5::H5File;
637    /// let file = H5File::create("append.h5").unwrap();
638    /// let ds = file.new_dataset::<f64>()
639    ///     .shape(&[0, 3])
640    ///     .chunk(&[1, 3])
641    ///     .max_shape(&[None, Some(3)])
642    ///     .create("data")
643    ///     .unwrap();
644    /// ds.append(&[1.0, 2.0, 3.0]).unwrap();       // shape becomes [1, 3]
645    /// ds.append(&[4.0, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap(); // shape becomes [3, 3]
646    /// ```
647    pub fn append<T: H5Type>(&self, data: &[T]) -> Result<()> {
648        match &self.info {
649            DatasetInfo::Writer {
650                index,
651                element_size,
652                chunked,
653                ..
654            } => {
655                if !*chunked {
656                    return Err(Hdf5Error::InvalidState(
657                        "append is only for chunked datasets".into(),
658                    ));
659                }
660                if T::element_size() != *element_size {
661                    return Err(Hdf5Error::TypeMismatch(format!(
662                        "append type has element size {} but dataset expects {}",
663                        T::element_size(),
664                        element_size,
665                    )));
666                }
667
668                let ds_index = *index;
669                let es = *element_size;
670
671                let mut inner = borrow_inner_mut(&self.file_inner);
672                let writer = match &mut *inner {
673                    H5FileInner::Writer(w) => w,
674                    _ => {
675                        return Err(Hdf5Error::InvalidState(
676                            "file is no longer in write mode".into(),
677                        ))
678                    }
679                };
680
681                let chunk_dims = writer
682                    .dataset_chunk_dims(ds_index)
683                    .ok_or_else(|| Hdf5Error::InvalidState("dataset has no chunk info".into()))?
684                    .to_vec();
685                let dims = writer.dataset_dims(ds_index).to_vec();
686
687                // Frame size = product of dims[1..]
688                let frame_elems: usize = if dims.len() > 1 {
689                    dims[1..].iter().map(|&d| d as usize).product()
690                } else {
691                    1
692                };
693
694                if frame_elems == 0 {
695                    return Err(Hdf5Error::InvalidState(
696                        "cannot append to dataset with zero-size trailing dimensions".into(),
697                    ));
698                }
699
700                if !data.len().is_multiple_of(frame_elems) {
701                    return Err(Hdf5Error::InvalidState(format!(
702                        "data length {} is not a multiple of frame size {}",
703                        data.len(),
704                        frame_elems,
705                    )));
706                }
707
708                let n_new_frames = data.len() / frame_elems;
709                let current_dim0 = dims[0] as usize;
710
711                // Chunk size along first dimension
712                let chunk_dim0 = chunk_dims[0] as usize;
713                // Bytes per chunk = product of all chunk_dims * element_size
714                let chunk_bytes = chunk_dims.iter().map(|&d| d as usize).product::<usize>() * es;
715                let frame_bytes = frame_elems * es;
716
717                let raw = unsafe {
718                    std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * es)
719                };
720
721                // Merge buffered data with new data
722                let ds = &mut writer.datasets[ds_index];
723                let buffered_frames = ds.append_buffered_frames as usize;
724                let mut combined = std::mem::take(&mut ds.append_buffer);
725                combined.extend_from_slice(raw);
726                ds.append_buffered_frames = 0;
727
728                let total_frames = buffered_frames + n_new_frames;
729                let total_bytes = combined.len();
730
731                // Base chunk index: account for buffered frames
732                let base_dim0 = current_dim0 - buffered_frames;
733                let mut byte_pos = 0usize;
734                let mut frame_pos = 0usize;
735
736                while frame_pos < total_frames {
737                    let abs_frame = base_dim0 + frame_pos;
738                    let chunk_idx = abs_frame / chunk_dim0;
739                    let remaining_frames = total_frames - frame_pos;
740                    let frames_to_fill = chunk_dim0 - (abs_frame % chunk_dim0);
741
742                    if remaining_frames >= frames_to_fill {
743                        // Full chunk — write
744                        let end = byte_pos + frames_to_fill * frame_bytes;
745                        if frames_to_fill == chunk_dim0 {
746                            writer.write_chunk(
747                                ds_index,
748                                chunk_idx as u64,
749                                &combined[byte_pos..end],
750                            )?;
751                        } else {
752                            // Partial start but fills to chunk boundary
753                            let mut chunk_buf = vec![0u8; chunk_bytes];
754                            let offset_in_chunk = (abs_frame % chunk_dim0) * frame_bytes;
755                            chunk_buf
756                                [offset_in_chunk..offset_in_chunk + frames_to_fill * frame_bytes]
757                                .copy_from_slice(&combined[byte_pos..end]);
758                            writer.write_chunk(ds_index, chunk_idx as u64, &chunk_buf)?;
759                        }
760                        byte_pos = end;
761                        frame_pos += frames_to_fill;
762                    } else {
763                        // Partial chunk — buffer for next append
764                        let ds = &mut writer.datasets[ds_index];
765                        ds.append_buffer = combined[byte_pos..total_bytes].to_vec();
766                        ds.append_buffered_frames = remaining_frames as u64;
767                        frame_pos = total_frames;
768                    }
769                }
770
771                // Extend dims to include all frames (buffered + new)
772                let logical_dim0 = base_dim0 + total_frames;
773                let mut new_dims: Vec<u64> = dims;
774                new_dims[0] = logical_dim0 as u64;
775                writer.extend_dataset(ds_index, &new_dims)?;
776
777                Ok(())
778            }
779            DatasetInfo::Reader { .. } => {
780                Err(Hdf5Error::InvalidState("cannot append in read mode".into()))
781            }
782        }
783    }
784
785    /// Extend the dimensions of a chunked dataset.
786    pub fn extend(&self, new_dims: &[usize]) -> Result<()> {
787        match &self.info {
788            DatasetInfo::Writer { index, chunked, .. } => {
789                if !*chunked {
790                    return Err(Hdf5Error::InvalidState(
791                        "extend is only for chunked datasets".into(),
792                    ));
793                }
794
795                let dims_u64: Vec<u64> = new_dims.iter().map(|&d| d as u64).collect();
796                let mut inner = borrow_inner_mut(&self.file_inner);
797                match &mut *inner {
798                    H5FileInner::Writer(writer) => {
799                        writer.extend_dataset(*index, &dims_u64)?;
800                        Ok(())
801                    }
802                    _ => Err(Hdf5Error::InvalidState(
803                        "file is no longer in write mode".into(),
804                    )),
805                }
806            }
807            DatasetInfo::Reader { .. } => {
808                Err(Hdf5Error::InvalidState("cannot extend in read mode".into()))
809            }
810        }
811    }
812
813    /// Flush a chunked dataset's index structures to disk.
814    pub fn flush(&self) -> Result<()> {
815        match &self.info {
816            DatasetInfo::Writer { index, .. } => {
817                let mut inner = borrow_inner_mut(&self.file_inner);
818                match &mut *inner {
819                    H5FileInner::Writer(writer) => {
820                        writer.flush_dataset(*index)?;
821                        Ok(())
822                    }
823                    _ => Ok(()),
824                }
825            }
826            DatasetInfo::Reader { .. } => Ok(()),
827        }
828    }
829
830    /// Read a slice (hyperslab) of the dataset as a typed vector.
831    ///
832    /// `starts` and `counts` define the N-dimensional selection:
833    /// `starts[d]` = first index along dim d, `counts[d]` = how many elements.
834    pub fn read_slice<T: H5Type>(&self, starts: &[usize], counts: &[usize]) -> Result<Vec<T>> {
835        match &self.info {
836            DatasetInfo::Reader {
837                name, element_size, ..
838            } => {
839                if T::element_size() != *element_size {
840                    return Err(Hdf5Error::TypeMismatch(format!(
841                        "read type has element size {} but dataset has element size {}",
842                        T::element_size(),
843                        element_size,
844                    )));
845                }
846                let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
847                let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
848
849                let raw = {
850                    let mut inner = borrow_inner_mut(&self.file_inner);
851                    match &mut *inner {
852                        H5FileInner::Reader(reader) => {
853                            reader.read_slice(name, &starts_u64, &counts_u64)?
854                        }
855                        _ => {
856                            return Err(Hdf5Error::InvalidState("file is not in read mode".into()))
857                        }
858                    }
859                };
860
861                if raw.len() % T::element_size() != 0 {
862                    return Err(Hdf5Error::TypeMismatch(format!(
863                        "raw data size {} is not a multiple of element size {}",
864                        raw.len(),
865                        T::element_size(),
866                    )));
867                }
868
869                let count = raw.len() / T::element_size();
870                let mut result = Vec::<T>::with_capacity(count);
871                unsafe {
872                    std::ptr::copy_nonoverlapping(
873                        raw.as_ptr(),
874                        result.as_mut_ptr() as *mut u8,
875                        raw.len(),
876                    );
877                    result.set_len(count);
878                }
879                Ok(result)
880            }
881            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
882                "cannot read_slice from a dataset in write mode".into(),
883            )),
884        }
885    }
886
887    /// Write a typed slice to a sub-region of a contiguous dataset.
888    ///
889    /// `starts` and `counts` define the N-dimensional selection.
890    pub fn write_slice<T: H5Type>(
891        &self,
892        starts: &[usize],
893        counts: &[usize],
894        data: &[T],
895    ) -> Result<()> {
896        match &self.info {
897            DatasetInfo::Writer {
898                index,
899                element_size,
900                chunked,
901                ..
902            } => {
903                if *chunked {
904                    return Err(Hdf5Error::InvalidState(
905                        "write_slice is only for contiguous datasets".into(),
906                    ));
907                }
908                if T::element_size() != *element_size {
909                    return Err(Hdf5Error::TypeMismatch(format!(
910                        "write type has element size {} but dataset expects {}",
911                        T::element_size(),
912                        element_size,
913                    )));
914                }
915
916                let expected: usize = counts.iter().product();
917                if data.len() != expected {
918                    return Err(Hdf5Error::InvalidState(format!(
919                        "data length {} does not match slice size {}",
920                        data.len(),
921                        expected,
922                    )));
923                }
924
925                let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
926                let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
927
928                let byte_len = data.len() * T::element_size();
929                let raw =
930                    unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
931
932                let mut inner = borrow_inner_mut(&self.file_inner);
933                match &mut *inner {
934                    H5FileInner::Writer(writer) => {
935                        writer.write_slice(*index, &starts_u64, &counts_u64, raw)?;
936                        Ok(())
937                    }
938                    _ => Err(Hdf5Error::InvalidState(
939                        "file is no longer in write mode".into(),
940                    )),
941                }
942            }
943            DatasetInfo::Reader { .. } => {
944                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
945            }
946        }
947    }
948
949    /// Read variable-length strings from a dataset.
950    ///
951    /// This handles h5py-style vlen string datasets that store strings
952    /// as global heap references. Returns one String per element.
953    pub fn read_vlen_strings(&self) -> Result<Vec<String>> {
954        match &self.info {
955            DatasetInfo::Reader { name, .. } => {
956                let mut inner = borrow_inner_mut(&self.file_inner);
957                match &mut *inner {
958                    H5FileInner::Reader(reader) => Ok(reader.read_vlen_strings(name)?),
959                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
960                }
961            }
962            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
963                "cannot read vlen strings from a dataset in write mode".into(),
964            )),
965        }
966    }
967
968    /// Read the entire dataset as a typed vector.
969    ///
970    /// The raw bytes are read from the file and reinterpreted as `T`. The
971    /// caller must ensure that `T` matches the datatype used when the dataset
972    /// was written.
973    ///
974    /// # Errors
975    ///
976    /// Returns an error if:
977    /// - The file is in write mode.
978    /// - The raw data size is not a multiple of `T::element_size()`.
979    pub fn read_raw<T: H5Type>(&self) -> Result<Vec<T>> {
980        match &self.info {
981            DatasetInfo::Reader {
982                name, element_size, ..
983            } => {
984                if T::element_size() != *element_size {
985                    return Err(Hdf5Error::TypeMismatch(format!(
986                        "read type has element size {} but dataset has element size {}",
987                        T::element_size(),
988                        element_size,
989                    )));
990                }
991
992                let raw = {
993                    let mut inner = borrow_inner_mut(&self.file_inner);
994                    match &mut *inner {
995                        H5FileInner::Reader(reader) => reader.read_dataset_raw(name)?,
996                        _ => {
997                            return Err(Hdf5Error::InvalidState("file is not in read mode".into()));
998                        }
999                    }
1000                };
1001
1002                if raw.len() % T::element_size() != 0 {
1003                    return Err(Hdf5Error::TypeMismatch(format!(
1004                        "raw data size {} is not a multiple of element size {}",
1005                        raw.len(),
1006                        T::element_size(),
1007                    )));
1008                }
1009
1010                let count = raw.len() / T::element_size();
1011                let mut result = Vec::<T>::with_capacity(count);
1012
1013                // Safety: T is Copy + 'static (required by H5Type). We verified
1014                // the byte count matches count * size_of::<T>() above.
1015                // copy_nonoverlapping fills the memory with valid bit patterns
1016                // for all H5Type implementors (numeric primitives).
1017                // We call set_len AFTER the copy so that if an unexpected panic
1018                // occurs, uninitialized memory is never exposed.
1019                unsafe {
1020                    std::ptr::copy_nonoverlapping(
1021                        raw.as_ptr(),
1022                        result.as_mut_ptr() as *mut u8,
1023                        raw.len(),
1024                    );
1025                    result.set_len(count);
1026                }
1027
1028                Ok(result)
1029            }
1030            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1031                "cannot read from a dataset in write mode".into(),
1032            )),
1033        }
1034    }
1035}
1036
1037#[cfg(test)]
1038mod tests {
1039    use crate::H5File;
1040    use std::path::PathBuf;
1041
1042    fn temp_path(name: &str) -> PathBuf {
1043        std::env::temp_dir().join(format!("hdf5_dataset_test_{}.h5", name))
1044    }
1045
1046    #[test]
1047    fn builder_requires_shape() {
1048        let path = temp_path("no_shape");
1049        let file = H5File::create(&path).unwrap();
1050        let result = file.new_dataset::<u8>().create("data");
1051        assert!(result.is_err());
1052        std::fs::remove_file(&path).ok();
1053    }
1054
1055    #[test]
1056    fn write_raw_size_mismatch() {
1057        let path = temp_path("size_mismatch");
1058        let file = H5File::create(&path).unwrap();
1059        let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1060        // Provide 3 elements instead of 4
1061        let result = ds.write_raw(&[1u8, 2, 3]);
1062        assert!(result.is_err());
1063        std::fs::remove_file(&path).ok();
1064    }
1065
1066    #[test]
1067    fn roundtrip_u8_1d() {
1068        let path = temp_path("rt_u8_1d");
1069        let data: Vec<u8> = (0..10).collect();
1070
1071        {
1072            let file = H5File::create(&path).unwrap();
1073            let ds = file.new_dataset::<u8>().shape([10]).create("seq").unwrap();
1074            ds.write_raw(&data).unwrap();
1075            file.close().unwrap();
1076        }
1077
1078        {
1079            let file = H5File::open(&path).unwrap();
1080            let ds = file.dataset("seq").unwrap();
1081            assert_eq!(ds.shape(), vec![10]);
1082            let readback = ds.read_raw::<u8>().unwrap();
1083            assert_eq!(readback, data);
1084        }
1085
1086        std::fs::remove_file(&path).ok();
1087    }
1088
1089    #[test]
1090    fn roundtrip_i32_2d() {
1091        let path = temp_path("rt_i32_2d");
1092        let data: Vec<i32> = vec![-1, 0, 1, 2, 3, 4];
1093
1094        {
1095            let file = H5File::create(&path).unwrap();
1096            let ds = file
1097                .new_dataset::<i32>()
1098                .shape([2, 3])
1099                .create("matrix")
1100                .unwrap();
1101            ds.write_raw(&data).unwrap();
1102            file.close().unwrap();
1103        }
1104
1105        {
1106            let file = H5File::open(&path).unwrap();
1107            let ds = file.dataset("matrix").unwrap();
1108            assert_eq!(ds.shape(), vec![2, 3]);
1109            let readback = ds.read_raw::<i32>().unwrap();
1110            assert_eq!(readback, data);
1111        }
1112
1113        std::fs::remove_file(&path).ok();
1114    }
1115
1116    #[test]
1117    fn roundtrip_f64_3d() {
1118        let path = temp_path("rt_f64_3d");
1119        let data: Vec<f64> = (0..24).map(|i| i as f64 * 0.5).collect();
1120
1121        {
1122            let file = H5File::create(&path).unwrap();
1123            let ds = file
1124                .new_dataset::<f64>()
1125                .shape([2, 3, 4])
1126                .create("cube")
1127                .unwrap();
1128            ds.write_raw(&data).unwrap();
1129            file.close().unwrap();
1130        }
1131
1132        {
1133            let file = H5File::open(&path).unwrap();
1134            let ds = file.dataset("cube").unwrap();
1135            assert_eq!(ds.shape(), vec![2, 3, 4]);
1136            let readback = ds.read_raw::<f64>().unwrap();
1137            assert_eq!(readback, data);
1138        }
1139
1140        std::fs::remove_file(&path).ok();
1141    }
1142
1143    #[test]
1144    fn cannot_read_in_write_mode() {
1145        let path = temp_path("no_read_write");
1146        let file = H5File::create(&path).unwrap();
1147        let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1148        ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1149        let result = ds.read_raw::<u8>();
1150        assert!(result.is_err());
1151        std::fs::remove_file(&path).ok();
1152    }
1153
1154    #[test]
1155    fn cannot_write_in_read_mode() {
1156        let path = temp_path("no_write_read");
1157
1158        {
1159            let file = H5File::create(&path).unwrap();
1160            let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1161            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1162            file.close().unwrap();
1163        }
1164
1165        {
1166            let file = H5File::open(&path).unwrap();
1167            let ds = file.dataset("x").unwrap();
1168            let result = ds.write_raw(&[5u8, 6, 7, 8]);
1169            assert!(result.is_err());
1170        }
1171
1172        std::fs::remove_file(&path).ok();
1173    }
1174
1175    #[test]
1176    fn numeric_attr_roundtrip() {
1177        let path = temp_path("num_attr");
1178        {
1179            let file = H5File::create(&path).unwrap();
1180            let ds = file.new_dataset::<f32>().shape([4]).create("data").unwrap();
1181            ds.write_raw(&[1.0f32; 4]).unwrap();
1182
1183            let a1 = ds.new_attr::<f64>().shape(()).create("scale").unwrap();
1184            a1.write_numeric(&1.2345f64).unwrap();
1185
1186            let a2 = ds.new_attr::<i32>().shape(()).create("count").unwrap();
1187            a2.write_numeric(&42i32).unwrap();
1188
1189            file.close().unwrap();
1190        }
1191        {
1192            let file = H5File::open(&path).unwrap();
1193            let ds = file.dataset("data").unwrap();
1194
1195            let scale = ds.attr("scale").unwrap();
1196            let val: f64 = scale.read_numeric().unwrap();
1197            assert!((val - 1.2345).abs() < 1e-10);
1198
1199            let count = ds.attr("count").unwrap();
1200            let val: i32 = count.read_numeric().unwrap();
1201            assert_eq!(val, 42);
1202        }
1203        std::fs::remove_file(&path).ok();
1204    }
1205
1206    #[test]
1207    fn cannot_create_dataset_in_read_mode() {
1208        let path = temp_path("no_create_read");
1209
1210        {
1211            let _file = H5File::create(&path).unwrap();
1212        }
1213
1214        {
1215            let file = H5File::open(&path).unwrap();
1216            let result = file.new_dataset::<u8>().shape([4]).create("x");
1217            assert!(result.is_err());
1218        }
1219
1220        std::fs::remove_file(&path).ok();
1221    }
1222
1223    #[test]
1224    fn shape_accessor() {
1225        let path = temp_path("shape_acc");
1226
1227        let file = H5File::create(&path).unwrap();
1228        let ds = file
1229            .new_dataset::<f32>()
1230            .shape([5, 10, 3])
1231            .create("tensor")
1232            .unwrap();
1233        assert_eq!(ds.shape(), vec![5, 10, 3]);
1234
1235        std::fs::remove_file(&path).ok();
1236    }
1237
1238    #[test]
1239    fn slice_roundtrip_2d() {
1240        let path = temp_path("slice_2d");
1241
1242        // Create a 4x5 dataset, write full, then read a slice
1243        let data: Vec<i32> = (0..20).collect();
1244        {
1245            let file = H5File::create(&path).unwrap();
1246            let ds = file
1247                .new_dataset::<i32>()
1248                .shape([4, 5])
1249                .create("mat")
1250                .unwrap();
1251            ds.write_raw(&data).unwrap();
1252            file.close().unwrap();
1253        }
1254        {
1255            let file = H5File::open(&path).unwrap();
1256            let ds = file.dataset("mat").unwrap();
1257            // Read rows 1..3, cols 2..4 (2x2 slice)
1258            let slice = ds.read_slice::<i32>(&[1, 2], &[2, 2]).unwrap();
1259            // Row 1: [5,6,7,8,9] -> cols 2..4 = [7,8]
1260            // Row 2: [10,11,12,13,14] -> cols 2..4 = [12,13]
1261            assert_eq!(slice, vec![7, 8, 12, 13]);
1262        }
1263
1264        std::fs::remove_file(&path).ok();
1265    }
1266
1267    #[test]
1268    fn write_slice_2d() {
1269        let path = temp_path("write_slice_2d");
1270
1271        {
1272            let file = H5File::create(&path).unwrap();
1273            let ds = file
1274                .new_dataset::<f32>()
1275                .shape([3, 4])
1276                .create("data")
1277                .unwrap();
1278            ds.write_raw(&[0.0f32; 12]).unwrap();
1279            // Overwrite a 2x2 sub-region
1280            ds.write_slice(&[1, 1], &[2, 2], &[10.0f32, 20.0, 30.0, 40.0])
1281                .unwrap();
1282            file.close().unwrap();
1283        }
1284        {
1285            let file = H5File::open(&path).unwrap();
1286            let ds = file.dataset("data").unwrap();
1287            let full = ds.read_raw::<f32>().unwrap();
1288            // Row 0: [0,0,0,0]
1289            // Row 1: [0,10,20,0]
1290            // Row 2: [0,30,40,0]
1291            assert_eq!(
1292                full,
1293                vec![0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 20.0, 0.0, 0.0, 30.0, 40.0, 0.0,]
1294            );
1295        }
1296
1297        std::fs::remove_file(&path).ok();
1298    }
1299
1300    #[test]
1301    fn attr_read_roundtrip() {
1302        use crate::types::VarLenUnicode;
1303        let path = temp_path("attr_read");
1304
1305        {
1306            let file = H5File::create(&path).unwrap();
1307            let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1308            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1309            let a1 = ds
1310                .new_attr::<VarLenUnicode>()
1311                .shape(())
1312                .create("units")
1313                .unwrap();
1314            a1.write_string("meters").unwrap();
1315            let a2 = ds
1316                .new_attr::<VarLenUnicode>()
1317                .shape(())
1318                .create("desc")
1319                .unwrap();
1320            a2.write_string("test data").unwrap();
1321            file.close().unwrap();
1322        }
1323        {
1324            let file = H5File::open(&path).unwrap();
1325            let ds = file.dataset("data").unwrap();
1326
1327            let names = ds.attr_names().unwrap();
1328            assert!(names.contains(&"units".to_string()));
1329            assert!(names.contains(&"desc".to_string()));
1330
1331            let units = ds.attr("units").unwrap();
1332            assert_eq!(units.read_string().unwrap(), "meters");
1333
1334            let desc = ds.attr("desc").unwrap();
1335            assert_eq!(desc.read_string().unwrap(), "test data");
1336        }
1337
1338        std::fs::remove_file(&path).ok();
1339    }
1340
1341    #[test]
1342    fn type_mismatch_element_size() {
1343        let path = temp_path("type_mismatch");
1344
1345        {
1346            let file = H5File::create(&path).unwrap();
1347            let ds = file.new_dataset::<f64>().shape([4]).create("data").unwrap();
1348            ds.write_raw(&[1.0f64, 2.0, 3.0, 4.0]).unwrap();
1349            file.close().unwrap();
1350        }
1351
1352        {
1353            let file = H5File::open(&path).unwrap();
1354            let ds = file.dataset("data").unwrap();
1355            // Try to read as u8 (element_size = 1) from a f64 dataset (element_size = 8)
1356            let result = ds.read_raw::<u8>();
1357            assert!(result.is_err());
1358        }
1359
1360        std::fs::remove_file(&path).ok();
1361    }
1362
1363    #[test]
1364    fn dataset_survives_file_move() {
1365        let path = temp_path("ds_survives");
1366
1367        let ds = {
1368            let file = H5File::create(&path).unwrap();
1369            file.new_dataset::<u8>().shape([4]).create("x").unwrap()
1370        };
1371        // file is dropped here, but ds still holds Rc to the inner state
1372        ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1373        // The writer will finalize on drop of the last Rc
1374
1375        std::fs::remove_file(&path).ok();
1376    }
1377
1378    #[test]
1379    fn new_attr_scalar_string() {
1380        use crate::types::VarLenUnicode;
1381
1382        let path = temp_path("attr_scalar_string");
1383        {
1384            let file = H5File::create(&path).unwrap();
1385            let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1386            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1387
1388            let attr = ds
1389                .new_attr::<VarLenUnicode>()
1390                .shape(())
1391                .create("name")
1392                .unwrap();
1393            attr.write_scalar(&VarLenUnicode("test_value".to_string()))
1394                .unwrap();
1395
1396            file.close().unwrap();
1397        }
1398
1399        // Verify the file is still valid and readable
1400        {
1401            let file = H5File::open(&path).unwrap();
1402            let ds = file.dataset("data").unwrap();
1403            assert_eq!(ds.shape(), vec![4]);
1404            let readback = ds.read_raw::<u8>().unwrap();
1405            assert_eq!(readback, vec![1u8, 2, 3, 4]);
1406        }
1407
1408        std::fs::remove_file(&path).ok();
1409    }
1410
1411    #[test]
1412    fn all_numeric_types_roundtrip() {
1413        let path = temp_path("all_types");
1414
1415        {
1416            let file = H5File::create(&path).unwrap();
1417
1418            let ds = file.new_dataset::<u8>().shape([2]).create("u8").unwrap();
1419            ds.write_raw(&[1u8, 2]).unwrap();
1420
1421            let ds = file.new_dataset::<i8>().shape([2]).create("i8").unwrap();
1422            ds.write_raw(&[-1i8, 1]).unwrap();
1423
1424            let ds = file.new_dataset::<u16>().shape([2]).create("u16").unwrap();
1425            ds.write_raw(&[100u16, 200]).unwrap();
1426
1427            let ds = file.new_dataset::<i16>().shape([2]).create("i16").unwrap();
1428            ds.write_raw(&[-100i16, 100]).unwrap();
1429
1430            let ds = file.new_dataset::<u32>().shape([2]).create("u32").unwrap();
1431            ds.write_raw(&[1000u32, 2000]).unwrap();
1432
1433            let ds = file.new_dataset::<i32>().shape([2]).create("i32").unwrap();
1434            ds.write_raw(&[-1000i32, 1000]).unwrap();
1435
1436            let ds = file.new_dataset::<u64>().shape([2]).create("u64").unwrap();
1437            ds.write_raw(&[10000u64, 20000]).unwrap();
1438
1439            let ds = file.new_dataset::<i64>().shape([2]).create("i64").unwrap();
1440            ds.write_raw(&[-10000i64, 10000]).unwrap();
1441
1442            let ds = file.new_dataset::<f32>().shape([2]).create("f32").unwrap();
1443            ds.write_raw(&[1.5f32, 2.5]).unwrap();
1444
1445            let ds = file.new_dataset::<f64>().shape([2]).create("f64").unwrap();
1446            ds.write_raw(&[1.23456f64, 7.89012]).unwrap();
1447
1448            file.close().unwrap();
1449        }
1450
1451        {
1452            let file = H5File::open(&path).unwrap();
1453
1454            assert_eq!(
1455                file.dataset("u8").unwrap().read_raw::<u8>().unwrap(),
1456                vec![1u8, 2]
1457            );
1458            assert_eq!(
1459                file.dataset("i8").unwrap().read_raw::<i8>().unwrap(),
1460                vec![-1i8, 1]
1461            );
1462            assert_eq!(
1463                file.dataset("u16").unwrap().read_raw::<u16>().unwrap(),
1464                vec![100u16, 200]
1465            );
1466            assert_eq!(
1467                file.dataset("i16").unwrap().read_raw::<i16>().unwrap(),
1468                vec![-100i16, 100]
1469            );
1470            assert_eq!(
1471                file.dataset("u32").unwrap().read_raw::<u32>().unwrap(),
1472                vec![1000u32, 2000]
1473            );
1474            assert_eq!(
1475                file.dataset("i32").unwrap().read_raw::<i32>().unwrap(),
1476                vec![-1000i32, 1000]
1477            );
1478            assert_eq!(
1479                file.dataset("u64").unwrap().read_raw::<u64>().unwrap(),
1480                vec![10000u64, 20000]
1481            );
1482            assert_eq!(
1483                file.dataset("i64").unwrap().read_raw::<i64>().unwrap(),
1484                vec![-10000i64, 10000]
1485            );
1486            assert_eq!(
1487                file.dataset("f32").unwrap().read_raw::<f32>().unwrap(),
1488                vec![1.5f32, 2.5]
1489            );
1490            assert_eq!(
1491                file.dataset("f64").unwrap().read_raw::<f64>().unwrap(),
1492                vec![1.23456f64, 7.89012]
1493            );
1494        }
1495
1496        std::fs::remove_file(&path).ok();
1497    }
1498
1499    #[test]
1500    fn append_chunked_roundtrip() {
1501        let path = temp_path("append_chunked");
1502
1503        {
1504            let file = H5File::create(&path).unwrap();
1505            let ds = file
1506                .new_dataset::<f64>()
1507                .shape([0, 3])
1508                .chunk(&[1, 3])
1509                .max_shape(&[None, Some(3)])
1510                .create("data")
1511                .unwrap();
1512
1513            // Append one frame
1514            ds.append(&[1.0f64, 2.0, 3.0]).unwrap();
1515            // Append two frames at once
1516            ds.append(&[4.0f64, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap();
1517
1518            file.close().unwrap();
1519        }
1520
1521        {
1522            let file = H5File::open(&path).unwrap();
1523            let ds = file.dataset("data").unwrap();
1524            assert_eq!(ds.shape(), vec![3, 3]);
1525            let all = ds.read_raw::<f64>().unwrap();
1526            assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
1527        }
1528
1529        std::fs::remove_file(&path).ok();
1530    }
1531
1532    #[test]
1533    fn append_1d_chunked() {
1534        let path = temp_path("append_1d");
1535
1536        {
1537            let file = H5File::create(&path).unwrap();
1538            let ds = file
1539                .new_dataset::<i32>()
1540                .shape([0])
1541                .chunk(&[4])
1542                .max_shape(&[None])
1543                .create("values")
1544                .unwrap();
1545
1546            ds.append(&[10i32, 20, 30]).unwrap(); // partial chunk
1547            ds.append(&[40i32]).unwrap(); // fills chunk boundary
1548            ds.append(&[50i32, 60, 70, 80]).unwrap(); // full chunk
1549
1550            file.close().unwrap();
1551        }
1552
1553        {
1554            let file = H5File::open(&path).unwrap();
1555            let ds = file.dataset("values").unwrap();
1556            assert_eq!(ds.shape(), vec![8]);
1557            let all = ds.read_raw::<i32>().unwrap();
1558            assert_eq!(all, vec![10, 20, 30, 40, 50, 60, 70, 80]);
1559        }
1560
1561        std::fs::remove_file(&path).ok();
1562    }
1563
1564    #[test]
1565    fn append_partial_chunk_flushed_on_close() {
1566        let path = temp_path("append_partial_close");
1567
1568        {
1569            let file = H5File::create(&path).unwrap();
1570            let ds = file
1571                .new_dataset::<f64>()
1572                .shape([0])
1573                .chunk(&[4])
1574                .max_shape(&[None])
1575                .create("vals")
1576                .unwrap();
1577
1578            // Append 5 elements: chunk 0 = full [1,2,3,4], chunk 1 = partial [5,0,0,0]
1579            ds.append(&[1.0f64, 2.0, 3.0, 4.0, 5.0]).unwrap();
1580            file.close().unwrap();
1581        }
1582
1583        {
1584            let file = H5File::open(&path).unwrap();
1585            let ds = file.dataset("vals").unwrap();
1586            assert_eq!(ds.shape(), vec![5]);
1587            let all = ds.read_raw::<f64>().unwrap();
1588            // The full dataset is 2 chunks * 4 = 8 elements; shape says 5
1589            // read_raw reads total shape elements
1590            assert_eq!(all.len(), 5);
1591            assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1592        }
1593
1594        std::fs::remove_file(&path).ok();
1595    }
1596}