Skip to main content

rust_hdf5/
dataset.rs

1//! Dataset creation and I/O.
2//!
3//! Datasets are created via the fluent [`DatasetBuilder`] API obtained from
4//! [`H5File::new_dataset`](crate::file::H5File::new_dataset). Once created,
5//! the [`H5Dataset`] handle can read or write raw typed data.
6
7use crate::attribute::AttrBuilder;
8use crate::error::{Hdf5Error, Result};
9use crate::file::{borrow_inner, borrow_inner_mut, clone_inner, H5FileInner, SharedInner};
10use crate::types::H5Type;
11
12// ---------------------------------------------------------------------------
13// DatasetBuilder
14// ---------------------------------------------------------------------------
15
16/// A fluent builder for creating datasets.
17///
18/// Obtained from [`H5File::new_dataset::<T>()`](crate::file::H5File::new_dataset).
19///
20/// ```no_run
21/// # use rust_hdf5::H5File;
22/// let file = H5File::create("builder.h5").unwrap();
23/// let ds = file.new_dataset::<f32>()
24///     .shape(&[10, 20])
25///     .create("temperatures")
26///     .unwrap();
27/// ```
28pub struct DatasetBuilder<T: H5Type> {
29    file_inner: SharedInner,
30    shape: Option<Vec<usize>>,
31    chunk_dims: Option<Vec<usize>>,
32    max_shape: Option<Vec<Option<usize>>>,
33    deflate_level: Option<u32>,
34    shuffle_deflate_level: Option<u32>,
35    custom_pipeline: Option<crate::format::messages::filter::FilterPipeline>,
36    group_path: Option<String>,
37    _marker: std::marker::PhantomData<T>,
38}
39
40impl<T: H5Type> DatasetBuilder<T> {
41    pub(crate) fn new(file_inner: SharedInner) -> Self {
42        Self {
43            file_inner,
44            shape: None,
45            chunk_dims: None,
46            max_shape: None,
47            deflate_level: None,
48            shuffle_deflate_level: None,
49            custom_pipeline: None,
50            group_path: None,
51            _marker: std::marker::PhantomData,
52        }
53    }
54
55    pub(crate) fn new_in_group(file_inner: SharedInner, group_path: String) -> Self {
56        Self {
57            file_inner,
58            shape: None,
59            chunk_dims: None,
60            max_shape: None,
61            deflate_level: None,
62            shuffle_deflate_level: None,
63            custom_pipeline: None,
64            group_path: Some(group_path),
65            _marker: std::marker::PhantomData,
66        }
67    }
68
69    /// Set the dataset dimensions.
70    ///
71    /// This is required before calling [`create`](Self::create).
72    /// Use an empty slice `&[]` for a scalar (0-dimensional) dataset.
73    #[must_use]
74    pub fn shape<S: AsRef<[usize]>>(mut self, dims: S) -> Self {
75        self.shape = Some(dims.as_ref().to_vec());
76        self
77    }
78
79    /// Create a scalar (0-dimensional) dataset holding a single value.
80    #[must_use]
81    pub fn scalar(mut self) -> Self {
82        self.shape = Some(vec![]);
83        self
84    }
85
86    /// Set chunk dimensions for chunked storage.
87    ///
88    /// When set, the dataset uses chunked storage with the extensible array
89    /// index. You should also call [`max_shape`](Self::max_shape) or
90    /// [`resizable`](Self::resizable) to allow extending.
91    #[must_use]
92    pub fn chunk(mut self, chunk_dims: &[usize]) -> Self {
93        self.chunk_dims = Some(chunk_dims.to_vec());
94        self
95    }
96
97    /// Make all dimensions unlimited (resizable).
98    ///
99    /// This sets max_dims to u64::MAX for all dimensions.
100    #[must_use]
101    pub fn resizable(mut self) -> Self {
102        self.max_shape = Some(vec![None; self.shape.as_ref().map_or(0, |s| s.len())]);
103        self
104    }
105
106    /// Set maximum dimensions. `None` means unlimited for that dimension.
107    #[must_use]
108    pub fn max_shape(mut self, max: &[Option<usize>]) -> Self {
109        self.max_shape = Some(max.to_vec());
110        self
111    }
112
113    /// Enable deflate (gzip) compression with the given level (0-9).
114    ///
115    /// Requires chunked storage (call `.chunk()` before `.create()`).
116    /// Level 0 = no compression, 9 = maximum compression. Default is 6.
117    #[must_use]
118    pub fn deflate(mut self, level: u32) -> Self {
119        self.deflate_level = Some(level);
120        self
121    }
122
123    /// Enable shuffle + deflate compression.
124    ///
125    /// Shuffle reorders bytes by position within elements before compression,
126    /// which typically improves compression ratios for numeric data.
127    /// Requires chunked storage.
128    #[must_use]
129    pub fn shuffle_deflate(mut self, level: u32) -> Self {
130        self.shuffle_deflate_level = Some(level);
131        self
132    }
133
134    /// Enable Zstandard compression with the given level (1-22, default 3).
135    ///
136    /// Requires chunked storage (call `.chunk()` before `.create()`).
137    #[must_use]
138    pub fn zstd(mut self, level: u32) -> Self {
139        self.custom_pipeline = Some(crate::format::messages::filter::FilterPipeline::zstd(level));
140        self
141    }
142
143    /// Set a custom filter pipeline for compression.
144    ///
145    /// This takes precedence over [`deflate`](Self::deflate) and
146    /// [`shuffle_deflate`](Self::shuffle_deflate). Requires chunked storage.
147    #[must_use]
148    pub fn filter_pipeline(
149        mut self,
150        pipeline: crate::format::messages::filter::FilterPipeline,
151    ) -> Self {
152        self.custom_pipeline = Some(pipeline);
153        self
154    }
155
156    /// Finalize and create the dataset with the given `name`.
157    ///
158    /// The name is the link name within the root group (e.g. `"data"` or
159    /// `"group1/data"` once nested groups are supported).
160    pub fn create(self, name: &str) -> Result<H5Dataset> {
161        let shape = self.shape.ok_or_else(|| {
162            Hdf5Error::InvalidState("shape must be set before calling create()".into())
163        })?;
164
165        // Build the full name: if created within a group, prefix with group path
166        let full_name = if let Some(ref gp) = self.group_path {
167            if gp == "/" {
168                name.to_string()
169            } else {
170                let trimmed = gp.trim_start_matches('/');
171                format!("{}/{}", trimmed, name)
172            }
173        } else {
174            name.to_string()
175        };
176        let group_path = self.group_path.clone();
177
178        let dims_u64: Vec<u64> = shape.iter().map(|&d| d as u64).collect();
179        let datatype = T::hdf5_type();
180        let element_size = T::element_size();
181
182        if let Some(ref chunk_dims) = self.chunk_dims {
183            // Chunked dataset
184            let chunk_u64: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
185            let max_u64: Vec<u64> = if let Some(ref max) = self.max_shape {
186                max.iter()
187                    .map(|m| m.map_or(u64::MAX, |v| v as u64))
188                    .collect()
189            } else {
190                // Default: max = current
191                dims_u64.clone()
192            };
193
194            let index = {
195                let mut inner = borrow_inner_mut(&self.file_inner);
196                match &mut *inner {
197                    H5FileInner::Writer(writer) => {
198                        let idx = if let Some(pipeline) = self.custom_pipeline {
199                            writer.create_chunked_dataset_with_pipeline(
200                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
201                            )?
202                        } else if let Some(level) = self.shuffle_deflate_level {
203                            let pipeline =
204                                crate::format::messages::filter::FilterPipeline::shuffle_deflate(
205                                    T::element_size() as u32,
206                                    level,
207                                );
208                            writer.create_chunked_dataset_with_pipeline(
209                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
210                            )?
211                        } else if let Some(level) = self.deflate_level {
212                            writer.create_chunked_dataset_compressed(
213                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, level,
214                            )?
215                        } else {
216                            writer.create_chunked_dataset(
217                                &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
218                            )?
219                        };
220                        if let Some(ref gp) = group_path {
221                            if gp != "/" {
222                                writer.assign_dataset_to_group(gp, idx)?;
223                            }
224                        }
225                        idx
226                    }
227                    H5FileInner::Reader(_) => {
228                        return Err(Hdf5Error::InvalidState(
229                            "cannot create a dataset in read mode".into(),
230                        ));
231                    }
232                    H5FileInner::Closed => {
233                        return Err(Hdf5Error::InvalidState("file is closed".into()));
234                    }
235                }
236            };
237
238            Ok(H5Dataset {
239                file_inner: clone_inner(&self.file_inner),
240                info: DatasetInfo::Writer {
241                    index,
242                    shape,
243                    element_size,
244                    chunked: true,
245                },
246            })
247        } else {
248            // Contiguous dataset (original path)
249            let index = {
250                let mut inner = borrow_inner_mut(&self.file_inner);
251                match &mut *inner {
252                    H5FileInner::Writer(writer) => {
253                        let idx = writer.create_dataset(&full_name, datatype, &dims_u64)?;
254                        if let Some(ref gp) = group_path {
255                            if gp != "/" {
256                                writer.assign_dataset_to_group(gp, idx)?;
257                            }
258                        }
259                        idx
260                    }
261                    H5FileInner::Reader(_) => {
262                        return Err(Hdf5Error::InvalidState(
263                            "cannot create a dataset in read mode".into(),
264                        ));
265                    }
266                    H5FileInner::Closed => {
267                        return Err(Hdf5Error::InvalidState("file is closed".into()));
268                    }
269                }
270            };
271
272            Ok(H5Dataset {
273                file_inner: clone_inner(&self.file_inner),
274                info: DatasetInfo::Writer {
275                    index,
276                    shape,
277                    element_size,
278                    chunked: false,
279                },
280            })
281        }
282    }
283}
284
285// ---------------------------------------------------------------------------
286// DatasetInfo
287// ---------------------------------------------------------------------------
288
289/// Internal metadata about a dataset handle.
290enum DatasetInfo {
291    /// A dataset created via `new_dataset().create()` in write mode.
292    Writer {
293        /// Index into the writer's dataset list.
294        index: usize,
295        /// Shape (current dimensions).
296        shape: Vec<usize>,
297        /// Size of one element in bytes.
298        element_size: usize,
299        /// Whether this is a chunked dataset.
300        chunked: bool,
301    },
302    /// A dataset opened by name in read mode.
303    Reader {
304        /// The link name of the dataset.
305        name: String,
306        /// Shape (current dimensions).
307        shape: Vec<usize>,
308        /// Size of one element in bytes.
309        element_size: usize,
310    },
311}
312
313// ---------------------------------------------------------------------------
314// H5Dataset
315// ---------------------------------------------------------------------------
316
317/// A handle to an HDF5 dataset, supporting typed read and write operations.
318///
319/// The dataset holds a shared reference to the file's I/O backend, so it
320/// remains valid even if the originating [`H5File`](crate::file::H5File) is
321/// moved or dropped (they share ownership via `Rc`).
322pub struct H5Dataset {
323    file_inner: SharedInner,
324    info: DatasetInfo,
325}
326
327impl H5Dataset {
328    /// Create a reader-mode dataset handle (called internally by `H5File::dataset`).
329    pub(crate) fn new_reader(
330        file_inner: SharedInner,
331        name: String,
332        shape: Vec<usize>,
333        element_size: usize,
334    ) -> Self {
335        Self {
336            file_inner,
337            info: DatasetInfo::Reader {
338                name,
339                shape,
340                element_size,
341            },
342        }
343    }
344
345    /// Return the dataset dimensions.
346    pub fn shape(&self) -> Vec<usize> {
347        match &self.info {
348            DatasetInfo::Writer { shape, .. } => shape.clone(),
349            DatasetInfo::Reader { shape, .. } => shape.clone(),
350        }
351    }
352
353    /// Return the number of dimensions (rank) of the dataset.
354    pub fn ndims(&self) -> usize {
355        match &self.info {
356            DatasetInfo::Writer { shape, .. } => shape.len(),
357            DatasetInfo::Reader { shape, .. } => shape.len(),
358        }
359    }
360
361    /// Return the total number of elements in the dataset.
362    pub fn total_elements(&self) -> usize {
363        match &self.info {
364            DatasetInfo::Writer { shape, .. } => shape.iter().product(),
365            DatasetInfo::Reader { shape, .. } => shape.iter().product(),
366        }
367    }
368
369    /// Return the size of one element in bytes.
370    pub fn element_size(&self) -> usize {
371        match &self.info {
372            DatasetInfo::Writer { element_size, .. } => *element_size,
373            DatasetInfo::Reader { element_size, .. } => *element_size,
374        }
375    }
376
377    /// Return the chunk dimensions, if this is a chunked dataset.
378    pub fn chunk_dims(&self) -> Option<Vec<usize>> {
379        match &self.info {
380            DatasetInfo::Reader { name, .. } => {
381                let inner = borrow_inner(&self.file_inner);
382                if let H5FileInner::Reader(reader) = &*inner {
383                    if let Some(info) = reader.dataset_info(name) {
384                        if let crate::format::messages::data_layout::DataLayoutMessage::ChunkedV4 {
385                            chunk_dims,
386                            ..
387                        } = &info.layout
388                        {
389                            // Strip trailing element-size dimension
390                            return Some(
391                                chunk_dims[..chunk_dims.len() - 1]
392                                    .iter()
393                                    .map(|&d| d as usize)
394                                    .collect(),
395                            );
396                        }
397                    }
398                }
399                None
400            }
401            DatasetInfo::Writer { .. } => None,
402        }
403    }
404
405    /// Return whether this is a chunked dataset.
406    pub fn is_chunked(&self) -> bool {
407        match &self.info {
408            DatasetInfo::Writer { chunked, .. } => *chunked,
409            DatasetInfo::Reader { name, .. } => {
410                let inner = borrow_inner(&self.file_inner);
411                match &*inner {
412                    H5FileInner::Reader(reader) => {
413                        if let Some(info) = reader.dataset_info(name) {
414                            matches!(
415                                info.layout,
416                                crate::format::messages::data_layout::DataLayoutMessage::ChunkedV4 { .. }
417                            )
418                        } else {
419                            false
420                        }
421                    }
422                    _ => false,
423                }
424            }
425        }
426    }
427
428    /// Return the names of all attributes on this dataset (read mode only).
429    pub fn attr_names(&self) -> Result<Vec<String>> {
430        match &self.info {
431            DatasetInfo::Reader { name, .. } => {
432                let inner = borrow_inner(&self.file_inner);
433                match &*inner {
434                    H5FileInner::Reader(reader) => Ok(reader.dataset_attr_names(name)?),
435                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
436                }
437            }
438            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
439                "attr_names not available in write mode".into(),
440            )),
441        }
442    }
443
444    /// Open an attribute by name (read mode only).
445    pub fn attr(&self, attr_name: &str) -> Result<crate::attribute::H5Attribute> {
446        match &self.info {
447            DatasetInfo::Reader { name, .. } => {
448                let inner = borrow_inner(&self.file_inner);
449                match &*inner {
450                    H5FileInner::Reader(reader) => {
451                        let attr_msg = reader.dataset_attr(name, attr_name)?;
452                        Ok(crate::attribute::H5Attribute::new_reader(
453                            clone_inner(&self.file_inner),
454                            attr_msg.name.clone(),
455                            attr_msg.data.clone(),
456                        ))
457                    }
458                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
459                }
460            }
461            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
462                "attr() not available in write mode".into(),
463            )),
464        }
465    }
466
467    /// Start building a new attribute on this dataset.
468    ///
469    /// Returns a fluent builder. Call `.shape(())` for a scalar attribute
470    /// and `.create("name")` to finalize.
471    ///
472    /// # Example
473    ///
474    /// ```no_run
475    /// # use rust_hdf5::H5File;
476    /// # use rust_hdf5::types::VarLenUnicode;
477    /// let file = H5File::create("attr.h5").unwrap();
478    /// let ds = file.new_dataset::<f32>().shape(&[10]).create("data").unwrap();
479    /// let attr = ds.new_attr::<VarLenUnicode>().shape(()).create("units").unwrap();
480    /// attr.write_scalar(&VarLenUnicode("meters".to_string())).unwrap();
481    /// ```
482    pub fn new_attr<T: 'static>(&self) -> AttrBuilder<'_, T> {
483        let ds_index = match &self.info {
484            DatasetInfo::Writer { index, .. } => *index,
485            DatasetInfo::Reader { .. } => {
486                // Reader mode: we'll return a builder that will error on create.
487                // Using usize::MAX as sentinel.
488                usize::MAX
489            }
490        };
491        AttrBuilder::new(&self.file_inner, ds_index)
492    }
493
494    /// Write a typed slice to the dataset (contiguous datasets only).
495    ///
496    /// The slice length must match the total number of elements declared by
497    /// the dataset shape. The data is reinterpreted as raw bytes and written
498    /// to the file.
499    ///
500    /// # Errors
501    ///
502    /// Returns an error if:
503    /// - The file is in read mode.
504    /// - The data length does not match the declared shape.
505    pub fn write_raw<T: H5Type>(&self, data: &[T]) -> Result<()> {
506        match &self.info {
507            DatasetInfo::Writer {
508                index,
509                shape,
510                element_size,
511                chunked,
512            } => {
513                if *chunked {
514                    return Err(Hdf5Error::InvalidState(
515                        "use write_chunk for chunked datasets".into(),
516                    ));
517                }
518
519                let total_elements: usize = shape.iter().product();
520                if data.len() != total_elements {
521                    return Err(Hdf5Error::InvalidState(format!(
522                        "data length {} does not match dataset size {}",
523                        data.len(),
524                        total_elements,
525                    )));
526                }
527
528                // Verify element size matches
529                if T::element_size() != *element_size {
530                    return Err(Hdf5Error::TypeMismatch(format!(
531                        "write type has element size {} but dataset expects {}",
532                        T::element_size(),
533                        element_size,
534                    )));
535                }
536
537                // Safety: T: Copy + 'static (numeric primitive) with well-defined
538                // byte representation. The resulting slice borrows `data` and
539                // lives only as long as this block.
540                let byte_len = data.len() * T::element_size();
541                let raw =
542                    unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
543
544                let mut inner = borrow_inner_mut(&self.file_inner);
545                match &mut *inner {
546                    H5FileInner::Writer(writer) => {
547                        writer.write_dataset_raw(*index, raw)?;
548                        Ok(())
549                    }
550                    _ => Err(Hdf5Error::InvalidState(
551                        "file is no longer in write mode".into(),
552                    )),
553                }
554            }
555            DatasetInfo::Reader { .. } => Err(Hdf5Error::InvalidState(
556                "cannot write to a dataset opened in read mode".into(),
557            )),
558        }
559    }
560
561    /// Write a single chunk to a chunked dataset.
562    ///
563    /// `chunk_idx` is the linear chunk index (typically the frame number for
564    /// streaming datasets). `data` is the raw byte data for one chunk.
565    pub fn write_chunk(&self, chunk_idx: usize, data: &[u8]) -> Result<()> {
566        match &self.info {
567            DatasetInfo::Writer { index, chunked, .. } => {
568                if !*chunked {
569                    return Err(Hdf5Error::InvalidState(
570                        "write_chunk is only for chunked datasets".into(),
571                    ));
572                }
573
574                let mut inner = borrow_inner_mut(&self.file_inner);
575                match &mut *inner {
576                    H5FileInner::Writer(writer) => {
577                        writer.write_chunk(*index, chunk_idx as u64, data)?;
578                        Ok(())
579                    }
580                    _ => Err(Hdf5Error::InvalidState(
581                        "file is no longer in write mode".into(),
582                    )),
583                }
584            }
585            DatasetInfo::Reader { .. } => {
586                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
587            }
588        }
589    }
590
591    /// Write multiple chunks in a batch, optionally compressing in parallel.
592    ///
593    /// `chunks` is a slice of `(chunk_index, raw_data)` pairs. When a filter
594    /// pipeline is configured and the `parallel` feature is enabled, all
595    /// chunks are compressed concurrently via rayon.
596    pub fn write_chunks_batch(&self, chunks: &[(usize, &[u8])]) -> Result<()> {
597        match &self.info {
598            DatasetInfo::Writer { index, chunked, .. } => {
599                if !*chunked {
600                    return Err(Hdf5Error::InvalidState(
601                        "write_chunks_batch is only for chunked datasets".into(),
602                    ));
603                }
604                let pairs: Vec<(u64, &[u8])> = chunks
605                    .iter()
606                    .map(|(idx, data)| (*idx as u64, *data))
607                    .collect();
608                let mut inner = borrow_inner_mut(&self.file_inner);
609                match &mut *inner {
610                    H5FileInner::Writer(writer) => {
611                        writer.write_chunks_batch(*index, &pairs)?;
612                        Ok(())
613                    }
614                    _ => Err(Hdf5Error::InvalidState(
615                        "file is no longer in write mode".into(),
616                    )),
617                }
618            }
619            DatasetInfo::Reader { .. } => {
620                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
621            }
622        }
623    }
624
625    /// Append data along the first dimension of a chunked dataset.
626    ///
627    /// `data` must contain a whole number of "frames" — slices along
628    /// dimension 0. For example, if the dataset has shape `[N, H, W]`
629    /// and `chunk_dims = [1, H, W]`, then `data.len()` must be a
630    /// multiple of `H * W`.
631    ///
632    /// This method writes the necessary chunks and extends the dataset
633    /// shape automatically.
634    ///
635    /// ```no_run
636    /// # use rust_hdf5::H5File;
637    /// let file = H5File::create("append.h5").unwrap();
638    /// let ds = file.new_dataset::<f64>()
639    ///     .shape(&[0, 3])
640    ///     .chunk(&[1, 3])
641    ///     .max_shape(&[None, Some(3)])
642    ///     .create("data")
643    ///     .unwrap();
644    /// ds.append(&[1.0, 2.0, 3.0]).unwrap();       // shape becomes [1, 3]
645    /// ds.append(&[4.0, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap(); // shape becomes [3, 3]
646    /// ```
647    pub fn append<T: H5Type>(&self, data: &[T]) -> Result<()> {
648        match &self.info {
649            DatasetInfo::Writer {
650                index,
651                element_size,
652                chunked,
653                ..
654            } => {
655                if !*chunked {
656                    return Err(Hdf5Error::InvalidState(
657                        "append is only for chunked datasets".into(),
658                    ));
659                }
660                if T::element_size() != *element_size {
661                    return Err(Hdf5Error::TypeMismatch(format!(
662                        "append type has element size {} but dataset expects {}",
663                        T::element_size(),
664                        element_size,
665                    )));
666                }
667
668                let ds_index = *index;
669                let es = *element_size;
670
671                let mut inner = borrow_inner_mut(&self.file_inner);
672                let writer = match &mut *inner {
673                    H5FileInner::Writer(w) => w,
674                    _ => {
675                        return Err(Hdf5Error::InvalidState(
676                            "file is no longer in write mode".into(),
677                        ))
678                    }
679                };
680
681                let chunk_dims = writer
682                    .dataset_chunk_dims(ds_index)
683                    .ok_or_else(|| {
684                        Hdf5Error::InvalidState("dataset has no chunk info".into())
685                    })?
686                    .to_vec();
687                let dims = writer.dataset_dims(ds_index).to_vec();
688
689                // Frame size = product of dims[1..]
690                let frame_elems: usize = if dims.len() > 1 {
691                    dims[1..].iter().map(|&d| d as usize).product()
692                } else {
693                    1
694                };
695
696                if frame_elems == 0 {
697                    return Err(Hdf5Error::InvalidState(
698                        "cannot append to dataset with zero-size trailing dimensions".into(),
699                    ));
700                }
701
702                if data.len() % frame_elems != 0 {
703                    return Err(Hdf5Error::InvalidState(format!(
704                        "data length {} is not a multiple of frame size {}",
705                        data.len(),
706                        frame_elems,
707                    )));
708                }
709
710                let n_new_frames = data.len() / frame_elems;
711                let current_dim0 = dims[0] as usize;
712
713                // Chunk size along first dimension
714                let chunk_dim0 = chunk_dims[0] as usize;
715                // Bytes per chunk = product of all chunk_dims * element_size
716                let chunk_bytes = chunk_dims.iter().map(|&d| d as usize).product::<usize>() * es;
717                let frame_bytes = frame_elems * es;
718
719                let raw = unsafe {
720                    std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * es)
721                };
722
723                // Merge buffered data with new data
724                let ds = &mut writer.datasets[ds_index];
725                let buffered_frames = ds.append_buffered_frames as usize;
726                let mut combined = std::mem::take(&mut ds.append_buffer);
727                combined.extend_from_slice(raw);
728                ds.append_buffered_frames = 0;
729
730                let total_frames = buffered_frames + n_new_frames;
731                let total_bytes = combined.len();
732
733                // Base chunk index: account for buffered frames
734                let base_dim0 = current_dim0 - buffered_frames;
735                let mut byte_pos = 0usize;
736                let mut frame_pos = 0usize;
737
738                while frame_pos < total_frames {
739                    let abs_frame = base_dim0 + frame_pos;
740                    let chunk_idx = abs_frame / chunk_dim0;
741                    let remaining_frames = total_frames - frame_pos;
742                    let frames_to_fill = chunk_dim0 - (abs_frame % chunk_dim0);
743
744                    if remaining_frames >= frames_to_fill {
745                        // Full chunk — write
746                        let end = byte_pos + frames_to_fill * frame_bytes;
747                        if frames_to_fill == chunk_dim0 {
748                            writer.write_chunk(ds_index, chunk_idx as u64, &combined[byte_pos..end])?;
749                        } else {
750                            // Partial start but fills to chunk boundary
751                            let mut chunk_buf = vec![0u8; chunk_bytes];
752                            let offset_in_chunk = (abs_frame % chunk_dim0) * frame_bytes;
753                            chunk_buf[offset_in_chunk..offset_in_chunk + frames_to_fill * frame_bytes]
754                                .copy_from_slice(&combined[byte_pos..end]);
755                            writer.write_chunk(ds_index, chunk_idx as u64, &chunk_buf)?;
756                        }
757                        byte_pos = end;
758                        frame_pos += frames_to_fill;
759                    } else {
760                        // Partial chunk — buffer for next append
761                        let ds = &mut writer.datasets[ds_index];
762                        ds.append_buffer = combined[byte_pos..total_bytes].to_vec();
763                        ds.append_buffered_frames = remaining_frames as u64;
764                        frame_pos = total_frames;
765                    }
766                }
767
768                // Extend dims to include all frames (buffered + new)
769                let logical_dim0 = base_dim0 + total_frames;
770                let mut new_dims: Vec<u64> = dims;
771                new_dims[0] = logical_dim0 as u64;
772                writer.extend_dataset(ds_index, &new_dims)?;
773
774                Ok(())
775            }
776            DatasetInfo::Reader { .. } => {
777                Err(Hdf5Error::InvalidState("cannot append in read mode".into()))
778            }
779        }
780    }
781
782    /// Extend the dimensions of a chunked dataset.
783    pub fn extend(&self, new_dims: &[usize]) -> Result<()> {
784        match &self.info {
785            DatasetInfo::Writer { index, chunked, .. } => {
786                if !*chunked {
787                    return Err(Hdf5Error::InvalidState(
788                        "extend is only for chunked datasets".into(),
789                    ));
790                }
791
792                let dims_u64: Vec<u64> = new_dims.iter().map(|&d| d as u64).collect();
793                let mut inner = borrow_inner_mut(&self.file_inner);
794                match &mut *inner {
795                    H5FileInner::Writer(writer) => {
796                        writer.extend_dataset(*index, &dims_u64)?;
797                        Ok(())
798                    }
799                    _ => Err(Hdf5Error::InvalidState(
800                        "file is no longer in write mode".into(),
801                    )),
802                }
803            }
804            DatasetInfo::Reader { .. } => {
805                Err(Hdf5Error::InvalidState("cannot extend in read mode".into()))
806            }
807        }
808    }
809
810    /// Flush a chunked dataset's index structures to disk.
811    pub fn flush(&self) -> Result<()> {
812        match &self.info {
813            DatasetInfo::Writer { index, .. } => {
814                let mut inner = borrow_inner_mut(&self.file_inner);
815                match &mut *inner {
816                    H5FileInner::Writer(writer) => {
817                        writer.flush_dataset(*index)?;
818                        Ok(())
819                    }
820                    _ => Ok(()),
821                }
822            }
823            DatasetInfo::Reader { .. } => Ok(()),
824        }
825    }
826
827    /// Read a slice (hyperslab) of the dataset as a typed vector.
828    ///
829    /// `starts` and `counts` define the N-dimensional selection:
830    /// `starts[d]` = first index along dim d, `counts[d]` = how many elements.
831    pub fn read_slice<T: H5Type>(&self, starts: &[usize], counts: &[usize]) -> Result<Vec<T>> {
832        match &self.info {
833            DatasetInfo::Reader {
834                name, element_size, ..
835            } => {
836                if T::element_size() != *element_size {
837                    return Err(Hdf5Error::TypeMismatch(format!(
838                        "read type has element size {} but dataset has element size {}",
839                        T::element_size(),
840                        element_size,
841                    )));
842                }
843                let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
844                let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
845
846                let raw = {
847                    let mut inner = borrow_inner_mut(&self.file_inner);
848                    match &mut *inner {
849                        H5FileInner::Reader(reader) => {
850                            reader.read_slice(name, &starts_u64, &counts_u64)?
851                        }
852                        _ => {
853                            return Err(Hdf5Error::InvalidState("file is not in read mode".into()))
854                        }
855                    }
856                };
857
858                if raw.len() % T::element_size() != 0 {
859                    return Err(Hdf5Error::TypeMismatch(format!(
860                        "raw data size {} is not a multiple of element size {}",
861                        raw.len(),
862                        T::element_size(),
863                    )));
864                }
865
866                let count = raw.len() / T::element_size();
867                let mut result = Vec::<T>::with_capacity(count);
868                unsafe {
869                    std::ptr::copy_nonoverlapping(
870                        raw.as_ptr(),
871                        result.as_mut_ptr() as *mut u8,
872                        raw.len(),
873                    );
874                    result.set_len(count);
875                }
876                Ok(result)
877            }
878            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
879                "cannot read_slice from a dataset in write mode".into(),
880            )),
881        }
882    }
883
884    /// Write a typed slice to a sub-region of a contiguous dataset.
885    ///
886    /// `starts` and `counts` define the N-dimensional selection.
887    pub fn write_slice<T: H5Type>(
888        &self,
889        starts: &[usize],
890        counts: &[usize],
891        data: &[T],
892    ) -> Result<()> {
893        match &self.info {
894            DatasetInfo::Writer {
895                index,
896                element_size,
897                chunked,
898                ..
899            } => {
900                if *chunked {
901                    return Err(Hdf5Error::InvalidState(
902                        "write_slice is only for contiguous datasets".into(),
903                    ));
904                }
905                if T::element_size() != *element_size {
906                    return Err(Hdf5Error::TypeMismatch(format!(
907                        "write type has element size {} but dataset expects {}",
908                        T::element_size(),
909                        element_size,
910                    )));
911                }
912
913                let expected: usize = counts.iter().product();
914                if data.len() != expected {
915                    return Err(Hdf5Error::InvalidState(format!(
916                        "data length {} does not match slice size {}",
917                        data.len(),
918                        expected,
919                    )));
920                }
921
922                let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
923                let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
924
925                let byte_len = data.len() * T::element_size();
926                let raw =
927                    unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
928
929                let mut inner = borrow_inner_mut(&self.file_inner);
930                match &mut *inner {
931                    H5FileInner::Writer(writer) => {
932                        writer.write_slice(*index, &starts_u64, &counts_u64, raw)?;
933                        Ok(())
934                    }
935                    _ => Err(Hdf5Error::InvalidState(
936                        "file is no longer in write mode".into(),
937                    )),
938                }
939            }
940            DatasetInfo::Reader { .. } => {
941                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
942            }
943        }
944    }
945
946    /// Read variable-length strings from a dataset.
947    ///
948    /// This handles h5py-style vlen string datasets that store strings
949    /// as global heap references. Returns one String per element.
950    pub fn read_vlen_strings(&self) -> Result<Vec<String>> {
951        match &self.info {
952            DatasetInfo::Reader { name, .. } => {
953                let mut inner = borrow_inner_mut(&self.file_inner);
954                match &mut *inner {
955                    H5FileInner::Reader(reader) => Ok(reader.read_vlen_strings(name)?),
956                    _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
957                }
958            }
959            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
960                "cannot read vlen strings from a dataset in write mode".into(),
961            )),
962        }
963    }
964
965    /// Read the entire dataset as a typed vector.
966    ///
967    /// The raw bytes are read from the file and reinterpreted as `T`. The
968    /// caller must ensure that `T` matches the datatype used when the dataset
969    /// was written.
970    ///
971    /// # Errors
972    ///
973    /// Returns an error if:
974    /// - The file is in write mode.
975    /// - The raw data size is not a multiple of `T::element_size()`.
976    pub fn read_raw<T: H5Type>(&self) -> Result<Vec<T>> {
977        match &self.info {
978            DatasetInfo::Reader {
979                name, element_size, ..
980            } => {
981                if T::element_size() != *element_size {
982                    return Err(Hdf5Error::TypeMismatch(format!(
983                        "read type has element size {} but dataset has element size {}",
984                        T::element_size(),
985                        element_size,
986                    )));
987                }
988
989                let raw = {
990                    let mut inner = borrow_inner_mut(&self.file_inner);
991                    match &mut *inner {
992                        H5FileInner::Reader(reader) => reader.read_dataset_raw(name)?,
993                        _ => {
994                            return Err(Hdf5Error::InvalidState("file is not in read mode".into()));
995                        }
996                    }
997                };
998
999                if raw.len() % T::element_size() != 0 {
1000                    return Err(Hdf5Error::TypeMismatch(format!(
1001                        "raw data size {} is not a multiple of element size {}",
1002                        raw.len(),
1003                        T::element_size(),
1004                    )));
1005                }
1006
1007                let count = raw.len() / T::element_size();
1008                let mut result = Vec::<T>::with_capacity(count);
1009
1010                // Safety: T is Copy + 'static (required by H5Type). We verified
1011                // the byte count matches count * size_of::<T>() above.
1012                // copy_nonoverlapping fills the memory with valid bit patterns
1013                // for all H5Type implementors (numeric primitives).
1014                // We call set_len AFTER the copy so that if an unexpected panic
1015                // occurs, uninitialized memory is never exposed.
1016                unsafe {
1017                    std::ptr::copy_nonoverlapping(
1018                        raw.as_ptr(),
1019                        result.as_mut_ptr() as *mut u8,
1020                        raw.len(),
1021                    );
1022                    result.set_len(count);
1023                }
1024
1025                Ok(result)
1026            }
1027            DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1028                "cannot read from a dataset in write mode".into(),
1029            )),
1030        }
1031    }
1032}
1033
1034#[cfg(test)]
1035mod tests {
1036    use crate::H5File;
1037    use std::path::PathBuf;
1038
1039    fn temp_path(name: &str) -> PathBuf {
1040        std::env::temp_dir().join(format!("hdf5_dataset_test_{}.h5", name))
1041    }
1042
1043    #[test]
1044    fn builder_requires_shape() {
1045        let path = temp_path("no_shape");
1046        let file = H5File::create(&path).unwrap();
1047        let result = file.new_dataset::<u8>().create("data");
1048        assert!(result.is_err());
1049        std::fs::remove_file(&path).ok();
1050    }
1051
1052    #[test]
1053    fn write_raw_size_mismatch() {
1054        let path = temp_path("size_mismatch");
1055        let file = H5File::create(&path).unwrap();
1056        let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1057        // Provide 3 elements instead of 4
1058        let result = ds.write_raw(&[1u8, 2, 3]);
1059        assert!(result.is_err());
1060        std::fs::remove_file(&path).ok();
1061    }
1062
1063    #[test]
1064    fn roundtrip_u8_1d() {
1065        let path = temp_path("rt_u8_1d");
1066        let data: Vec<u8> = (0..10).collect();
1067
1068        {
1069            let file = H5File::create(&path).unwrap();
1070            let ds = file.new_dataset::<u8>().shape([10]).create("seq").unwrap();
1071            ds.write_raw(&data).unwrap();
1072            file.close().unwrap();
1073        }
1074
1075        {
1076            let file = H5File::open(&path).unwrap();
1077            let ds = file.dataset("seq").unwrap();
1078            assert_eq!(ds.shape(), vec![10]);
1079            let readback = ds.read_raw::<u8>().unwrap();
1080            assert_eq!(readback, data);
1081        }
1082
1083        std::fs::remove_file(&path).ok();
1084    }
1085
1086    #[test]
1087    fn roundtrip_i32_2d() {
1088        let path = temp_path("rt_i32_2d");
1089        let data: Vec<i32> = vec![-1, 0, 1, 2, 3, 4];
1090
1091        {
1092            let file = H5File::create(&path).unwrap();
1093            let ds = file
1094                .new_dataset::<i32>()
1095                .shape([2, 3])
1096                .create("matrix")
1097                .unwrap();
1098            ds.write_raw(&data).unwrap();
1099            file.close().unwrap();
1100        }
1101
1102        {
1103            let file = H5File::open(&path).unwrap();
1104            let ds = file.dataset("matrix").unwrap();
1105            assert_eq!(ds.shape(), vec![2, 3]);
1106            let readback = ds.read_raw::<i32>().unwrap();
1107            assert_eq!(readback, data);
1108        }
1109
1110        std::fs::remove_file(&path).ok();
1111    }
1112
1113    #[test]
1114    fn roundtrip_f64_3d() {
1115        let path = temp_path("rt_f64_3d");
1116        let data: Vec<f64> = (0..24).map(|i| i as f64 * 0.5).collect();
1117
1118        {
1119            let file = H5File::create(&path).unwrap();
1120            let ds = file
1121                .new_dataset::<f64>()
1122                .shape([2, 3, 4])
1123                .create("cube")
1124                .unwrap();
1125            ds.write_raw(&data).unwrap();
1126            file.close().unwrap();
1127        }
1128
1129        {
1130            let file = H5File::open(&path).unwrap();
1131            let ds = file.dataset("cube").unwrap();
1132            assert_eq!(ds.shape(), vec![2, 3, 4]);
1133            let readback = ds.read_raw::<f64>().unwrap();
1134            assert_eq!(readback, data);
1135        }
1136
1137        std::fs::remove_file(&path).ok();
1138    }
1139
1140    #[test]
1141    fn cannot_read_in_write_mode() {
1142        let path = temp_path("no_read_write");
1143        let file = H5File::create(&path).unwrap();
1144        let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1145        ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1146        let result = ds.read_raw::<u8>();
1147        assert!(result.is_err());
1148        std::fs::remove_file(&path).ok();
1149    }
1150
1151    #[test]
1152    fn cannot_write_in_read_mode() {
1153        let path = temp_path("no_write_read");
1154
1155        {
1156            let file = H5File::create(&path).unwrap();
1157            let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1158            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1159            file.close().unwrap();
1160        }
1161
1162        {
1163            let file = H5File::open(&path).unwrap();
1164            let ds = file.dataset("x").unwrap();
1165            let result = ds.write_raw(&[5u8, 6, 7, 8]);
1166            assert!(result.is_err());
1167        }
1168
1169        std::fs::remove_file(&path).ok();
1170    }
1171
1172    #[test]
1173    fn numeric_attr_roundtrip() {
1174        let path = temp_path("num_attr");
1175        {
1176            let file = H5File::create(&path).unwrap();
1177            let ds = file.new_dataset::<f32>().shape([4]).create("data").unwrap();
1178            ds.write_raw(&[1.0f32; 4]).unwrap();
1179
1180            let a1 = ds.new_attr::<f64>().shape(()).create("scale").unwrap();
1181            a1.write_numeric(&1.2345f64).unwrap();
1182
1183            let a2 = ds.new_attr::<i32>().shape(()).create("count").unwrap();
1184            a2.write_numeric(&42i32).unwrap();
1185
1186            file.close().unwrap();
1187        }
1188        {
1189            let file = H5File::open(&path).unwrap();
1190            let ds = file.dataset("data").unwrap();
1191
1192            let scale = ds.attr("scale").unwrap();
1193            let val: f64 = scale.read_numeric().unwrap();
1194            assert!((val - 1.2345).abs() < 1e-10);
1195
1196            let count = ds.attr("count").unwrap();
1197            let val: i32 = count.read_numeric().unwrap();
1198            assert_eq!(val, 42);
1199        }
1200        std::fs::remove_file(&path).ok();
1201    }
1202
1203    #[test]
1204    fn cannot_create_dataset_in_read_mode() {
1205        let path = temp_path("no_create_read");
1206
1207        {
1208            let _file = H5File::create(&path).unwrap();
1209        }
1210
1211        {
1212            let file = H5File::open(&path).unwrap();
1213            let result = file.new_dataset::<u8>().shape([4]).create("x");
1214            assert!(result.is_err());
1215        }
1216
1217        std::fs::remove_file(&path).ok();
1218    }
1219
1220    #[test]
1221    fn shape_accessor() {
1222        let path = temp_path("shape_acc");
1223
1224        let file = H5File::create(&path).unwrap();
1225        let ds = file
1226            .new_dataset::<f32>()
1227            .shape([5, 10, 3])
1228            .create("tensor")
1229            .unwrap();
1230        assert_eq!(ds.shape(), vec![5, 10, 3]);
1231
1232        std::fs::remove_file(&path).ok();
1233    }
1234
1235    #[test]
1236    fn slice_roundtrip_2d() {
1237        let path = temp_path("slice_2d");
1238
1239        // Create a 4x5 dataset, write full, then read a slice
1240        let data: Vec<i32> = (0..20).collect();
1241        {
1242            let file = H5File::create(&path).unwrap();
1243            let ds = file
1244                .new_dataset::<i32>()
1245                .shape([4, 5])
1246                .create("mat")
1247                .unwrap();
1248            ds.write_raw(&data).unwrap();
1249            file.close().unwrap();
1250        }
1251        {
1252            let file = H5File::open(&path).unwrap();
1253            let ds = file.dataset("mat").unwrap();
1254            // Read rows 1..3, cols 2..4 (2x2 slice)
1255            let slice = ds.read_slice::<i32>(&[1, 2], &[2, 2]).unwrap();
1256            // Row 1: [5,6,7,8,9] -> cols 2..4 = [7,8]
1257            // Row 2: [10,11,12,13,14] -> cols 2..4 = [12,13]
1258            assert_eq!(slice, vec![7, 8, 12, 13]);
1259        }
1260
1261        std::fs::remove_file(&path).ok();
1262    }
1263
1264    #[test]
1265    fn write_slice_2d() {
1266        let path = temp_path("write_slice_2d");
1267
1268        {
1269            let file = H5File::create(&path).unwrap();
1270            let ds = file
1271                .new_dataset::<f32>()
1272                .shape([3, 4])
1273                .create("data")
1274                .unwrap();
1275            ds.write_raw(&[0.0f32; 12]).unwrap();
1276            // Overwrite a 2x2 sub-region
1277            ds.write_slice(&[1, 1], &[2, 2], &[10.0f32, 20.0, 30.0, 40.0])
1278                .unwrap();
1279            file.close().unwrap();
1280        }
1281        {
1282            let file = H5File::open(&path).unwrap();
1283            let ds = file.dataset("data").unwrap();
1284            let full = ds.read_raw::<f32>().unwrap();
1285            // Row 0: [0,0,0,0]
1286            // Row 1: [0,10,20,0]
1287            // Row 2: [0,30,40,0]
1288            assert_eq!(
1289                full,
1290                vec![0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 20.0, 0.0, 0.0, 30.0, 40.0, 0.0,]
1291            );
1292        }
1293
1294        std::fs::remove_file(&path).ok();
1295    }
1296
1297    #[test]
1298    fn attr_read_roundtrip() {
1299        use crate::types::VarLenUnicode;
1300        let path = temp_path("attr_read");
1301
1302        {
1303            let file = H5File::create(&path).unwrap();
1304            let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1305            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1306            let a1 = ds
1307                .new_attr::<VarLenUnicode>()
1308                .shape(())
1309                .create("units")
1310                .unwrap();
1311            a1.write_string("meters").unwrap();
1312            let a2 = ds
1313                .new_attr::<VarLenUnicode>()
1314                .shape(())
1315                .create("desc")
1316                .unwrap();
1317            a2.write_string("test data").unwrap();
1318            file.close().unwrap();
1319        }
1320        {
1321            let file = H5File::open(&path).unwrap();
1322            let ds = file.dataset("data").unwrap();
1323
1324            let names = ds.attr_names().unwrap();
1325            assert!(names.contains(&"units".to_string()));
1326            assert!(names.contains(&"desc".to_string()));
1327
1328            let units = ds.attr("units").unwrap();
1329            assert_eq!(units.read_string().unwrap(), "meters");
1330
1331            let desc = ds.attr("desc").unwrap();
1332            assert_eq!(desc.read_string().unwrap(), "test data");
1333        }
1334
1335        std::fs::remove_file(&path).ok();
1336    }
1337
1338    #[test]
1339    fn type_mismatch_element_size() {
1340        let path = temp_path("type_mismatch");
1341
1342        {
1343            let file = H5File::create(&path).unwrap();
1344            let ds = file.new_dataset::<f64>().shape([4]).create("data").unwrap();
1345            ds.write_raw(&[1.0f64, 2.0, 3.0, 4.0]).unwrap();
1346            file.close().unwrap();
1347        }
1348
1349        {
1350            let file = H5File::open(&path).unwrap();
1351            let ds = file.dataset("data").unwrap();
1352            // Try to read as u8 (element_size = 1) from a f64 dataset (element_size = 8)
1353            let result = ds.read_raw::<u8>();
1354            assert!(result.is_err());
1355        }
1356
1357        std::fs::remove_file(&path).ok();
1358    }
1359
1360    #[test]
1361    fn dataset_survives_file_move() {
1362        let path = temp_path("ds_survives");
1363
1364        let ds = {
1365            let file = H5File::create(&path).unwrap();
1366            file.new_dataset::<u8>().shape([4]).create("x").unwrap()
1367        };
1368        // file is dropped here, but ds still holds Rc to the inner state
1369        ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1370        // The writer will finalize on drop of the last Rc
1371
1372        std::fs::remove_file(&path).ok();
1373    }
1374
1375    #[test]
1376    fn new_attr_scalar_string() {
1377        use crate::types::VarLenUnicode;
1378
1379        let path = temp_path("attr_scalar_string");
1380        {
1381            let file = H5File::create(&path).unwrap();
1382            let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1383            ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1384
1385            let attr = ds
1386                .new_attr::<VarLenUnicode>()
1387                .shape(())
1388                .create("name")
1389                .unwrap();
1390            attr.write_scalar(&VarLenUnicode("test_value".to_string()))
1391                .unwrap();
1392
1393            file.close().unwrap();
1394        }
1395
1396        // Verify the file is still valid and readable
1397        {
1398            let file = H5File::open(&path).unwrap();
1399            let ds = file.dataset("data").unwrap();
1400            assert_eq!(ds.shape(), vec![4]);
1401            let readback = ds.read_raw::<u8>().unwrap();
1402            assert_eq!(readback, vec![1u8, 2, 3, 4]);
1403        }
1404
1405        std::fs::remove_file(&path).ok();
1406    }
1407
1408    #[test]
1409    fn all_numeric_types_roundtrip() {
1410        let path = temp_path("all_types");
1411
1412        {
1413            let file = H5File::create(&path).unwrap();
1414
1415            let ds = file.new_dataset::<u8>().shape([2]).create("u8").unwrap();
1416            ds.write_raw(&[1u8, 2]).unwrap();
1417
1418            let ds = file.new_dataset::<i8>().shape([2]).create("i8").unwrap();
1419            ds.write_raw(&[-1i8, 1]).unwrap();
1420
1421            let ds = file.new_dataset::<u16>().shape([2]).create("u16").unwrap();
1422            ds.write_raw(&[100u16, 200]).unwrap();
1423
1424            let ds = file.new_dataset::<i16>().shape([2]).create("i16").unwrap();
1425            ds.write_raw(&[-100i16, 100]).unwrap();
1426
1427            let ds = file.new_dataset::<u32>().shape([2]).create("u32").unwrap();
1428            ds.write_raw(&[1000u32, 2000]).unwrap();
1429
1430            let ds = file.new_dataset::<i32>().shape([2]).create("i32").unwrap();
1431            ds.write_raw(&[-1000i32, 1000]).unwrap();
1432
1433            let ds = file.new_dataset::<u64>().shape([2]).create("u64").unwrap();
1434            ds.write_raw(&[10000u64, 20000]).unwrap();
1435
1436            let ds = file.new_dataset::<i64>().shape([2]).create("i64").unwrap();
1437            ds.write_raw(&[-10000i64, 10000]).unwrap();
1438
1439            let ds = file.new_dataset::<f32>().shape([2]).create("f32").unwrap();
1440            ds.write_raw(&[1.5f32, 2.5]).unwrap();
1441
1442            let ds = file.new_dataset::<f64>().shape([2]).create("f64").unwrap();
1443            ds.write_raw(&[1.23456f64, 7.89012]).unwrap();
1444
1445            file.close().unwrap();
1446        }
1447
1448        {
1449            let file = H5File::open(&path).unwrap();
1450
1451            assert_eq!(
1452                file.dataset("u8").unwrap().read_raw::<u8>().unwrap(),
1453                vec![1u8, 2]
1454            );
1455            assert_eq!(
1456                file.dataset("i8").unwrap().read_raw::<i8>().unwrap(),
1457                vec![-1i8, 1]
1458            );
1459            assert_eq!(
1460                file.dataset("u16").unwrap().read_raw::<u16>().unwrap(),
1461                vec![100u16, 200]
1462            );
1463            assert_eq!(
1464                file.dataset("i16").unwrap().read_raw::<i16>().unwrap(),
1465                vec![-100i16, 100]
1466            );
1467            assert_eq!(
1468                file.dataset("u32").unwrap().read_raw::<u32>().unwrap(),
1469                vec![1000u32, 2000]
1470            );
1471            assert_eq!(
1472                file.dataset("i32").unwrap().read_raw::<i32>().unwrap(),
1473                vec![-1000i32, 1000]
1474            );
1475            assert_eq!(
1476                file.dataset("u64").unwrap().read_raw::<u64>().unwrap(),
1477                vec![10000u64, 20000]
1478            );
1479            assert_eq!(
1480                file.dataset("i64").unwrap().read_raw::<i64>().unwrap(),
1481                vec![-10000i64, 10000]
1482            );
1483            assert_eq!(
1484                file.dataset("f32").unwrap().read_raw::<f32>().unwrap(),
1485                vec![1.5f32, 2.5]
1486            );
1487            assert_eq!(
1488                file.dataset("f64").unwrap().read_raw::<f64>().unwrap(),
1489                vec![1.23456f64, 7.89012]
1490            );
1491        }
1492
1493        std::fs::remove_file(&path).ok();
1494    }
1495
1496    #[test]
1497    fn append_chunked_roundtrip() {
1498        let path = temp_path("append_chunked");
1499
1500        {
1501            let file = H5File::create(&path).unwrap();
1502            let ds = file
1503                .new_dataset::<f64>()
1504                .shape(&[0, 3])
1505                .chunk(&[1, 3])
1506                .max_shape(&[None, Some(3)])
1507                .create("data")
1508                .unwrap();
1509
1510            // Append one frame
1511            ds.append(&[1.0f64, 2.0, 3.0]).unwrap();
1512            // Append two frames at once
1513            ds.append(&[4.0f64, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap();
1514
1515            file.close().unwrap();
1516        }
1517
1518        {
1519            let file = H5File::open(&path).unwrap();
1520            let ds = file.dataset("data").unwrap();
1521            assert_eq!(ds.shape(), vec![3, 3]);
1522            let all = ds.read_raw::<f64>().unwrap();
1523            assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
1524        }
1525
1526        std::fs::remove_file(&path).ok();
1527    }
1528
1529    #[test]
1530    fn append_1d_chunked() {
1531        let path = temp_path("append_1d");
1532
1533        {
1534            let file = H5File::create(&path).unwrap();
1535            let ds = file
1536                .new_dataset::<i32>()
1537                .shape(&[0])
1538                .chunk(&[4])
1539                .max_shape(&[None])
1540                .create("values")
1541                .unwrap();
1542
1543            ds.append(&[10i32, 20, 30]).unwrap(); // partial chunk
1544            ds.append(&[40i32]).unwrap(); // fills chunk boundary
1545            ds.append(&[50i32, 60, 70, 80]).unwrap(); // full chunk
1546
1547            file.close().unwrap();
1548        }
1549
1550        {
1551            let file = H5File::open(&path).unwrap();
1552            let ds = file.dataset("values").unwrap();
1553            assert_eq!(ds.shape(), vec![8]);
1554            let all = ds.read_raw::<i32>().unwrap();
1555            assert_eq!(all, vec![10, 20, 30, 40, 50, 60, 70, 80]);
1556        }
1557
1558        std::fs::remove_file(&path).ok();
1559    }
1560
1561    #[test]
1562    fn append_partial_chunk_flushed_on_close() {
1563        let path = temp_path("append_partial_close");
1564
1565        {
1566            let file = H5File::create(&path).unwrap();
1567            let ds = file
1568                .new_dataset::<f64>()
1569                .shape(&[0])
1570                .chunk(&[4])
1571                .max_shape(&[None])
1572                .create("vals")
1573                .unwrap();
1574
1575            // Append 5 elements: chunk 0 = full [1,2,3,4], chunk 1 = partial [5,0,0,0]
1576            ds.append(&[1.0f64, 2.0, 3.0, 4.0, 5.0]).unwrap();
1577            file.close().unwrap();
1578        }
1579
1580        {
1581            let file = H5File::open(&path).unwrap();
1582            let ds = file.dataset("vals").unwrap();
1583            assert_eq!(ds.shape(), vec![5]);
1584            let all = ds.read_raw::<f64>().unwrap();
1585            // The full dataset is 2 chunks * 4 = 8 elements; shape says 5
1586            // read_raw reads total shape elements
1587            assert_eq!(all.len(), 5);
1588            assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1589        }
1590
1591        std::fs::remove_file(&path).ok();
1592    }
1593}