Skip to main content

rust_hdf5/
file.rs

1//! HDF5 file handle — the main entry point for the public API.
2//!
3//! ```no_run
4//! use rust_hdf5::H5File;
5//!
6//! // Write
7//! let file = H5File::create("example.h5").unwrap();
8//! let ds = file.new_dataset::<u8>().shape(&[10, 20]).create("data").unwrap();
9//! ds.write_raw(&vec![0u8; 200]).unwrap();
10//! drop(file);
11//!
12//! // Read
13//! let file = H5File::open("example.h5").unwrap();
14//! let ds = file.dataset("data").unwrap();
15//! let data = ds.read_raw::<u8>().unwrap();
16//! assert_eq!(data.len(), 200);
17//! ```
18
19use std::path::Path;
20
21use crate::io::{Hdf5Reader, Hdf5Writer};
22
23use crate::dataset::{DatasetBuilder, H5Dataset};
24use crate::error::{Hdf5Error, Result};
25use crate::format::messages::filter::FilterPipeline;
26use crate::group::H5Group;
27use crate::types::H5Type;
28
29// ---------------------------------------------------------------------------
30// Thread-safety: choose between Rc<RefCell<>> and Arc<Mutex<>> based on
31// the `threadsafe` feature flag.
32// ---------------------------------------------------------------------------
33
34#[cfg(not(feature = "threadsafe"))]
35pub(crate) type SharedInner = std::rc::Rc<std::cell::RefCell<H5FileInner>>;
36
37#[cfg(feature = "threadsafe")]
38pub(crate) type SharedInner = std::sync::Arc<std::sync::Mutex<H5FileInner>>;
39
40/// Helper to borrow/lock the inner state immutably.
41#[cfg(not(feature = "threadsafe"))]
42pub(crate) fn borrow_inner(inner: &SharedInner) -> std::cell::Ref<'_, H5FileInner> {
43    inner.borrow()
44}
45
46/// Helper to borrow/lock the inner state mutably.
47#[cfg(not(feature = "threadsafe"))]
48pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::cell::RefMut<'_, H5FileInner> {
49    inner.borrow_mut()
50}
51
52/// Helper to clone a SharedInner.
53#[cfg(not(feature = "threadsafe"))]
54pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
55    std::rc::Rc::clone(inner)
56}
57
58/// Helper to wrap an H5FileInner in SharedInner.
59#[cfg(not(feature = "threadsafe"))]
60pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
61    std::rc::Rc::new(std::cell::RefCell::new(inner))
62}
63
64#[cfg(feature = "threadsafe")]
65pub(crate) fn borrow_inner(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
66    inner.lock().unwrap()
67}
68
69#[cfg(feature = "threadsafe")]
70pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
71    inner.lock().unwrap()
72}
73
74#[cfg(feature = "threadsafe")]
75pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
76    std::sync::Arc::clone(inner)
77}
78
79#[cfg(feature = "threadsafe")]
80pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
81    std::sync::Arc::new(std::sync::Mutex::new(inner))
82}
83
84/// The inner state of an HDF5 file, shared with datasets via reference counting.
85///
86/// By default, this uses `Rc<RefCell<>>` for zero-overhead single-threaded use.
87/// Enable the `threadsafe` feature to use `Arc<Mutex<>>` instead, making
88/// `H5File` `Send + Sync`.
89pub(crate) enum H5FileInner {
90    Writer(Hdf5Writer),
91    Reader(Hdf5Reader),
92    /// Sentinel value used during `close()` to take ownership of the writer.
93    Closed,
94}
95
96/// An HDF5 file opened for reading or writing.
97///
98/// Datasets created from this file hold a shared reference to the underlying
99/// I/O handle, so the file does not need to outlive its datasets (they share
100/// ownership via reference counting).
101pub struct H5File {
102    pub(crate) inner: SharedInner,
103}
104
105impl H5File {
106    /// Create a new HDF5 file at `path`. Truncates if the file already exists.
107    pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
108        let writer = Hdf5Writer::create(path.as_ref())?;
109        Ok(Self {
110            inner: new_shared(H5FileInner::Writer(writer)),
111        })
112    }
113
114    /// Open an existing HDF5 file for reading.
115    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
116        let reader = Hdf5Reader::open(path.as_ref())?;
117        Ok(Self {
118            inner: new_shared(H5FileInner::Reader(reader)),
119        })
120    }
121
122    /// Open an existing HDF5 file for appending new datasets.
123    ///
124    /// Existing datasets are preserved. New datasets can be added and will
125    /// be written after the current end of file. Existing chunked datasets
126    /// can be extended with `write_chunk` and `extend_dataset`.
127    ///
128    /// ```no_run
129    /// use rust_hdf5::H5File;
130    /// let file = H5File::open_rw("existing.h5").unwrap();
131    /// let ds = file.new_dataset::<f64>().shape(&[100]).create("new_data").unwrap();
132    /// ds.write_raw(&vec![0.0f64; 100]).unwrap();
133    /// file.close().unwrap();
134    /// ```
135    pub fn open_rw<P: AsRef<Path>>(path: P) -> Result<Self> {
136        let writer = Hdf5Writer::open_append(path.as_ref())?;
137        Ok(Self {
138            inner: new_shared(H5FileInner::Writer(writer)),
139        })
140    }
141
142    /// Return a handle to the root group.
143    ///
144    /// The root group can be used to create datasets and sub-groups.
145    pub fn root_group(&self) -> H5Group {
146        H5Group::new(clone_inner(&self.inner), "/".to_string())
147    }
148
149    /// Create a group in the root of the file.
150    ///
151    /// ```no_run
152    /// use rust_hdf5::H5File;
153    /// let file = H5File::create("groups.h5").unwrap();
154    /// let grp = file.create_group("detector").unwrap();
155    /// ```
156    pub fn create_group(&self, name: &str) -> Result<H5Group> {
157        self.root_group().create_group(name)
158    }
159
160    /// Start building a new dataset with the given element type.
161    ///
162    /// This returns a fluent builder. Call `.shape(...)` to set dimensions and
163    /// `.create("name")` to finalize.
164    ///
165    /// ```no_run
166    /// # use rust_hdf5::H5File;
167    /// let file = H5File::create("build.h5").unwrap();
168    /// let ds = file.new_dataset::<f64>().shape(&[3, 4]).create("matrix").unwrap();
169    /// ```
170    pub fn new_dataset<T: H5Type>(&self) -> DatasetBuilder<T> {
171        DatasetBuilder::new(clone_inner(&self.inner))
172    }
173
174    /// Add a string attribute to the file (root group).
175    pub fn set_attr_string(&self, name: &str, value: &str) -> Result<()> {
176        use crate::format::messages::attribute::AttributeMessage;
177        let attr = AttributeMessage::scalar_string(name, value);
178        let mut inner = borrow_inner_mut(&self.inner);
179        match &mut *inner {
180            H5FileInner::Writer(writer) => {
181                writer.add_root_attribute(attr);
182                Ok(())
183            }
184            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
185        }
186    }
187
188    /// Add a numeric attribute to the file (root group).
189    pub fn set_attr_numeric<T: crate::types::H5Type>(&self, name: &str, value: &T) -> Result<()> {
190        use crate::format::messages::attribute::AttributeMessage;
191        let es = T::element_size();
192        let raw = unsafe { std::slice::from_raw_parts(value as *const T as *const u8, es) };
193        let attr = AttributeMessage::scalar_numeric(name, T::hdf5_type(), raw.to_vec());
194        let mut inner = borrow_inner_mut(&self.inner);
195        match &mut *inner {
196            H5FileInner::Writer(writer) => {
197                writer.add_root_attribute(attr);
198                Ok(())
199            }
200            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
201        }
202    }
203
204    /// Return the names of file-level (root group) attributes.
205    pub fn attr_names(&self) -> Result<Vec<String>> {
206        let inner = borrow_inner(&self.inner);
207        match &*inner {
208            H5FileInner::Reader(reader) => Ok(reader.root_attr_names()),
209            _ => Ok(vec![]),
210        }
211    }
212
213    /// Read a file-level string attribute.
214    pub fn attr_string(&self, name: &str) -> Result<String> {
215        let inner = borrow_inner(&self.inner);
216        match &*inner {
217            H5FileInner::Reader(reader) => {
218                let attr = reader
219                    .root_attr(name)
220                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
221                let end = attr
222                    .data
223                    .iter()
224                    .position(|&b| b == 0)
225                    .unwrap_or(attr.data.len());
226                Ok(String::from_utf8_lossy(&attr.data[..end]).to_string())
227            }
228            _ => Err(Hdf5Error::InvalidState("not in read mode".into())),
229        }
230    }
231
232    /// Check if the file is in write/append mode.
233    pub fn is_writable(&self) -> bool {
234        let inner = borrow_inner(&self.inner);
235        matches!(&*inner, H5FileInner::Writer(_))
236    }
237
238    /// Create a variable-length string dataset and write data.
239    ///
240    /// This is a convenience method for writing h5py-compatible vlen string
241    /// datasets using global heap storage.
242    pub fn write_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
243        let mut inner = borrow_inner_mut(&self.inner);
244        match &mut *inner {
245            H5FileInner::Writer(writer) => {
246                let idx = writer.create_vlen_string_dataset(name, strings)?;
247                // If the name contains '/', assign the dataset to its parent group
248                if let Some(slash_pos) = name.rfind('/') {
249                    let group_path = &name[..slash_pos];
250                    let abs_group_path = if group_path.starts_with('/') {
251                        group_path.to_string()
252                    } else {
253                        format!("/{}", group_path)
254                    };
255                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
256                }
257                Ok(())
258            }
259            H5FileInner::Reader(_) => {
260                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
261            }
262            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
263        }
264    }
265
266    /// Create a chunked, compressed variable-length string dataset.
267    ///
268    /// Like `write_vlen_strings`, but stores the vlen references in chunked
269    /// layout with the given filter pipeline (e.g., `FilterPipeline::deflate(6)`
270    /// or `FilterPipeline::zstd(3)`). `chunk_size` is the number of strings
271    /// per chunk.
272    pub fn write_vlen_strings_compressed(
273        &self,
274        name: &str,
275        strings: &[&str],
276        chunk_size: usize,
277        pipeline: FilterPipeline,
278    ) -> Result<()> {
279        let mut inner = borrow_inner_mut(&self.inner);
280        match &mut *inner {
281            H5FileInner::Writer(writer) => {
282                let idx = writer
283                    .create_vlen_string_dataset_compressed(name, strings, chunk_size, pipeline)?;
284                if let Some(slash_pos) = name.rfind('/') {
285                    let group_path = &name[..slash_pos];
286                    let abs_group_path = if group_path.starts_with('/') {
287                        group_path.to_string()
288                    } else {
289                        format!("/{}", group_path)
290                    };
291                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
292                }
293                Ok(())
294            }
295            H5FileInner::Reader(_) => {
296                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
297            }
298            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
299        }
300    }
301
302    /// Create an empty chunked vlen string dataset ready for incremental appends.
303    ///
304    /// Use `append_vlen_strings` to add data. If `pipeline` is `Some`, chunks
305    /// are compressed (e.g., `Some(FilterPipeline::lz4())`).
306    pub fn create_appendable_vlen_dataset(
307        &self,
308        name: &str,
309        chunk_size: usize,
310        pipeline: Option<FilterPipeline>,
311    ) -> Result<()> {
312        let mut inner = borrow_inner_mut(&self.inner);
313        match &mut *inner {
314            H5FileInner::Writer(writer) => {
315                let idx =
316                    writer.create_appendable_vlen_string_dataset(name, chunk_size, pipeline)?;
317                if let Some(slash_pos) = name.rfind('/') {
318                    let group_path = &name[..slash_pos];
319                    let abs_group_path = if group_path.starts_with('/') {
320                        group_path.to_string()
321                    } else {
322                        format!("/{}", group_path)
323                    };
324                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
325                }
326                Ok(())
327            }
328            H5FileInner::Reader(_) => {
329                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
330            }
331            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
332        }
333    }
334
335    /// Append variable-length strings to an existing chunked vlen string dataset.
336    pub fn append_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
337        let mut inner = borrow_inner_mut(&self.inner);
338        match &mut *inner {
339            H5FileInner::Writer(writer) => {
340                let ds_index = writer
341                    .dataset_index(name)
342                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
343                writer.append_vlen_strings(ds_index, strings)?;
344                Ok(())
345            }
346            H5FileInner::Reader(_) => {
347                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
348            }
349            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
350        }
351    }
352
353    /// Delete a dataset by name. The dataset is unlinked on close;
354    /// file space is not reclaimed.
355    pub fn delete_dataset(&self, name: &str) -> Result<()> {
356        let mut inner = borrow_inner_mut(&self.inner);
357        match &mut *inner {
358            H5FileInner::Writer(writer) => {
359                writer.delete_dataset(name)?;
360                Ok(())
361            }
362            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
363        }
364    }
365
366    /// Delete a group and all its child datasets/sub-groups.
367    /// File space is not reclaimed.
368    pub fn delete_group(&self, name: &str) -> Result<()> {
369        let mut inner = borrow_inner_mut(&self.inner);
370        match &mut *inner {
371            H5FileInner::Writer(writer) => {
372                writer.delete_group(name)?;
373                Ok(())
374            }
375            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
376        }
377    }
378
379    /// Open an existing dataset by name (read mode).
380    pub fn dataset(&self, name: &str) -> Result<H5Dataset> {
381        let inner = borrow_inner(&self.inner);
382        match &*inner {
383            H5FileInner::Reader(reader) => {
384                let info = reader
385                    .dataset_info(name)
386                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
387                let shape: Vec<usize> = info.dataspace.dims.iter().map(|&d| d as usize).collect();
388                let element_size = info.datatype.element_size() as usize;
389                Ok(H5Dataset::new_reader(
390                    clone_inner(&self.inner),
391                    name.to_string(),
392                    shape,
393                    element_size,
394                ))
395            }
396            H5FileInner::Writer(_) => Err(Hdf5Error::InvalidState(
397                "cannot open a dataset by name in write mode; use new_dataset() instead"
398                    .to_string(),
399            )),
400            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".to_string())),
401        }
402    }
403
404    /// Return the names of all datasets in the root group.
405    ///
406    /// Works in both read and write mode: in write mode, returns the names of
407    /// datasets created so far; in read mode, returns the names discovered
408    /// during file open.
409    pub fn dataset_names(&self) -> Vec<String> {
410        let inner = borrow_inner(&self.inner);
411        match &*inner {
412            H5FileInner::Reader(reader) => reader
413                .dataset_names()
414                .iter()
415                .map(|s| s.to_string())
416                .collect(),
417            H5FileInner::Writer(writer) => writer
418                .dataset_names()
419                .iter()
420                .map(|s| s.to_string())
421                .collect(),
422            H5FileInner::Closed => Vec::new(),
423        }
424    }
425
426    /// Explicitly close the file. For a writer, this finalizes the file
427    /// (writes superblock, headers, etc.). For a reader, this is a no-op.
428    ///
429    /// The file is also auto-finalized on drop, but calling `close()` lets
430    /// you handle errors.
431    pub fn close(self) -> Result<()> {
432        let old = {
433            let mut inner = borrow_inner_mut(&self.inner);
434            std::mem::replace(&mut *inner, H5FileInner::Closed)
435        };
436        match old {
437            H5FileInner::Writer(writer) => {
438                writer.close()?;
439                Ok(())
440            }
441            H5FileInner::Reader(_) => Ok(()),
442            H5FileInner::Closed => Ok(()),
443        }
444    }
445
446    /// Flush the file to disk. Only meaningful in write mode.
447    pub fn flush(&self) -> Result<()> {
448        // The underlying writer does not expose a standalone flush; data is
449        // written to disk immediately via pwrite. This is a compatibility
450        // method that does nothing for now.
451        Ok(())
452    }
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458    use std::path::PathBuf;
459
460    fn temp_path(name: &str) -> PathBuf {
461        std::env::temp_dir().join(format!("hdf5_file_test_{}.h5", name))
462    }
463
464    #[test]
465    fn create_and_close_empty() {
466        let path = temp_path("create_empty");
467        let file = H5File::create(&path).unwrap();
468        file.close().unwrap();
469
470        // Should be readable
471        let file = H5File::open(&path).unwrap();
472        file.close().unwrap();
473
474        std::fs::remove_file(&path).ok();
475    }
476
477    #[test]
478    fn create_and_drop_empty() {
479        let path = temp_path("drop_empty");
480        {
481            let _file = H5File::create(&path).unwrap();
482            // drop auto-finalizes
483        }
484        // Verify the file is valid by opening it
485        let file = H5File::open(&path).unwrap();
486        file.close().unwrap();
487
488        std::fs::remove_file(&path).ok();
489    }
490
491    #[test]
492    fn dataset_not_found() {
493        let path = temp_path("ds_not_found");
494        {
495            let _file = H5File::create(&path).unwrap();
496        }
497        let file = H5File::open(&path).unwrap();
498        let result = file.dataset("nonexistent");
499        assert!(result.is_err());
500
501        std::fs::remove_file(&path).ok();
502    }
503
504    #[test]
505    fn write_and_read_roundtrip() {
506        let path = temp_path("write_read_rt");
507
508        // Write
509        {
510            let file = H5File::create(&path).unwrap();
511            let ds = file
512                .new_dataset::<u8>()
513                .shape([4, 4])
514                .create("data")
515                .unwrap();
516            ds.write_raw(&[0u8; 16]).unwrap();
517            file.close().unwrap();
518        }
519
520        // Read
521        {
522            let file = H5File::open(&path).unwrap();
523            let ds = file.dataset("data").unwrap();
524            assert_eq!(ds.shape(), vec![4, 4]);
525            let data = ds.read_raw::<u8>().unwrap();
526            assert_eq!(data.len(), 16);
527            assert!(data.iter().all(|&b| b == 0));
528            file.close().unwrap();
529        }
530
531        std::fs::remove_file(&path).ok();
532    }
533
534    #[test]
535    fn write_and_read_f64() {
536        let path = temp_path("write_read_f64");
537
538        let values: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
539
540        // Write
541        {
542            let file = H5File::create(&path).unwrap();
543            let ds = file
544                .new_dataset::<f64>()
545                .shape([2, 3])
546                .create("matrix")
547                .unwrap();
548            ds.write_raw(&values).unwrap();
549            file.close().unwrap();
550        }
551
552        // Read
553        {
554            let file = H5File::open(&path).unwrap();
555            let ds = file.dataset("matrix").unwrap();
556            assert_eq!(ds.shape(), vec![2, 3]);
557            let readback = ds.read_raw::<f64>().unwrap();
558            assert_eq!(readback, values);
559        }
560
561        std::fs::remove_file(&path).ok();
562    }
563
564    #[test]
565    fn multiple_datasets() {
566        let path = temp_path("multi_ds");
567
568        {
569            let file = H5File::create(&path).unwrap();
570            let ds1 = file.new_dataset::<i32>().shape([3]).create("ints").unwrap();
571            ds1.write_raw(&[10i32, 20, 30]).unwrap();
572
573            let ds2 = file
574                .new_dataset::<f32>()
575                .shape([2, 2])
576                .create("floats")
577                .unwrap();
578            ds2.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
579
580            file.close().unwrap();
581        }
582
583        {
584            let file = H5File::open(&path).unwrap();
585
586            let ds_ints = file.dataset("ints").unwrap();
587            assert_eq!(ds_ints.shape(), vec![3]);
588            let ints = ds_ints.read_raw::<i32>().unwrap();
589            assert_eq!(ints, vec![10, 20, 30]);
590
591            let ds_floats = file.dataset("floats").unwrap();
592            assert_eq!(ds_floats.shape(), vec![2, 2]);
593            let floats = ds_floats.read_raw::<f32>().unwrap();
594            assert_eq!(floats, vec![1.0f32, 2.0, 3.0, 4.0]);
595        }
596
597        std::fs::remove_file(&path).ok();
598    }
599
600    #[test]
601    fn close_is_idempotent() {
602        let path = temp_path("close_idemp");
603        let file = H5File::create(&path).unwrap();
604        file.close().unwrap();
605        // File is consumed by close(), so no double-close possible at the type level.
606        std::fs::remove_file(&path).ok();
607    }
608}
609
610#[cfg(test)]
611mod integration_tests {
612    use super::*;
613
614    #[test]
615    fn write_file_for_h5dump() {
616        let path = std::env::temp_dir().join("test_hdf5rs_integration.h5");
617        let file = H5File::create(&path).unwrap();
618
619        let ds = file
620            .new_dataset::<u8>()
621            .shape([4usize, 4])
622            .create("data_u8")
623            .unwrap();
624        let data: Vec<u8> = (0..16).collect();
625        ds.write_raw(&data).unwrap();
626
627        let ds2 = file
628            .new_dataset::<f64>()
629            .shape([3usize, 2])
630            .create("data_f64")
631            .unwrap();
632        let fdata: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
633        ds2.write_raw(&fdata).unwrap();
634
635        let ds3 = file
636            .new_dataset::<i32>()
637            .shape([5usize])
638            .create("values")
639            .unwrap();
640        let idata: Vec<i32> = vec![-10, -5, 0, 5, 10];
641        ds3.write_raw(&idata).unwrap();
642
643        file.close().unwrap();
644
645        // File exists
646        assert!(path.exists());
647    }
648
649    #[test]
650    fn write_chunked_file_for_h5dump() {
651        let path = std::env::temp_dir().join("test_hdf5rs_chunked.h5");
652        let file = H5File::create(&path).unwrap();
653
654        // Create a chunked dataset with unlimited first dimension
655        let ds = file
656            .new_dataset::<f64>()
657            .shape([0usize, 4])
658            .chunk(&[1, 4])
659            .max_shape(&[None, Some(4)])
660            .create("streaming_data")
661            .unwrap();
662
663        // Write 5 frames of data
664        for frame in 0..5u64 {
665            let values: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
666            let raw: Vec<u8> = values.iter().flat_map(|v| v.to_le_bytes()).collect();
667            ds.write_chunk(frame as usize, &raw).unwrap();
668        }
669
670        // Extend dimensions to reflect the 5 written frames
671        ds.extend(&[5, 4]).unwrap();
672        ds.flush().unwrap();
673
674        file.close().unwrap();
675
676        assert!(path.exists());
677    }
678
679    #[test]
680    fn write_chunked_many_frames_for_h5dump() {
681        let path = std::env::temp_dir().join("test_hdf5rs_chunked_many.h5");
682        let file = H5File::create(&path).unwrap();
683
684        let ds = file
685            .new_dataset::<i32>()
686            .shape([0usize, 3])
687            .chunk(&[1, 3])
688            .max_shape(&[None, Some(3)])
689            .create("data")
690            .unwrap();
691
692        // Write 10 frames (exceeds idx_blk_elmts=4, uses data blocks)
693        for frame in 0..10u64 {
694            let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
695            let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
696            ds.write_chunk(frame as usize, &raw).unwrap();
697        }
698        ds.extend(&[10, 3]).unwrap();
699        file.close().unwrap();
700
701        assert!(path.exists());
702    }
703
704    #[test]
705    fn write_dataset_with_attributes() {
706        use crate::types::VarLenUnicode;
707
708        let path = std::env::temp_dir().join("test_hdf5rs_attributes.h5");
709        let file = H5File::create(&path).unwrap();
710
711        let ds = file
712            .new_dataset::<f32>()
713            .shape([10usize])
714            .create("temperature")
715            .unwrap();
716        let data: Vec<f32> = (0..10).map(|i| i as f32 * 1.5).collect();
717        ds.write_raw(&data).unwrap();
718
719        // Add string attributes
720        let attr = ds
721            .new_attr::<VarLenUnicode>()
722            .shape(())
723            .create("units")
724            .unwrap();
725        attr.write_scalar(&VarLenUnicode("kelvin".to_string()))
726            .unwrap();
727
728        let attr2 = ds
729            .new_attr::<VarLenUnicode>()
730            .shape(())
731            .create("description")
732            .unwrap();
733        attr2
734            .write_scalar(&VarLenUnicode("Temperature measurements".to_string()))
735            .unwrap();
736
737        // Use write_string convenience method
738        let attr3 = ds
739            .new_attr::<VarLenUnicode>()
740            .shape(())
741            .create("source")
742            .unwrap();
743        attr3.write_string("sensor_01").unwrap();
744
745        // Also test parse -> write_scalar pattern
746        let attr4 = ds
747            .new_attr::<VarLenUnicode>()
748            .shape(())
749            .create("label")
750            .unwrap();
751        let s: VarLenUnicode = "test_label".parse().unwrap_or_default();
752        attr4.write_scalar(&s).unwrap();
753
754        file.close().unwrap();
755
756        assert!(path.exists());
757    }
758
759    #[test]
760    fn chunked_write_read_roundtrip() {
761        let path = std::env::temp_dir().join("hdf5_chunked_roundtrip.h5");
762
763        // Write
764        {
765            let file = H5File::create(&path).unwrap();
766            let ds = file
767                .new_dataset::<i32>()
768                .shape([0usize, 3])
769                .chunk(&[1, 3])
770                .max_shape(&[None, Some(3)])
771                .create("table")
772                .unwrap();
773
774            for frame in 0..8u64 {
775                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
776                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
777                ds.write_chunk(frame as usize, &raw).unwrap();
778            }
779            ds.extend(&[8, 3]).unwrap();
780            file.close().unwrap();
781        }
782
783        // Read
784        {
785            let file = H5File::open(&path).unwrap();
786            let ds = file.dataset("table").unwrap();
787            assert_eq!(ds.shape(), vec![8, 3]);
788            let data = ds.read_raw::<i32>().unwrap();
789            assert_eq!(data.len(), 24);
790            for (i, val) in data.iter().enumerate() {
791                assert_eq!(*val, i as i32);
792            }
793        }
794
795        std::fs::remove_file(&path).ok();
796    }
797
798    #[test]
799    #[cfg(feature = "deflate")]
800    fn compressed_chunked_roundtrip() {
801        let path = std::env::temp_dir().join("hdf5_compressed_roundtrip.h5");
802
803        // Write compressed
804        {
805            let file = H5File::create(&path).unwrap();
806            let ds = file
807                .new_dataset::<f64>()
808                .shape([0usize, 4])
809                .chunk(&[1, 4])
810                .max_shape(&[None, Some(4)])
811                .deflate(6)
812                .create("compressed")
813                .unwrap();
814
815            for frame in 0..10u64 {
816                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
817                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
818                ds.write_chunk(frame as usize, &raw).unwrap();
819            }
820            ds.extend(&[10, 4]).unwrap();
821            file.close().unwrap();
822        }
823
824        // Read back and verify
825        {
826            let file = H5File::open(&path).unwrap();
827            let ds = file.dataset("compressed").unwrap();
828            assert_eq!(ds.shape(), vec![10, 4]);
829            let data = ds.read_raw::<f64>().unwrap();
830            assert_eq!(data.len(), 40);
831            for (i, val) in data.iter().enumerate() {
832                assert!(
833                    (val - i as f64).abs() < 1e-10,
834                    "mismatch at {}: {} != {}",
835                    i,
836                    val,
837                    i
838                );
839            }
840        }
841
842        std::fs::remove_file(&path).ok();
843    }
844
845    #[test]
846    #[cfg(feature = "deflate")]
847    fn compressed_chunked_many_frames() {
848        let path = std::env::temp_dir().join("hdf5_compressed_many.h5");
849
850        {
851            let file = H5File::create(&path).unwrap();
852            let ds = file
853                .new_dataset::<i32>()
854                .shape([0usize, 3])
855                .chunk(&[1, 3])
856                .max_shape(&[None, Some(3)])
857                .deflate(6)
858                .create("stream")
859                .unwrap();
860
861            for frame in 0..100u64 {
862                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
863                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
864                ds.write_chunk(frame as usize, &raw).unwrap();
865            }
866            ds.extend(&[100, 3]).unwrap();
867            file.close().unwrap();
868        }
869
870        {
871            let file = H5File::open(&path).unwrap();
872            let ds = file.dataset("stream").unwrap();
873            assert_eq!(ds.shape(), vec![100, 3]);
874            let data = ds.read_raw::<i32>().unwrap();
875            assert_eq!(data.len(), 300);
876            for (i, val) in data.iter().enumerate() {
877                assert_eq!(*val, i as i32, "mismatch at {}", i);
878            }
879        }
880
881        std::fs::remove_file(&path).ok();
882    }
883    #[test]
884    fn append_mode() {
885        let path = std::env::temp_dir().join("hdf5_append.h5");
886
887        // Create initial file
888        {
889            let file = H5File::create(&path).unwrap();
890            let ds = file
891                .new_dataset::<i32>()
892                .shape([3usize])
893                .create("first")
894                .unwrap();
895            ds.write_raw(&[1i32, 2, 3]).unwrap();
896            file.close().unwrap();
897        }
898
899        // Append new dataset
900        {
901            let file = H5File::open_rw(&path).unwrap();
902            let ds = file
903                .new_dataset::<f64>()
904                .shape([2usize])
905                .create("second")
906                .unwrap();
907            ds.write_raw(&[4.0f64, 5.0]).unwrap();
908            file.close().unwrap();
909        }
910
911        // Read back both
912        {
913            let file = H5File::open(&path).unwrap();
914            let names = file.dataset_names();
915            assert!(names.contains(&"first".to_string()));
916            assert!(names.contains(&"second".to_string()));
917
918            let ds1 = file.dataset("first").unwrap();
919            assert_eq!(ds1.read_raw::<i32>().unwrap(), vec![1, 2, 3]);
920
921            let ds2 = file.dataset("second").unwrap();
922            assert_eq!(ds2.read_raw::<f64>().unwrap(), vec![4.0, 5.0]);
923        }
924
925        std::fs::remove_file(&path).ok();
926    }
927
928    #[test]
929    fn open_rw_set_attr_preserves_file() {
930        let path = std::env::temp_dir().join("hdf5_open_rw_attr.h5");
931        // Create file with a dataset and an attribute
932        {
933            let file = H5File::create(&path).unwrap();
934            let ds = file
935                .new_dataset::<i32>()
936                .shape([3usize])
937                .create("data")
938                .unwrap();
939            ds.write_raw(&[10i32, 20, 30]).unwrap();
940            file.set_attr_string("version", "1.0").unwrap();
941            file.close().unwrap();
942        }
943        // Open rw and modify the attribute
944        {
945            let file = H5File::open_rw(&path).unwrap();
946            file.set_attr_string("version", "2.0").unwrap();
947            file.close().unwrap();
948        }
949        // Verify: dataset intact, attribute updated
950        {
951            let file = H5File::open(&path).unwrap();
952            let ds = file.dataset("data").unwrap();
953            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![10, 20, 30]);
954            let ver = file.attr_string("version").unwrap();
955            assert_eq!(ver, "2.0");
956        }
957        std::fs::remove_file(&path).ok();
958    }
959
960    #[test]
961    #[cfg(feature = "deflate")]
962    fn open_rw_attr_with_compressed_dataset() {
963        use crate::format::messages::filter::FilterPipeline;
964        let path = std::env::temp_dir().join("hdf5_open_rw_compressed.h5");
965        let input: Vec<&str> = (0..50).map(|_| "test string data").collect();
966        // Create file with compressed vlen strings
967        {
968            let file = H5File::create(&path).unwrap();
969            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
970                .unwrap();
971            file.set_attr_string("version", "1.0").unwrap();
972            file.close().unwrap();
973        }
974        // Open rw and modify attribute only
975        {
976            let file = H5File::open_rw(&path).unwrap();
977            file.set_attr_string("version", "2.0").unwrap();
978            file.close().unwrap();
979        }
980        // Verify: compressed dataset still readable, attribute updated
981        {
982            let file = H5File::open(&path).unwrap();
983            let ds = file.dataset("texts").unwrap();
984            let strings = ds.read_vlen_strings().unwrap();
985            assert_eq!(strings.len(), 50);
986            assert_eq!(strings[0], "test string data");
987            let ver = file.attr_string("version").unwrap();
988            assert_eq!(ver, "2.0");
989        }
990        std::fs::remove_file(&path).ok();
991    }
992
993    #[test]
994    #[cfg(feature = "lz4")]
995    fn append_vlen_strings_basic() {
996        use crate::format::messages::filter::FilterPipeline;
997        let path = std::env::temp_dir().join("hdf5_append_vlen.h5");
998        {
999            let file = H5File::create(&path).unwrap();
1000            file.create_appendable_vlen_dataset("names", 4, Some(FilterPipeline::lz4()))
1001                .unwrap();
1002            file.append_vlen_strings("names", &["alice", "bob", "charlie"])
1003                .unwrap();
1004            file.append_vlen_strings("names", &["dave", "eve"]).unwrap();
1005            file.close().unwrap();
1006        }
1007        {
1008            let file = H5File::open(&path).unwrap();
1009            let ds = file.dataset("names").unwrap();
1010            let strings = ds.read_vlen_strings().unwrap();
1011            assert_eq!(strings, vec!["alice", "bob", "charlie", "dave", "eve"]);
1012        }
1013        std::fs::remove_file(&path).ok();
1014    }
1015
1016    #[test]
1017    #[cfg(feature = "lz4")]
1018    fn append_vlen_strings_large() {
1019        use crate::format::messages::filter::FilterPipeline;
1020        let path = std::env::temp_dir().join("hdf5_append_vlen_large.h5");
1021        let batch1: Vec<String> = (0..5000).map(|i| format!("node-{:06}", i)).collect();
1022        let batch2: Vec<String> = (5000..7189).map(|i| format!("node-{:06}", i)).collect();
1023        {
1024            let file = H5File::create(&path).unwrap();
1025            file.create_appendable_vlen_dataset("data", 512, Some(FilterPipeline::lz4()))
1026                .unwrap();
1027            let r1: Vec<&str> = batch1.iter().map(|s| s.as_str()).collect();
1028            file.append_vlen_strings("data", &r1).unwrap();
1029            let r2: Vec<&str> = batch2.iter().map(|s| s.as_str()).collect();
1030            file.append_vlen_strings("data", &r2).unwrap();
1031            file.close().unwrap();
1032        }
1033        {
1034            let file = H5File::open(&path).unwrap();
1035            let ds = file.dataset("data").unwrap();
1036            let strings = ds.read_vlen_strings().unwrap();
1037            assert_eq!(strings.len(), 7189);
1038            assert_eq!(strings[0], "node-000000");
1039            assert_eq!(strings[7188], "node-007188");
1040        }
1041        std::fs::remove_file(&path).ok();
1042    }
1043
1044    #[test]
1045    fn append_vlen_strings_uncompressed() {
1046        let path = std::env::temp_dir().join("hdf5_append_vlen_unc.h5");
1047        {
1048            let file = H5File::create(&path).unwrap();
1049            file.create_appendable_vlen_dataset("texts", 8, None)
1050                .unwrap();
1051            file.append_vlen_strings("texts", &["hello", "world"])
1052                .unwrap();
1053            file.append_vlen_strings("texts", &["foo", "bar", "baz"])
1054                .unwrap();
1055            file.close().unwrap();
1056        }
1057        {
1058            let file = H5File::open(&path).unwrap();
1059            let ds = file.dataset("texts").unwrap();
1060            let strings = ds.read_vlen_strings().unwrap();
1061            assert_eq!(strings, vec!["hello", "world", "foo", "bar", "baz"]);
1062        }
1063        std::fs::remove_file(&path).ok();
1064    }
1065
1066    #[test]
1067    fn delete_dataset_roundtrip() {
1068        let path = std::env::temp_dir().join("hdf5_delete_ds.h5");
1069        {
1070            let file = H5File::create(&path).unwrap();
1071            file.write_vlen_strings("keep", &["a", "b"]).unwrap();
1072            file.write_vlen_strings("remove", &["x", "y"]).unwrap();
1073            file.delete_dataset("remove").unwrap();
1074            file.close().unwrap();
1075        }
1076        {
1077            let file = H5File::open(&path).unwrap();
1078            let names = file.dataset_names();
1079            assert!(names.contains(&"keep".to_string()));
1080            assert!(!names.contains(&"remove".to_string()));
1081            let ds = file.dataset("keep").unwrap();
1082            assert_eq!(ds.read_vlen_strings().unwrap(), vec!["a", "b"]);
1083        }
1084        std::fs::remove_file(&path).ok();
1085    }
1086
1087    #[test]
1088    fn delete_group_roundtrip() {
1089        let path = std::env::temp_dir().join("hdf5_delete_grp.h5");
1090        {
1091            let file = H5File::create(&path).unwrap();
1092            let g1 = file.create_group("keep").unwrap();
1093            g1.write_vlen_strings("data", &["a"]).unwrap();
1094            let g2 = file.create_group("remove").unwrap();
1095            g2.write_vlen_strings("data", &["x"]).unwrap();
1096            file.delete_group("remove").unwrap();
1097            file.close().unwrap();
1098        }
1099        {
1100            let file = H5File::open(&path).unwrap();
1101            let names = file.dataset_names();
1102            assert!(names.contains(&"keep/data".to_string()));
1103            assert!(!names.contains(&"remove/data".to_string()));
1104        }
1105        std::fs::remove_file(&path).ok();
1106    }
1107
1108    #[test]
1109    fn open_rw_delete_recreate_group() {
1110        let path = std::env::temp_dir().join("hdf5_rw_delete_recreate.h5");
1111        // Step 1: create file with groups
1112        {
1113            let file = H5File::create(&path).unwrap();
1114            let n = file.create_group("nodes").unwrap();
1115            n.write_vlen_strings("id", &["a", "b", "c"]).unwrap();
1116            let e = file.create_group("edges").unwrap();
1117            e.write_vlen_strings("src", &["x", "y"]).unwrap();
1118            file.close().unwrap();
1119        }
1120        // Step 2: open_rw, delete one group, recreate with new data
1121        {
1122            let file = H5File::open_rw(&path).unwrap();
1123            file.delete_group("nodes").unwrap();
1124            let n = file.create_group("nodes").unwrap();
1125            n.write_vlen_strings("id", &["new1", "new2"]).unwrap();
1126            file.close().unwrap();
1127        }
1128        // Step 3: verify
1129        {
1130            let file = H5File::open(&path).unwrap();
1131            let ds = file.dataset("nodes/id").unwrap();
1132            let s = ds.read_vlen_strings().unwrap();
1133            assert_eq!(s, vec!["new1", "new2"]);
1134            // edges should still be intact
1135            let ds = file.dataset("edges/src").unwrap();
1136            let s = ds.read_vlen_strings().unwrap();
1137            assert_eq!(s, vec!["x", "y"]);
1138        }
1139        std::fs::remove_file(&path).ok();
1140    }
1141
1142    #[test]
1143    fn delete_and_recreate_group() {
1144        let path = std::env::temp_dir().join("hdf5_delete_recreate.h5");
1145        {
1146            let file = H5File::create(&path).unwrap();
1147            let g = file.create_group("nodes").unwrap();
1148            g.write_vlen_strings("id", &["old1", "old2"]).unwrap();
1149            file.delete_group("nodes").unwrap();
1150            let g = file.create_group("nodes").unwrap();
1151            g.write_vlen_strings("id", &["new1", "new2", "new3"])
1152                .unwrap();
1153            file.close().unwrap();
1154        }
1155        {
1156            let file = H5File::open(&path).unwrap();
1157            let ds = file.dataset("nodes/id").unwrap();
1158            let strings = ds.read_vlen_strings().unwrap();
1159            assert_eq!(strings, vec!["new1", "new2", "new3"]);
1160        }
1161        std::fs::remove_file(&path).ok();
1162    }
1163
1164    #[test]
1165    #[cfg(feature = "deflate")]
1166    fn vlen_string_compressed_large_roundtrip() {
1167        use crate::format::messages::filter::FilterPipeline;
1168        let path = std::env::temp_dir().join("hdf5_vlen_large.h5");
1169        // Simulate kodex scenario: 7189 strings, chunk_size 512
1170        let input: Vec<String> = (0..7189)
1171            .map(|i| format!("node-{:08x}-{}", i, "a".repeat(20 + (i % 30))))
1172            .collect();
1173        let input_refs: Vec<&str> = input.iter().map(|s| s.as_str()).collect();
1174        {
1175            let file = H5File::create(&path).unwrap();
1176            file.create_group("nodes").unwrap();
1177            file.write_vlen_strings_compressed(
1178                "nodes/id",
1179                &input_refs,
1180                512,
1181                FilterPipeline::deflate(6),
1182            )
1183            .unwrap();
1184            file.close().unwrap();
1185        }
1186        // Read back
1187        {
1188            let file = H5File::open(&path).unwrap();
1189            let ds = file.dataset("nodes/id").unwrap();
1190            let strings = ds.read_vlen_strings().unwrap();
1191            assert_eq!(strings.len(), 7189);
1192            assert_eq!(strings[0], input[0]);
1193            assert_eq!(strings[7188], input[7188]);
1194        }
1195        // Also test open_rw then re-read
1196        {
1197            let file = H5File::open_rw(&path).unwrap();
1198            file.set_attr_string("version", "1.0").unwrap();
1199            file.close().unwrap();
1200        }
1201        {
1202            let file = H5File::open(&path).unwrap();
1203            let ds = file.dataset("nodes/id").unwrap();
1204            let strings = ds.read_vlen_strings().unwrap();
1205            assert_eq!(strings.len(), 7189);
1206            assert_eq!(strings[0], input[0]);
1207        }
1208        std::fs::remove_file(&path).ok();
1209    }
1210
1211    #[test]
1212    fn vlen_string_write_read() {
1213        let path = std::env::temp_dir().join("hdf5_vlen_wr.h5");
1214        {
1215            let file = H5File::create(&path).unwrap();
1216            file.write_vlen_strings("names", &["alice", "bob", "charlie"])
1217                .unwrap();
1218            file.close().unwrap();
1219        }
1220        {
1221            let file = H5File::open(&path).unwrap();
1222            let ds = file.dataset("names").unwrap();
1223            let strings = ds.read_vlen_strings().unwrap();
1224            assert_eq!(strings, vec!["alice", "bob", "charlie"]);
1225        }
1226        std::fs::remove_file(&path).ok();
1227    }
1228
1229    #[test]
1230    #[cfg(feature = "deflate")]
1231    fn vlen_string_deflate_roundtrip() {
1232        use crate::format::messages::filter::FilterPipeline;
1233        let path = std::env::temp_dir().join("hdf5_vlen_deflate.h5");
1234        let input: Vec<&str> = (0..100)
1235            .map(|i| match i % 3 {
1236                0 => "hello world",
1237                1 => "compressed vlen string test",
1238                _ => "rust-hdf5",
1239            })
1240            .collect();
1241        {
1242            let file = H5File::create(&path).unwrap();
1243            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1244                .unwrap();
1245            file.close().unwrap();
1246        }
1247        {
1248            let file = H5File::open(&path).unwrap();
1249            let ds = file.dataset("texts").unwrap();
1250            let strings = ds.read_vlen_strings().unwrap();
1251            assert_eq!(strings.len(), 100);
1252            for (i, s) in strings.iter().enumerate() {
1253                assert_eq!(s, input[i]);
1254            }
1255        }
1256        std::fs::remove_file(&path).ok();
1257    }
1258
1259    #[test]
1260    #[cfg(feature = "zstd")]
1261    fn vlen_string_zstd_roundtrip() {
1262        use crate::format::messages::filter::FilterPipeline;
1263        let path = std::env::temp_dir().join("hdf5_vlen_zstd.h5");
1264        let input: Vec<&str> = (0..200)
1265            .map(|i| match i % 4 {
1266                0 => "zstandard compression test",
1267                1 => "variable length string",
1268                2 => "rust-hdf5 chunked storage",
1269                _ => "hello zstd world",
1270            })
1271            .collect();
1272        {
1273            let file = H5File::create(&path).unwrap();
1274            file.write_vlen_strings_compressed("data", &input, 32, FilterPipeline::zstd(3))
1275                .unwrap();
1276            file.close().unwrap();
1277        }
1278        {
1279            let file = H5File::open(&path).unwrap();
1280            let ds = file.dataset("data").unwrap();
1281            let strings = ds.read_vlen_strings().unwrap();
1282            assert_eq!(strings.len(), 200);
1283            for (i, s) in strings.iter().enumerate() {
1284                assert_eq!(s, input[i]);
1285            }
1286        }
1287        std::fs::remove_file(&path).ok();
1288    }
1289
1290    #[test]
1291    #[cfg(feature = "deflate")]
1292    fn shuffle_deflate_roundtrip() {
1293        let path = std::env::temp_dir().join("hdf5_shuf_defl.h5");
1294        {
1295            let file = H5File::create(&path).unwrap();
1296            let ds = file
1297                .new_dataset::<f64>()
1298                .shape([0usize, 4])
1299                .chunk(&[1, 4])
1300                .max_shape(&[None, Some(4)])
1301                .shuffle_deflate(6)
1302                .create("data")
1303                .unwrap();
1304            for frame in 0..20u64 {
1305                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
1306                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1307                ds.write_chunk(frame as usize, &raw).unwrap();
1308            }
1309            ds.extend(&[20, 4]).unwrap();
1310            file.close().unwrap();
1311        }
1312        {
1313            let file = H5File::open(&path).unwrap();
1314            let ds = file.dataset("data").unwrap();
1315            assert_eq!(ds.shape(), vec![20, 4]);
1316            let data = ds.read_raw::<f64>().unwrap();
1317            assert_eq!(data.len(), 80);
1318            for (i, val) in data.iter().enumerate() {
1319                assert!((val - i as f64).abs() < 1e-10);
1320            }
1321        }
1322        std::fs::remove_file(&path).ok();
1323    }
1324
1325    #[test]
1326    fn file_level_attributes() {
1327        let path = std::env::temp_dir().join("hdf5_file_attr.h5");
1328        {
1329            let file = H5File::create(&path).unwrap();
1330            file.set_attr_string("title", "Test File").unwrap();
1331            file.set_attr_numeric("version", &42i32).unwrap();
1332            let ds = file
1333                .new_dataset::<u8>()
1334                .shape([1usize])
1335                .create("dummy")
1336                .unwrap();
1337            ds.write_raw(&[0u8]).unwrap();
1338            file.close().unwrap();
1339        }
1340        {
1341            let file = H5File::open(&path).unwrap();
1342            assert!(file.dataset_names().contains(&"dummy".to_string()));
1343
1344            // Read file-level attributes
1345            let names = file.attr_names().unwrap();
1346            assert!(names.contains(&"title".to_string()));
1347
1348            let title = file.attr_string("title").unwrap();
1349            assert_eq!(title, "Test File");
1350        }
1351        std::fs::remove_file(&path).ok();
1352    }
1353
1354    #[test]
1355    fn scalar_dataset_roundtrip() {
1356        let path = std::env::temp_dir().join("hdf5_scalar.h5");
1357        {
1358            let file = H5File::create(&path).unwrap();
1359            let ds = file.new_dataset::<f64>().scalar().create("pi").unwrap();
1360            ds.write_raw(&[std::f64::consts::PI]).unwrap();
1361            file.close().unwrap();
1362        }
1363        {
1364            let file = H5File::open(&path).unwrap();
1365            let ds = file.dataset("pi").unwrap();
1366            assert_eq!(ds.shape(), Vec::<usize>::new());
1367            assert_eq!(ds.total_elements(), 1);
1368            let data = ds.read_raw::<f64>().unwrap();
1369            assert_eq!(data.len(), 1);
1370            assert!((data[0] - std::f64::consts::PI).abs() < 1e-15);
1371        }
1372        std::fs::remove_file(&path).ok();
1373    }
1374
1375    #[test]
1376    fn append_mode_extend_chunked() {
1377        let path = std::env::temp_dir().join("hdf5_append_extend.h5");
1378
1379        // Create with 5 frames
1380        {
1381            let file = H5File::create(&path).unwrap();
1382            let ds = file
1383                .new_dataset::<i32>()
1384                .shape([0usize, 3])
1385                .chunk(&[1, 3])
1386                .max_shape(&[None, Some(3)])
1387                .create("stream")
1388                .unwrap();
1389            for i in 0..5u64 {
1390                let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1391                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1392                ds.write_chunk(i as usize, &raw).unwrap();
1393            }
1394            ds.extend(&[5, 3]).unwrap();
1395            file.close().unwrap();
1396        }
1397
1398        // Reopen and add 5 more frames
1399        {
1400            let file = H5File::open_rw(&path).unwrap();
1401            // Find the stream dataset index (it's the first one)
1402            let names = file.dataset_names();
1403            assert!(names.contains(&"stream".to_string()));
1404
1405            // Write more chunks via the writer directly
1406            let mut inner = crate::file::borrow_inner_mut(&file.inner);
1407            if let crate::file::H5FileInner::Writer(writer) = &mut *inner {
1408                let ds_idx = writer.dataset_index("stream").unwrap();
1409                for i in 5..10u64 {
1410                    let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1411                    let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1412                    writer.write_chunk(ds_idx, i, &raw).unwrap();
1413                }
1414                writer.extend_dataset(ds_idx, &[10, 3]).unwrap();
1415            }
1416            drop(inner);
1417            file.close().unwrap();
1418        }
1419
1420        // Read back all 10 frames
1421        {
1422            let file = H5File::open(&path).unwrap();
1423            let ds = file.dataset("stream").unwrap();
1424            assert_eq!(ds.shape(), vec![10, 3]);
1425            let data = ds.read_raw::<i32>().unwrap();
1426            assert_eq!(data.len(), 30);
1427            for (i, val) in data.iter().enumerate() {
1428                assert_eq!(*val, i as i32, "mismatch at {}", i);
1429            }
1430        }
1431
1432        std::fs::remove_file(&path).ok();
1433    }
1434
1435    #[test]
1436    fn group_hierarchy_roundtrip() {
1437        let path = std::env::temp_dir().join("hdf5_groups_rt.h5");
1438
1439        {
1440            let file = H5File::create(&path).unwrap();
1441            let root = file.root_group();
1442
1443            // Create groups
1444            let det = root.create_group("detector").unwrap();
1445            let raw = det.create_group("raw").unwrap();
1446
1447            // Create datasets in groups
1448            let ds1 = det
1449                .new_dataset::<f32>()
1450                .shape([10usize])
1451                .create("temperature")
1452                .unwrap();
1453            ds1.write_raw(&[1.0f32; 10]).unwrap();
1454
1455            let ds2 = raw
1456                .new_dataset::<u16>()
1457                .shape([4usize, 4])
1458                .create("image")
1459                .unwrap();
1460            ds2.write_raw(&[42u16; 16]).unwrap();
1461
1462            // Root-level dataset
1463            let ds3 = file
1464                .new_dataset::<i32>()
1465                .shape([3usize])
1466                .create("version")
1467                .unwrap();
1468            ds3.write_raw(&[1i32, 0, 0]).unwrap();
1469
1470            file.close().unwrap();
1471        }
1472
1473        {
1474            let file = H5File::open(&path).unwrap();
1475            let names = file.dataset_names();
1476            assert!(names.contains(&"version".to_string()));
1477            assert!(names.contains(&"detector/temperature".to_string()));
1478            assert!(names.contains(&"detector/raw/image".to_string()));
1479
1480            // Read datasets
1481            let ds = file.dataset("version").unwrap();
1482            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![1, 0, 0]);
1483
1484            let ds = file.dataset("detector/temperature").unwrap();
1485            assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0f32; 10]);
1486
1487            let ds = file.dataset("detector/raw/image").unwrap();
1488            assert_eq!(ds.shape(), vec![4, 4]);
1489            assert_eq!(ds.read_raw::<u16>().unwrap(), vec![42u16; 16]);
1490
1491            // Group traversal
1492            let root = file.root_group();
1493            let group_names = root.group_names().unwrap();
1494            assert!(group_names.contains(&"detector".to_string()));
1495        }
1496
1497        std::fs::remove_file(&path).ok();
1498    }
1499
1500    #[test]
1501    fn nested_groups_via_file_create_group() {
1502        let path = std::env::temp_dir().join("hdf5_file_create_group.h5");
1503
1504        {
1505            let file = H5File::create(&path).unwrap();
1506
1507            // Use the H5File::create_group convenience method
1508            let grp = file.create_group("sensors").unwrap();
1509            let sub = grp.create_group("accel").unwrap();
1510
1511            let ds = sub
1512                .new_dataset::<f64>()
1513                .shape([3usize])
1514                .create("xyz")
1515                .unwrap();
1516            ds.write_raw(&[1.0f64, 2.0, 3.0]).unwrap();
1517
1518            file.close().unwrap();
1519        }
1520
1521        {
1522            let file = H5File::open(&path).unwrap();
1523            let names = file.dataset_names();
1524            assert!(names.contains(&"sensors/accel/xyz".to_string()));
1525
1526            let ds = file.dataset("sensors/accel/xyz").unwrap();
1527            assert_eq!(ds.read_raw::<f64>().unwrap(), vec![1.0, 2.0, 3.0]);
1528
1529            // Open group in read mode
1530            let root = file.root_group();
1531            let sensors = root.group("sensors").unwrap();
1532            assert_eq!(sensors.name(), "/sensors");
1533
1534            let accel = sensors.group("accel").unwrap();
1535            assert_eq!(accel.name(), "/sensors/accel");
1536
1537            // list_groups from root
1538            let top_groups = root.group_names().unwrap();
1539            assert!(top_groups.contains(&"sensors".to_string()));
1540
1541            // list_groups from sensors
1542            let sub_groups = sensors.group_names().unwrap();
1543            assert!(sub_groups.contains(&"accel".to_string()));
1544        }
1545
1546        std::fs::remove_file(&path).ok();
1547    }
1548}
1549
1550#[cfg(test)]
1551mod h5py_compat_tests {
1552    use super::*;
1553
1554    /// Verify our files can be read by h5dump (if available).
1555    #[test]
1556    #[cfg(feature = "deflate")]
1557    fn h5dump_validates_our_files() {
1558        // Check if h5dump is available
1559        let h5dump = std::process::Command::new("h5dump")
1560            .arg("--version")
1561            .output();
1562        if h5dump.is_err() {
1563            eprintln!("skipping: h5dump not found");
1564            return;
1565        }
1566
1567        let path = std::env::temp_dir().join("hdf5_h5dump_validate.h5");
1568
1569        // Write a comprehensive test file
1570        {
1571            let file = H5File::create(&path).unwrap();
1572
1573            // Contiguous
1574            let ds = file
1575                .new_dataset::<f64>()
1576                .shape([3usize, 4])
1577                .create("matrix")
1578                .unwrap();
1579            let data: Vec<f64> = (0..12).map(|i| i as f64).collect();
1580            ds.write_raw(&data).unwrap();
1581
1582            // Chunked + compressed
1583            let ds2 = file
1584                .new_dataset::<i32>()
1585                .shape([0usize, 2])
1586                .chunk(&[1, 2])
1587                .max_shape(&[None, Some(2)])
1588                .deflate(6)
1589                .create("stream")
1590                .unwrap();
1591            for i in 0..5u64 {
1592                let vals: Vec<i32> = vec![i as i32 * 2, i as i32 * 2 + 1];
1593                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1594                ds2.write_chunk(i as usize, &raw).unwrap();
1595            }
1596            ds2.extend(&[5, 2]).unwrap();
1597
1598            // Group
1599            let grp = file.create_group("meta").unwrap();
1600            let ds3 = grp
1601                .new_dataset::<u8>()
1602                .shape([4usize])
1603                .create("flags")
1604                .unwrap();
1605            ds3.write_raw(&[1u8, 0, 1, 0]).unwrap();
1606
1607            // String attribute
1608            use crate::types::VarLenUnicode;
1609            let attr = ds
1610                .new_attr::<VarLenUnicode>()
1611                .shape(())
1612                .create("units")
1613                .unwrap();
1614            attr.write_string("meters").unwrap();
1615
1616            file.close().unwrap();
1617        }
1618
1619        // Run h5dump and verify exit code
1620        let output = std::process::Command::new("h5dump")
1621            .arg("-H") // header only (faster)
1622            .arg(path.to_str().unwrap())
1623            .output()
1624            .unwrap();
1625
1626        assert!(
1627            output.status.success(),
1628            "h5dump failed:\nstdout: {}\nstderr: {}",
1629            String::from_utf8_lossy(&output.stdout),
1630            String::from_utf8_lossy(&output.stderr),
1631        );
1632
1633        // Full dump (with data) should also work
1634        let output2 = std::process::Command::new("h5dump")
1635            .arg(path.to_str().unwrap())
1636            .output()
1637            .unwrap();
1638
1639        assert!(
1640            output2.status.success(),
1641            "h5dump (full) failed:\nstderr: {}",
1642            String::from_utf8_lossy(&output2.stderr),
1643        );
1644
1645        std::fs::remove_file(&path).ok();
1646    }
1647
1648    #[test]
1649    fn read_h5py_generated_file() {
1650        let path = "/tmp/test_h5py_default.h5";
1651        if !std::path::Path::new(path).exists() {
1652            eprintln!("skipping: h5py test file not found");
1653            return;
1654        }
1655        let file = H5File::open(path).unwrap();
1656
1657        let ds = file.dataset("data").unwrap();
1658        assert_eq!(ds.shape(), vec![4, 5]);
1659        let data = ds.read_raw::<f64>().unwrap();
1660        assert_eq!(data.len(), 20);
1661        assert!((data[0]).abs() < 1e-10);
1662        assert!((data[19] - 19.0).abs() < 1e-10);
1663
1664        let ds2 = file.dataset("images").unwrap();
1665        assert_eq!(ds2.shape(), vec![3, 64, 64]);
1666        let images = ds2.read_raw::<u16>().unwrap();
1667        assert_eq!(images.len(), 3 * 64 * 64);
1668    }
1669}