Skip to main content

rust_hdf5/
file.rs

1//! HDF5 file handle — the main entry point for the public API.
2//!
3//! ```no_run
4//! use rust_hdf5::H5File;
5//!
6//! // Write
7//! let file = H5File::create("example.h5").unwrap();
8//! let ds = file.new_dataset::<u8>().shape(&[10, 20]).create("data").unwrap();
9//! ds.write_raw(&vec![0u8; 200]).unwrap();
10//! drop(file);
11//!
12//! // Read
13//! let file = H5File::open("example.h5").unwrap();
14//! let ds = file.dataset("data").unwrap();
15//! let data = ds.read_raw::<u8>().unwrap();
16//! assert_eq!(data.len(), 200);
17//! ```
18
19use std::path::Path;
20
21use crate::io::{Hdf5Reader, Hdf5Writer};
22
23use crate::dataset::{DatasetBuilder, H5Dataset};
24use crate::error::{Hdf5Error, Result};
25use crate::group::H5Group;
26use crate::types::H5Type;
27
28// ---------------------------------------------------------------------------
29// Thread-safety: choose between Rc<RefCell<>> and Arc<Mutex<>> based on
30// the `threadsafe` feature flag.
31// ---------------------------------------------------------------------------
32
33#[cfg(not(feature = "threadsafe"))]
34pub(crate) type SharedInner = std::rc::Rc<std::cell::RefCell<H5FileInner>>;
35
36#[cfg(feature = "threadsafe")]
37pub(crate) type SharedInner = std::sync::Arc<std::sync::Mutex<H5FileInner>>;
38
39/// Helper to borrow/lock the inner state immutably.
40#[cfg(not(feature = "threadsafe"))]
41pub(crate) fn borrow_inner(inner: &SharedInner) -> std::cell::Ref<'_, H5FileInner> {
42    inner.borrow()
43}
44
45/// Helper to borrow/lock the inner state mutably.
46#[cfg(not(feature = "threadsafe"))]
47pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::cell::RefMut<'_, H5FileInner> {
48    inner.borrow_mut()
49}
50
51/// Helper to clone a SharedInner.
52#[cfg(not(feature = "threadsafe"))]
53pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
54    std::rc::Rc::clone(inner)
55}
56
57/// Helper to wrap an H5FileInner in SharedInner.
58#[cfg(not(feature = "threadsafe"))]
59pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
60    std::rc::Rc::new(std::cell::RefCell::new(inner))
61}
62
63#[cfg(feature = "threadsafe")]
64pub(crate) fn borrow_inner(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
65    inner.lock().unwrap()
66}
67
68#[cfg(feature = "threadsafe")]
69pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
70    inner.lock().unwrap()
71}
72
73#[cfg(feature = "threadsafe")]
74pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
75    std::sync::Arc::clone(inner)
76}
77
78#[cfg(feature = "threadsafe")]
79pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
80    std::sync::Arc::new(std::sync::Mutex::new(inner))
81}
82
83/// The inner state of an HDF5 file, shared with datasets via reference counting.
84///
85/// By default, this uses `Rc<RefCell<>>` for zero-overhead single-threaded use.
86/// Enable the `threadsafe` feature to use `Arc<Mutex<>>` instead, making
87/// `H5File` `Send + Sync`.
88pub(crate) enum H5FileInner {
89    Writer(Hdf5Writer),
90    Reader(Hdf5Reader),
91    /// Sentinel value used during `close()` to take ownership of the writer.
92    Closed,
93}
94
95/// An HDF5 file opened for reading or writing.
96///
97/// Datasets created from this file hold a shared reference to the underlying
98/// I/O handle, so the file does not need to outlive its datasets (they share
99/// ownership via reference counting).
100pub struct H5File {
101    pub(crate) inner: SharedInner,
102}
103
104impl H5File {
105    /// Create a new HDF5 file at `path`. Truncates if the file already exists.
106    pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
107        let writer = Hdf5Writer::create(path.as_ref())?;
108        Ok(Self {
109            inner: new_shared(H5FileInner::Writer(writer)),
110        })
111    }
112
113    /// Open an existing HDF5 file for reading.
114    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
115        let reader = Hdf5Reader::open(path.as_ref())?;
116        Ok(Self {
117            inner: new_shared(H5FileInner::Reader(reader)),
118        })
119    }
120
121    /// Open an existing HDF5 file for appending new datasets.
122    ///
123    /// Existing datasets are preserved. New datasets can be added and will
124    /// be written after the current end of file. Existing chunked datasets
125    /// can be extended with `write_chunk` and `extend_dataset`.
126    ///
127    /// ```no_run
128    /// use rust_hdf5::H5File;
129    /// let file = H5File::open_rw("existing.h5").unwrap();
130    /// let ds = file.new_dataset::<f64>().shape(&[100]).create("new_data").unwrap();
131    /// ds.write_raw(&vec![0.0f64; 100]).unwrap();
132    /// file.close().unwrap();
133    /// ```
134    pub fn open_rw<P: AsRef<Path>>(path: P) -> Result<Self> {
135        let writer = Hdf5Writer::open_append(path.as_ref())?;
136        Ok(Self {
137            inner: new_shared(H5FileInner::Writer(writer)),
138        })
139    }
140
141    /// Return a handle to the root group.
142    ///
143    /// The root group can be used to create datasets and sub-groups.
144    pub fn root_group(&self) -> H5Group {
145        H5Group::new(clone_inner(&self.inner), "/".to_string())
146    }
147
148    /// Create a group in the root of the file.
149    ///
150    /// ```no_run
151    /// use rust_hdf5::H5File;
152    /// let file = H5File::create("groups.h5").unwrap();
153    /// let grp = file.create_group("detector").unwrap();
154    /// ```
155    pub fn create_group(&self, name: &str) -> Result<H5Group> {
156        self.root_group().create_group(name)
157    }
158
159    /// Start building a new dataset with the given element type.
160    ///
161    /// This returns a fluent builder. Call `.shape(...)` to set dimensions and
162    /// `.create("name")` to finalize.
163    ///
164    /// ```no_run
165    /// # use rust_hdf5::H5File;
166    /// let file = H5File::create("build.h5").unwrap();
167    /// let ds = file.new_dataset::<f64>().shape(&[3, 4]).create("matrix").unwrap();
168    /// ```
169    pub fn new_dataset<T: H5Type>(&self) -> DatasetBuilder<T> {
170        DatasetBuilder::new(clone_inner(&self.inner))
171    }
172
173    /// Add a string attribute to the file (root group).
174    pub fn set_attr_string(&self, name: &str, value: &str) -> Result<()> {
175        use crate::format::messages::attribute::AttributeMessage;
176        let attr = AttributeMessage::scalar_string(name, value);
177        let mut inner = borrow_inner_mut(&self.inner);
178        match &mut *inner {
179            H5FileInner::Writer(writer) => {
180                writer.add_root_attribute(attr);
181                Ok(())
182            }
183            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
184        }
185    }
186
187    /// Add a numeric attribute to the file (root group).
188    pub fn set_attr_numeric<T: crate::types::H5Type>(&self, name: &str, value: &T) -> Result<()> {
189        use crate::format::messages::attribute::AttributeMessage;
190        let es = T::element_size();
191        let raw = unsafe { std::slice::from_raw_parts(value as *const T as *const u8, es) };
192        let attr = AttributeMessage::scalar_numeric(name, T::hdf5_type(), raw.to_vec());
193        let mut inner = borrow_inner_mut(&self.inner);
194        match &mut *inner {
195            H5FileInner::Writer(writer) => {
196                writer.add_root_attribute(attr);
197                Ok(())
198            }
199            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
200        }
201    }
202
203    /// Return the names of file-level (root group) attributes.
204    pub fn attr_names(&self) -> Result<Vec<String>> {
205        let inner = borrow_inner(&self.inner);
206        match &*inner {
207            H5FileInner::Reader(reader) => Ok(reader.root_attr_names()),
208            _ => Ok(vec![]),
209        }
210    }
211
212    /// Read a file-level string attribute.
213    pub fn attr_string(&self, name: &str) -> Result<String> {
214        let inner = borrow_inner(&self.inner);
215        match &*inner {
216            H5FileInner::Reader(reader) => {
217                let attr = reader
218                    .root_attr(name)
219                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
220                let end = attr
221                    .data
222                    .iter()
223                    .position(|&b| b == 0)
224                    .unwrap_or(attr.data.len());
225                Ok(String::from_utf8_lossy(&attr.data[..end]).to_string())
226            }
227            _ => Err(Hdf5Error::InvalidState("not in read mode".into())),
228        }
229    }
230
231    /// Check if the file is in write/append mode.
232    pub fn is_writable(&self) -> bool {
233        let inner = borrow_inner(&self.inner);
234        matches!(&*inner, H5FileInner::Writer(_))
235    }
236
237    /// Create a variable-length string dataset and write data.
238    ///
239    /// This is a convenience method for writing h5py-compatible vlen string
240    /// datasets using global heap storage.
241    pub fn write_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
242        let mut inner = borrow_inner_mut(&self.inner);
243        match &mut *inner {
244            H5FileInner::Writer(writer) => {
245                writer.create_vlen_string_dataset(name, strings)?;
246                Ok(())
247            }
248            H5FileInner::Reader(_) => {
249                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
250            }
251            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
252        }
253    }
254
255    /// Open an existing dataset by name (read mode).
256    pub fn dataset(&self, name: &str) -> Result<H5Dataset> {
257        let inner = borrow_inner(&self.inner);
258        match &*inner {
259            H5FileInner::Reader(reader) => {
260                let info = reader
261                    .dataset_info(name)
262                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
263                let shape: Vec<usize> = info.dataspace.dims.iter().map(|&d| d as usize).collect();
264                let element_size = info.datatype.element_size() as usize;
265                Ok(H5Dataset::new_reader(
266                    clone_inner(&self.inner),
267                    name.to_string(),
268                    shape,
269                    element_size,
270                ))
271            }
272            H5FileInner::Writer(_) => Err(Hdf5Error::InvalidState(
273                "cannot open a dataset by name in write mode; use new_dataset() instead"
274                    .to_string(),
275            )),
276            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".to_string())),
277        }
278    }
279
280    /// Return the names of all datasets in the root group.
281    ///
282    /// Works in both read and write mode: in write mode, returns the names of
283    /// datasets created so far; in read mode, returns the names discovered
284    /// during file open.
285    pub fn dataset_names(&self) -> Vec<String> {
286        let inner = borrow_inner(&self.inner);
287        match &*inner {
288            H5FileInner::Reader(reader) => reader
289                .dataset_names()
290                .iter()
291                .map(|s| s.to_string())
292                .collect(),
293            H5FileInner::Writer(writer) => writer
294                .dataset_names()
295                .iter()
296                .map(|s| s.to_string())
297                .collect(),
298            H5FileInner::Closed => Vec::new(),
299        }
300    }
301
302    /// Explicitly close the file. For a writer, this finalizes the file
303    /// (writes superblock, headers, etc.). For a reader, this is a no-op.
304    ///
305    /// The file is also auto-finalized on drop, but calling `close()` lets
306    /// you handle errors.
307    pub fn close(self) -> Result<()> {
308        let old = {
309            let mut inner = borrow_inner_mut(&self.inner);
310            std::mem::replace(&mut *inner, H5FileInner::Closed)
311        };
312        match old {
313            H5FileInner::Writer(writer) => {
314                writer.close()?;
315                Ok(())
316            }
317            H5FileInner::Reader(_) => Ok(()),
318            H5FileInner::Closed => Ok(()),
319        }
320    }
321
322    /// Flush the file to disk. Only meaningful in write mode.
323    pub fn flush(&self) -> Result<()> {
324        // The underlying writer does not expose a standalone flush; data is
325        // written to disk immediately via pwrite. This is a compatibility
326        // method that does nothing for now.
327        Ok(())
328    }
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334    use std::path::PathBuf;
335
336    fn temp_path(name: &str) -> PathBuf {
337        std::env::temp_dir().join(format!("hdf5_file_test_{}.h5", name))
338    }
339
340    #[test]
341    fn create_and_close_empty() {
342        let path = temp_path("create_empty");
343        let file = H5File::create(&path).unwrap();
344        file.close().unwrap();
345
346        // Should be readable
347        let file = H5File::open(&path).unwrap();
348        file.close().unwrap();
349
350        std::fs::remove_file(&path).ok();
351    }
352
353    #[test]
354    fn create_and_drop_empty() {
355        let path = temp_path("drop_empty");
356        {
357            let _file = H5File::create(&path).unwrap();
358            // drop auto-finalizes
359        }
360        // Verify the file is valid by opening it
361        let file = H5File::open(&path).unwrap();
362        file.close().unwrap();
363
364        std::fs::remove_file(&path).ok();
365    }
366
367    #[test]
368    fn dataset_not_found() {
369        let path = temp_path("ds_not_found");
370        {
371            let _file = H5File::create(&path).unwrap();
372        }
373        let file = H5File::open(&path).unwrap();
374        let result = file.dataset("nonexistent");
375        assert!(result.is_err());
376
377        std::fs::remove_file(&path).ok();
378    }
379
380    #[test]
381    fn write_and_read_roundtrip() {
382        let path = temp_path("write_read_rt");
383
384        // Write
385        {
386            let file = H5File::create(&path).unwrap();
387            let ds = file
388                .new_dataset::<u8>()
389                .shape([4, 4])
390                .create("data")
391                .unwrap();
392            ds.write_raw(&[0u8; 16]).unwrap();
393            file.close().unwrap();
394        }
395
396        // Read
397        {
398            let file = H5File::open(&path).unwrap();
399            let ds = file.dataset("data").unwrap();
400            assert_eq!(ds.shape(), vec![4, 4]);
401            let data = ds.read_raw::<u8>().unwrap();
402            assert_eq!(data.len(), 16);
403            assert!(data.iter().all(|&b| b == 0));
404            file.close().unwrap();
405        }
406
407        std::fs::remove_file(&path).ok();
408    }
409
410    #[test]
411    fn write_and_read_f64() {
412        let path = temp_path("write_read_f64");
413
414        let values: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
415
416        // Write
417        {
418            let file = H5File::create(&path).unwrap();
419            let ds = file
420                .new_dataset::<f64>()
421                .shape([2, 3])
422                .create("matrix")
423                .unwrap();
424            ds.write_raw(&values).unwrap();
425            file.close().unwrap();
426        }
427
428        // Read
429        {
430            let file = H5File::open(&path).unwrap();
431            let ds = file.dataset("matrix").unwrap();
432            assert_eq!(ds.shape(), vec![2, 3]);
433            let readback = ds.read_raw::<f64>().unwrap();
434            assert_eq!(readback, values);
435        }
436
437        std::fs::remove_file(&path).ok();
438    }
439
440    #[test]
441    fn multiple_datasets() {
442        let path = temp_path("multi_ds");
443
444        {
445            let file = H5File::create(&path).unwrap();
446            let ds1 = file.new_dataset::<i32>().shape([3]).create("ints").unwrap();
447            ds1.write_raw(&[10i32, 20, 30]).unwrap();
448
449            let ds2 = file
450                .new_dataset::<f32>()
451                .shape([2, 2])
452                .create("floats")
453                .unwrap();
454            ds2.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
455
456            file.close().unwrap();
457        }
458
459        {
460            let file = H5File::open(&path).unwrap();
461
462            let ds_ints = file.dataset("ints").unwrap();
463            assert_eq!(ds_ints.shape(), vec![3]);
464            let ints = ds_ints.read_raw::<i32>().unwrap();
465            assert_eq!(ints, vec![10, 20, 30]);
466
467            let ds_floats = file.dataset("floats").unwrap();
468            assert_eq!(ds_floats.shape(), vec![2, 2]);
469            let floats = ds_floats.read_raw::<f32>().unwrap();
470            assert_eq!(floats, vec![1.0f32, 2.0, 3.0, 4.0]);
471        }
472
473        std::fs::remove_file(&path).ok();
474    }
475
476    #[test]
477    fn close_is_idempotent() {
478        let path = temp_path("close_idemp");
479        let file = H5File::create(&path).unwrap();
480        file.close().unwrap();
481        // File is consumed by close(), so no double-close possible at the type level.
482        std::fs::remove_file(&path).ok();
483    }
484}
485
486#[cfg(test)]
487mod integration_tests {
488    use super::*;
489
490    #[test]
491    fn write_file_for_h5dump() {
492        let path = std::env::temp_dir().join("test_hdf5rs_integration.h5");
493        let file = H5File::create(&path).unwrap();
494
495        let ds = file
496            .new_dataset::<u8>()
497            .shape([4usize, 4])
498            .create("data_u8")
499            .unwrap();
500        let data: Vec<u8> = (0..16).collect();
501        ds.write_raw(&data).unwrap();
502
503        let ds2 = file
504            .new_dataset::<f64>()
505            .shape([3usize, 2])
506            .create("data_f64")
507            .unwrap();
508        let fdata: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
509        ds2.write_raw(&fdata).unwrap();
510
511        let ds3 = file
512            .new_dataset::<i32>()
513            .shape([5usize])
514            .create("values")
515            .unwrap();
516        let idata: Vec<i32> = vec![-10, -5, 0, 5, 10];
517        ds3.write_raw(&idata).unwrap();
518
519        file.close().unwrap();
520
521        // File exists
522        assert!(path.exists());
523    }
524
525    #[test]
526    fn write_chunked_file_for_h5dump() {
527        let path = std::env::temp_dir().join("test_hdf5rs_chunked.h5");
528        let file = H5File::create(&path).unwrap();
529
530        // Create a chunked dataset with unlimited first dimension
531        let ds = file
532            .new_dataset::<f64>()
533            .shape([0usize, 4])
534            .chunk(&[1, 4])
535            .max_shape(&[None, Some(4)])
536            .create("streaming_data")
537            .unwrap();
538
539        // Write 5 frames of data
540        for frame in 0..5u64 {
541            let values: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
542            let raw: Vec<u8> = values.iter().flat_map(|v| v.to_le_bytes()).collect();
543            ds.write_chunk(frame as usize, &raw).unwrap();
544        }
545
546        // Extend dimensions to reflect the 5 written frames
547        ds.extend(&[5, 4]).unwrap();
548        ds.flush().unwrap();
549
550        file.close().unwrap();
551
552        assert!(path.exists());
553    }
554
555    #[test]
556    fn write_chunked_many_frames_for_h5dump() {
557        let path = std::env::temp_dir().join("test_hdf5rs_chunked_many.h5");
558        let file = H5File::create(&path).unwrap();
559
560        let ds = file
561            .new_dataset::<i32>()
562            .shape([0usize, 3])
563            .chunk(&[1, 3])
564            .max_shape(&[None, Some(3)])
565            .create("data")
566            .unwrap();
567
568        // Write 10 frames (exceeds idx_blk_elmts=4, uses data blocks)
569        for frame in 0..10u64 {
570            let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
571            let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
572            ds.write_chunk(frame as usize, &raw).unwrap();
573        }
574        ds.extend(&[10, 3]).unwrap();
575        file.close().unwrap();
576
577        assert!(path.exists());
578    }
579
580    #[test]
581    fn write_dataset_with_attributes() {
582        use crate::types::VarLenUnicode;
583
584        let path = std::env::temp_dir().join("test_hdf5rs_attributes.h5");
585        let file = H5File::create(&path).unwrap();
586
587        let ds = file
588            .new_dataset::<f32>()
589            .shape([10usize])
590            .create("temperature")
591            .unwrap();
592        let data: Vec<f32> = (0..10).map(|i| i as f32 * 1.5).collect();
593        ds.write_raw(&data).unwrap();
594
595        // Add string attributes
596        let attr = ds
597            .new_attr::<VarLenUnicode>()
598            .shape(())
599            .create("units")
600            .unwrap();
601        attr.write_scalar(&VarLenUnicode("kelvin".to_string()))
602            .unwrap();
603
604        let attr2 = ds
605            .new_attr::<VarLenUnicode>()
606            .shape(())
607            .create("description")
608            .unwrap();
609        attr2
610            .write_scalar(&VarLenUnicode("Temperature measurements".to_string()))
611            .unwrap();
612
613        // Use write_string convenience method
614        let attr3 = ds
615            .new_attr::<VarLenUnicode>()
616            .shape(())
617            .create("source")
618            .unwrap();
619        attr3.write_string("sensor_01").unwrap();
620
621        // Also test parse -> write_scalar pattern
622        let attr4 = ds
623            .new_attr::<VarLenUnicode>()
624            .shape(())
625            .create("label")
626            .unwrap();
627        let s: VarLenUnicode = "test_label".parse().unwrap_or_default();
628        attr4.write_scalar(&s).unwrap();
629
630        file.close().unwrap();
631
632        assert!(path.exists());
633    }
634
635    #[test]
636    fn chunked_write_read_roundtrip() {
637        let path = std::env::temp_dir().join("hdf5_chunked_roundtrip.h5");
638
639        // Write
640        {
641            let file = H5File::create(&path).unwrap();
642            let ds = file
643                .new_dataset::<i32>()
644                .shape([0usize, 3])
645                .chunk(&[1, 3])
646                .max_shape(&[None, Some(3)])
647                .create("table")
648                .unwrap();
649
650            for frame in 0..8u64 {
651                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
652                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
653                ds.write_chunk(frame as usize, &raw).unwrap();
654            }
655            ds.extend(&[8, 3]).unwrap();
656            file.close().unwrap();
657        }
658
659        // Read
660        {
661            let file = H5File::open(&path).unwrap();
662            let ds = file.dataset("table").unwrap();
663            assert_eq!(ds.shape(), vec![8, 3]);
664            let data = ds.read_raw::<i32>().unwrap();
665            assert_eq!(data.len(), 24);
666            for (i, val) in data.iter().enumerate() {
667                assert_eq!(*val, i as i32);
668            }
669        }
670
671        std::fs::remove_file(&path).ok();
672    }
673
674    #[test]
675    fn compressed_chunked_roundtrip() {
676        let path = std::env::temp_dir().join("hdf5_compressed_roundtrip.h5");
677
678        // Write compressed
679        {
680            let file = H5File::create(&path).unwrap();
681            let ds = file
682                .new_dataset::<f64>()
683                .shape([0usize, 4])
684                .chunk(&[1, 4])
685                .max_shape(&[None, Some(4)])
686                .deflate(6)
687                .create("compressed")
688                .unwrap();
689
690            for frame in 0..10u64 {
691                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
692                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
693                ds.write_chunk(frame as usize, &raw).unwrap();
694            }
695            ds.extend(&[10, 4]).unwrap();
696            file.close().unwrap();
697        }
698
699        // Read back and verify
700        {
701            let file = H5File::open(&path).unwrap();
702            let ds = file.dataset("compressed").unwrap();
703            assert_eq!(ds.shape(), vec![10, 4]);
704            let data = ds.read_raw::<f64>().unwrap();
705            assert_eq!(data.len(), 40);
706            for (i, val) in data.iter().enumerate() {
707                assert!(
708                    (val - i as f64).abs() < 1e-10,
709                    "mismatch at {}: {} != {}",
710                    i,
711                    val,
712                    i
713                );
714            }
715        }
716
717        std::fs::remove_file(&path).ok();
718    }
719
720    #[test]
721    fn compressed_chunked_many_frames() {
722        let path = std::env::temp_dir().join("hdf5_compressed_many.h5");
723
724        {
725            let file = H5File::create(&path).unwrap();
726            let ds = file
727                .new_dataset::<i32>()
728                .shape([0usize, 3])
729                .chunk(&[1, 3])
730                .max_shape(&[None, Some(3)])
731                .deflate(6)
732                .create("stream")
733                .unwrap();
734
735            for frame in 0..100u64 {
736                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
737                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
738                ds.write_chunk(frame as usize, &raw).unwrap();
739            }
740            ds.extend(&[100, 3]).unwrap();
741            file.close().unwrap();
742        }
743
744        {
745            let file = H5File::open(&path).unwrap();
746            let ds = file.dataset("stream").unwrap();
747            assert_eq!(ds.shape(), vec![100, 3]);
748            let data = ds.read_raw::<i32>().unwrap();
749            assert_eq!(data.len(), 300);
750            for (i, val) in data.iter().enumerate() {
751                assert_eq!(*val, i as i32, "mismatch at {}", i);
752            }
753        }
754
755        std::fs::remove_file(&path).ok();
756    }
757    #[test]
758    fn append_mode() {
759        let path = std::env::temp_dir().join("hdf5_append.h5");
760
761        // Create initial file
762        {
763            let file = H5File::create(&path).unwrap();
764            let ds = file
765                .new_dataset::<i32>()
766                .shape([3usize])
767                .create("first")
768                .unwrap();
769            ds.write_raw(&[1i32, 2, 3]).unwrap();
770            file.close().unwrap();
771        }
772
773        // Append new dataset
774        {
775            let file = H5File::open_rw(&path).unwrap();
776            let ds = file
777                .new_dataset::<f64>()
778                .shape([2usize])
779                .create("second")
780                .unwrap();
781            ds.write_raw(&[4.0f64, 5.0]).unwrap();
782            file.close().unwrap();
783        }
784
785        // Read back both
786        {
787            let file = H5File::open(&path).unwrap();
788            let names = file.dataset_names();
789            assert!(names.contains(&"first".to_string()));
790            assert!(names.contains(&"second".to_string()));
791
792            let ds1 = file.dataset("first").unwrap();
793            assert_eq!(ds1.read_raw::<i32>().unwrap(), vec![1, 2, 3]);
794
795            let ds2 = file.dataset("second").unwrap();
796            assert_eq!(ds2.read_raw::<f64>().unwrap(), vec![4.0, 5.0]);
797        }
798
799        std::fs::remove_file(&path).ok();
800    }
801
802    #[test]
803    fn vlen_string_write_read() {
804        let path = std::env::temp_dir().join("hdf5_vlen_wr.h5");
805        {
806            let file = H5File::create(&path).unwrap();
807            file.write_vlen_strings("names", &["alice", "bob", "charlie"])
808                .unwrap();
809            file.close().unwrap();
810        }
811        {
812            let file = H5File::open(&path).unwrap();
813            let ds = file.dataset("names").unwrap();
814            let strings = ds.read_vlen_strings().unwrap();
815            assert_eq!(strings, vec!["alice", "bob", "charlie"]);
816        }
817        std::fs::remove_file(&path).ok();
818    }
819
820    #[test]
821    fn shuffle_deflate_roundtrip() {
822        let path = std::env::temp_dir().join("hdf5_shuf_defl.h5");
823        {
824            let file = H5File::create(&path).unwrap();
825            let ds = file
826                .new_dataset::<f64>()
827                .shape([0usize, 4])
828                .chunk(&[1, 4])
829                .max_shape(&[None, Some(4)])
830                .shuffle_deflate(6)
831                .create("data")
832                .unwrap();
833            for frame in 0..20u64 {
834                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
835                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
836                ds.write_chunk(frame as usize, &raw).unwrap();
837            }
838            ds.extend(&[20, 4]).unwrap();
839            file.close().unwrap();
840        }
841        {
842            let file = H5File::open(&path).unwrap();
843            let ds = file.dataset("data").unwrap();
844            assert_eq!(ds.shape(), vec![20, 4]);
845            let data = ds.read_raw::<f64>().unwrap();
846            assert_eq!(data.len(), 80);
847            for (i, val) in data.iter().enumerate() {
848                assert!((val - i as f64).abs() < 1e-10);
849            }
850        }
851        std::fs::remove_file(&path).ok();
852    }
853
854    #[test]
855    fn file_level_attributes() {
856        let path = std::env::temp_dir().join("hdf5_file_attr.h5");
857        {
858            let file = H5File::create(&path).unwrap();
859            file.set_attr_string("title", "Test File").unwrap();
860            file.set_attr_numeric("version", &42i32).unwrap();
861            let ds = file
862                .new_dataset::<u8>()
863                .shape([1usize])
864                .create("dummy")
865                .unwrap();
866            ds.write_raw(&[0u8]).unwrap();
867            file.close().unwrap();
868        }
869        {
870            let file = H5File::open(&path).unwrap();
871            assert!(file.dataset_names().contains(&"dummy".to_string()));
872
873            // Read file-level attributes
874            let names = file.attr_names().unwrap();
875            assert!(names.contains(&"title".to_string()));
876
877            let title = file.attr_string("title").unwrap();
878            assert_eq!(title, "Test File");
879        }
880        std::fs::remove_file(&path).ok();
881    }
882
883    #[test]
884    fn scalar_dataset_roundtrip() {
885        let path = std::env::temp_dir().join("hdf5_scalar.h5");
886        {
887            let file = H5File::create(&path).unwrap();
888            let ds = file.new_dataset::<f64>().scalar().create("pi").unwrap();
889            ds.write_raw(&[std::f64::consts::PI]).unwrap();
890            file.close().unwrap();
891        }
892        {
893            let file = H5File::open(&path).unwrap();
894            let ds = file.dataset("pi").unwrap();
895            assert_eq!(ds.shape(), Vec::<usize>::new());
896            assert_eq!(ds.total_elements(), 1);
897            let data = ds.read_raw::<f64>().unwrap();
898            assert_eq!(data.len(), 1);
899            assert!((data[0] - std::f64::consts::PI).abs() < 1e-15);
900        }
901        std::fs::remove_file(&path).ok();
902    }
903
904    #[test]
905    fn append_mode_extend_chunked() {
906        let path = std::env::temp_dir().join("hdf5_append_extend.h5");
907
908        // Create with 5 frames
909        {
910            let file = H5File::create(&path).unwrap();
911            let ds = file
912                .new_dataset::<i32>()
913                .shape([0usize, 3])
914                .chunk(&[1, 3])
915                .max_shape(&[None, Some(3)])
916                .create("stream")
917                .unwrap();
918            for i in 0..5u64 {
919                let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
920                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
921                ds.write_chunk(i as usize, &raw).unwrap();
922            }
923            ds.extend(&[5, 3]).unwrap();
924            file.close().unwrap();
925        }
926
927        // Reopen and add 5 more frames
928        {
929            let file = H5File::open_rw(&path).unwrap();
930            // Find the stream dataset index (it's the first one)
931            let names = file.dataset_names();
932            assert!(names.contains(&"stream".to_string()));
933
934            // Write more chunks via the writer directly
935            let mut inner = crate::file::borrow_inner_mut(&file.inner);
936            if let crate::file::H5FileInner::Writer(writer) = &mut *inner {
937                let ds_idx = writer.dataset_index("stream").unwrap();
938                for i in 5..10u64 {
939                    let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
940                    let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
941                    writer.write_chunk(ds_idx, i, &raw).unwrap();
942                }
943                writer.extend_dataset(ds_idx, &[10, 3]).unwrap();
944            }
945            drop(inner);
946            file.close().unwrap();
947        }
948
949        // Read back all 10 frames
950        {
951            let file = H5File::open(&path).unwrap();
952            let ds = file.dataset("stream").unwrap();
953            assert_eq!(ds.shape(), vec![10, 3]);
954            let data = ds.read_raw::<i32>().unwrap();
955            assert_eq!(data.len(), 30);
956            for (i, val) in data.iter().enumerate() {
957                assert_eq!(*val, i as i32, "mismatch at {}", i);
958            }
959        }
960
961        std::fs::remove_file(&path).ok();
962    }
963
964    #[test]
965    fn group_hierarchy_roundtrip() {
966        let path = std::env::temp_dir().join("hdf5_groups_rt.h5");
967
968        {
969            let file = H5File::create(&path).unwrap();
970            let root = file.root_group();
971
972            // Create groups
973            let det = root.create_group("detector").unwrap();
974            let raw = det.create_group("raw").unwrap();
975
976            // Create datasets in groups
977            let ds1 = det
978                .new_dataset::<f32>()
979                .shape([10usize])
980                .create("temperature")
981                .unwrap();
982            ds1.write_raw(&[1.0f32; 10]).unwrap();
983
984            let ds2 = raw
985                .new_dataset::<u16>()
986                .shape([4usize, 4])
987                .create("image")
988                .unwrap();
989            ds2.write_raw(&[42u16; 16]).unwrap();
990
991            // Root-level dataset
992            let ds3 = file
993                .new_dataset::<i32>()
994                .shape([3usize])
995                .create("version")
996                .unwrap();
997            ds3.write_raw(&[1i32, 0, 0]).unwrap();
998
999            file.close().unwrap();
1000        }
1001
1002        {
1003            let file = H5File::open(&path).unwrap();
1004            let names = file.dataset_names();
1005            assert!(names.contains(&"version".to_string()));
1006            assert!(names.contains(&"detector/temperature".to_string()));
1007            assert!(names.contains(&"detector/raw/image".to_string()));
1008
1009            // Read datasets
1010            let ds = file.dataset("version").unwrap();
1011            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![1, 0, 0]);
1012
1013            let ds = file.dataset("detector/temperature").unwrap();
1014            assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0f32; 10]);
1015
1016            let ds = file.dataset("detector/raw/image").unwrap();
1017            assert_eq!(ds.shape(), vec![4, 4]);
1018            assert_eq!(ds.read_raw::<u16>().unwrap(), vec![42u16; 16]);
1019
1020            // Group traversal
1021            let root = file.root_group();
1022            let group_names = root.group_names().unwrap();
1023            assert!(group_names.contains(&"detector".to_string()));
1024        }
1025
1026        std::fs::remove_file(&path).ok();
1027    }
1028
1029    #[test]
1030    fn nested_groups_via_file_create_group() {
1031        let path = std::env::temp_dir().join("hdf5_file_create_group.h5");
1032
1033        {
1034            let file = H5File::create(&path).unwrap();
1035
1036            // Use the H5File::create_group convenience method
1037            let grp = file.create_group("sensors").unwrap();
1038            let sub = grp.create_group("accel").unwrap();
1039
1040            let ds = sub
1041                .new_dataset::<f64>()
1042                .shape([3usize])
1043                .create("xyz")
1044                .unwrap();
1045            ds.write_raw(&[1.0f64, 2.0, 3.0]).unwrap();
1046
1047            file.close().unwrap();
1048        }
1049
1050        {
1051            let file = H5File::open(&path).unwrap();
1052            let names = file.dataset_names();
1053            assert!(names.contains(&"sensors/accel/xyz".to_string()));
1054
1055            let ds = file.dataset("sensors/accel/xyz").unwrap();
1056            assert_eq!(ds.read_raw::<f64>().unwrap(), vec![1.0, 2.0, 3.0]);
1057
1058            // Open group in read mode
1059            let root = file.root_group();
1060            let sensors = root.group("sensors").unwrap();
1061            assert_eq!(sensors.name(), "/sensors");
1062
1063            let accel = sensors.group("accel").unwrap();
1064            assert_eq!(accel.name(), "/sensors/accel");
1065
1066            // list_groups from root
1067            let top_groups = root.group_names().unwrap();
1068            assert!(top_groups.contains(&"sensors".to_string()));
1069
1070            // list_groups from sensors
1071            let sub_groups = sensors.group_names().unwrap();
1072            assert!(sub_groups.contains(&"accel".to_string()));
1073        }
1074
1075        std::fs::remove_file(&path).ok();
1076    }
1077}
1078
1079#[cfg(test)]
1080mod h5py_compat_tests {
1081    use super::*;
1082
1083    /// Verify our files can be read by h5dump (if available).
1084    #[test]
1085    fn h5dump_validates_our_files() {
1086        // Check if h5dump is available
1087        let h5dump = std::process::Command::new("h5dump")
1088            .arg("--version")
1089            .output();
1090        if h5dump.is_err() {
1091            eprintln!("skipping: h5dump not found");
1092            return;
1093        }
1094
1095        let path = std::env::temp_dir().join("hdf5_h5dump_validate.h5");
1096
1097        // Write a comprehensive test file
1098        {
1099            let file = H5File::create(&path).unwrap();
1100
1101            // Contiguous
1102            let ds = file
1103                .new_dataset::<f64>()
1104                .shape([3usize, 4])
1105                .create("matrix")
1106                .unwrap();
1107            let data: Vec<f64> = (0..12).map(|i| i as f64).collect();
1108            ds.write_raw(&data).unwrap();
1109
1110            // Chunked + compressed
1111            let ds2 = file
1112                .new_dataset::<i32>()
1113                .shape([0usize, 2])
1114                .chunk(&[1, 2])
1115                .max_shape(&[None, Some(2)])
1116                .deflate(6)
1117                .create("stream")
1118                .unwrap();
1119            for i in 0..5u64 {
1120                let vals: Vec<i32> = vec![i as i32 * 2, i as i32 * 2 + 1];
1121                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1122                ds2.write_chunk(i as usize, &raw).unwrap();
1123            }
1124            ds2.extend(&[5, 2]).unwrap();
1125
1126            // Group
1127            let grp = file.create_group("meta").unwrap();
1128            let ds3 = grp
1129                .new_dataset::<u8>()
1130                .shape([4usize])
1131                .create("flags")
1132                .unwrap();
1133            ds3.write_raw(&[1u8, 0, 1, 0]).unwrap();
1134
1135            // String attribute
1136            use crate::types::VarLenUnicode;
1137            let attr = ds
1138                .new_attr::<VarLenUnicode>()
1139                .shape(())
1140                .create("units")
1141                .unwrap();
1142            attr.write_string("meters").unwrap();
1143
1144            file.close().unwrap();
1145        }
1146
1147        // Run h5dump and verify exit code
1148        let output = std::process::Command::new("h5dump")
1149            .arg("-H") // header only (faster)
1150            .arg(path.to_str().unwrap())
1151            .output()
1152            .unwrap();
1153
1154        assert!(
1155            output.status.success(),
1156            "h5dump failed:\nstdout: {}\nstderr: {}",
1157            String::from_utf8_lossy(&output.stdout),
1158            String::from_utf8_lossy(&output.stderr),
1159        );
1160
1161        // Full dump (with data) should also work
1162        let output2 = std::process::Command::new("h5dump")
1163            .arg(path.to_str().unwrap())
1164            .output()
1165            .unwrap();
1166
1167        assert!(
1168            output2.status.success(),
1169            "h5dump (full) failed:\nstderr: {}",
1170            String::from_utf8_lossy(&output2.stderr),
1171        );
1172
1173        std::fs::remove_file(&path).ok();
1174    }
1175
1176    #[test]
1177    fn read_h5py_generated_file() {
1178        let path = "/tmp/test_h5py_default.h5";
1179        if !std::path::Path::new(path).exists() {
1180            eprintln!("skipping: h5py test file not found");
1181            return;
1182        }
1183        let file = H5File::open(path).unwrap();
1184
1185        let ds = file.dataset("data").unwrap();
1186        assert_eq!(ds.shape(), vec![4, 5]);
1187        let data = ds.read_raw::<f64>().unwrap();
1188        assert_eq!(data.len(), 20);
1189        assert!((data[0]).abs() < 1e-10);
1190        assert!((data[19] - 19.0).abs() < 1e-10);
1191
1192        let ds2 = file.dataset("images").unwrap();
1193        assert_eq!(ds2.shape(), vec![3, 64, 64]);
1194        let images = ds2.read_raw::<u16>().unwrap();
1195        assert_eq!(images.len(), 3 * 64 * 64);
1196    }
1197}