Skip to main content

rust_hdf5/
file.rs

1//! HDF5 file handle — the main entry point for the public API.
2//!
3//! ```no_run
4//! use rust_hdf5::H5File;
5//!
6//! // Write
7//! let file = H5File::create("example.h5").unwrap();
8//! let ds = file.new_dataset::<u8>().shape(&[10, 20]).create("data").unwrap();
9//! ds.write_raw(&vec![0u8; 200]).unwrap();
10//! drop(file);
11//!
12//! // Read
13//! let file = H5File::open("example.h5").unwrap();
14//! let ds = file.dataset("data").unwrap();
15//! let data = ds.read_raw::<u8>().unwrap();
16//! assert_eq!(data.len(), 200);
17//! ```
18
19use std::path::Path;
20
21use crate::io::{Hdf5Reader, Hdf5Writer};
22
23use crate::dataset::{DatasetBuilder, H5Dataset};
24use crate::error::{Hdf5Error, Result};
25use crate::group::H5Group;
26use crate::types::H5Type;
27
28// ---------------------------------------------------------------------------
29// Thread-safety: choose between Rc<RefCell<>> and Arc<Mutex<>> based on
30// the `threadsafe` feature flag.
31// ---------------------------------------------------------------------------
32
33#[cfg(not(feature = "threadsafe"))]
34pub(crate) type SharedInner = std::rc::Rc<std::cell::RefCell<H5FileInner>>;
35
36#[cfg(feature = "threadsafe")]
37pub(crate) type SharedInner = std::sync::Arc<std::sync::Mutex<H5FileInner>>;
38
39/// Helper to borrow/lock the inner state immutably.
40#[cfg(not(feature = "threadsafe"))]
41pub(crate) fn borrow_inner(inner: &SharedInner) -> std::cell::Ref<'_, H5FileInner> {
42    inner.borrow()
43}
44
45/// Helper to borrow/lock the inner state mutably.
46#[cfg(not(feature = "threadsafe"))]
47pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::cell::RefMut<'_, H5FileInner> {
48    inner.borrow_mut()
49}
50
51/// Helper to clone a SharedInner.
52#[cfg(not(feature = "threadsafe"))]
53pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
54    std::rc::Rc::clone(inner)
55}
56
57/// Helper to wrap an H5FileInner in SharedInner.
58#[cfg(not(feature = "threadsafe"))]
59pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
60    std::rc::Rc::new(std::cell::RefCell::new(inner))
61}
62
63#[cfg(feature = "threadsafe")]
64pub(crate) fn borrow_inner(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
65    inner.lock().unwrap()
66}
67
68#[cfg(feature = "threadsafe")]
69pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
70    inner.lock().unwrap()
71}
72
73#[cfg(feature = "threadsafe")]
74pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
75    std::sync::Arc::clone(inner)
76}
77
78#[cfg(feature = "threadsafe")]
79pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
80    std::sync::Arc::new(std::sync::Mutex::new(inner))
81}
82
83/// The inner state of an HDF5 file, shared with datasets via reference counting.
84///
85/// By default, this uses `Rc<RefCell<>>` for zero-overhead single-threaded use.
86/// Enable the `threadsafe` feature to use `Arc<Mutex<>>` instead, making
87/// `H5File` `Send + Sync`.
88pub(crate) enum H5FileInner {
89    Writer(Hdf5Writer),
90    Reader(Hdf5Reader),
91    /// Sentinel value used during `close()` to take ownership of the writer.
92    Closed,
93}
94
95/// An HDF5 file opened for reading or writing.
96///
97/// Datasets created from this file hold a shared reference to the underlying
98/// I/O handle, so the file does not need to outlive its datasets (they share
99/// ownership via reference counting).
100pub struct H5File {
101    pub(crate) inner: SharedInner,
102}
103
104impl H5File {
105    /// Create a new HDF5 file at `path`. Truncates if the file already exists.
106    pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
107        let writer = Hdf5Writer::create(path.as_ref())?;
108        Ok(Self {
109            inner: new_shared(H5FileInner::Writer(writer)),
110        })
111    }
112
113    /// Open an existing HDF5 file for reading.
114    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
115        let reader = Hdf5Reader::open(path.as_ref())?;
116        Ok(Self {
117            inner: new_shared(H5FileInner::Reader(reader)),
118        })
119    }
120
121    /// Open an existing HDF5 file for appending new datasets.
122    ///
123    /// Existing datasets are preserved. New datasets can be added and will
124    /// be written after the current end of file. Existing chunked datasets
125    /// can be extended with `write_chunk` and `extend_dataset`.
126    ///
127    /// ```no_run
128    /// use rust_hdf5::H5File;
129    /// let file = H5File::open_rw("existing.h5").unwrap();
130    /// let ds = file.new_dataset::<f64>().shape(&[100]).create("new_data").unwrap();
131    /// ds.write_raw(&vec![0.0f64; 100]).unwrap();
132    /// file.close().unwrap();
133    /// ```
134    pub fn open_rw<P: AsRef<Path>>(path: P) -> Result<Self> {
135        let writer = Hdf5Writer::open_append(path.as_ref())?;
136        Ok(Self {
137            inner: new_shared(H5FileInner::Writer(writer)),
138        })
139    }
140
141    /// Return a handle to the root group.
142    ///
143    /// The root group can be used to create datasets and sub-groups.
144    pub fn root_group(&self) -> H5Group {
145        H5Group::new(clone_inner(&self.inner), "/".to_string())
146    }
147
148    /// Create a group in the root of the file.
149    ///
150    /// ```no_run
151    /// use rust_hdf5::H5File;
152    /// let file = H5File::create("groups.h5").unwrap();
153    /// let grp = file.create_group("detector").unwrap();
154    /// ```
155    pub fn create_group(&self, name: &str) -> Result<H5Group> {
156        self.root_group().create_group(name)
157    }
158
159    /// Start building a new dataset with the given element type.
160    ///
161    /// This returns a fluent builder. Call `.shape(...)` to set dimensions and
162    /// `.create("name")` to finalize.
163    ///
164    /// ```no_run
165    /// # use rust_hdf5::H5File;
166    /// let file = H5File::create("build.h5").unwrap();
167    /// let ds = file.new_dataset::<f64>().shape(&[3, 4]).create("matrix").unwrap();
168    /// ```
169    pub fn new_dataset<T: H5Type>(&self) -> DatasetBuilder<T> {
170        DatasetBuilder::new(clone_inner(&self.inner))
171    }
172
173    /// Add a string attribute to the file (root group).
174    pub fn set_attr_string(&self, name: &str, value: &str) -> Result<()> {
175        use crate::format::messages::attribute::AttributeMessage;
176        let attr = AttributeMessage::scalar_string(name, value);
177        let mut inner = borrow_inner_mut(&self.inner);
178        match &mut *inner {
179            H5FileInner::Writer(writer) => {
180                writer.add_root_attribute(attr);
181                Ok(())
182            }
183            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
184        }
185    }
186
187    /// Add a numeric attribute to the file (root group).
188    pub fn set_attr_numeric<T: crate::types::H5Type>(&self, name: &str, value: &T) -> Result<()> {
189        use crate::format::messages::attribute::AttributeMessage;
190        let es = T::element_size();
191        let raw = unsafe { std::slice::from_raw_parts(value as *const T as *const u8, es) };
192        let attr = AttributeMessage::scalar_numeric(name, T::hdf5_type(), raw.to_vec());
193        let mut inner = borrow_inner_mut(&self.inner);
194        match &mut *inner {
195            H5FileInner::Writer(writer) => {
196                writer.add_root_attribute(attr);
197                Ok(())
198            }
199            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
200        }
201    }
202
203    /// Return the names of file-level (root group) attributes.
204    pub fn attr_names(&self) -> Result<Vec<String>> {
205        let inner = borrow_inner(&self.inner);
206        match &*inner {
207            H5FileInner::Reader(reader) => Ok(reader.root_attr_names()),
208            _ => Ok(vec![]),
209        }
210    }
211
212    /// Read a file-level string attribute.
213    pub fn attr_string(&self, name: &str) -> Result<String> {
214        let inner = borrow_inner(&self.inner);
215        match &*inner {
216            H5FileInner::Reader(reader) => {
217                let attr = reader
218                    .root_attr(name)
219                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
220                let end = attr
221                    .data
222                    .iter()
223                    .position(|&b| b == 0)
224                    .unwrap_or(attr.data.len());
225                Ok(String::from_utf8_lossy(&attr.data[..end]).to_string())
226            }
227            _ => Err(Hdf5Error::InvalidState("not in read mode".into())),
228        }
229    }
230
231    /// Check if the file is in write/append mode.
232    pub fn is_writable(&self) -> bool {
233        let inner = borrow_inner(&self.inner);
234        matches!(&*inner, H5FileInner::Writer(_))
235    }
236
237    /// Create a variable-length string dataset and write data.
238    ///
239    /// This is a convenience method for writing h5py-compatible vlen string
240    /// datasets using global heap storage.
241    pub fn write_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
242        let mut inner = borrow_inner_mut(&self.inner);
243        match &mut *inner {
244            H5FileInner::Writer(writer) => {
245                let idx = writer.create_vlen_string_dataset(name, strings)?;
246                // If the name contains '/', assign the dataset to its parent group
247                if let Some(slash_pos) = name.rfind('/') {
248                    let group_path = &name[..slash_pos];
249                    let abs_group_path = if group_path.starts_with('/') {
250                        group_path.to_string()
251                    } else {
252                        format!("/{}", group_path)
253                    };
254                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
255                }
256                Ok(())
257            }
258            H5FileInner::Reader(_) => {
259                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
260            }
261            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
262        }
263    }
264
265    /// Open an existing dataset by name (read mode).
266    pub fn dataset(&self, name: &str) -> Result<H5Dataset> {
267        let inner = borrow_inner(&self.inner);
268        match &*inner {
269            H5FileInner::Reader(reader) => {
270                let info = reader
271                    .dataset_info(name)
272                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
273                let shape: Vec<usize> = info.dataspace.dims.iter().map(|&d| d as usize).collect();
274                let element_size = info.datatype.element_size() as usize;
275                Ok(H5Dataset::new_reader(
276                    clone_inner(&self.inner),
277                    name.to_string(),
278                    shape,
279                    element_size,
280                ))
281            }
282            H5FileInner::Writer(_) => Err(Hdf5Error::InvalidState(
283                "cannot open a dataset by name in write mode; use new_dataset() instead"
284                    .to_string(),
285            )),
286            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".to_string())),
287        }
288    }
289
290    /// Return the names of all datasets in the root group.
291    ///
292    /// Works in both read and write mode: in write mode, returns the names of
293    /// datasets created so far; in read mode, returns the names discovered
294    /// during file open.
295    pub fn dataset_names(&self) -> Vec<String> {
296        let inner = borrow_inner(&self.inner);
297        match &*inner {
298            H5FileInner::Reader(reader) => reader
299                .dataset_names()
300                .iter()
301                .map(|s| s.to_string())
302                .collect(),
303            H5FileInner::Writer(writer) => writer
304                .dataset_names()
305                .iter()
306                .map(|s| s.to_string())
307                .collect(),
308            H5FileInner::Closed => Vec::new(),
309        }
310    }
311
312    /// Explicitly close the file. For a writer, this finalizes the file
313    /// (writes superblock, headers, etc.). For a reader, this is a no-op.
314    ///
315    /// The file is also auto-finalized on drop, but calling `close()` lets
316    /// you handle errors.
317    pub fn close(self) -> Result<()> {
318        let old = {
319            let mut inner = borrow_inner_mut(&self.inner);
320            std::mem::replace(&mut *inner, H5FileInner::Closed)
321        };
322        match old {
323            H5FileInner::Writer(writer) => {
324                writer.close()?;
325                Ok(())
326            }
327            H5FileInner::Reader(_) => Ok(()),
328            H5FileInner::Closed => Ok(()),
329        }
330    }
331
332    /// Flush the file to disk. Only meaningful in write mode.
333    pub fn flush(&self) -> Result<()> {
334        // The underlying writer does not expose a standalone flush; data is
335        // written to disk immediately via pwrite. This is a compatibility
336        // method that does nothing for now.
337        Ok(())
338    }
339}
340
341#[cfg(test)]
342mod tests {
343    use super::*;
344    use std::path::PathBuf;
345
346    fn temp_path(name: &str) -> PathBuf {
347        std::env::temp_dir().join(format!("hdf5_file_test_{}.h5", name))
348    }
349
350    #[test]
351    fn create_and_close_empty() {
352        let path = temp_path("create_empty");
353        let file = H5File::create(&path).unwrap();
354        file.close().unwrap();
355
356        // Should be readable
357        let file = H5File::open(&path).unwrap();
358        file.close().unwrap();
359
360        std::fs::remove_file(&path).ok();
361    }
362
363    #[test]
364    fn create_and_drop_empty() {
365        let path = temp_path("drop_empty");
366        {
367            let _file = H5File::create(&path).unwrap();
368            // drop auto-finalizes
369        }
370        // Verify the file is valid by opening it
371        let file = H5File::open(&path).unwrap();
372        file.close().unwrap();
373
374        std::fs::remove_file(&path).ok();
375    }
376
377    #[test]
378    fn dataset_not_found() {
379        let path = temp_path("ds_not_found");
380        {
381            let _file = H5File::create(&path).unwrap();
382        }
383        let file = H5File::open(&path).unwrap();
384        let result = file.dataset("nonexistent");
385        assert!(result.is_err());
386
387        std::fs::remove_file(&path).ok();
388    }
389
390    #[test]
391    fn write_and_read_roundtrip() {
392        let path = temp_path("write_read_rt");
393
394        // Write
395        {
396            let file = H5File::create(&path).unwrap();
397            let ds = file
398                .new_dataset::<u8>()
399                .shape([4, 4])
400                .create("data")
401                .unwrap();
402            ds.write_raw(&[0u8; 16]).unwrap();
403            file.close().unwrap();
404        }
405
406        // Read
407        {
408            let file = H5File::open(&path).unwrap();
409            let ds = file.dataset("data").unwrap();
410            assert_eq!(ds.shape(), vec![4, 4]);
411            let data = ds.read_raw::<u8>().unwrap();
412            assert_eq!(data.len(), 16);
413            assert!(data.iter().all(|&b| b == 0));
414            file.close().unwrap();
415        }
416
417        std::fs::remove_file(&path).ok();
418    }
419
420    #[test]
421    fn write_and_read_f64() {
422        let path = temp_path("write_read_f64");
423
424        let values: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
425
426        // Write
427        {
428            let file = H5File::create(&path).unwrap();
429            let ds = file
430                .new_dataset::<f64>()
431                .shape([2, 3])
432                .create("matrix")
433                .unwrap();
434            ds.write_raw(&values).unwrap();
435            file.close().unwrap();
436        }
437
438        // Read
439        {
440            let file = H5File::open(&path).unwrap();
441            let ds = file.dataset("matrix").unwrap();
442            assert_eq!(ds.shape(), vec![2, 3]);
443            let readback = ds.read_raw::<f64>().unwrap();
444            assert_eq!(readback, values);
445        }
446
447        std::fs::remove_file(&path).ok();
448    }
449
450    #[test]
451    fn multiple_datasets() {
452        let path = temp_path("multi_ds");
453
454        {
455            let file = H5File::create(&path).unwrap();
456            let ds1 = file.new_dataset::<i32>().shape([3]).create("ints").unwrap();
457            ds1.write_raw(&[10i32, 20, 30]).unwrap();
458
459            let ds2 = file
460                .new_dataset::<f32>()
461                .shape([2, 2])
462                .create("floats")
463                .unwrap();
464            ds2.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
465
466            file.close().unwrap();
467        }
468
469        {
470            let file = H5File::open(&path).unwrap();
471
472            let ds_ints = file.dataset("ints").unwrap();
473            assert_eq!(ds_ints.shape(), vec![3]);
474            let ints = ds_ints.read_raw::<i32>().unwrap();
475            assert_eq!(ints, vec![10, 20, 30]);
476
477            let ds_floats = file.dataset("floats").unwrap();
478            assert_eq!(ds_floats.shape(), vec![2, 2]);
479            let floats = ds_floats.read_raw::<f32>().unwrap();
480            assert_eq!(floats, vec![1.0f32, 2.0, 3.0, 4.0]);
481        }
482
483        std::fs::remove_file(&path).ok();
484    }
485
486    #[test]
487    fn close_is_idempotent() {
488        let path = temp_path("close_idemp");
489        let file = H5File::create(&path).unwrap();
490        file.close().unwrap();
491        // File is consumed by close(), so no double-close possible at the type level.
492        std::fs::remove_file(&path).ok();
493    }
494}
495
496#[cfg(test)]
497mod integration_tests {
498    use super::*;
499
500    #[test]
501    fn write_file_for_h5dump() {
502        let path = std::env::temp_dir().join("test_hdf5rs_integration.h5");
503        let file = H5File::create(&path).unwrap();
504
505        let ds = file
506            .new_dataset::<u8>()
507            .shape([4usize, 4])
508            .create("data_u8")
509            .unwrap();
510        let data: Vec<u8> = (0..16).collect();
511        ds.write_raw(&data).unwrap();
512
513        let ds2 = file
514            .new_dataset::<f64>()
515            .shape([3usize, 2])
516            .create("data_f64")
517            .unwrap();
518        let fdata: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
519        ds2.write_raw(&fdata).unwrap();
520
521        let ds3 = file
522            .new_dataset::<i32>()
523            .shape([5usize])
524            .create("values")
525            .unwrap();
526        let idata: Vec<i32> = vec![-10, -5, 0, 5, 10];
527        ds3.write_raw(&idata).unwrap();
528
529        file.close().unwrap();
530
531        // File exists
532        assert!(path.exists());
533    }
534
535    #[test]
536    fn write_chunked_file_for_h5dump() {
537        let path = std::env::temp_dir().join("test_hdf5rs_chunked.h5");
538        let file = H5File::create(&path).unwrap();
539
540        // Create a chunked dataset with unlimited first dimension
541        let ds = file
542            .new_dataset::<f64>()
543            .shape([0usize, 4])
544            .chunk(&[1, 4])
545            .max_shape(&[None, Some(4)])
546            .create("streaming_data")
547            .unwrap();
548
549        // Write 5 frames of data
550        for frame in 0..5u64 {
551            let values: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
552            let raw: Vec<u8> = values.iter().flat_map(|v| v.to_le_bytes()).collect();
553            ds.write_chunk(frame as usize, &raw).unwrap();
554        }
555
556        // Extend dimensions to reflect the 5 written frames
557        ds.extend(&[5, 4]).unwrap();
558        ds.flush().unwrap();
559
560        file.close().unwrap();
561
562        assert!(path.exists());
563    }
564
565    #[test]
566    fn write_chunked_many_frames_for_h5dump() {
567        let path = std::env::temp_dir().join("test_hdf5rs_chunked_many.h5");
568        let file = H5File::create(&path).unwrap();
569
570        let ds = file
571            .new_dataset::<i32>()
572            .shape([0usize, 3])
573            .chunk(&[1, 3])
574            .max_shape(&[None, Some(3)])
575            .create("data")
576            .unwrap();
577
578        // Write 10 frames (exceeds idx_blk_elmts=4, uses data blocks)
579        for frame in 0..10u64 {
580            let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
581            let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
582            ds.write_chunk(frame as usize, &raw).unwrap();
583        }
584        ds.extend(&[10, 3]).unwrap();
585        file.close().unwrap();
586
587        assert!(path.exists());
588    }
589
590    #[test]
591    fn write_dataset_with_attributes() {
592        use crate::types::VarLenUnicode;
593
594        let path = std::env::temp_dir().join("test_hdf5rs_attributes.h5");
595        let file = H5File::create(&path).unwrap();
596
597        let ds = file
598            .new_dataset::<f32>()
599            .shape([10usize])
600            .create("temperature")
601            .unwrap();
602        let data: Vec<f32> = (0..10).map(|i| i as f32 * 1.5).collect();
603        ds.write_raw(&data).unwrap();
604
605        // Add string attributes
606        let attr = ds
607            .new_attr::<VarLenUnicode>()
608            .shape(())
609            .create("units")
610            .unwrap();
611        attr.write_scalar(&VarLenUnicode("kelvin".to_string()))
612            .unwrap();
613
614        let attr2 = ds
615            .new_attr::<VarLenUnicode>()
616            .shape(())
617            .create("description")
618            .unwrap();
619        attr2
620            .write_scalar(&VarLenUnicode("Temperature measurements".to_string()))
621            .unwrap();
622
623        // Use write_string convenience method
624        let attr3 = ds
625            .new_attr::<VarLenUnicode>()
626            .shape(())
627            .create("source")
628            .unwrap();
629        attr3.write_string("sensor_01").unwrap();
630
631        // Also test parse -> write_scalar pattern
632        let attr4 = ds
633            .new_attr::<VarLenUnicode>()
634            .shape(())
635            .create("label")
636            .unwrap();
637        let s: VarLenUnicode = "test_label".parse().unwrap_or_default();
638        attr4.write_scalar(&s).unwrap();
639
640        file.close().unwrap();
641
642        assert!(path.exists());
643    }
644
645    #[test]
646    fn chunked_write_read_roundtrip() {
647        let path = std::env::temp_dir().join("hdf5_chunked_roundtrip.h5");
648
649        // Write
650        {
651            let file = H5File::create(&path).unwrap();
652            let ds = file
653                .new_dataset::<i32>()
654                .shape([0usize, 3])
655                .chunk(&[1, 3])
656                .max_shape(&[None, Some(3)])
657                .create("table")
658                .unwrap();
659
660            for frame in 0..8u64 {
661                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
662                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
663                ds.write_chunk(frame as usize, &raw).unwrap();
664            }
665            ds.extend(&[8, 3]).unwrap();
666            file.close().unwrap();
667        }
668
669        // Read
670        {
671            let file = H5File::open(&path).unwrap();
672            let ds = file.dataset("table").unwrap();
673            assert_eq!(ds.shape(), vec![8, 3]);
674            let data = ds.read_raw::<i32>().unwrap();
675            assert_eq!(data.len(), 24);
676            for (i, val) in data.iter().enumerate() {
677                assert_eq!(*val, i as i32);
678            }
679        }
680
681        std::fs::remove_file(&path).ok();
682    }
683
684    #[test]
685    #[cfg(feature = "deflate")]
686    fn compressed_chunked_roundtrip() {
687        let path = std::env::temp_dir().join("hdf5_compressed_roundtrip.h5");
688
689        // Write compressed
690        {
691            let file = H5File::create(&path).unwrap();
692            let ds = file
693                .new_dataset::<f64>()
694                .shape([0usize, 4])
695                .chunk(&[1, 4])
696                .max_shape(&[None, Some(4)])
697                .deflate(6)
698                .create("compressed")
699                .unwrap();
700
701            for frame in 0..10u64 {
702                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
703                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
704                ds.write_chunk(frame as usize, &raw).unwrap();
705            }
706            ds.extend(&[10, 4]).unwrap();
707            file.close().unwrap();
708        }
709
710        // Read back and verify
711        {
712            let file = H5File::open(&path).unwrap();
713            let ds = file.dataset("compressed").unwrap();
714            assert_eq!(ds.shape(), vec![10, 4]);
715            let data = ds.read_raw::<f64>().unwrap();
716            assert_eq!(data.len(), 40);
717            for (i, val) in data.iter().enumerate() {
718                assert!(
719                    (val - i as f64).abs() < 1e-10,
720                    "mismatch at {}: {} != {}",
721                    i,
722                    val,
723                    i
724                );
725            }
726        }
727
728        std::fs::remove_file(&path).ok();
729    }
730
731    #[test]
732    #[cfg(feature = "deflate")]
733    fn compressed_chunked_many_frames() {
734        let path = std::env::temp_dir().join("hdf5_compressed_many.h5");
735
736        {
737            let file = H5File::create(&path).unwrap();
738            let ds = file
739                .new_dataset::<i32>()
740                .shape([0usize, 3])
741                .chunk(&[1, 3])
742                .max_shape(&[None, Some(3)])
743                .deflate(6)
744                .create("stream")
745                .unwrap();
746
747            for frame in 0..100u64 {
748                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
749                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
750                ds.write_chunk(frame as usize, &raw).unwrap();
751            }
752            ds.extend(&[100, 3]).unwrap();
753            file.close().unwrap();
754        }
755
756        {
757            let file = H5File::open(&path).unwrap();
758            let ds = file.dataset("stream").unwrap();
759            assert_eq!(ds.shape(), vec![100, 3]);
760            let data = ds.read_raw::<i32>().unwrap();
761            assert_eq!(data.len(), 300);
762            for (i, val) in data.iter().enumerate() {
763                assert_eq!(*val, i as i32, "mismatch at {}", i);
764            }
765        }
766
767        std::fs::remove_file(&path).ok();
768    }
769    #[test]
770    fn append_mode() {
771        let path = std::env::temp_dir().join("hdf5_append.h5");
772
773        // Create initial file
774        {
775            let file = H5File::create(&path).unwrap();
776            let ds = file
777                .new_dataset::<i32>()
778                .shape([3usize])
779                .create("first")
780                .unwrap();
781            ds.write_raw(&[1i32, 2, 3]).unwrap();
782            file.close().unwrap();
783        }
784
785        // Append new dataset
786        {
787            let file = H5File::open_rw(&path).unwrap();
788            let ds = file
789                .new_dataset::<f64>()
790                .shape([2usize])
791                .create("second")
792                .unwrap();
793            ds.write_raw(&[4.0f64, 5.0]).unwrap();
794            file.close().unwrap();
795        }
796
797        // Read back both
798        {
799            let file = H5File::open(&path).unwrap();
800            let names = file.dataset_names();
801            assert!(names.contains(&"first".to_string()));
802            assert!(names.contains(&"second".to_string()));
803
804            let ds1 = file.dataset("first").unwrap();
805            assert_eq!(ds1.read_raw::<i32>().unwrap(), vec![1, 2, 3]);
806
807            let ds2 = file.dataset("second").unwrap();
808            assert_eq!(ds2.read_raw::<f64>().unwrap(), vec![4.0, 5.0]);
809        }
810
811        std::fs::remove_file(&path).ok();
812    }
813
814    #[test]
815    fn vlen_string_write_read() {
816        let path = std::env::temp_dir().join("hdf5_vlen_wr.h5");
817        {
818            let file = H5File::create(&path).unwrap();
819            file.write_vlen_strings("names", &["alice", "bob", "charlie"])
820                .unwrap();
821            file.close().unwrap();
822        }
823        {
824            let file = H5File::open(&path).unwrap();
825            let ds = file.dataset("names").unwrap();
826            let strings = ds.read_vlen_strings().unwrap();
827            assert_eq!(strings, vec!["alice", "bob", "charlie"]);
828        }
829        std::fs::remove_file(&path).ok();
830    }
831
832    #[test]
833    #[cfg(feature = "deflate")]
834    fn shuffle_deflate_roundtrip() {
835        let path = std::env::temp_dir().join("hdf5_shuf_defl.h5");
836        {
837            let file = H5File::create(&path).unwrap();
838            let ds = file
839                .new_dataset::<f64>()
840                .shape([0usize, 4])
841                .chunk(&[1, 4])
842                .max_shape(&[None, Some(4)])
843                .shuffle_deflate(6)
844                .create("data")
845                .unwrap();
846            for frame in 0..20u64 {
847                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
848                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
849                ds.write_chunk(frame as usize, &raw).unwrap();
850            }
851            ds.extend(&[20, 4]).unwrap();
852            file.close().unwrap();
853        }
854        {
855            let file = H5File::open(&path).unwrap();
856            let ds = file.dataset("data").unwrap();
857            assert_eq!(ds.shape(), vec![20, 4]);
858            let data = ds.read_raw::<f64>().unwrap();
859            assert_eq!(data.len(), 80);
860            for (i, val) in data.iter().enumerate() {
861                assert!((val - i as f64).abs() < 1e-10);
862            }
863        }
864        std::fs::remove_file(&path).ok();
865    }
866
867    #[test]
868    fn file_level_attributes() {
869        let path = std::env::temp_dir().join("hdf5_file_attr.h5");
870        {
871            let file = H5File::create(&path).unwrap();
872            file.set_attr_string("title", "Test File").unwrap();
873            file.set_attr_numeric("version", &42i32).unwrap();
874            let ds = file
875                .new_dataset::<u8>()
876                .shape([1usize])
877                .create("dummy")
878                .unwrap();
879            ds.write_raw(&[0u8]).unwrap();
880            file.close().unwrap();
881        }
882        {
883            let file = H5File::open(&path).unwrap();
884            assert!(file.dataset_names().contains(&"dummy".to_string()));
885
886            // Read file-level attributes
887            let names = file.attr_names().unwrap();
888            assert!(names.contains(&"title".to_string()));
889
890            let title = file.attr_string("title").unwrap();
891            assert_eq!(title, "Test File");
892        }
893        std::fs::remove_file(&path).ok();
894    }
895
896    #[test]
897    fn scalar_dataset_roundtrip() {
898        let path = std::env::temp_dir().join("hdf5_scalar.h5");
899        {
900            let file = H5File::create(&path).unwrap();
901            let ds = file.new_dataset::<f64>().scalar().create("pi").unwrap();
902            ds.write_raw(&[std::f64::consts::PI]).unwrap();
903            file.close().unwrap();
904        }
905        {
906            let file = H5File::open(&path).unwrap();
907            let ds = file.dataset("pi").unwrap();
908            assert_eq!(ds.shape(), Vec::<usize>::new());
909            assert_eq!(ds.total_elements(), 1);
910            let data = ds.read_raw::<f64>().unwrap();
911            assert_eq!(data.len(), 1);
912            assert!((data[0] - std::f64::consts::PI).abs() < 1e-15);
913        }
914        std::fs::remove_file(&path).ok();
915    }
916
917    #[test]
918    fn append_mode_extend_chunked() {
919        let path = std::env::temp_dir().join("hdf5_append_extend.h5");
920
921        // Create with 5 frames
922        {
923            let file = H5File::create(&path).unwrap();
924            let ds = file
925                .new_dataset::<i32>()
926                .shape([0usize, 3])
927                .chunk(&[1, 3])
928                .max_shape(&[None, Some(3)])
929                .create("stream")
930                .unwrap();
931            for i in 0..5u64 {
932                let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
933                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
934                ds.write_chunk(i as usize, &raw).unwrap();
935            }
936            ds.extend(&[5, 3]).unwrap();
937            file.close().unwrap();
938        }
939
940        // Reopen and add 5 more frames
941        {
942            let file = H5File::open_rw(&path).unwrap();
943            // Find the stream dataset index (it's the first one)
944            let names = file.dataset_names();
945            assert!(names.contains(&"stream".to_string()));
946
947            // Write more chunks via the writer directly
948            let mut inner = crate::file::borrow_inner_mut(&file.inner);
949            if let crate::file::H5FileInner::Writer(writer) = &mut *inner {
950                let ds_idx = writer.dataset_index("stream").unwrap();
951                for i in 5..10u64 {
952                    let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
953                    let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
954                    writer.write_chunk(ds_idx, i, &raw).unwrap();
955                }
956                writer.extend_dataset(ds_idx, &[10, 3]).unwrap();
957            }
958            drop(inner);
959            file.close().unwrap();
960        }
961
962        // Read back all 10 frames
963        {
964            let file = H5File::open(&path).unwrap();
965            let ds = file.dataset("stream").unwrap();
966            assert_eq!(ds.shape(), vec![10, 3]);
967            let data = ds.read_raw::<i32>().unwrap();
968            assert_eq!(data.len(), 30);
969            for (i, val) in data.iter().enumerate() {
970                assert_eq!(*val, i as i32, "mismatch at {}", i);
971            }
972        }
973
974        std::fs::remove_file(&path).ok();
975    }
976
977    #[test]
978    fn group_hierarchy_roundtrip() {
979        let path = std::env::temp_dir().join("hdf5_groups_rt.h5");
980
981        {
982            let file = H5File::create(&path).unwrap();
983            let root = file.root_group();
984
985            // Create groups
986            let det = root.create_group("detector").unwrap();
987            let raw = det.create_group("raw").unwrap();
988
989            // Create datasets in groups
990            let ds1 = det
991                .new_dataset::<f32>()
992                .shape([10usize])
993                .create("temperature")
994                .unwrap();
995            ds1.write_raw(&[1.0f32; 10]).unwrap();
996
997            let ds2 = raw
998                .new_dataset::<u16>()
999                .shape([4usize, 4])
1000                .create("image")
1001                .unwrap();
1002            ds2.write_raw(&[42u16; 16]).unwrap();
1003
1004            // Root-level dataset
1005            let ds3 = file
1006                .new_dataset::<i32>()
1007                .shape([3usize])
1008                .create("version")
1009                .unwrap();
1010            ds3.write_raw(&[1i32, 0, 0]).unwrap();
1011
1012            file.close().unwrap();
1013        }
1014
1015        {
1016            let file = H5File::open(&path).unwrap();
1017            let names = file.dataset_names();
1018            assert!(names.contains(&"version".to_string()));
1019            assert!(names.contains(&"detector/temperature".to_string()));
1020            assert!(names.contains(&"detector/raw/image".to_string()));
1021
1022            // Read datasets
1023            let ds = file.dataset("version").unwrap();
1024            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![1, 0, 0]);
1025
1026            let ds = file.dataset("detector/temperature").unwrap();
1027            assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0f32; 10]);
1028
1029            let ds = file.dataset("detector/raw/image").unwrap();
1030            assert_eq!(ds.shape(), vec![4, 4]);
1031            assert_eq!(ds.read_raw::<u16>().unwrap(), vec![42u16; 16]);
1032
1033            // Group traversal
1034            let root = file.root_group();
1035            let group_names = root.group_names().unwrap();
1036            assert!(group_names.contains(&"detector".to_string()));
1037        }
1038
1039        std::fs::remove_file(&path).ok();
1040    }
1041
1042    #[test]
1043    fn nested_groups_via_file_create_group() {
1044        let path = std::env::temp_dir().join("hdf5_file_create_group.h5");
1045
1046        {
1047            let file = H5File::create(&path).unwrap();
1048
1049            // Use the H5File::create_group convenience method
1050            let grp = file.create_group("sensors").unwrap();
1051            let sub = grp.create_group("accel").unwrap();
1052
1053            let ds = sub
1054                .new_dataset::<f64>()
1055                .shape([3usize])
1056                .create("xyz")
1057                .unwrap();
1058            ds.write_raw(&[1.0f64, 2.0, 3.0]).unwrap();
1059
1060            file.close().unwrap();
1061        }
1062
1063        {
1064            let file = H5File::open(&path).unwrap();
1065            let names = file.dataset_names();
1066            assert!(names.contains(&"sensors/accel/xyz".to_string()));
1067
1068            let ds = file.dataset("sensors/accel/xyz").unwrap();
1069            assert_eq!(ds.read_raw::<f64>().unwrap(), vec![1.0, 2.0, 3.0]);
1070
1071            // Open group in read mode
1072            let root = file.root_group();
1073            let sensors = root.group("sensors").unwrap();
1074            assert_eq!(sensors.name(), "/sensors");
1075
1076            let accel = sensors.group("accel").unwrap();
1077            assert_eq!(accel.name(), "/sensors/accel");
1078
1079            // list_groups from root
1080            let top_groups = root.group_names().unwrap();
1081            assert!(top_groups.contains(&"sensors".to_string()));
1082
1083            // list_groups from sensors
1084            let sub_groups = sensors.group_names().unwrap();
1085            assert!(sub_groups.contains(&"accel".to_string()));
1086        }
1087
1088        std::fs::remove_file(&path).ok();
1089    }
1090}
1091
1092#[cfg(test)]
1093mod h5py_compat_tests {
1094    use super::*;
1095
1096    /// Verify our files can be read by h5dump (if available).
1097    #[test]
1098    #[cfg(feature = "deflate")]
1099    fn h5dump_validates_our_files() {
1100        // Check if h5dump is available
1101        let h5dump = std::process::Command::new("h5dump")
1102            .arg("--version")
1103            .output();
1104        if h5dump.is_err() {
1105            eprintln!("skipping: h5dump not found");
1106            return;
1107        }
1108
1109        let path = std::env::temp_dir().join("hdf5_h5dump_validate.h5");
1110
1111        // Write a comprehensive test file
1112        {
1113            let file = H5File::create(&path).unwrap();
1114
1115            // Contiguous
1116            let ds = file
1117                .new_dataset::<f64>()
1118                .shape([3usize, 4])
1119                .create("matrix")
1120                .unwrap();
1121            let data: Vec<f64> = (0..12).map(|i| i as f64).collect();
1122            ds.write_raw(&data).unwrap();
1123
1124            // Chunked + compressed
1125            let ds2 = file
1126                .new_dataset::<i32>()
1127                .shape([0usize, 2])
1128                .chunk(&[1, 2])
1129                .max_shape(&[None, Some(2)])
1130                .deflate(6)
1131                .create("stream")
1132                .unwrap();
1133            for i in 0..5u64 {
1134                let vals: Vec<i32> = vec![i as i32 * 2, i as i32 * 2 + 1];
1135                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1136                ds2.write_chunk(i as usize, &raw).unwrap();
1137            }
1138            ds2.extend(&[5, 2]).unwrap();
1139
1140            // Group
1141            let grp = file.create_group("meta").unwrap();
1142            let ds3 = grp
1143                .new_dataset::<u8>()
1144                .shape([4usize])
1145                .create("flags")
1146                .unwrap();
1147            ds3.write_raw(&[1u8, 0, 1, 0]).unwrap();
1148
1149            // String attribute
1150            use crate::types::VarLenUnicode;
1151            let attr = ds
1152                .new_attr::<VarLenUnicode>()
1153                .shape(())
1154                .create("units")
1155                .unwrap();
1156            attr.write_string("meters").unwrap();
1157
1158            file.close().unwrap();
1159        }
1160
1161        // Run h5dump and verify exit code
1162        let output = std::process::Command::new("h5dump")
1163            .arg("-H") // header only (faster)
1164            .arg(path.to_str().unwrap())
1165            .output()
1166            .unwrap();
1167
1168        assert!(
1169            output.status.success(),
1170            "h5dump failed:\nstdout: {}\nstderr: {}",
1171            String::from_utf8_lossy(&output.stdout),
1172            String::from_utf8_lossy(&output.stderr),
1173        );
1174
1175        // Full dump (with data) should also work
1176        let output2 = std::process::Command::new("h5dump")
1177            .arg(path.to_str().unwrap())
1178            .output()
1179            .unwrap();
1180
1181        assert!(
1182            output2.status.success(),
1183            "h5dump (full) failed:\nstderr: {}",
1184            String::from_utf8_lossy(&output2.stderr),
1185        );
1186
1187        std::fs::remove_file(&path).ok();
1188    }
1189
1190    #[test]
1191    fn read_h5py_generated_file() {
1192        let path = "/tmp/test_h5py_default.h5";
1193        if !std::path::Path::new(path).exists() {
1194            eprintln!("skipping: h5py test file not found");
1195            return;
1196        }
1197        let file = H5File::open(path).unwrap();
1198
1199        let ds = file.dataset("data").unwrap();
1200        assert_eq!(ds.shape(), vec![4, 5]);
1201        let data = ds.read_raw::<f64>().unwrap();
1202        assert_eq!(data.len(), 20);
1203        assert!((data[0]).abs() < 1e-10);
1204        assert!((data[19] - 19.0).abs() < 1e-10);
1205
1206        let ds2 = file.dataset("images").unwrap();
1207        assert_eq!(ds2.shape(), vec![3, 64, 64]);
1208        let images = ds2.read_raw::<u16>().unwrap();
1209        assert_eq!(images.len(), 3 * 64 * 64);
1210    }
1211}