Skip to main content

rust_hdf5/
file.rs

1//! HDF5 file handle — the main entry point for the public API.
2//!
3//! ```no_run
4//! use rust_hdf5::H5File;
5//!
6//! // Write
7//! let file = H5File::create("example.h5").unwrap();
8//! let ds = file.new_dataset::<u8>().shape(&[10, 20]).create("data").unwrap();
9//! ds.write_raw(&vec![0u8; 200]).unwrap();
10//! drop(file);
11//!
12//! // Read
13//! let file = H5File::open("example.h5").unwrap();
14//! let ds = file.dataset("data").unwrap();
15//! let data = ds.read_raw::<u8>().unwrap();
16//! assert_eq!(data.len(), 200);
17//! ```
18
19use std::path::Path;
20
21use crate::io::locking::FileLocking;
22use crate::io::{Hdf5Reader, Hdf5Writer};
23
24use crate::dataset::{DatasetBuilder, H5Dataset};
25use crate::error::{Hdf5Error, Result};
26use crate::format::messages::filter::FilterPipeline;
27use crate::group::H5Group;
28use crate::types::H5Type;
29
30// ---------------------------------------------------------------------------
31// Thread-safety: choose between Rc<RefCell<>> and Arc<Mutex<>> based on
32// the `threadsafe` feature flag.
33// ---------------------------------------------------------------------------
34
35#[cfg(not(feature = "threadsafe"))]
36pub(crate) type SharedInner = std::rc::Rc<std::cell::RefCell<H5FileInner>>;
37
38#[cfg(feature = "threadsafe")]
39pub(crate) type SharedInner = std::sync::Arc<std::sync::Mutex<H5FileInner>>;
40
41/// Helper to borrow/lock the inner state immutably.
42#[cfg(not(feature = "threadsafe"))]
43pub(crate) fn borrow_inner(inner: &SharedInner) -> std::cell::Ref<'_, H5FileInner> {
44    inner.borrow()
45}
46
47/// Helper to borrow/lock the inner state mutably.
48#[cfg(not(feature = "threadsafe"))]
49pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::cell::RefMut<'_, H5FileInner> {
50    inner.borrow_mut()
51}
52
53/// Helper to clone a SharedInner.
54#[cfg(not(feature = "threadsafe"))]
55pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
56    std::rc::Rc::clone(inner)
57}
58
59/// Helper to wrap an H5FileInner in SharedInner.
60#[cfg(not(feature = "threadsafe"))]
61pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
62    std::rc::Rc::new(std::cell::RefCell::new(inner))
63}
64
65#[cfg(feature = "threadsafe")]
66pub(crate) fn borrow_inner(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
67    inner.lock().unwrap()
68}
69
70#[cfg(feature = "threadsafe")]
71pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
72    inner.lock().unwrap()
73}
74
75#[cfg(feature = "threadsafe")]
76pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
77    std::sync::Arc::clone(inner)
78}
79
80#[cfg(feature = "threadsafe")]
81pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
82    std::sync::Arc::new(std::sync::Mutex::new(inner))
83}
84
85/// The inner state of an HDF5 file, shared with datasets via reference counting.
86///
87/// By default, this uses `Rc<RefCell<>>` for zero-overhead single-threaded use.
88/// Enable the `threadsafe` feature to use `Arc<Mutex<>>` instead, making
89/// `H5File` `Send + Sync`.
90pub(crate) enum H5FileInner {
91    Writer(Hdf5Writer),
92    Reader(Hdf5Reader),
93    /// Sentinel value used during `close()` to take ownership of the writer.
94    Closed,
95}
96
97/// An HDF5 file opened for reading or writing.
98///
99/// Datasets created from this file hold a shared reference to the underlying
100/// I/O handle, so the file does not need to outlive its datasets (they share
101/// ownership via reference counting).
102pub struct H5File {
103    pub(crate) inner: SharedInner,
104}
105
106impl H5File {
107    /// Create a new HDF5 file at `path`. Truncates if the file already exists.
108    pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
109        let writer = Hdf5Writer::create(path.as_ref())?;
110        Ok(Self {
111            inner: new_shared(H5FileInner::Writer(writer)),
112        })
113    }
114
115    /// Open an existing HDF5 file for reading.
116    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
117        let reader = Hdf5Reader::open(path.as_ref())?;
118        Ok(Self {
119            inner: new_shared(H5FileInner::Reader(reader)),
120        })
121    }
122
123    /// Open an existing HDF5 file for appending new datasets.
124    ///
125    /// Existing datasets are preserved. New datasets can be added and will
126    /// be written after the current end of file. Existing chunked datasets
127    /// can be extended with `write_chunk` and `extend_dataset`.
128    ///
129    /// ```no_run
130    /// use rust_hdf5::H5File;
131    /// let file = H5File::open_rw("existing.h5").unwrap();
132    /// let ds = file.new_dataset::<f64>().shape(&[100]).create("new_data").unwrap();
133    /// ds.write_raw(&vec![0.0f64; 100]).unwrap();
134    /// file.close().unwrap();
135    /// ```
136    pub fn open_rw<P: AsRef<Path>>(path: P) -> Result<Self> {
137        let writer = Hdf5Writer::open_append(path.as_ref())?;
138        Ok(Self {
139            inner: new_shared(H5FileInner::Writer(writer)),
140        })
141    }
142
143    /// Start building open options for an HDF5 file.
144    ///
145    /// Use this to control file-locking behavior explicitly:
146    ///
147    /// ```no_run
148    /// use rust_hdf5::{H5File, FileLocking};
149    /// // Open with locking disabled (e.g. on NFS without lock support).
150    /// let file = H5File::options()
151    ///     .locking(FileLocking::Disabled)
152    ///     .open_rw("existing.h5")
153    ///     .unwrap();
154    /// # let _ = file;
155    /// ```
156    pub fn options() -> H5FileOptions {
157        H5FileOptions::default()
158    }
159
160    /// Return a handle to the root group.
161    ///
162    /// The root group can be used to create datasets and sub-groups.
163    pub fn root_group(&self) -> H5Group {
164        H5Group::new(clone_inner(&self.inner), "/".to_string())
165    }
166
167    /// Create a group in the root of the file.
168    ///
169    /// ```no_run
170    /// use rust_hdf5::H5File;
171    /// let file = H5File::create("groups.h5").unwrap();
172    /// let grp = file.create_group("detector").unwrap();
173    /// ```
174    pub fn create_group(&self, name: &str) -> Result<H5Group> {
175        self.root_group().create_group(name)
176    }
177
178    /// Start building a new dataset with the given element type.
179    ///
180    /// This returns a fluent builder. Call `.shape(...)` to set dimensions and
181    /// `.create("name")` to finalize.
182    ///
183    /// ```no_run
184    /// # use rust_hdf5::H5File;
185    /// let file = H5File::create("build.h5").unwrap();
186    /// let ds = file.new_dataset::<f64>().shape(&[3, 4]).create("matrix").unwrap();
187    /// ```
188    pub fn new_dataset<T: H5Type>(&self) -> DatasetBuilder<T> {
189        DatasetBuilder::new(clone_inner(&self.inner))
190    }
191
192    /// Add a string attribute to the file (root group).
193    pub fn set_attr_string(&self, name: &str, value: &str) -> Result<()> {
194        use crate::format::messages::attribute::AttributeMessage;
195        let attr = AttributeMessage::scalar_string(name, value);
196        let mut inner = borrow_inner_mut(&self.inner);
197        match &mut *inner {
198            H5FileInner::Writer(writer) => {
199                writer.add_root_attribute(attr);
200                Ok(())
201            }
202            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
203        }
204    }
205
206    /// Add a numeric attribute to the file (root group).
207    pub fn set_attr_numeric<T: crate::types::H5Type>(&self, name: &str, value: &T) -> Result<()> {
208        use crate::format::messages::attribute::AttributeMessage;
209        let es = T::element_size();
210        let raw = unsafe { std::slice::from_raw_parts(value as *const T as *const u8, es) };
211        let attr = AttributeMessage::scalar_numeric(name, T::hdf5_type(), raw.to_vec());
212        let mut inner = borrow_inner_mut(&self.inner);
213        match &mut *inner {
214            H5FileInner::Writer(writer) => {
215                writer.add_root_attribute(attr);
216                Ok(())
217            }
218            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
219        }
220    }
221
222    /// Return the names of file-level (root group) attributes.
223    pub fn attr_names(&self) -> Result<Vec<String>> {
224        let inner = borrow_inner(&self.inner);
225        match &*inner {
226            H5FileInner::Reader(reader) => Ok(reader.root_attr_names()),
227            _ => Ok(vec![]),
228        }
229    }
230
231    /// Read a file-level string attribute.
232    pub fn attr_string(&self, name: &str) -> Result<String> {
233        let inner = borrow_inner(&self.inner);
234        match &*inner {
235            H5FileInner::Reader(reader) => {
236                let attr = reader
237                    .root_attr(name)
238                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
239                let end = attr
240                    .data
241                    .iter()
242                    .position(|&b| b == 0)
243                    .unwrap_or(attr.data.len());
244                Ok(String::from_utf8_lossy(&attr.data[..end]).to_string())
245            }
246            _ => Err(Hdf5Error::InvalidState("not in read mode".into())),
247        }
248    }
249
250    /// Check if the file is in write/append mode.
251    pub fn is_writable(&self) -> bool {
252        let inner = borrow_inner(&self.inner);
253        matches!(&*inner, H5FileInner::Writer(_))
254    }
255
256    /// Create a variable-length string dataset and write data.
257    ///
258    /// This is a convenience method for writing h5py-compatible vlen string
259    /// datasets using global heap storage.
260    pub fn write_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
261        let mut inner = borrow_inner_mut(&self.inner);
262        match &mut *inner {
263            H5FileInner::Writer(writer) => {
264                let idx = writer.create_vlen_string_dataset(name, strings)?;
265                // If the name contains '/', assign the dataset to its parent group
266                if let Some(slash_pos) = name.rfind('/') {
267                    let group_path = &name[..slash_pos];
268                    let abs_group_path = if group_path.starts_with('/') {
269                        group_path.to_string()
270                    } else {
271                        format!("/{}", group_path)
272                    };
273                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
274                }
275                Ok(())
276            }
277            H5FileInner::Reader(_) => {
278                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
279            }
280            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
281        }
282    }
283
284    /// Create a chunked, compressed variable-length string dataset.
285    ///
286    /// Like `write_vlen_strings`, but stores the vlen references in chunked
287    /// layout with the given filter pipeline (e.g., `FilterPipeline::deflate(6)`
288    /// or `FilterPipeline::zstd(3)`). `chunk_size` is the number of strings
289    /// per chunk.
290    pub fn write_vlen_strings_compressed(
291        &self,
292        name: &str,
293        strings: &[&str],
294        chunk_size: usize,
295        pipeline: FilterPipeline,
296    ) -> Result<()> {
297        let mut inner = borrow_inner_mut(&self.inner);
298        match &mut *inner {
299            H5FileInner::Writer(writer) => {
300                let idx = writer
301                    .create_vlen_string_dataset_compressed(name, strings, chunk_size, pipeline)?;
302                if let Some(slash_pos) = name.rfind('/') {
303                    let group_path = &name[..slash_pos];
304                    let abs_group_path = if group_path.starts_with('/') {
305                        group_path.to_string()
306                    } else {
307                        format!("/{}", group_path)
308                    };
309                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
310                }
311                Ok(())
312            }
313            H5FileInner::Reader(_) => {
314                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
315            }
316            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
317        }
318    }
319
320    /// Create an empty chunked vlen string dataset ready for incremental appends.
321    ///
322    /// Use `append_vlen_strings` to add data. If `pipeline` is `Some`, chunks
323    /// are compressed (e.g., `Some(FilterPipeline::lz4())`).
324    pub fn create_appendable_vlen_dataset(
325        &self,
326        name: &str,
327        chunk_size: usize,
328        pipeline: Option<FilterPipeline>,
329    ) -> Result<()> {
330        let mut inner = borrow_inner_mut(&self.inner);
331        match &mut *inner {
332            H5FileInner::Writer(writer) => {
333                let idx =
334                    writer.create_appendable_vlen_string_dataset(name, chunk_size, pipeline)?;
335                if let Some(slash_pos) = name.rfind('/') {
336                    let group_path = &name[..slash_pos];
337                    let abs_group_path = if group_path.starts_with('/') {
338                        group_path.to_string()
339                    } else {
340                        format!("/{}", group_path)
341                    };
342                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
343                }
344                Ok(())
345            }
346            H5FileInner::Reader(_) => {
347                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
348            }
349            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
350        }
351    }
352
353    /// Append variable-length strings to an existing chunked vlen string dataset.
354    pub fn append_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
355        let mut inner = borrow_inner_mut(&self.inner);
356        match &mut *inner {
357            H5FileInner::Writer(writer) => {
358                let ds_index = writer
359                    .dataset_index(name)
360                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
361                writer.append_vlen_strings(ds_index, strings)?;
362                Ok(())
363            }
364            H5FileInner::Reader(_) => {
365                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
366            }
367            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
368        }
369    }
370
371    /// Delete a dataset by name. The dataset is unlinked on close;
372    /// file space is not reclaimed.
373    pub fn delete_dataset(&self, name: &str) -> Result<()> {
374        let mut inner = borrow_inner_mut(&self.inner);
375        match &mut *inner {
376            H5FileInner::Writer(writer) => {
377                writer.delete_dataset(name)?;
378                Ok(())
379            }
380            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
381        }
382    }
383
384    /// Delete a group and all its child datasets/sub-groups.
385    /// File space is not reclaimed.
386    pub fn delete_group(&self, name: &str) -> Result<()> {
387        let mut inner = borrow_inner_mut(&self.inner);
388        match &mut *inner {
389            H5FileInner::Writer(writer) => {
390                writer.delete_group(name)?;
391                Ok(())
392            }
393            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
394        }
395    }
396
397    /// Open an existing dataset by name (read mode).
398    pub fn dataset(&self, name: &str) -> Result<H5Dataset> {
399        let inner = borrow_inner(&self.inner);
400        match &*inner {
401            H5FileInner::Reader(reader) => {
402                let info = reader
403                    .dataset_info(name)
404                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
405                let shape: Vec<usize> = info.dataspace.dims.iter().map(|&d| d as usize).collect();
406                let element_size = info.datatype.element_size() as usize;
407                Ok(H5Dataset::new_reader(
408                    clone_inner(&self.inner),
409                    name.to_string(),
410                    shape,
411                    element_size,
412                ))
413            }
414            H5FileInner::Writer(_) => Err(Hdf5Error::InvalidState(
415                "cannot open a dataset by name in write mode; use new_dataset() instead"
416                    .to_string(),
417            )),
418            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".to_string())),
419        }
420    }
421
422    /// Return the names of all datasets in the root group.
423    ///
424    /// Works in both read and write mode: in write mode, returns the names of
425    /// datasets created so far; in read mode, returns the names discovered
426    /// during file open.
427    pub fn dataset_names(&self) -> Vec<String> {
428        let inner = borrow_inner(&self.inner);
429        match &*inner {
430            H5FileInner::Reader(reader) => reader
431                .dataset_names()
432                .iter()
433                .map(|s| s.to_string())
434                .collect(),
435            H5FileInner::Writer(writer) => writer
436                .dataset_names()
437                .iter()
438                .map(|s| s.to_string())
439                .collect(),
440            H5FileInner::Closed => Vec::new(),
441        }
442    }
443
444    /// Explicitly close the file. For a writer, this finalizes the file
445    /// (writes superblock, headers, etc.). For a reader, this is a no-op.
446    ///
447    /// The file is also auto-finalized on drop, but calling `close()` lets
448    /// you handle errors.
449    pub fn close(self) -> Result<()> {
450        let old = {
451            let mut inner = borrow_inner_mut(&self.inner);
452            std::mem::replace(&mut *inner, H5FileInner::Closed)
453        };
454        match old {
455            H5FileInner::Writer(writer) => {
456                writer.close()?;
457                Ok(())
458            }
459            H5FileInner::Reader(_) => Ok(()),
460            H5FileInner::Closed => Ok(()),
461        }
462    }
463
464    /// Flush the file to disk. Only meaningful in write mode.
465    pub fn flush(&self) -> Result<()> {
466        // The underlying writer does not expose a standalone flush; data is
467        // written to disk immediately via pwrite. This is a compatibility
468        // method that does nothing for now.
469        Ok(())
470    }
471}
472
473/// Builder controlling how an [`H5File`] is opened.
474///
475/// The default policy follows the HDF5 C library: an exclusive lock is
476/// acquired for write-mode opens and a shared lock for read-mode opens,
477/// honoring the `HDF5_USE_FILE_LOCKING` environment variable. Calling
478/// [`Self::locking`] overrides the env-var value.
479#[derive(Debug, Default, Clone)]
480pub struct H5FileOptions {
481    locking: Option<FileLocking>,
482}
483
484impl H5FileOptions {
485    /// Construct a fresh options builder with default settings.
486    pub fn new() -> Self {
487        Self::default()
488    }
489
490    /// Override the locking policy. Bypasses the `HDF5_USE_FILE_LOCKING`
491    /// environment variable for the resulting open call.
492    pub fn locking(mut self, policy: FileLocking) -> Self {
493        self.locking = Some(policy);
494        self
495    }
496
497    /// Disable OS-level file locking entirely (equivalent to
498    /// `HDF5_USE_FILE_LOCKING=FALSE`).
499    pub fn no_locking(self) -> Self {
500        self.locking(FileLocking::Disabled)
501    }
502
503    /// Try to acquire the lock but do not fail if the filesystem rejects it
504    /// (equivalent to `HDF5_USE_FILE_LOCKING=BEST_EFFORT`).
505    pub fn best_effort_locking(self) -> Self {
506        self.locking(FileLocking::BestEffort)
507    }
508
509    fn resolved_locking(&self) -> FileLocking {
510        match self.locking {
511            Some(p) => p,
512            None => FileLocking::from_env_or(FileLocking::default()),
513        }
514    }
515
516    /// Create a new HDF5 file at `path` with the configured options.
517    pub fn create<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
518        let writer = Hdf5Writer::create_with_locking(path.as_ref(), self.resolved_locking())?;
519        Ok(H5File {
520            inner: new_shared(H5FileInner::Writer(writer)),
521        })
522    }
523
524    /// Open an existing HDF5 file for reading with the configured options.
525    pub fn open<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
526        let reader = Hdf5Reader::open_with_locking(path.as_ref(), self.resolved_locking())?;
527        Ok(H5File {
528            inner: new_shared(H5FileInner::Reader(reader)),
529        })
530    }
531
532    /// Open an existing HDF5 file for read/write with the configured options.
533    pub fn open_rw<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
534        let writer = Hdf5Writer::open_append_with_locking(path.as_ref(), self.resolved_locking())?;
535        Ok(H5File {
536            inner: new_shared(H5FileInner::Writer(writer)),
537        })
538    }
539}
540
541#[cfg(test)]
542mod tests {
543    use super::*;
544    use std::path::PathBuf;
545
546    fn temp_path(name: &str) -> PathBuf {
547        std::env::temp_dir().join(format!("hdf5_file_test_{}.h5", name))
548    }
549
550    #[test]
551    fn create_and_close_empty() {
552        let path = temp_path("create_empty");
553        let file = H5File::create(&path).unwrap();
554        file.close().unwrap();
555
556        // Should be readable
557        let file = H5File::open(&path).unwrap();
558        file.close().unwrap();
559
560        std::fs::remove_file(&path).ok();
561    }
562
563    #[test]
564    fn create_and_drop_empty() {
565        let path = temp_path("drop_empty");
566        {
567            let _file = H5File::create(&path).unwrap();
568            // drop auto-finalizes
569        }
570        // Verify the file is valid by opening it
571        let file = H5File::open(&path).unwrap();
572        file.close().unwrap();
573
574        std::fs::remove_file(&path).ok();
575    }
576
577    #[test]
578    fn dataset_not_found() {
579        let path = temp_path("ds_not_found");
580        {
581            let _file = H5File::create(&path).unwrap();
582        }
583        let file = H5File::open(&path).unwrap();
584        let result = file.dataset("nonexistent");
585        assert!(result.is_err());
586
587        std::fs::remove_file(&path).ok();
588    }
589
590    #[test]
591    fn write_and_read_roundtrip() {
592        let path = temp_path("write_read_rt");
593
594        // Write
595        {
596            let file = H5File::create(&path).unwrap();
597            let ds = file
598                .new_dataset::<u8>()
599                .shape([4, 4])
600                .create("data")
601                .unwrap();
602            ds.write_raw(&[0u8; 16]).unwrap();
603            file.close().unwrap();
604        }
605
606        // Read
607        {
608            let file = H5File::open(&path).unwrap();
609            let ds = file.dataset("data").unwrap();
610            assert_eq!(ds.shape(), vec![4, 4]);
611            let data = ds.read_raw::<u8>().unwrap();
612            assert_eq!(data.len(), 16);
613            assert!(data.iter().all(|&b| b == 0));
614            file.close().unwrap();
615        }
616
617        std::fs::remove_file(&path).ok();
618    }
619
620    #[test]
621    fn write_and_read_f64() {
622        let path = temp_path("write_read_f64");
623
624        let values: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
625
626        // Write
627        {
628            let file = H5File::create(&path).unwrap();
629            let ds = file
630                .new_dataset::<f64>()
631                .shape([2, 3])
632                .create("matrix")
633                .unwrap();
634            ds.write_raw(&values).unwrap();
635            file.close().unwrap();
636        }
637
638        // Read
639        {
640            let file = H5File::open(&path).unwrap();
641            let ds = file.dataset("matrix").unwrap();
642            assert_eq!(ds.shape(), vec![2, 3]);
643            let readback = ds.read_raw::<f64>().unwrap();
644            assert_eq!(readback, values);
645        }
646
647        std::fs::remove_file(&path).ok();
648    }
649
650    #[test]
651    fn multiple_datasets() {
652        let path = temp_path("multi_ds");
653
654        {
655            let file = H5File::create(&path).unwrap();
656            let ds1 = file.new_dataset::<i32>().shape([3]).create("ints").unwrap();
657            ds1.write_raw(&[10i32, 20, 30]).unwrap();
658
659            let ds2 = file
660                .new_dataset::<f32>()
661                .shape([2, 2])
662                .create("floats")
663                .unwrap();
664            ds2.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
665
666            file.close().unwrap();
667        }
668
669        {
670            let file = H5File::open(&path).unwrap();
671
672            let ds_ints = file.dataset("ints").unwrap();
673            assert_eq!(ds_ints.shape(), vec![3]);
674            let ints = ds_ints.read_raw::<i32>().unwrap();
675            assert_eq!(ints, vec![10, 20, 30]);
676
677            let ds_floats = file.dataset("floats").unwrap();
678            assert_eq!(ds_floats.shape(), vec![2, 2]);
679            let floats = ds_floats.read_raw::<f32>().unwrap();
680            assert_eq!(floats, vec![1.0f32, 2.0, 3.0, 4.0]);
681        }
682
683        std::fs::remove_file(&path).ok();
684    }
685
686    #[test]
687    fn close_is_idempotent() {
688        let path = temp_path("close_idemp");
689        let file = H5File::create(&path).unwrap();
690        file.close().unwrap();
691        // File is consumed by close(), so no double-close possible at the type level.
692        std::fs::remove_file(&path).ok();
693    }
694}
695
696#[cfg(test)]
697mod integration_tests {
698    use super::*;
699
700    #[test]
701    fn write_file_for_h5dump() {
702        let path = std::env::temp_dir().join("test_hdf5rs_integration.h5");
703        let file = H5File::create(&path).unwrap();
704
705        let ds = file
706            .new_dataset::<u8>()
707            .shape([4usize, 4])
708            .create("data_u8")
709            .unwrap();
710        let data: Vec<u8> = (0..16).collect();
711        ds.write_raw(&data).unwrap();
712
713        let ds2 = file
714            .new_dataset::<f64>()
715            .shape([3usize, 2])
716            .create("data_f64")
717            .unwrap();
718        let fdata: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
719        ds2.write_raw(&fdata).unwrap();
720
721        let ds3 = file
722            .new_dataset::<i32>()
723            .shape([5usize])
724            .create("values")
725            .unwrap();
726        let idata: Vec<i32> = vec![-10, -5, 0, 5, 10];
727        ds3.write_raw(&idata).unwrap();
728
729        file.close().unwrap();
730
731        // File exists
732        assert!(path.exists());
733    }
734
735    #[test]
736    fn write_chunked_file_for_h5dump() {
737        let path = std::env::temp_dir().join("test_hdf5rs_chunked.h5");
738        let file = H5File::create(&path).unwrap();
739
740        // Create a chunked dataset with unlimited first dimension
741        let ds = file
742            .new_dataset::<f64>()
743            .shape([0usize, 4])
744            .chunk(&[1, 4])
745            .max_shape(&[None, Some(4)])
746            .create("streaming_data")
747            .unwrap();
748
749        // Write 5 frames of data
750        for frame in 0..5u64 {
751            let values: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
752            let raw: Vec<u8> = values.iter().flat_map(|v| v.to_le_bytes()).collect();
753            ds.write_chunk(frame as usize, &raw).unwrap();
754        }
755
756        // Extend dimensions to reflect the 5 written frames
757        ds.extend(&[5, 4]).unwrap();
758        ds.flush().unwrap();
759
760        file.close().unwrap();
761
762        assert!(path.exists());
763    }
764
765    #[test]
766    fn write_chunked_many_frames_for_h5dump() {
767        let path = std::env::temp_dir().join("test_hdf5rs_chunked_many.h5");
768        let file = H5File::create(&path).unwrap();
769
770        let ds = file
771            .new_dataset::<i32>()
772            .shape([0usize, 3])
773            .chunk(&[1, 3])
774            .max_shape(&[None, Some(3)])
775            .create("data")
776            .unwrap();
777
778        // Write 10 frames (exceeds idx_blk_elmts=4, uses data blocks)
779        for frame in 0..10u64 {
780            let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
781            let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
782            ds.write_chunk(frame as usize, &raw).unwrap();
783        }
784        ds.extend(&[10, 3]).unwrap();
785        file.close().unwrap();
786
787        assert!(path.exists());
788    }
789
790    #[test]
791    fn write_dataset_with_attributes() {
792        use crate::types::VarLenUnicode;
793
794        let path = std::env::temp_dir().join("test_hdf5rs_attributes.h5");
795        let file = H5File::create(&path).unwrap();
796
797        let ds = file
798            .new_dataset::<f32>()
799            .shape([10usize])
800            .create("temperature")
801            .unwrap();
802        let data: Vec<f32> = (0..10).map(|i| i as f32 * 1.5).collect();
803        ds.write_raw(&data).unwrap();
804
805        // Add string attributes
806        let attr = ds
807            .new_attr::<VarLenUnicode>()
808            .shape(())
809            .create("units")
810            .unwrap();
811        attr.write_scalar(&VarLenUnicode("kelvin".to_string()))
812            .unwrap();
813
814        let attr2 = ds
815            .new_attr::<VarLenUnicode>()
816            .shape(())
817            .create("description")
818            .unwrap();
819        attr2
820            .write_scalar(&VarLenUnicode("Temperature measurements".to_string()))
821            .unwrap();
822
823        // Use write_string convenience method
824        let attr3 = ds
825            .new_attr::<VarLenUnicode>()
826            .shape(())
827            .create("source")
828            .unwrap();
829        attr3.write_string("sensor_01").unwrap();
830
831        // Also test parse -> write_scalar pattern
832        let attr4 = ds
833            .new_attr::<VarLenUnicode>()
834            .shape(())
835            .create("label")
836            .unwrap();
837        let s: VarLenUnicode = "test_label".parse().unwrap_or_default();
838        attr4.write_scalar(&s).unwrap();
839
840        file.close().unwrap();
841
842        assert!(path.exists());
843    }
844
845    #[test]
846    fn chunked_write_read_roundtrip() {
847        let path = std::env::temp_dir().join("hdf5_chunked_roundtrip.h5");
848
849        // Write
850        {
851            let file = H5File::create(&path).unwrap();
852            let ds = file
853                .new_dataset::<i32>()
854                .shape([0usize, 3])
855                .chunk(&[1, 3])
856                .max_shape(&[None, Some(3)])
857                .create("table")
858                .unwrap();
859
860            for frame in 0..8u64 {
861                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
862                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
863                ds.write_chunk(frame as usize, &raw).unwrap();
864            }
865            ds.extend(&[8, 3]).unwrap();
866            file.close().unwrap();
867        }
868
869        // Read
870        {
871            let file = H5File::open(&path).unwrap();
872            let ds = file.dataset("table").unwrap();
873            assert_eq!(ds.shape(), vec![8, 3]);
874            let data = ds.read_raw::<i32>().unwrap();
875            assert_eq!(data.len(), 24);
876            for (i, val) in data.iter().enumerate() {
877                assert_eq!(*val, i as i32);
878            }
879        }
880
881        std::fs::remove_file(&path).ok();
882    }
883
884    #[test]
885    #[cfg(feature = "deflate")]
886    fn compressed_chunked_roundtrip() {
887        let path = std::env::temp_dir().join("hdf5_compressed_roundtrip.h5");
888
889        // Write compressed
890        {
891            let file = H5File::create(&path).unwrap();
892            let ds = file
893                .new_dataset::<f64>()
894                .shape([0usize, 4])
895                .chunk(&[1, 4])
896                .max_shape(&[None, Some(4)])
897                .deflate(6)
898                .create("compressed")
899                .unwrap();
900
901            for frame in 0..10u64 {
902                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
903                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
904                ds.write_chunk(frame as usize, &raw).unwrap();
905            }
906            ds.extend(&[10, 4]).unwrap();
907            file.close().unwrap();
908        }
909
910        // Read back and verify
911        {
912            let file = H5File::open(&path).unwrap();
913            let ds = file.dataset("compressed").unwrap();
914            assert_eq!(ds.shape(), vec![10, 4]);
915            let data = ds.read_raw::<f64>().unwrap();
916            assert_eq!(data.len(), 40);
917            for (i, val) in data.iter().enumerate() {
918                assert!(
919                    (val - i as f64).abs() < 1e-10,
920                    "mismatch at {}: {} != {}",
921                    i,
922                    val,
923                    i
924                );
925            }
926        }
927
928        std::fs::remove_file(&path).ok();
929    }
930
931    #[test]
932    #[cfg(feature = "deflate")]
933    fn compressed_chunked_many_frames() {
934        let path = std::env::temp_dir().join("hdf5_compressed_many.h5");
935
936        {
937            let file = H5File::create(&path).unwrap();
938            let ds = file
939                .new_dataset::<i32>()
940                .shape([0usize, 3])
941                .chunk(&[1, 3])
942                .max_shape(&[None, Some(3)])
943                .deflate(6)
944                .create("stream")
945                .unwrap();
946
947            for frame in 0..100u64 {
948                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
949                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
950                ds.write_chunk(frame as usize, &raw).unwrap();
951            }
952            ds.extend(&[100, 3]).unwrap();
953            file.close().unwrap();
954        }
955
956        {
957            let file = H5File::open(&path).unwrap();
958            let ds = file.dataset("stream").unwrap();
959            assert_eq!(ds.shape(), vec![100, 3]);
960            let data = ds.read_raw::<i32>().unwrap();
961            assert_eq!(data.len(), 300);
962            for (i, val) in data.iter().enumerate() {
963                assert_eq!(*val, i as i32, "mismatch at {}", i);
964            }
965        }
966
967        std::fs::remove_file(&path).ok();
968    }
969    #[test]
970    fn append_mode() {
971        let path = std::env::temp_dir().join("hdf5_append.h5");
972
973        // Create initial file
974        {
975            let file = H5File::create(&path).unwrap();
976            let ds = file
977                .new_dataset::<i32>()
978                .shape([3usize])
979                .create("first")
980                .unwrap();
981            ds.write_raw(&[1i32, 2, 3]).unwrap();
982            file.close().unwrap();
983        }
984
985        // Append new dataset
986        {
987            let file = H5File::open_rw(&path).unwrap();
988            let ds = file
989                .new_dataset::<f64>()
990                .shape([2usize])
991                .create("second")
992                .unwrap();
993            ds.write_raw(&[4.0f64, 5.0]).unwrap();
994            file.close().unwrap();
995        }
996
997        // Read back both
998        {
999            let file = H5File::open(&path).unwrap();
1000            let names = file.dataset_names();
1001            assert!(names.contains(&"first".to_string()));
1002            assert!(names.contains(&"second".to_string()));
1003
1004            let ds1 = file.dataset("first").unwrap();
1005            assert_eq!(ds1.read_raw::<i32>().unwrap(), vec![1, 2, 3]);
1006
1007            let ds2 = file.dataset("second").unwrap();
1008            assert_eq!(ds2.read_raw::<f64>().unwrap(), vec![4.0, 5.0]);
1009        }
1010
1011        std::fs::remove_file(&path).ok();
1012    }
1013
1014    #[test]
1015    fn open_rw_set_attr_preserves_file() {
1016        let path = std::env::temp_dir().join("hdf5_open_rw_attr.h5");
1017        // Create file with a dataset and an attribute
1018        {
1019            let file = H5File::create(&path).unwrap();
1020            let ds = file
1021                .new_dataset::<i32>()
1022                .shape([3usize])
1023                .create("data")
1024                .unwrap();
1025            ds.write_raw(&[10i32, 20, 30]).unwrap();
1026            file.set_attr_string("version", "1.0").unwrap();
1027            file.close().unwrap();
1028        }
1029        // Open rw and modify the attribute
1030        {
1031            let file = H5File::open_rw(&path).unwrap();
1032            file.set_attr_string("version", "2.0").unwrap();
1033            file.close().unwrap();
1034        }
1035        // Verify: dataset intact, attribute updated
1036        {
1037            let file = H5File::open(&path).unwrap();
1038            let ds = file.dataset("data").unwrap();
1039            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![10, 20, 30]);
1040            let ver = file.attr_string("version").unwrap();
1041            assert_eq!(ver, "2.0");
1042        }
1043        std::fs::remove_file(&path).ok();
1044    }
1045
1046    #[test]
1047    #[cfg(feature = "deflate")]
1048    fn open_rw_attr_with_compressed_dataset() {
1049        use crate::format::messages::filter::FilterPipeline;
1050        let path = std::env::temp_dir().join("hdf5_open_rw_compressed.h5");
1051        let input: Vec<&str> = (0..50).map(|_| "test string data").collect();
1052        // Create file with compressed vlen strings
1053        {
1054            let file = H5File::create(&path).unwrap();
1055            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1056                .unwrap();
1057            file.set_attr_string("version", "1.0").unwrap();
1058            file.close().unwrap();
1059        }
1060        // Open rw and modify attribute only
1061        {
1062            let file = H5File::open_rw(&path).unwrap();
1063            file.set_attr_string("version", "2.0").unwrap();
1064            file.close().unwrap();
1065        }
1066        // Verify: compressed dataset still readable, attribute updated
1067        {
1068            let file = H5File::open(&path).unwrap();
1069            let ds = file.dataset("texts").unwrap();
1070            let strings = ds.read_vlen_strings().unwrap();
1071            assert_eq!(strings.len(), 50);
1072            assert_eq!(strings[0], "test string data");
1073            let ver = file.attr_string("version").unwrap();
1074            assert_eq!(ver, "2.0");
1075        }
1076        std::fs::remove_file(&path).ok();
1077    }
1078
1079    #[test]
1080    #[cfg(feature = "lz4")]
1081    fn append_vlen_strings_basic() {
1082        use crate::format::messages::filter::FilterPipeline;
1083        let path = std::env::temp_dir().join("hdf5_append_vlen.h5");
1084        {
1085            let file = H5File::create(&path).unwrap();
1086            file.create_appendable_vlen_dataset("names", 4, Some(FilterPipeline::lz4()))
1087                .unwrap();
1088            file.append_vlen_strings("names", &["alice", "bob", "charlie"])
1089                .unwrap();
1090            file.append_vlen_strings("names", &["dave", "eve"]).unwrap();
1091            file.close().unwrap();
1092        }
1093        {
1094            let file = H5File::open(&path).unwrap();
1095            let ds = file.dataset("names").unwrap();
1096            let strings = ds.read_vlen_strings().unwrap();
1097            assert_eq!(strings, vec!["alice", "bob", "charlie", "dave", "eve"]);
1098        }
1099        std::fs::remove_file(&path).ok();
1100    }
1101
1102    #[test]
1103    #[cfg(feature = "lz4")]
1104    fn append_vlen_strings_large() {
1105        use crate::format::messages::filter::FilterPipeline;
1106        let path = std::env::temp_dir().join("hdf5_append_vlen_large.h5");
1107        let batch1: Vec<String> = (0..5000).map(|i| format!("node-{:06}", i)).collect();
1108        let batch2: Vec<String> = (5000..7189).map(|i| format!("node-{:06}", i)).collect();
1109        {
1110            let file = H5File::create(&path).unwrap();
1111            file.create_appendable_vlen_dataset("data", 512, Some(FilterPipeline::lz4()))
1112                .unwrap();
1113            let r1: Vec<&str> = batch1.iter().map(|s| s.as_str()).collect();
1114            file.append_vlen_strings("data", &r1).unwrap();
1115            let r2: Vec<&str> = batch2.iter().map(|s| s.as_str()).collect();
1116            file.append_vlen_strings("data", &r2).unwrap();
1117            file.close().unwrap();
1118        }
1119        {
1120            let file = H5File::open(&path).unwrap();
1121            let ds = file.dataset("data").unwrap();
1122            let strings = ds.read_vlen_strings().unwrap();
1123            assert_eq!(strings.len(), 7189);
1124            assert_eq!(strings[0], "node-000000");
1125            assert_eq!(strings[7188], "node-007188");
1126        }
1127        std::fs::remove_file(&path).ok();
1128    }
1129
1130    #[test]
1131    fn append_vlen_strings_uncompressed() {
1132        let path = std::env::temp_dir().join("hdf5_append_vlen_unc.h5");
1133        {
1134            let file = H5File::create(&path).unwrap();
1135            file.create_appendable_vlen_dataset("texts", 8, None)
1136                .unwrap();
1137            file.append_vlen_strings("texts", &["hello", "world"])
1138                .unwrap();
1139            file.append_vlen_strings("texts", &["foo", "bar", "baz"])
1140                .unwrap();
1141            file.close().unwrap();
1142        }
1143        {
1144            let file = H5File::open(&path).unwrap();
1145            let ds = file.dataset("texts").unwrap();
1146            let strings = ds.read_vlen_strings().unwrap();
1147            assert_eq!(strings, vec!["hello", "world", "foo", "bar", "baz"]);
1148        }
1149        std::fs::remove_file(&path).ok();
1150    }
1151
1152    #[test]
1153    fn delete_dataset_roundtrip() {
1154        let path = std::env::temp_dir().join("hdf5_delete_ds.h5");
1155        {
1156            let file = H5File::create(&path).unwrap();
1157            file.write_vlen_strings("keep", &["a", "b"]).unwrap();
1158            file.write_vlen_strings("remove", &["x", "y"]).unwrap();
1159            file.delete_dataset("remove").unwrap();
1160            file.close().unwrap();
1161        }
1162        {
1163            let file = H5File::open(&path).unwrap();
1164            let names = file.dataset_names();
1165            assert!(names.contains(&"keep".to_string()));
1166            assert!(!names.contains(&"remove".to_string()));
1167            let ds = file.dataset("keep").unwrap();
1168            assert_eq!(ds.read_vlen_strings().unwrap(), vec!["a", "b"]);
1169        }
1170        std::fs::remove_file(&path).ok();
1171    }
1172
1173    #[test]
1174    fn delete_group_roundtrip() {
1175        let path = std::env::temp_dir().join("hdf5_delete_grp.h5");
1176        {
1177            let file = H5File::create(&path).unwrap();
1178            let g1 = file.create_group("keep").unwrap();
1179            g1.write_vlen_strings("data", &["a"]).unwrap();
1180            let g2 = file.create_group("remove").unwrap();
1181            g2.write_vlen_strings("data", &["x"]).unwrap();
1182            file.delete_group("remove").unwrap();
1183            file.close().unwrap();
1184        }
1185        {
1186            let file = H5File::open(&path).unwrap();
1187            let names = file.dataset_names();
1188            assert!(names.contains(&"keep/data".to_string()));
1189            assert!(!names.contains(&"remove/data".to_string()));
1190        }
1191        std::fs::remove_file(&path).ok();
1192    }
1193
1194    #[test]
1195    fn open_rw_delete_recreate_group() {
1196        let path = std::env::temp_dir().join("hdf5_rw_delete_recreate.h5");
1197        // Step 1: create file with groups
1198        {
1199            let file = H5File::create(&path).unwrap();
1200            let n = file.create_group("nodes").unwrap();
1201            n.write_vlen_strings("id", &["a", "b", "c"]).unwrap();
1202            let e = file.create_group("edges").unwrap();
1203            e.write_vlen_strings("src", &["x", "y"]).unwrap();
1204            file.close().unwrap();
1205        }
1206        // Step 2: open_rw, delete one group, recreate with new data
1207        {
1208            let file = H5File::open_rw(&path).unwrap();
1209            file.delete_group("nodes").unwrap();
1210            let n = file.create_group("nodes").unwrap();
1211            n.write_vlen_strings("id", &["new1", "new2"]).unwrap();
1212            file.close().unwrap();
1213        }
1214        // Step 3: verify
1215        {
1216            let file = H5File::open(&path).unwrap();
1217            let ds = file.dataset("nodes/id").unwrap();
1218            let s = ds.read_vlen_strings().unwrap();
1219            assert_eq!(s, vec!["new1", "new2"]);
1220            // edges should still be intact
1221            let ds = file.dataset("edges/src").unwrap();
1222            let s = ds.read_vlen_strings().unwrap();
1223            assert_eq!(s, vec!["x", "y"]);
1224        }
1225        std::fs::remove_file(&path).ok();
1226    }
1227
1228    #[test]
1229    fn delete_and_recreate_group() {
1230        let path = std::env::temp_dir().join("hdf5_delete_recreate.h5");
1231        {
1232            let file = H5File::create(&path).unwrap();
1233            let g = file.create_group("nodes").unwrap();
1234            g.write_vlen_strings("id", &["old1", "old2"]).unwrap();
1235            file.delete_group("nodes").unwrap();
1236            let g = file.create_group("nodes").unwrap();
1237            g.write_vlen_strings("id", &["new1", "new2", "new3"])
1238                .unwrap();
1239            file.close().unwrap();
1240        }
1241        {
1242            let file = H5File::open(&path).unwrap();
1243            let ds = file.dataset("nodes/id").unwrap();
1244            let strings = ds.read_vlen_strings().unwrap();
1245            assert_eq!(strings, vec!["new1", "new2", "new3"]);
1246        }
1247        std::fs::remove_file(&path).ok();
1248    }
1249
1250    #[test]
1251    #[cfg(feature = "deflate")]
1252    fn vlen_string_compressed_large_roundtrip() {
1253        use crate::format::messages::filter::FilterPipeline;
1254        let path = std::env::temp_dir().join("hdf5_vlen_large.h5");
1255        // Simulate kodex scenario: 7189 strings, chunk_size 512
1256        let input: Vec<String> = (0..7189)
1257            .map(|i| format!("node-{:08x}-{}", i, "a".repeat(20 + (i % 30))))
1258            .collect();
1259        let input_refs: Vec<&str> = input.iter().map(|s| s.as_str()).collect();
1260        {
1261            let file = H5File::create(&path).unwrap();
1262            file.create_group("nodes").unwrap();
1263            file.write_vlen_strings_compressed(
1264                "nodes/id",
1265                &input_refs,
1266                512,
1267                FilterPipeline::deflate(6),
1268            )
1269            .unwrap();
1270            file.close().unwrap();
1271        }
1272        // Read back
1273        {
1274            let file = H5File::open(&path).unwrap();
1275            let ds = file.dataset("nodes/id").unwrap();
1276            let strings = ds.read_vlen_strings().unwrap();
1277            assert_eq!(strings.len(), 7189);
1278            assert_eq!(strings[0], input[0]);
1279            assert_eq!(strings[7188], input[7188]);
1280        }
1281        // Also test open_rw then re-read
1282        {
1283            let file = H5File::open_rw(&path).unwrap();
1284            file.set_attr_string("version", "1.0").unwrap();
1285            file.close().unwrap();
1286        }
1287        {
1288            let file = H5File::open(&path).unwrap();
1289            let ds = file.dataset("nodes/id").unwrap();
1290            let strings = ds.read_vlen_strings().unwrap();
1291            assert_eq!(strings.len(), 7189);
1292            assert_eq!(strings[0], input[0]);
1293        }
1294        std::fs::remove_file(&path).ok();
1295    }
1296
1297    #[test]
1298    fn vlen_string_write_read() {
1299        let path = std::env::temp_dir().join("hdf5_vlen_wr.h5");
1300        {
1301            let file = H5File::create(&path).unwrap();
1302            file.write_vlen_strings("names", &["alice", "bob", "charlie"])
1303                .unwrap();
1304            file.close().unwrap();
1305        }
1306        {
1307            let file = H5File::open(&path).unwrap();
1308            let ds = file.dataset("names").unwrap();
1309            let strings = ds.read_vlen_strings().unwrap();
1310            assert_eq!(strings, vec!["alice", "bob", "charlie"]);
1311        }
1312        std::fs::remove_file(&path).ok();
1313    }
1314
1315    #[test]
1316    #[cfg(feature = "deflate")]
1317    fn vlen_string_deflate_roundtrip() {
1318        use crate::format::messages::filter::FilterPipeline;
1319        let path = std::env::temp_dir().join("hdf5_vlen_deflate.h5");
1320        let input: Vec<&str> = (0..100)
1321            .map(|i| match i % 3 {
1322                0 => "hello world",
1323                1 => "compressed vlen string test",
1324                _ => "rust-hdf5",
1325            })
1326            .collect();
1327        {
1328            let file = H5File::create(&path).unwrap();
1329            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1330                .unwrap();
1331            file.close().unwrap();
1332        }
1333        {
1334            let file = H5File::open(&path).unwrap();
1335            let ds = file.dataset("texts").unwrap();
1336            let strings = ds.read_vlen_strings().unwrap();
1337            assert_eq!(strings.len(), 100);
1338            for (i, s) in strings.iter().enumerate() {
1339                assert_eq!(s, input[i]);
1340            }
1341        }
1342        std::fs::remove_file(&path).ok();
1343    }
1344
1345    #[test]
1346    #[cfg(feature = "zstd")]
1347    fn vlen_string_zstd_roundtrip() {
1348        use crate::format::messages::filter::FilterPipeline;
1349        let path = std::env::temp_dir().join("hdf5_vlen_zstd.h5");
1350        let input: Vec<&str> = (0..200)
1351            .map(|i| match i % 4 {
1352                0 => "zstandard compression test",
1353                1 => "variable length string",
1354                2 => "rust-hdf5 chunked storage",
1355                _ => "hello zstd world",
1356            })
1357            .collect();
1358        {
1359            let file = H5File::create(&path).unwrap();
1360            file.write_vlen_strings_compressed("data", &input, 32, FilterPipeline::zstd(3))
1361                .unwrap();
1362            file.close().unwrap();
1363        }
1364        {
1365            let file = H5File::open(&path).unwrap();
1366            let ds = file.dataset("data").unwrap();
1367            let strings = ds.read_vlen_strings().unwrap();
1368            assert_eq!(strings.len(), 200);
1369            for (i, s) in strings.iter().enumerate() {
1370                assert_eq!(s, input[i]);
1371            }
1372        }
1373        std::fs::remove_file(&path).ok();
1374    }
1375
1376    #[test]
1377    #[cfg(feature = "deflate")]
1378    fn shuffle_deflate_roundtrip() {
1379        let path = std::env::temp_dir().join("hdf5_shuf_defl.h5");
1380        {
1381            let file = H5File::create(&path).unwrap();
1382            let ds = file
1383                .new_dataset::<f64>()
1384                .shape([0usize, 4])
1385                .chunk(&[1, 4])
1386                .max_shape(&[None, Some(4)])
1387                .shuffle_deflate(6)
1388                .create("data")
1389                .unwrap();
1390            for frame in 0..20u64 {
1391                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
1392                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1393                ds.write_chunk(frame as usize, &raw).unwrap();
1394            }
1395            ds.extend(&[20, 4]).unwrap();
1396            file.close().unwrap();
1397        }
1398        {
1399            let file = H5File::open(&path).unwrap();
1400            let ds = file.dataset("data").unwrap();
1401            assert_eq!(ds.shape(), vec![20, 4]);
1402            let data = ds.read_raw::<f64>().unwrap();
1403            assert_eq!(data.len(), 80);
1404            for (i, val) in data.iter().enumerate() {
1405                assert!((val - i as f64).abs() < 1e-10);
1406            }
1407        }
1408        std::fs::remove_file(&path).ok();
1409    }
1410
1411    #[test]
1412    fn file_level_attributes() {
1413        let path = std::env::temp_dir().join("hdf5_file_attr.h5");
1414        {
1415            let file = H5File::create(&path).unwrap();
1416            file.set_attr_string("title", "Test File").unwrap();
1417            file.set_attr_numeric("version", &42i32).unwrap();
1418            let ds = file
1419                .new_dataset::<u8>()
1420                .shape([1usize])
1421                .create("dummy")
1422                .unwrap();
1423            ds.write_raw(&[0u8]).unwrap();
1424            file.close().unwrap();
1425        }
1426        {
1427            let file = H5File::open(&path).unwrap();
1428            assert!(file.dataset_names().contains(&"dummy".to_string()));
1429
1430            // Read file-level attributes
1431            let names = file.attr_names().unwrap();
1432            assert!(names.contains(&"title".to_string()));
1433
1434            let title = file.attr_string("title").unwrap();
1435            assert_eq!(title, "Test File");
1436        }
1437        std::fs::remove_file(&path).ok();
1438    }
1439
1440    #[test]
1441    fn scalar_dataset_roundtrip() {
1442        let path = std::env::temp_dir().join("hdf5_scalar.h5");
1443        {
1444            let file = H5File::create(&path).unwrap();
1445            let ds = file.new_dataset::<f64>().scalar().create("pi").unwrap();
1446            ds.write_raw(&[std::f64::consts::PI]).unwrap();
1447            file.close().unwrap();
1448        }
1449        {
1450            let file = H5File::open(&path).unwrap();
1451            let ds = file.dataset("pi").unwrap();
1452            assert_eq!(ds.shape(), Vec::<usize>::new());
1453            assert_eq!(ds.total_elements(), 1);
1454            let data = ds.read_raw::<f64>().unwrap();
1455            assert_eq!(data.len(), 1);
1456            assert!((data[0] - std::f64::consts::PI).abs() < 1e-15);
1457        }
1458        std::fs::remove_file(&path).ok();
1459    }
1460
1461    #[test]
1462    fn append_mode_extend_chunked() {
1463        let path = std::env::temp_dir().join("hdf5_append_extend.h5");
1464
1465        // Create with 5 frames
1466        {
1467            let file = H5File::create(&path).unwrap();
1468            let ds = file
1469                .new_dataset::<i32>()
1470                .shape([0usize, 3])
1471                .chunk(&[1, 3])
1472                .max_shape(&[None, Some(3)])
1473                .create("stream")
1474                .unwrap();
1475            for i in 0..5u64 {
1476                let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1477                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1478                ds.write_chunk(i as usize, &raw).unwrap();
1479            }
1480            ds.extend(&[5, 3]).unwrap();
1481            file.close().unwrap();
1482        }
1483
1484        // Reopen and add 5 more frames
1485        {
1486            let file = H5File::open_rw(&path).unwrap();
1487            // Find the stream dataset index (it's the first one)
1488            let names = file.dataset_names();
1489            assert!(names.contains(&"stream".to_string()));
1490
1491            // Write more chunks via the writer directly
1492            let mut inner = crate::file::borrow_inner_mut(&file.inner);
1493            if let crate::file::H5FileInner::Writer(writer) = &mut *inner {
1494                let ds_idx = writer.dataset_index("stream").unwrap();
1495                for i in 5..10u64 {
1496                    let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1497                    let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1498                    writer.write_chunk(ds_idx, i, &raw).unwrap();
1499                }
1500                writer.extend_dataset(ds_idx, &[10, 3]).unwrap();
1501            }
1502            drop(inner);
1503            file.close().unwrap();
1504        }
1505
1506        // Read back all 10 frames
1507        {
1508            let file = H5File::open(&path).unwrap();
1509            let ds = file.dataset("stream").unwrap();
1510            assert_eq!(ds.shape(), vec![10, 3]);
1511            let data = ds.read_raw::<i32>().unwrap();
1512            assert_eq!(data.len(), 30);
1513            for (i, val) in data.iter().enumerate() {
1514                assert_eq!(*val, i as i32, "mismatch at {}", i);
1515            }
1516        }
1517
1518        std::fs::remove_file(&path).ok();
1519    }
1520
1521    #[test]
1522    fn group_hierarchy_roundtrip() {
1523        let path = std::env::temp_dir().join("hdf5_groups_rt.h5");
1524
1525        {
1526            let file = H5File::create(&path).unwrap();
1527            let root = file.root_group();
1528
1529            // Create groups
1530            let det = root.create_group("detector").unwrap();
1531            let raw = det.create_group("raw").unwrap();
1532
1533            // Create datasets in groups
1534            let ds1 = det
1535                .new_dataset::<f32>()
1536                .shape([10usize])
1537                .create("temperature")
1538                .unwrap();
1539            ds1.write_raw(&[1.0f32; 10]).unwrap();
1540
1541            let ds2 = raw
1542                .new_dataset::<u16>()
1543                .shape([4usize, 4])
1544                .create("image")
1545                .unwrap();
1546            ds2.write_raw(&[42u16; 16]).unwrap();
1547
1548            // Root-level dataset
1549            let ds3 = file
1550                .new_dataset::<i32>()
1551                .shape([3usize])
1552                .create("version")
1553                .unwrap();
1554            ds3.write_raw(&[1i32, 0, 0]).unwrap();
1555
1556            file.close().unwrap();
1557        }
1558
1559        {
1560            let file = H5File::open(&path).unwrap();
1561            let names = file.dataset_names();
1562            assert!(names.contains(&"version".to_string()));
1563            assert!(names.contains(&"detector/temperature".to_string()));
1564            assert!(names.contains(&"detector/raw/image".to_string()));
1565
1566            // Read datasets
1567            let ds = file.dataset("version").unwrap();
1568            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![1, 0, 0]);
1569
1570            let ds = file.dataset("detector/temperature").unwrap();
1571            assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0f32; 10]);
1572
1573            let ds = file.dataset("detector/raw/image").unwrap();
1574            assert_eq!(ds.shape(), vec![4, 4]);
1575            assert_eq!(ds.read_raw::<u16>().unwrap(), vec![42u16; 16]);
1576
1577            // Group traversal
1578            let root = file.root_group();
1579            let group_names = root.group_names().unwrap();
1580            assert!(group_names.contains(&"detector".to_string()));
1581        }
1582
1583        std::fs::remove_file(&path).ok();
1584    }
1585
1586    #[test]
1587    fn nested_groups_via_file_create_group() {
1588        let path = std::env::temp_dir().join("hdf5_file_create_group.h5");
1589
1590        {
1591            let file = H5File::create(&path).unwrap();
1592
1593            // Use the H5File::create_group convenience method
1594            let grp = file.create_group("sensors").unwrap();
1595            let sub = grp.create_group("accel").unwrap();
1596
1597            let ds = sub
1598                .new_dataset::<f64>()
1599                .shape([3usize])
1600                .create("xyz")
1601                .unwrap();
1602            ds.write_raw(&[1.0f64, 2.0, 3.0]).unwrap();
1603
1604            file.close().unwrap();
1605        }
1606
1607        {
1608            let file = H5File::open(&path).unwrap();
1609            let names = file.dataset_names();
1610            assert!(names.contains(&"sensors/accel/xyz".to_string()));
1611
1612            let ds = file.dataset("sensors/accel/xyz").unwrap();
1613            assert_eq!(ds.read_raw::<f64>().unwrap(), vec![1.0, 2.0, 3.0]);
1614
1615            // Open group in read mode
1616            let root = file.root_group();
1617            let sensors = root.group("sensors").unwrap();
1618            assert_eq!(sensors.name(), "/sensors");
1619
1620            let accel = sensors.group("accel").unwrap();
1621            assert_eq!(accel.name(), "/sensors/accel");
1622
1623            // list_groups from root
1624            let top_groups = root.group_names().unwrap();
1625            assert!(top_groups.contains(&"sensors".to_string()));
1626
1627            // list_groups from sensors
1628            let sub_groups = sensors.group_names().unwrap();
1629            assert!(sub_groups.contains(&"accel".to_string()));
1630        }
1631
1632        std::fs::remove_file(&path).ok();
1633    }
1634}
1635
1636#[cfg(test)]
1637mod h5py_compat_tests {
1638    use super::*;
1639
1640    /// Verify our files can be read by h5dump (if available).
1641    #[test]
1642    #[cfg(feature = "deflate")]
1643    fn h5dump_validates_our_files() {
1644        // Check if h5dump is available
1645        let h5dump = std::process::Command::new("h5dump")
1646            .arg("--version")
1647            .output();
1648        if h5dump.is_err() {
1649            eprintln!("skipping: h5dump not found");
1650            return;
1651        }
1652
1653        let path = std::env::temp_dir().join("hdf5_h5dump_validate.h5");
1654
1655        // Write a comprehensive test file
1656        {
1657            let file = H5File::create(&path).unwrap();
1658
1659            // Contiguous
1660            let ds = file
1661                .new_dataset::<f64>()
1662                .shape([3usize, 4])
1663                .create("matrix")
1664                .unwrap();
1665            let data: Vec<f64> = (0..12).map(|i| i as f64).collect();
1666            ds.write_raw(&data).unwrap();
1667
1668            // Chunked + compressed
1669            let ds2 = file
1670                .new_dataset::<i32>()
1671                .shape([0usize, 2])
1672                .chunk(&[1, 2])
1673                .max_shape(&[None, Some(2)])
1674                .deflate(6)
1675                .create("stream")
1676                .unwrap();
1677            for i in 0..5u64 {
1678                let vals: Vec<i32> = vec![i as i32 * 2, i as i32 * 2 + 1];
1679                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1680                ds2.write_chunk(i as usize, &raw).unwrap();
1681            }
1682            ds2.extend(&[5, 2]).unwrap();
1683
1684            // Group
1685            let grp = file.create_group("meta").unwrap();
1686            let ds3 = grp
1687                .new_dataset::<u8>()
1688                .shape([4usize])
1689                .create("flags")
1690                .unwrap();
1691            ds3.write_raw(&[1u8, 0, 1, 0]).unwrap();
1692
1693            // String attribute
1694            use crate::types::VarLenUnicode;
1695            let attr = ds
1696                .new_attr::<VarLenUnicode>()
1697                .shape(())
1698                .create("units")
1699                .unwrap();
1700            attr.write_string("meters").unwrap();
1701
1702            file.close().unwrap();
1703        }
1704
1705        // Run h5dump and verify exit code
1706        let output = std::process::Command::new("h5dump")
1707            .arg("-H") // header only (faster)
1708            .arg(path.to_str().unwrap())
1709            .output()
1710            .unwrap();
1711
1712        assert!(
1713            output.status.success(),
1714            "h5dump failed:\nstdout: {}\nstderr: {}",
1715            String::from_utf8_lossy(&output.stdout),
1716            String::from_utf8_lossy(&output.stderr),
1717        );
1718
1719        // Full dump (with data) should also work
1720        let output2 = std::process::Command::new("h5dump")
1721            .arg(path.to_str().unwrap())
1722            .output()
1723            .unwrap();
1724
1725        assert!(
1726            output2.status.success(),
1727            "h5dump (full) failed:\nstderr: {}",
1728            String::from_utf8_lossy(&output2.stderr),
1729        );
1730
1731        std::fs::remove_file(&path).ok();
1732    }
1733
1734    #[test]
1735    fn read_h5py_generated_file() {
1736        let path = "/tmp/test_h5py_default.h5";
1737        if !std::path::Path::new(path).exists() {
1738            eprintln!("skipping: h5py test file not found");
1739            return;
1740        }
1741        let file = H5File::open(path).unwrap();
1742
1743        let ds = file.dataset("data").unwrap();
1744        assert_eq!(ds.shape(), vec![4, 5]);
1745        let data = ds.read_raw::<f64>().unwrap();
1746        assert_eq!(data.len(), 20);
1747        assert!((data[0]).abs() < 1e-10);
1748        assert!((data[19] - 19.0).abs() < 1e-10);
1749
1750        let ds2 = file.dataset("images").unwrap();
1751        assert_eq!(ds2.shape(), vec![3, 64, 64]);
1752        let images = ds2.read_raw::<u16>().unwrap();
1753        assert_eq!(images.len(), 3 * 64 * 64);
1754    }
1755}