Skip to main content

rust_hdf5/
file.rs

1//! HDF5 file handle — the main entry point for the public API.
2//!
3//! ```no_run
4//! use rust_hdf5::H5File;
5//!
6//! // Write
7//! let file = H5File::create("example.h5").unwrap();
8//! let ds = file.new_dataset::<u8>().shape(&[10, 20]).create("data").unwrap();
9//! ds.write_raw(&vec![0u8; 200]).unwrap();
10//! drop(file);
11//!
12//! // Read
13//! let file = H5File::open("example.h5").unwrap();
14//! let ds = file.dataset("data").unwrap();
15//! let data = ds.read_raw::<u8>().unwrap();
16//! assert_eq!(data.len(), 200);
17//! ```
18
19use std::path::Path;
20
21use crate::io::locking::FileLocking;
22use crate::io::{Hdf5Reader, Hdf5Writer};
23
24use crate::dataset::{DatasetBuilder, H5Dataset};
25use crate::error::{Hdf5Error, Result};
26use crate::format::messages::filter::FilterPipeline;
27use crate::group::H5Group;
28use crate::types::H5Type;
29
30// ---------------------------------------------------------------------------
31// Thread-safety: choose between Rc<RefCell<>> and Arc<Mutex<>> based on
32// the `threadsafe` feature flag.
33// ---------------------------------------------------------------------------
34
35#[cfg(not(feature = "threadsafe"))]
36pub(crate) type SharedInner = std::rc::Rc<std::cell::RefCell<H5FileInner>>;
37
38#[cfg(feature = "threadsafe")]
39pub(crate) type SharedInner = std::sync::Arc<std::sync::Mutex<H5FileInner>>;
40
41/// Helper to borrow/lock the inner state immutably.
42#[cfg(not(feature = "threadsafe"))]
43pub(crate) fn borrow_inner(inner: &SharedInner) -> std::cell::Ref<'_, H5FileInner> {
44    inner.borrow()
45}
46
47/// Helper to borrow/lock the inner state mutably.
48#[cfg(not(feature = "threadsafe"))]
49pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::cell::RefMut<'_, H5FileInner> {
50    inner.borrow_mut()
51}
52
53/// Helper to clone a SharedInner.
54#[cfg(not(feature = "threadsafe"))]
55pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
56    std::rc::Rc::clone(inner)
57}
58
59/// Helper to wrap an H5FileInner in SharedInner.
60#[cfg(not(feature = "threadsafe"))]
61pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
62    std::rc::Rc::new(std::cell::RefCell::new(inner))
63}
64
65#[cfg(feature = "threadsafe")]
66pub(crate) fn borrow_inner(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
67    inner.lock().unwrap()
68}
69
70#[cfg(feature = "threadsafe")]
71pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
72    inner.lock().unwrap()
73}
74
75#[cfg(feature = "threadsafe")]
76pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
77    std::sync::Arc::clone(inner)
78}
79
80#[cfg(feature = "threadsafe")]
81pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
82    std::sync::Arc::new(std::sync::Mutex::new(inner))
83}
84
85/// The inner state of an HDF5 file, shared with datasets via reference counting.
86///
87/// By default, this uses `Rc<RefCell<>>` for zero-overhead single-threaded use.
88/// Enable the `threadsafe` feature to use `Arc<Mutex<>>` instead, making
89/// `H5File` `Send + Sync`.
90pub(crate) enum H5FileInner {
91    Writer(Hdf5Writer),
92    Reader(Hdf5Reader),
93    /// Sentinel value used during `close()` to take ownership of the writer.
94    Closed,
95}
96
97/// An HDF5 file opened for reading or writing.
98///
99/// Datasets created from this file hold a shared reference to the underlying
100/// I/O handle, so the file does not need to outlive its datasets (they share
101/// ownership via reference counting).
102pub struct H5File {
103    pub(crate) inner: SharedInner,
104}
105
106impl H5File {
107    /// Create a new HDF5 file at `path`. Truncates if the file already exists.
108    pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
109        let writer = Hdf5Writer::create(path.as_ref())?;
110        Ok(Self {
111            inner: new_shared(H5FileInner::Writer(writer)),
112        })
113    }
114
115    /// Open an existing HDF5 file for reading.
116    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
117        let reader = Hdf5Reader::open(path.as_ref())?;
118        Ok(Self {
119            inner: new_shared(H5FileInner::Reader(reader)),
120        })
121    }
122
123    /// Open an existing HDF5 file for appending new datasets.
124    ///
125    /// Existing datasets are preserved. New datasets can be added and will
126    /// be written after the current end of file. Existing chunked datasets
127    /// can be extended with `write_chunk` and `extend_dataset`.
128    ///
129    /// ```no_run
130    /// use rust_hdf5::H5File;
131    /// let file = H5File::open_rw("existing.h5").unwrap();
132    /// let ds = file.new_dataset::<f64>().shape(&[100]).create("new_data").unwrap();
133    /// ds.write_raw(&vec![0.0f64; 100]).unwrap();
134    /// file.close().unwrap();
135    /// ```
136    pub fn open_rw<P: AsRef<Path>>(path: P) -> Result<Self> {
137        let writer = Hdf5Writer::open_append(path.as_ref())?;
138        Ok(Self {
139            inner: new_shared(H5FileInner::Writer(writer)),
140        })
141    }
142
143    /// Start building open options for an HDF5 file.
144    ///
145    /// Use this to control file-locking behavior explicitly:
146    ///
147    /// ```no_run
148    /// use rust_hdf5::{H5File, FileLocking};
149    /// // Open with locking disabled (e.g. on NFS without lock support).
150    /// let file = H5File::options()
151    ///     .locking(FileLocking::Disabled)
152    ///     .open_rw("existing.h5")
153    ///     .unwrap();
154    /// # let _ = file;
155    /// ```
156    pub fn options() -> H5FileOptions {
157        H5FileOptions::default()
158    }
159
160    /// Return a handle to the root group.
161    ///
162    /// The root group can be used to create datasets and sub-groups.
163    pub fn root_group(&self) -> H5Group {
164        H5Group::new(clone_inner(&self.inner), "/".to_string())
165    }
166
167    /// Create a group in the root of the file.
168    ///
169    /// ```no_run
170    /// use rust_hdf5::H5File;
171    /// let file = H5File::create("groups.h5").unwrap();
172    /// let grp = file.create_group("detector").unwrap();
173    /// ```
174    pub fn create_group(&self, name: &str) -> Result<H5Group> {
175        self.root_group().create_group(name)
176    }
177
178    /// Start building a new dataset with the given element type.
179    ///
180    /// This returns a fluent builder. Call `.shape(...)` to set dimensions and
181    /// `.create("name")` to finalize.
182    ///
183    /// ```no_run
184    /// # use rust_hdf5::H5File;
185    /// let file = H5File::create("build.h5").unwrap();
186    /// let ds = file.new_dataset::<f64>().shape(&[3, 4]).create("matrix").unwrap();
187    /// ```
188    pub fn new_dataset<T: H5Type>(&self) -> DatasetBuilder<T> {
189        DatasetBuilder::new(clone_inner(&self.inner))
190    }
191
192    /// Add a string attribute to the file (root group).
193    pub fn set_attr_string(&self, name: &str, value: &str) -> Result<()> {
194        use crate::format::messages::attribute::AttributeMessage;
195        let attr = AttributeMessage::scalar_string(name, value);
196        let mut inner = borrow_inner_mut(&self.inner);
197        match &mut *inner {
198            H5FileInner::Writer(writer) => {
199                writer.add_root_attribute(attr);
200                Ok(())
201            }
202            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
203        }
204    }
205
206    /// Add a numeric attribute to the file (root group).
207    pub fn set_attr_numeric<T: crate::types::H5Type>(&self, name: &str, value: &T) -> Result<()> {
208        use crate::format::messages::attribute::AttributeMessage;
209        let es = T::element_size();
210        let raw = unsafe { std::slice::from_raw_parts(value as *const T as *const u8, es) };
211        let attr = AttributeMessage::scalar_numeric(name, T::hdf5_type(), raw.to_vec());
212        let mut inner = borrow_inner_mut(&self.inner);
213        match &mut *inner {
214            H5FileInner::Writer(writer) => {
215                writer.add_root_attribute(attr);
216                Ok(())
217            }
218            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
219        }
220    }
221
222    /// Return the names of file-level (root group) attributes.
223    pub fn attr_names(&self) -> Result<Vec<String>> {
224        let inner = borrow_inner(&self.inner);
225        match &*inner {
226            H5FileInner::Reader(reader) => Ok(reader.root_attr_names()),
227            _ => Ok(vec![]),
228        }
229    }
230
231    /// Read a file-level string attribute.
232    pub fn attr_string(&self, name: &str) -> Result<String> {
233        let inner = borrow_inner(&self.inner);
234        match &*inner {
235            H5FileInner::Reader(reader) => {
236                let attr = reader
237                    .root_attr(name)
238                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
239                let end = attr
240                    .data
241                    .iter()
242                    .position(|&b| b == 0)
243                    .unwrap_or(attr.data.len());
244                Ok(String::from_utf8_lossy(&attr.data[..end]).to_string())
245            }
246            _ => Err(Hdf5Error::InvalidState("not in read mode".into())),
247        }
248    }
249
250    /// Check if the file is in write/append mode.
251    pub fn is_writable(&self) -> bool {
252        let inner = borrow_inner(&self.inner);
253        matches!(&*inner, H5FileInner::Writer(_))
254    }
255
256    /// Create a variable-length string dataset and write data.
257    ///
258    /// This is a convenience method for writing h5py-compatible vlen string
259    /// datasets using global heap storage.
260    pub fn write_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
261        let mut inner = borrow_inner_mut(&self.inner);
262        match &mut *inner {
263            H5FileInner::Writer(writer) => {
264                let idx = writer.create_vlen_string_dataset(name, strings)?;
265                // If the name contains '/', assign the dataset to its parent group
266                if let Some(slash_pos) = name.rfind('/') {
267                    let group_path = &name[..slash_pos];
268                    let abs_group_path = if group_path.starts_with('/') {
269                        group_path.to_string()
270                    } else {
271                        format!("/{}", group_path)
272                    };
273                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
274                }
275                Ok(())
276            }
277            H5FileInner::Reader(_) => {
278                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
279            }
280            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
281        }
282    }
283
284    /// Create a chunked, compressed variable-length string dataset.
285    ///
286    /// Like `write_vlen_strings`, but stores the vlen references in chunked
287    /// layout with the given filter pipeline (e.g., `FilterPipeline::deflate(6)`
288    /// or `FilterPipeline::zstd(3)`). `chunk_size` is the number of strings
289    /// per chunk.
290    pub fn write_vlen_strings_compressed(
291        &self,
292        name: &str,
293        strings: &[&str],
294        chunk_size: usize,
295        pipeline: FilterPipeline,
296    ) -> Result<()> {
297        let mut inner = borrow_inner_mut(&self.inner);
298        match &mut *inner {
299            H5FileInner::Writer(writer) => {
300                let idx = writer
301                    .create_vlen_string_dataset_compressed(name, strings, chunk_size, pipeline)?;
302                if let Some(slash_pos) = name.rfind('/') {
303                    let group_path = &name[..slash_pos];
304                    let abs_group_path = if group_path.starts_with('/') {
305                        group_path.to_string()
306                    } else {
307                        format!("/{}", group_path)
308                    };
309                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
310                }
311                Ok(())
312            }
313            H5FileInner::Reader(_) => {
314                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
315            }
316            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
317        }
318    }
319
320    /// Create an empty chunked vlen string dataset ready for incremental appends.
321    ///
322    /// Use `append_vlen_strings` to add data. If `pipeline` is `Some`, chunks
323    /// are compressed (e.g., `Some(FilterPipeline::lz4())`).
324    pub fn create_appendable_vlen_dataset(
325        &self,
326        name: &str,
327        chunk_size: usize,
328        pipeline: Option<FilterPipeline>,
329    ) -> Result<()> {
330        let mut inner = borrow_inner_mut(&self.inner);
331        match &mut *inner {
332            H5FileInner::Writer(writer) => {
333                let idx =
334                    writer.create_appendable_vlen_string_dataset(name, chunk_size, pipeline)?;
335                if let Some(slash_pos) = name.rfind('/') {
336                    let group_path = &name[..slash_pos];
337                    let abs_group_path = if group_path.starts_with('/') {
338                        group_path.to_string()
339                    } else {
340                        format!("/{}", group_path)
341                    };
342                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
343                }
344                Ok(())
345            }
346            H5FileInner::Reader(_) => {
347                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
348            }
349            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
350        }
351    }
352
353    /// Append variable-length strings to an existing chunked vlen string dataset.
354    pub fn append_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
355        let mut inner = borrow_inner_mut(&self.inner);
356        match &mut *inner {
357            H5FileInner::Writer(writer) => {
358                let ds_index = writer
359                    .dataset_index(name)
360                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
361                writer.append_vlen_strings(ds_index, strings)?;
362                Ok(())
363            }
364            H5FileInner::Reader(_) => {
365                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
366            }
367            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
368        }
369    }
370
371    /// Delete a dataset by name. The dataset is unlinked on close;
372    /// file space is not reclaimed.
373    pub fn delete_dataset(&self, name: &str) -> Result<()> {
374        let mut inner = borrow_inner_mut(&self.inner);
375        match &mut *inner {
376            H5FileInner::Writer(writer) => {
377                writer.delete_dataset(name)?;
378                Ok(())
379            }
380            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
381        }
382    }
383
384    /// Delete a group and all its child datasets/sub-groups.
385    /// File space is not reclaimed.
386    pub fn delete_group(&self, name: &str) -> Result<()> {
387        let mut inner = borrow_inner_mut(&self.inner);
388        match &mut *inner {
389            H5FileInner::Writer(writer) => {
390                writer.delete_group(name)?;
391                Ok(())
392            }
393            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
394        }
395    }
396
397    /// Open an existing dataset by name (read mode).
398    pub fn dataset(&self, name: &str) -> Result<H5Dataset> {
399        let inner = borrow_inner(&self.inner);
400        match &*inner {
401            H5FileInner::Reader(reader) => {
402                let info = reader
403                    .dataset_info(name)
404                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
405                let shape: Vec<usize> = info.dataspace.dims.iter().map(|&d| d as usize).collect();
406                let element_size = info.datatype.element_size() as usize;
407                Ok(H5Dataset::new_reader(
408                    clone_inner(&self.inner),
409                    name.to_string(),
410                    shape,
411                    element_size,
412                ))
413            }
414            H5FileInner::Writer(_) => Err(Hdf5Error::InvalidState(
415                "cannot open a dataset by name in write mode; use new_dataset() instead"
416                    .to_string(),
417            )),
418            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".to_string())),
419        }
420    }
421
422    /// Return the names of all datasets in the root group.
423    ///
424    /// Works in both read and write mode: in write mode, returns the names of
425    /// datasets created so far; in read mode, returns the names discovered
426    /// during file open.
427    pub fn dataset_names(&self) -> Vec<String> {
428        let inner = borrow_inner(&self.inner);
429        match &*inner {
430            H5FileInner::Reader(reader) => reader
431                .dataset_names()
432                .iter()
433                .map(|s| s.to_string())
434                .collect(),
435            H5FileInner::Writer(writer) => writer
436                .dataset_names()
437                .iter()
438                .map(|s| s.to_string())
439                .collect(),
440            H5FileInner::Closed => Vec::new(),
441        }
442    }
443
444    /// Explicitly close the file. For a writer, this finalizes the file
445    /// (writes superblock, headers, etc.). For a reader, this is a no-op.
446    ///
447    /// The file is also auto-finalized on drop, but calling `close()` lets
448    /// you handle errors.
449    pub fn close(self) -> Result<()> {
450        let old = {
451            let mut inner = borrow_inner_mut(&self.inner);
452            std::mem::replace(&mut *inner, H5FileInner::Closed)
453        };
454        match old {
455            H5FileInner::Writer(writer) => {
456                writer.close()?;
457                Ok(())
458            }
459            H5FileInner::Reader(_) => Ok(()),
460            H5FileInner::Closed => Ok(()),
461        }
462    }
463
464    /// Flush the file to disk. Only meaningful in write mode.
465    pub fn flush(&self) -> Result<()> {
466        // The underlying writer does not expose a standalone flush; data is
467        // written to disk immediately via pwrite. This is a compatibility
468        // method that does nothing for now.
469        Ok(())
470    }
471}
472
473/// Builder controlling how an [`H5File`] is opened.
474///
475/// The default policy follows the HDF5 C library: an exclusive lock is
476/// acquired for write-mode opens and a shared lock for read-mode opens,
477/// honoring the `HDF5_USE_FILE_LOCKING` environment variable. Calling
478/// [`Self::locking`] overrides the env-var value.
479#[derive(Debug, Default, Clone)]
480pub struct H5FileOptions {
481    locking: Option<FileLocking>,
482}
483
484impl H5FileOptions {
485    /// Construct a fresh options builder with default settings.
486    pub fn new() -> Self {
487        Self::default()
488    }
489
490    /// Override the locking policy. Bypasses the `HDF5_USE_FILE_LOCKING`
491    /// environment variable for the resulting open call.
492    pub fn locking(mut self, policy: FileLocking) -> Self {
493        self.locking = Some(policy);
494        self
495    }
496
497    /// Disable OS-level file locking entirely (equivalent to
498    /// `HDF5_USE_FILE_LOCKING=FALSE`).
499    pub fn no_locking(self) -> Self {
500        self.locking(FileLocking::Disabled)
501    }
502
503    /// Try to acquire the lock but do not fail if the filesystem rejects it
504    /// (equivalent to `HDF5_USE_FILE_LOCKING=BEST_EFFORT`).
505    pub fn best_effort_locking(self) -> Self {
506        self.locking(FileLocking::BestEffort)
507    }
508
509    fn resolved_locking(&self) -> FileLocking {
510        match self.locking {
511            Some(p) => p,
512            None => FileLocking::from_env_or(FileLocking::default()),
513        }
514    }
515
516    /// Create a new HDF5 file at `path` with the configured options.
517    pub fn create<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
518        let writer = Hdf5Writer::create_with_locking(path.as_ref(), self.resolved_locking())?;
519        Ok(H5File {
520            inner: new_shared(H5FileInner::Writer(writer)),
521        })
522    }
523
524    /// Open an existing HDF5 file for reading with the configured options.
525    pub fn open<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
526        let reader = Hdf5Reader::open_with_locking(path.as_ref(), self.resolved_locking())?;
527        Ok(H5File {
528            inner: new_shared(H5FileInner::Reader(reader)),
529        })
530    }
531
532    /// Open an existing HDF5 file for read/write with the configured options.
533    pub fn open_rw<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
534        let writer =
535            Hdf5Writer::open_append_with_locking(path.as_ref(), self.resolved_locking())?;
536        Ok(H5File {
537            inner: new_shared(H5FileInner::Writer(writer)),
538        })
539    }
540}
541
542#[cfg(test)]
543mod tests {
544    use super::*;
545    use std::path::PathBuf;
546
547    fn temp_path(name: &str) -> PathBuf {
548        std::env::temp_dir().join(format!("hdf5_file_test_{}.h5", name))
549    }
550
551    #[test]
552    fn create_and_close_empty() {
553        let path = temp_path("create_empty");
554        let file = H5File::create(&path).unwrap();
555        file.close().unwrap();
556
557        // Should be readable
558        let file = H5File::open(&path).unwrap();
559        file.close().unwrap();
560
561        std::fs::remove_file(&path).ok();
562    }
563
564    #[test]
565    fn create_and_drop_empty() {
566        let path = temp_path("drop_empty");
567        {
568            let _file = H5File::create(&path).unwrap();
569            // drop auto-finalizes
570        }
571        // Verify the file is valid by opening it
572        let file = H5File::open(&path).unwrap();
573        file.close().unwrap();
574
575        std::fs::remove_file(&path).ok();
576    }
577
578    #[test]
579    fn dataset_not_found() {
580        let path = temp_path("ds_not_found");
581        {
582            let _file = H5File::create(&path).unwrap();
583        }
584        let file = H5File::open(&path).unwrap();
585        let result = file.dataset("nonexistent");
586        assert!(result.is_err());
587
588        std::fs::remove_file(&path).ok();
589    }
590
591    #[test]
592    fn write_and_read_roundtrip() {
593        let path = temp_path("write_read_rt");
594
595        // Write
596        {
597            let file = H5File::create(&path).unwrap();
598            let ds = file
599                .new_dataset::<u8>()
600                .shape([4, 4])
601                .create("data")
602                .unwrap();
603            ds.write_raw(&[0u8; 16]).unwrap();
604            file.close().unwrap();
605        }
606
607        // Read
608        {
609            let file = H5File::open(&path).unwrap();
610            let ds = file.dataset("data").unwrap();
611            assert_eq!(ds.shape(), vec![4, 4]);
612            let data = ds.read_raw::<u8>().unwrap();
613            assert_eq!(data.len(), 16);
614            assert!(data.iter().all(|&b| b == 0));
615            file.close().unwrap();
616        }
617
618        std::fs::remove_file(&path).ok();
619    }
620
621    #[test]
622    fn write_and_read_f64() {
623        let path = temp_path("write_read_f64");
624
625        let values: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
626
627        // Write
628        {
629            let file = H5File::create(&path).unwrap();
630            let ds = file
631                .new_dataset::<f64>()
632                .shape([2, 3])
633                .create("matrix")
634                .unwrap();
635            ds.write_raw(&values).unwrap();
636            file.close().unwrap();
637        }
638
639        // Read
640        {
641            let file = H5File::open(&path).unwrap();
642            let ds = file.dataset("matrix").unwrap();
643            assert_eq!(ds.shape(), vec![2, 3]);
644            let readback = ds.read_raw::<f64>().unwrap();
645            assert_eq!(readback, values);
646        }
647
648        std::fs::remove_file(&path).ok();
649    }
650
651    #[test]
652    fn multiple_datasets() {
653        let path = temp_path("multi_ds");
654
655        {
656            let file = H5File::create(&path).unwrap();
657            let ds1 = file.new_dataset::<i32>().shape([3]).create("ints").unwrap();
658            ds1.write_raw(&[10i32, 20, 30]).unwrap();
659
660            let ds2 = file
661                .new_dataset::<f32>()
662                .shape([2, 2])
663                .create("floats")
664                .unwrap();
665            ds2.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
666
667            file.close().unwrap();
668        }
669
670        {
671            let file = H5File::open(&path).unwrap();
672
673            let ds_ints = file.dataset("ints").unwrap();
674            assert_eq!(ds_ints.shape(), vec![3]);
675            let ints = ds_ints.read_raw::<i32>().unwrap();
676            assert_eq!(ints, vec![10, 20, 30]);
677
678            let ds_floats = file.dataset("floats").unwrap();
679            assert_eq!(ds_floats.shape(), vec![2, 2]);
680            let floats = ds_floats.read_raw::<f32>().unwrap();
681            assert_eq!(floats, vec![1.0f32, 2.0, 3.0, 4.0]);
682        }
683
684        std::fs::remove_file(&path).ok();
685    }
686
687    #[test]
688    fn close_is_idempotent() {
689        let path = temp_path("close_idemp");
690        let file = H5File::create(&path).unwrap();
691        file.close().unwrap();
692        // File is consumed by close(), so no double-close possible at the type level.
693        std::fs::remove_file(&path).ok();
694    }
695}
696
697#[cfg(test)]
698mod integration_tests {
699    use super::*;
700
701    #[test]
702    fn write_file_for_h5dump() {
703        let path = std::env::temp_dir().join("test_hdf5rs_integration.h5");
704        let file = H5File::create(&path).unwrap();
705
706        let ds = file
707            .new_dataset::<u8>()
708            .shape([4usize, 4])
709            .create("data_u8")
710            .unwrap();
711        let data: Vec<u8> = (0..16).collect();
712        ds.write_raw(&data).unwrap();
713
714        let ds2 = file
715            .new_dataset::<f64>()
716            .shape([3usize, 2])
717            .create("data_f64")
718            .unwrap();
719        let fdata: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
720        ds2.write_raw(&fdata).unwrap();
721
722        let ds3 = file
723            .new_dataset::<i32>()
724            .shape([5usize])
725            .create("values")
726            .unwrap();
727        let idata: Vec<i32> = vec![-10, -5, 0, 5, 10];
728        ds3.write_raw(&idata).unwrap();
729
730        file.close().unwrap();
731
732        // File exists
733        assert!(path.exists());
734    }
735
736    #[test]
737    fn write_chunked_file_for_h5dump() {
738        let path = std::env::temp_dir().join("test_hdf5rs_chunked.h5");
739        let file = H5File::create(&path).unwrap();
740
741        // Create a chunked dataset with unlimited first dimension
742        let ds = file
743            .new_dataset::<f64>()
744            .shape([0usize, 4])
745            .chunk(&[1, 4])
746            .max_shape(&[None, Some(4)])
747            .create("streaming_data")
748            .unwrap();
749
750        // Write 5 frames of data
751        for frame in 0..5u64 {
752            let values: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
753            let raw: Vec<u8> = values.iter().flat_map(|v| v.to_le_bytes()).collect();
754            ds.write_chunk(frame as usize, &raw).unwrap();
755        }
756
757        // Extend dimensions to reflect the 5 written frames
758        ds.extend(&[5, 4]).unwrap();
759        ds.flush().unwrap();
760
761        file.close().unwrap();
762
763        assert!(path.exists());
764    }
765
766    #[test]
767    fn write_chunked_many_frames_for_h5dump() {
768        let path = std::env::temp_dir().join("test_hdf5rs_chunked_many.h5");
769        let file = H5File::create(&path).unwrap();
770
771        let ds = file
772            .new_dataset::<i32>()
773            .shape([0usize, 3])
774            .chunk(&[1, 3])
775            .max_shape(&[None, Some(3)])
776            .create("data")
777            .unwrap();
778
779        // Write 10 frames (exceeds idx_blk_elmts=4, uses data blocks)
780        for frame in 0..10u64 {
781            let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
782            let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
783            ds.write_chunk(frame as usize, &raw).unwrap();
784        }
785        ds.extend(&[10, 3]).unwrap();
786        file.close().unwrap();
787
788        assert!(path.exists());
789    }
790
791    #[test]
792    fn write_dataset_with_attributes() {
793        use crate::types::VarLenUnicode;
794
795        let path = std::env::temp_dir().join("test_hdf5rs_attributes.h5");
796        let file = H5File::create(&path).unwrap();
797
798        let ds = file
799            .new_dataset::<f32>()
800            .shape([10usize])
801            .create("temperature")
802            .unwrap();
803        let data: Vec<f32> = (0..10).map(|i| i as f32 * 1.5).collect();
804        ds.write_raw(&data).unwrap();
805
806        // Add string attributes
807        let attr = ds
808            .new_attr::<VarLenUnicode>()
809            .shape(())
810            .create("units")
811            .unwrap();
812        attr.write_scalar(&VarLenUnicode("kelvin".to_string()))
813            .unwrap();
814
815        let attr2 = ds
816            .new_attr::<VarLenUnicode>()
817            .shape(())
818            .create("description")
819            .unwrap();
820        attr2
821            .write_scalar(&VarLenUnicode("Temperature measurements".to_string()))
822            .unwrap();
823
824        // Use write_string convenience method
825        let attr3 = ds
826            .new_attr::<VarLenUnicode>()
827            .shape(())
828            .create("source")
829            .unwrap();
830        attr3.write_string("sensor_01").unwrap();
831
832        // Also test parse -> write_scalar pattern
833        let attr4 = ds
834            .new_attr::<VarLenUnicode>()
835            .shape(())
836            .create("label")
837            .unwrap();
838        let s: VarLenUnicode = "test_label".parse().unwrap_or_default();
839        attr4.write_scalar(&s).unwrap();
840
841        file.close().unwrap();
842
843        assert!(path.exists());
844    }
845
846    #[test]
847    fn chunked_write_read_roundtrip() {
848        let path = std::env::temp_dir().join("hdf5_chunked_roundtrip.h5");
849
850        // Write
851        {
852            let file = H5File::create(&path).unwrap();
853            let ds = file
854                .new_dataset::<i32>()
855                .shape([0usize, 3])
856                .chunk(&[1, 3])
857                .max_shape(&[None, Some(3)])
858                .create("table")
859                .unwrap();
860
861            for frame in 0..8u64 {
862                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
863                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
864                ds.write_chunk(frame as usize, &raw).unwrap();
865            }
866            ds.extend(&[8, 3]).unwrap();
867            file.close().unwrap();
868        }
869
870        // Read
871        {
872            let file = H5File::open(&path).unwrap();
873            let ds = file.dataset("table").unwrap();
874            assert_eq!(ds.shape(), vec![8, 3]);
875            let data = ds.read_raw::<i32>().unwrap();
876            assert_eq!(data.len(), 24);
877            for (i, val) in data.iter().enumerate() {
878                assert_eq!(*val, i as i32);
879            }
880        }
881
882        std::fs::remove_file(&path).ok();
883    }
884
885    #[test]
886    #[cfg(feature = "deflate")]
887    fn compressed_chunked_roundtrip() {
888        let path = std::env::temp_dir().join("hdf5_compressed_roundtrip.h5");
889
890        // Write compressed
891        {
892            let file = H5File::create(&path).unwrap();
893            let ds = file
894                .new_dataset::<f64>()
895                .shape([0usize, 4])
896                .chunk(&[1, 4])
897                .max_shape(&[None, Some(4)])
898                .deflate(6)
899                .create("compressed")
900                .unwrap();
901
902            for frame in 0..10u64 {
903                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
904                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
905                ds.write_chunk(frame as usize, &raw).unwrap();
906            }
907            ds.extend(&[10, 4]).unwrap();
908            file.close().unwrap();
909        }
910
911        // Read back and verify
912        {
913            let file = H5File::open(&path).unwrap();
914            let ds = file.dataset("compressed").unwrap();
915            assert_eq!(ds.shape(), vec![10, 4]);
916            let data = ds.read_raw::<f64>().unwrap();
917            assert_eq!(data.len(), 40);
918            for (i, val) in data.iter().enumerate() {
919                assert!(
920                    (val - i as f64).abs() < 1e-10,
921                    "mismatch at {}: {} != {}",
922                    i,
923                    val,
924                    i
925                );
926            }
927        }
928
929        std::fs::remove_file(&path).ok();
930    }
931
932    #[test]
933    #[cfg(feature = "deflate")]
934    fn compressed_chunked_many_frames() {
935        let path = std::env::temp_dir().join("hdf5_compressed_many.h5");
936
937        {
938            let file = H5File::create(&path).unwrap();
939            let ds = file
940                .new_dataset::<i32>()
941                .shape([0usize, 3])
942                .chunk(&[1, 3])
943                .max_shape(&[None, Some(3)])
944                .deflate(6)
945                .create("stream")
946                .unwrap();
947
948            for frame in 0..100u64 {
949                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
950                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
951                ds.write_chunk(frame as usize, &raw).unwrap();
952            }
953            ds.extend(&[100, 3]).unwrap();
954            file.close().unwrap();
955        }
956
957        {
958            let file = H5File::open(&path).unwrap();
959            let ds = file.dataset("stream").unwrap();
960            assert_eq!(ds.shape(), vec![100, 3]);
961            let data = ds.read_raw::<i32>().unwrap();
962            assert_eq!(data.len(), 300);
963            for (i, val) in data.iter().enumerate() {
964                assert_eq!(*val, i as i32, "mismatch at {}", i);
965            }
966        }
967
968        std::fs::remove_file(&path).ok();
969    }
970    #[test]
971    fn append_mode() {
972        let path = std::env::temp_dir().join("hdf5_append.h5");
973
974        // Create initial file
975        {
976            let file = H5File::create(&path).unwrap();
977            let ds = file
978                .new_dataset::<i32>()
979                .shape([3usize])
980                .create("first")
981                .unwrap();
982            ds.write_raw(&[1i32, 2, 3]).unwrap();
983            file.close().unwrap();
984        }
985
986        // Append new dataset
987        {
988            let file = H5File::open_rw(&path).unwrap();
989            let ds = file
990                .new_dataset::<f64>()
991                .shape([2usize])
992                .create("second")
993                .unwrap();
994            ds.write_raw(&[4.0f64, 5.0]).unwrap();
995            file.close().unwrap();
996        }
997
998        // Read back both
999        {
1000            let file = H5File::open(&path).unwrap();
1001            let names = file.dataset_names();
1002            assert!(names.contains(&"first".to_string()));
1003            assert!(names.contains(&"second".to_string()));
1004
1005            let ds1 = file.dataset("first").unwrap();
1006            assert_eq!(ds1.read_raw::<i32>().unwrap(), vec![1, 2, 3]);
1007
1008            let ds2 = file.dataset("second").unwrap();
1009            assert_eq!(ds2.read_raw::<f64>().unwrap(), vec![4.0, 5.0]);
1010        }
1011
1012        std::fs::remove_file(&path).ok();
1013    }
1014
1015    #[test]
1016    fn open_rw_set_attr_preserves_file() {
1017        let path = std::env::temp_dir().join("hdf5_open_rw_attr.h5");
1018        // Create file with a dataset and an attribute
1019        {
1020            let file = H5File::create(&path).unwrap();
1021            let ds = file
1022                .new_dataset::<i32>()
1023                .shape([3usize])
1024                .create("data")
1025                .unwrap();
1026            ds.write_raw(&[10i32, 20, 30]).unwrap();
1027            file.set_attr_string("version", "1.0").unwrap();
1028            file.close().unwrap();
1029        }
1030        // Open rw and modify the attribute
1031        {
1032            let file = H5File::open_rw(&path).unwrap();
1033            file.set_attr_string("version", "2.0").unwrap();
1034            file.close().unwrap();
1035        }
1036        // Verify: dataset intact, attribute updated
1037        {
1038            let file = H5File::open(&path).unwrap();
1039            let ds = file.dataset("data").unwrap();
1040            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![10, 20, 30]);
1041            let ver = file.attr_string("version").unwrap();
1042            assert_eq!(ver, "2.0");
1043        }
1044        std::fs::remove_file(&path).ok();
1045    }
1046
1047    #[test]
1048    #[cfg(feature = "deflate")]
1049    fn open_rw_attr_with_compressed_dataset() {
1050        use crate::format::messages::filter::FilterPipeline;
1051        let path = std::env::temp_dir().join("hdf5_open_rw_compressed.h5");
1052        let input: Vec<&str> = (0..50).map(|_| "test string data").collect();
1053        // Create file with compressed vlen strings
1054        {
1055            let file = H5File::create(&path).unwrap();
1056            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1057                .unwrap();
1058            file.set_attr_string("version", "1.0").unwrap();
1059            file.close().unwrap();
1060        }
1061        // Open rw and modify attribute only
1062        {
1063            let file = H5File::open_rw(&path).unwrap();
1064            file.set_attr_string("version", "2.0").unwrap();
1065            file.close().unwrap();
1066        }
1067        // Verify: compressed dataset still readable, attribute updated
1068        {
1069            let file = H5File::open(&path).unwrap();
1070            let ds = file.dataset("texts").unwrap();
1071            let strings = ds.read_vlen_strings().unwrap();
1072            assert_eq!(strings.len(), 50);
1073            assert_eq!(strings[0], "test string data");
1074            let ver = file.attr_string("version").unwrap();
1075            assert_eq!(ver, "2.0");
1076        }
1077        std::fs::remove_file(&path).ok();
1078    }
1079
1080    #[test]
1081    #[cfg(feature = "lz4")]
1082    fn append_vlen_strings_basic() {
1083        use crate::format::messages::filter::FilterPipeline;
1084        let path = std::env::temp_dir().join("hdf5_append_vlen.h5");
1085        {
1086            let file = H5File::create(&path).unwrap();
1087            file.create_appendable_vlen_dataset("names", 4, Some(FilterPipeline::lz4()))
1088                .unwrap();
1089            file.append_vlen_strings("names", &["alice", "bob", "charlie"])
1090                .unwrap();
1091            file.append_vlen_strings("names", &["dave", "eve"]).unwrap();
1092            file.close().unwrap();
1093        }
1094        {
1095            let file = H5File::open(&path).unwrap();
1096            let ds = file.dataset("names").unwrap();
1097            let strings = ds.read_vlen_strings().unwrap();
1098            assert_eq!(strings, vec!["alice", "bob", "charlie", "dave", "eve"]);
1099        }
1100        std::fs::remove_file(&path).ok();
1101    }
1102
1103    #[test]
1104    #[cfg(feature = "lz4")]
1105    fn append_vlen_strings_large() {
1106        use crate::format::messages::filter::FilterPipeline;
1107        let path = std::env::temp_dir().join("hdf5_append_vlen_large.h5");
1108        let batch1: Vec<String> = (0..5000).map(|i| format!("node-{:06}", i)).collect();
1109        let batch2: Vec<String> = (5000..7189).map(|i| format!("node-{:06}", i)).collect();
1110        {
1111            let file = H5File::create(&path).unwrap();
1112            file.create_appendable_vlen_dataset("data", 512, Some(FilterPipeline::lz4()))
1113                .unwrap();
1114            let r1: Vec<&str> = batch1.iter().map(|s| s.as_str()).collect();
1115            file.append_vlen_strings("data", &r1).unwrap();
1116            let r2: Vec<&str> = batch2.iter().map(|s| s.as_str()).collect();
1117            file.append_vlen_strings("data", &r2).unwrap();
1118            file.close().unwrap();
1119        }
1120        {
1121            let file = H5File::open(&path).unwrap();
1122            let ds = file.dataset("data").unwrap();
1123            let strings = ds.read_vlen_strings().unwrap();
1124            assert_eq!(strings.len(), 7189);
1125            assert_eq!(strings[0], "node-000000");
1126            assert_eq!(strings[7188], "node-007188");
1127        }
1128        std::fs::remove_file(&path).ok();
1129    }
1130
1131    #[test]
1132    fn append_vlen_strings_uncompressed() {
1133        let path = std::env::temp_dir().join("hdf5_append_vlen_unc.h5");
1134        {
1135            let file = H5File::create(&path).unwrap();
1136            file.create_appendable_vlen_dataset("texts", 8, None)
1137                .unwrap();
1138            file.append_vlen_strings("texts", &["hello", "world"])
1139                .unwrap();
1140            file.append_vlen_strings("texts", &["foo", "bar", "baz"])
1141                .unwrap();
1142            file.close().unwrap();
1143        }
1144        {
1145            let file = H5File::open(&path).unwrap();
1146            let ds = file.dataset("texts").unwrap();
1147            let strings = ds.read_vlen_strings().unwrap();
1148            assert_eq!(strings, vec!["hello", "world", "foo", "bar", "baz"]);
1149        }
1150        std::fs::remove_file(&path).ok();
1151    }
1152
1153    #[test]
1154    fn delete_dataset_roundtrip() {
1155        let path = std::env::temp_dir().join("hdf5_delete_ds.h5");
1156        {
1157            let file = H5File::create(&path).unwrap();
1158            file.write_vlen_strings("keep", &["a", "b"]).unwrap();
1159            file.write_vlen_strings("remove", &["x", "y"]).unwrap();
1160            file.delete_dataset("remove").unwrap();
1161            file.close().unwrap();
1162        }
1163        {
1164            let file = H5File::open(&path).unwrap();
1165            let names = file.dataset_names();
1166            assert!(names.contains(&"keep".to_string()));
1167            assert!(!names.contains(&"remove".to_string()));
1168            let ds = file.dataset("keep").unwrap();
1169            assert_eq!(ds.read_vlen_strings().unwrap(), vec!["a", "b"]);
1170        }
1171        std::fs::remove_file(&path).ok();
1172    }
1173
1174    #[test]
1175    fn delete_group_roundtrip() {
1176        let path = std::env::temp_dir().join("hdf5_delete_grp.h5");
1177        {
1178            let file = H5File::create(&path).unwrap();
1179            let g1 = file.create_group("keep").unwrap();
1180            g1.write_vlen_strings("data", &["a"]).unwrap();
1181            let g2 = file.create_group("remove").unwrap();
1182            g2.write_vlen_strings("data", &["x"]).unwrap();
1183            file.delete_group("remove").unwrap();
1184            file.close().unwrap();
1185        }
1186        {
1187            let file = H5File::open(&path).unwrap();
1188            let names = file.dataset_names();
1189            assert!(names.contains(&"keep/data".to_string()));
1190            assert!(!names.contains(&"remove/data".to_string()));
1191        }
1192        std::fs::remove_file(&path).ok();
1193    }
1194
1195    #[test]
1196    fn open_rw_delete_recreate_group() {
1197        let path = std::env::temp_dir().join("hdf5_rw_delete_recreate.h5");
1198        // Step 1: create file with groups
1199        {
1200            let file = H5File::create(&path).unwrap();
1201            let n = file.create_group("nodes").unwrap();
1202            n.write_vlen_strings("id", &["a", "b", "c"]).unwrap();
1203            let e = file.create_group("edges").unwrap();
1204            e.write_vlen_strings("src", &["x", "y"]).unwrap();
1205            file.close().unwrap();
1206        }
1207        // Step 2: open_rw, delete one group, recreate with new data
1208        {
1209            let file = H5File::open_rw(&path).unwrap();
1210            file.delete_group("nodes").unwrap();
1211            let n = file.create_group("nodes").unwrap();
1212            n.write_vlen_strings("id", &["new1", "new2"]).unwrap();
1213            file.close().unwrap();
1214        }
1215        // Step 3: verify
1216        {
1217            let file = H5File::open(&path).unwrap();
1218            let ds = file.dataset("nodes/id").unwrap();
1219            let s = ds.read_vlen_strings().unwrap();
1220            assert_eq!(s, vec!["new1", "new2"]);
1221            // edges should still be intact
1222            let ds = file.dataset("edges/src").unwrap();
1223            let s = ds.read_vlen_strings().unwrap();
1224            assert_eq!(s, vec!["x", "y"]);
1225        }
1226        std::fs::remove_file(&path).ok();
1227    }
1228
1229    #[test]
1230    fn delete_and_recreate_group() {
1231        let path = std::env::temp_dir().join("hdf5_delete_recreate.h5");
1232        {
1233            let file = H5File::create(&path).unwrap();
1234            let g = file.create_group("nodes").unwrap();
1235            g.write_vlen_strings("id", &["old1", "old2"]).unwrap();
1236            file.delete_group("nodes").unwrap();
1237            let g = file.create_group("nodes").unwrap();
1238            g.write_vlen_strings("id", &["new1", "new2", "new3"])
1239                .unwrap();
1240            file.close().unwrap();
1241        }
1242        {
1243            let file = H5File::open(&path).unwrap();
1244            let ds = file.dataset("nodes/id").unwrap();
1245            let strings = ds.read_vlen_strings().unwrap();
1246            assert_eq!(strings, vec!["new1", "new2", "new3"]);
1247        }
1248        std::fs::remove_file(&path).ok();
1249    }
1250
1251    #[test]
1252    #[cfg(feature = "deflate")]
1253    fn vlen_string_compressed_large_roundtrip() {
1254        use crate::format::messages::filter::FilterPipeline;
1255        let path = std::env::temp_dir().join("hdf5_vlen_large.h5");
1256        // Simulate kodex scenario: 7189 strings, chunk_size 512
1257        let input: Vec<String> = (0..7189)
1258            .map(|i| format!("node-{:08x}-{}", i, "a".repeat(20 + (i % 30))))
1259            .collect();
1260        let input_refs: Vec<&str> = input.iter().map(|s| s.as_str()).collect();
1261        {
1262            let file = H5File::create(&path).unwrap();
1263            file.create_group("nodes").unwrap();
1264            file.write_vlen_strings_compressed(
1265                "nodes/id",
1266                &input_refs,
1267                512,
1268                FilterPipeline::deflate(6),
1269            )
1270            .unwrap();
1271            file.close().unwrap();
1272        }
1273        // Read back
1274        {
1275            let file = H5File::open(&path).unwrap();
1276            let ds = file.dataset("nodes/id").unwrap();
1277            let strings = ds.read_vlen_strings().unwrap();
1278            assert_eq!(strings.len(), 7189);
1279            assert_eq!(strings[0], input[0]);
1280            assert_eq!(strings[7188], input[7188]);
1281        }
1282        // Also test open_rw then re-read
1283        {
1284            let file = H5File::open_rw(&path).unwrap();
1285            file.set_attr_string("version", "1.0").unwrap();
1286            file.close().unwrap();
1287        }
1288        {
1289            let file = H5File::open(&path).unwrap();
1290            let ds = file.dataset("nodes/id").unwrap();
1291            let strings = ds.read_vlen_strings().unwrap();
1292            assert_eq!(strings.len(), 7189);
1293            assert_eq!(strings[0], input[0]);
1294        }
1295        std::fs::remove_file(&path).ok();
1296    }
1297
1298    #[test]
1299    fn vlen_string_write_read() {
1300        let path = std::env::temp_dir().join("hdf5_vlen_wr.h5");
1301        {
1302            let file = H5File::create(&path).unwrap();
1303            file.write_vlen_strings("names", &["alice", "bob", "charlie"])
1304                .unwrap();
1305            file.close().unwrap();
1306        }
1307        {
1308            let file = H5File::open(&path).unwrap();
1309            let ds = file.dataset("names").unwrap();
1310            let strings = ds.read_vlen_strings().unwrap();
1311            assert_eq!(strings, vec!["alice", "bob", "charlie"]);
1312        }
1313        std::fs::remove_file(&path).ok();
1314    }
1315
1316    #[test]
1317    #[cfg(feature = "deflate")]
1318    fn vlen_string_deflate_roundtrip() {
1319        use crate::format::messages::filter::FilterPipeline;
1320        let path = std::env::temp_dir().join("hdf5_vlen_deflate.h5");
1321        let input: Vec<&str> = (0..100)
1322            .map(|i| match i % 3 {
1323                0 => "hello world",
1324                1 => "compressed vlen string test",
1325                _ => "rust-hdf5",
1326            })
1327            .collect();
1328        {
1329            let file = H5File::create(&path).unwrap();
1330            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1331                .unwrap();
1332            file.close().unwrap();
1333        }
1334        {
1335            let file = H5File::open(&path).unwrap();
1336            let ds = file.dataset("texts").unwrap();
1337            let strings = ds.read_vlen_strings().unwrap();
1338            assert_eq!(strings.len(), 100);
1339            for (i, s) in strings.iter().enumerate() {
1340                assert_eq!(s, input[i]);
1341            }
1342        }
1343        std::fs::remove_file(&path).ok();
1344    }
1345
1346    #[test]
1347    #[cfg(feature = "zstd")]
1348    fn vlen_string_zstd_roundtrip() {
1349        use crate::format::messages::filter::FilterPipeline;
1350        let path = std::env::temp_dir().join("hdf5_vlen_zstd.h5");
1351        let input: Vec<&str> = (0..200)
1352            .map(|i| match i % 4 {
1353                0 => "zstandard compression test",
1354                1 => "variable length string",
1355                2 => "rust-hdf5 chunked storage",
1356                _ => "hello zstd world",
1357            })
1358            .collect();
1359        {
1360            let file = H5File::create(&path).unwrap();
1361            file.write_vlen_strings_compressed("data", &input, 32, FilterPipeline::zstd(3))
1362                .unwrap();
1363            file.close().unwrap();
1364        }
1365        {
1366            let file = H5File::open(&path).unwrap();
1367            let ds = file.dataset("data").unwrap();
1368            let strings = ds.read_vlen_strings().unwrap();
1369            assert_eq!(strings.len(), 200);
1370            for (i, s) in strings.iter().enumerate() {
1371                assert_eq!(s, input[i]);
1372            }
1373        }
1374        std::fs::remove_file(&path).ok();
1375    }
1376
1377    #[test]
1378    #[cfg(feature = "deflate")]
1379    fn shuffle_deflate_roundtrip() {
1380        let path = std::env::temp_dir().join("hdf5_shuf_defl.h5");
1381        {
1382            let file = H5File::create(&path).unwrap();
1383            let ds = file
1384                .new_dataset::<f64>()
1385                .shape([0usize, 4])
1386                .chunk(&[1, 4])
1387                .max_shape(&[None, Some(4)])
1388                .shuffle_deflate(6)
1389                .create("data")
1390                .unwrap();
1391            for frame in 0..20u64 {
1392                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
1393                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1394                ds.write_chunk(frame as usize, &raw).unwrap();
1395            }
1396            ds.extend(&[20, 4]).unwrap();
1397            file.close().unwrap();
1398        }
1399        {
1400            let file = H5File::open(&path).unwrap();
1401            let ds = file.dataset("data").unwrap();
1402            assert_eq!(ds.shape(), vec![20, 4]);
1403            let data = ds.read_raw::<f64>().unwrap();
1404            assert_eq!(data.len(), 80);
1405            for (i, val) in data.iter().enumerate() {
1406                assert!((val - i as f64).abs() < 1e-10);
1407            }
1408        }
1409        std::fs::remove_file(&path).ok();
1410    }
1411
1412    #[test]
1413    fn file_level_attributes() {
1414        let path = std::env::temp_dir().join("hdf5_file_attr.h5");
1415        {
1416            let file = H5File::create(&path).unwrap();
1417            file.set_attr_string("title", "Test File").unwrap();
1418            file.set_attr_numeric("version", &42i32).unwrap();
1419            let ds = file
1420                .new_dataset::<u8>()
1421                .shape([1usize])
1422                .create("dummy")
1423                .unwrap();
1424            ds.write_raw(&[0u8]).unwrap();
1425            file.close().unwrap();
1426        }
1427        {
1428            let file = H5File::open(&path).unwrap();
1429            assert!(file.dataset_names().contains(&"dummy".to_string()));
1430
1431            // Read file-level attributes
1432            let names = file.attr_names().unwrap();
1433            assert!(names.contains(&"title".to_string()));
1434
1435            let title = file.attr_string("title").unwrap();
1436            assert_eq!(title, "Test File");
1437        }
1438        std::fs::remove_file(&path).ok();
1439    }
1440
1441    #[test]
1442    fn scalar_dataset_roundtrip() {
1443        let path = std::env::temp_dir().join("hdf5_scalar.h5");
1444        {
1445            let file = H5File::create(&path).unwrap();
1446            let ds = file.new_dataset::<f64>().scalar().create("pi").unwrap();
1447            ds.write_raw(&[std::f64::consts::PI]).unwrap();
1448            file.close().unwrap();
1449        }
1450        {
1451            let file = H5File::open(&path).unwrap();
1452            let ds = file.dataset("pi").unwrap();
1453            assert_eq!(ds.shape(), Vec::<usize>::new());
1454            assert_eq!(ds.total_elements(), 1);
1455            let data = ds.read_raw::<f64>().unwrap();
1456            assert_eq!(data.len(), 1);
1457            assert!((data[0] - std::f64::consts::PI).abs() < 1e-15);
1458        }
1459        std::fs::remove_file(&path).ok();
1460    }
1461
1462    #[test]
1463    fn append_mode_extend_chunked() {
1464        let path = std::env::temp_dir().join("hdf5_append_extend.h5");
1465
1466        // Create with 5 frames
1467        {
1468            let file = H5File::create(&path).unwrap();
1469            let ds = file
1470                .new_dataset::<i32>()
1471                .shape([0usize, 3])
1472                .chunk(&[1, 3])
1473                .max_shape(&[None, Some(3)])
1474                .create("stream")
1475                .unwrap();
1476            for i in 0..5u64 {
1477                let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1478                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1479                ds.write_chunk(i as usize, &raw).unwrap();
1480            }
1481            ds.extend(&[5, 3]).unwrap();
1482            file.close().unwrap();
1483        }
1484
1485        // Reopen and add 5 more frames
1486        {
1487            let file = H5File::open_rw(&path).unwrap();
1488            // Find the stream dataset index (it's the first one)
1489            let names = file.dataset_names();
1490            assert!(names.contains(&"stream".to_string()));
1491
1492            // Write more chunks via the writer directly
1493            let mut inner = crate::file::borrow_inner_mut(&file.inner);
1494            if let crate::file::H5FileInner::Writer(writer) = &mut *inner {
1495                let ds_idx = writer.dataset_index("stream").unwrap();
1496                for i in 5..10u64 {
1497                    let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1498                    let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1499                    writer.write_chunk(ds_idx, i, &raw).unwrap();
1500                }
1501                writer.extend_dataset(ds_idx, &[10, 3]).unwrap();
1502            }
1503            drop(inner);
1504            file.close().unwrap();
1505        }
1506
1507        // Read back all 10 frames
1508        {
1509            let file = H5File::open(&path).unwrap();
1510            let ds = file.dataset("stream").unwrap();
1511            assert_eq!(ds.shape(), vec![10, 3]);
1512            let data = ds.read_raw::<i32>().unwrap();
1513            assert_eq!(data.len(), 30);
1514            for (i, val) in data.iter().enumerate() {
1515                assert_eq!(*val, i as i32, "mismatch at {}", i);
1516            }
1517        }
1518
1519        std::fs::remove_file(&path).ok();
1520    }
1521
1522    #[test]
1523    fn group_hierarchy_roundtrip() {
1524        let path = std::env::temp_dir().join("hdf5_groups_rt.h5");
1525
1526        {
1527            let file = H5File::create(&path).unwrap();
1528            let root = file.root_group();
1529
1530            // Create groups
1531            let det = root.create_group("detector").unwrap();
1532            let raw = det.create_group("raw").unwrap();
1533
1534            // Create datasets in groups
1535            let ds1 = det
1536                .new_dataset::<f32>()
1537                .shape([10usize])
1538                .create("temperature")
1539                .unwrap();
1540            ds1.write_raw(&[1.0f32; 10]).unwrap();
1541
1542            let ds2 = raw
1543                .new_dataset::<u16>()
1544                .shape([4usize, 4])
1545                .create("image")
1546                .unwrap();
1547            ds2.write_raw(&[42u16; 16]).unwrap();
1548
1549            // Root-level dataset
1550            let ds3 = file
1551                .new_dataset::<i32>()
1552                .shape([3usize])
1553                .create("version")
1554                .unwrap();
1555            ds3.write_raw(&[1i32, 0, 0]).unwrap();
1556
1557            file.close().unwrap();
1558        }
1559
1560        {
1561            let file = H5File::open(&path).unwrap();
1562            let names = file.dataset_names();
1563            assert!(names.contains(&"version".to_string()));
1564            assert!(names.contains(&"detector/temperature".to_string()));
1565            assert!(names.contains(&"detector/raw/image".to_string()));
1566
1567            // Read datasets
1568            let ds = file.dataset("version").unwrap();
1569            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![1, 0, 0]);
1570
1571            let ds = file.dataset("detector/temperature").unwrap();
1572            assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0f32; 10]);
1573
1574            let ds = file.dataset("detector/raw/image").unwrap();
1575            assert_eq!(ds.shape(), vec![4, 4]);
1576            assert_eq!(ds.read_raw::<u16>().unwrap(), vec![42u16; 16]);
1577
1578            // Group traversal
1579            let root = file.root_group();
1580            let group_names = root.group_names().unwrap();
1581            assert!(group_names.contains(&"detector".to_string()));
1582        }
1583
1584        std::fs::remove_file(&path).ok();
1585    }
1586
1587    #[test]
1588    fn nested_groups_via_file_create_group() {
1589        let path = std::env::temp_dir().join("hdf5_file_create_group.h5");
1590
1591        {
1592            let file = H5File::create(&path).unwrap();
1593
1594            // Use the H5File::create_group convenience method
1595            let grp = file.create_group("sensors").unwrap();
1596            let sub = grp.create_group("accel").unwrap();
1597
1598            let ds = sub
1599                .new_dataset::<f64>()
1600                .shape([3usize])
1601                .create("xyz")
1602                .unwrap();
1603            ds.write_raw(&[1.0f64, 2.0, 3.0]).unwrap();
1604
1605            file.close().unwrap();
1606        }
1607
1608        {
1609            let file = H5File::open(&path).unwrap();
1610            let names = file.dataset_names();
1611            assert!(names.contains(&"sensors/accel/xyz".to_string()));
1612
1613            let ds = file.dataset("sensors/accel/xyz").unwrap();
1614            assert_eq!(ds.read_raw::<f64>().unwrap(), vec![1.0, 2.0, 3.0]);
1615
1616            // Open group in read mode
1617            let root = file.root_group();
1618            let sensors = root.group("sensors").unwrap();
1619            assert_eq!(sensors.name(), "/sensors");
1620
1621            let accel = sensors.group("accel").unwrap();
1622            assert_eq!(accel.name(), "/sensors/accel");
1623
1624            // list_groups from root
1625            let top_groups = root.group_names().unwrap();
1626            assert!(top_groups.contains(&"sensors".to_string()));
1627
1628            // list_groups from sensors
1629            let sub_groups = sensors.group_names().unwrap();
1630            assert!(sub_groups.contains(&"accel".to_string()));
1631        }
1632
1633        std::fs::remove_file(&path).ok();
1634    }
1635}
1636
1637#[cfg(test)]
1638mod h5py_compat_tests {
1639    use super::*;
1640
1641    /// Verify our files can be read by h5dump (if available).
1642    #[test]
1643    #[cfg(feature = "deflate")]
1644    fn h5dump_validates_our_files() {
1645        // Check if h5dump is available
1646        let h5dump = std::process::Command::new("h5dump")
1647            .arg("--version")
1648            .output();
1649        if h5dump.is_err() {
1650            eprintln!("skipping: h5dump not found");
1651            return;
1652        }
1653
1654        let path = std::env::temp_dir().join("hdf5_h5dump_validate.h5");
1655
1656        // Write a comprehensive test file
1657        {
1658            let file = H5File::create(&path).unwrap();
1659
1660            // Contiguous
1661            let ds = file
1662                .new_dataset::<f64>()
1663                .shape([3usize, 4])
1664                .create("matrix")
1665                .unwrap();
1666            let data: Vec<f64> = (0..12).map(|i| i as f64).collect();
1667            ds.write_raw(&data).unwrap();
1668
1669            // Chunked + compressed
1670            let ds2 = file
1671                .new_dataset::<i32>()
1672                .shape([0usize, 2])
1673                .chunk(&[1, 2])
1674                .max_shape(&[None, Some(2)])
1675                .deflate(6)
1676                .create("stream")
1677                .unwrap();
1678            for i in 0..5u64 {
1679                let vals: Vec<i32> = vec![i as i32 * 2, i as i32 * 2 + 1];
1680                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1681                ds2.write_chunk(i as usize, &raw).unwrap();
1682            }
1683            ds2.extend(&[5, 2]).unwrap();
1684
1685            // Group
1686            let grp = file.create_group("meta").unwrap();
1687            let ds3 = grp
1688                .new_dataset::<u8>()
1689                .shape([4usize])
1690                .create("flags")
1691                .unwrap();
1692            ds3.write_raw(&[1u8, 0, 1, 0]).unwrap();
1693
1694            // String attribute
1695            use crate::types::VarLenUnicode;
1696            let attr = ds
1697                .new_attr::<VarLenUnicode>()
1698                .shape(())
1699                .create("units")
1700                .unwrap();
1701            attr.write_string("meters").unwrap();
1702
1703            file.close().unwrap();
1704        }
1705
1706        // Run h5dump and verify exit code
1707        let output = std::process::Command::new("h5dump")
1708            .arg("-H") // header only (faster)
1709            .arg(path.to_str().unwrap())
1710            .output()
1711            .unwrap();
1712
1713        assert!(
1714            output.status.success(),
1715            "h5dump failed:\nstdout: {}\nstderr: {}",
1716            String::from_utf8_lossy(&output.stdout),
1717            String::from_utf8_lossy(&output.stderr),
1718        );
1719
1720        // Full dump (with data) should also work
1721        let output2 = std::process::Command::new("h5dump")
1722            .arg(path.to_str().unwrap())
1723            .output()
1724            .unwrap();
1725
1726        assert!(
1727            output2.status.success(),
1728            "h5dump (full) failed:\nstderr: {}",
1729            String::from_utf8_lossy(&output2.stderr),
1730        );
1731
1732        std::fs::remove_file(&path).ok();
1733    }
1734
1735    #[test]
1736    fn read_h5py_generated_file() {
1737        let path = "/tmp/test_h5py_default.h5";
1738        if !std::path::Path::new(path).exists() {
1739            eprintln!("skipping: h5py test file not found");
1740            return;
1741        }
1742        let file = H5File::open(path).unwrap();
1743
1744        let ds = file.dataset("data").unwrap();
1745        assert_eq!(ds.shape(), vec![4, 5]);
1746        let data = ds.read_raw::<f64>().unwrap();
1747        assert_eq!(data.len(), 20);
1748        assert!((data[0]).abs() < 1e-10);
1749        assert!((data[19] - 19.0).abs() < 1e-10);
1750
1751        let ds2 = file.dataset("images").unwrap();
1752        assert_eq!(ds2.shape(), vec![3, 64, 64]);
1753        let images = ds2.read_raw::<u16>().unwrap();
1754        assert_eq!(images.len(), 3 * 64 * 64);
1755    }
1756}