Skip to main content

rust_hdf5/
file.rs

1//! HDF5 file handle — the main entry point for the public API.
2//!
3//! ```no_run
4//! use rust_hdf5::H5File;
5//!
6//! // Write
7//! let file = H5File::create("example.h5").unwrap();
8//! let ds = file.new_dataset::<u8>().shape(&[10, 20]).create("data").unwrap();
9//! ds.write_raw(&vec![0u8; 200]).unwrap();
10//! drop(file);
11//!
12//! // Read
13//! let file = H5File::open("example.h5").unwrap();
14//! let ds = file.dataset("data").unwrap();
15//! let data = ds.read_raw::<u8>().unwrap();
16//! assert_eq!(data.len(), 200);
17//! ```
18
19use std::path::Path;
20
21use crate::io::locking::FileLocking;
22use crate::io::{Hdf5Reader, Hdf5Writer};
23
24use crate::dataset::{DatasetBuilder, H5Dataset};
25use crate::error::{Hdf5Error, Result};
26use crate::format::messages::filter::FilterPipeline;
27use crate::group::H5Group;
28use crate::types::H5Type;
29
30// ---------------------------------------------------------------------------
31// Thread-safety: choose between Rc<RefCell<>> and Arc<Mutex<>> based on
32// the `threadsafe` feature flag.
33// ---------------------------------------------------------------------------
34
35#[cfg(not(feature = "threadsafe"))]
36pub(crate) type SharedInner = std::rc::Rc<std::cell::RefCell<H5FileInner>>;
37
38#[cfg(feature = "threadsafe")]
39pub(crate) type SharedInner = std::sync::Arc<std::sync::Mutex<H5FileInner>>;
40
41/// Helper to borrow/lock the inner state immutably.
42#[cfg(not(feature = "threadsafe"))]
43pub(crate) fn borrow_inner(inner: &SharedInner) -> std::cell::Ref<'_, H5FileInner> {
44    inner.borrow()
45}
46
47/// Helper to borrow/lock the inner state mutably.
48#[cfg(not(feature = "threadsafe"))]
49pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::cell::RefMut<'_, H5FileInner> {
50    inner.borrow_mut()
51}
52
53/// Helper to clone a SharedInner.
54#[cfg(not(feature = "threadsafe"))]
55pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
56    std::rc::Rc::clone(inner)
57}
58
59/// Helper to wrap an H5FileInner in SharedInner.
60#[cfg(not(feature = "threadsafe"))]
61pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
62    std::rc::Rc::new(std::cell::RefCell::new(inner))
63}
64
65#[cfg(feature = "threadsafe")]
66pub(crate) fn borrow_inner(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
67    inner.lock().unwrap()
68}
69
70#[cfg(feature = "threadsafe")]
71pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
72    inner.lock().unwrap()
73}
74
75#[cfg(feature = "threadsafe")]
76pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
77    std::sync::Arc::clone(inner)
78}
79
80#[cfg(feature = "threadsafe")]
81pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
82    std::sync::Arc::new(std::sync::Mutex::new(inner))
83}
84
85/// The inner state of an HDF5 file, shared with datasets via reference counting.
86///
87/// By default, this uses `Rc<RefCell<>>` for zero-overhead single-threaded use.
88/// Enable the `threadsafe` feature to use `Arc<Mutex<>>` instead, making
89/// `H5File` `Send + Sync`.
90pub(crate) enum H5FileInner {
91    Writer(Hdf5Writer),
92    Reader(Hdf5Reader),
93    /// Sentinel value used during `close()` to take ownership of the writer.
94    Closed,
95}
96
97/// An HDF5 file opened for reading or writing.
98///
99/// Datasets created from this file hold a shared reference to the underlying
100/// I/O handle, so the file does not need to outlive its datasets (they share
101/// ownership via reference counting).
102pub struct H5File {
103    pub(crate) inner: SharedInner,
104}
105
106impl H5File {
107    /// Create a new HDF5 file at `path`. Truncates if the file already exists.
108    pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
109        let writer = Hdf5Writer::create(path.as_ref())?;
110        Ok(Self {
111            inner: new_shared(H5FileInner::Writer(writer)),
112        })
113    }
114
115    /// Open an existing HDF5 file for reading.
116    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
117        let reader = Hdf5Reader::open(path.as_ref())?;
118        Ok(Self {
119            inner: new_shared(H5FileInner::Reader(reader)),
120        })
121    }
122
123    /// Open an existing HDF5 file for appending new datasets.
124    ///
125    /// Existing datasets are preserved. New datasets can be added and will
126    /// be written after the current end of file. Existing chunked datasets
127    /// can be extended with `write_chunk` and `extend_dataset`.
128    ///
129    /// ```no_run
130    /// use rust_hdf5::H5File;
131    /// let file = H5File::open_rw("existing.h5").unwrap();
132    /// let ds = file.new_dataset::<f64>().shape(&[100]).create("new_data").unwrap();
133    /// ds.write_raw(&vec![0.0f64; 100]).unwrap();
134    /// file.close().unwrap();
135    /// ```
136    pub fn open_rw<P: AsRef<Path>>(path: P) -> Result<Self> {
137        let writer = Hdf5Writer::open_append(path.as_ref())?;
138        Ok(Self {
139            inner: new_shared(H5FileInner::Writer(writer)),
140        })
141    }
142
143    /// Start building open options for an HDF5 file.
144    ///
145    /// Use this to control file-locking behavior explicitly:
146    ///
147    /// ```no_run
148    /// use rust_hdf5::{H5File, FileLocking};
149    /// // Open with locking disabled (e.g. on NFS without lock support).
150    /// let file = H5File::options()
151    ///     .locking(FileLocking::Disabled)
152    ///     .open_rw("existing.h5")
153    ///     .unwrap();
154    /// # let _ = file;
155    /// ```
156    pub fn options() -> H5FileOptions {
157        H5FileOptions::default()
158    }
159
160    /// Return a handle to the root group.
161    ///
162    /// The root group can be used to create datasets and sub-groups.
163    pub fn root_group(&self) -> H5Group {
164        H5Group::new(clone_inner(&self.inner), "/".to_string())
165    }
166
167    /// Create a group in the root of the file.
168    ///
169    /// ```no_run
170    /// use rust_hdf5::H5File;
171    /// let file = H5File::create("groups.h5").unwrap();
172    /// let grp = file.create_group("detector").unwrap();
173    /// ```
174    pub fn create_group(&self, name: &str) -> Result<H5Group> {
175        self.root_group().create_group(name)
176    }
177
178    /// Start building a new dataset with the given element type.
179    ///
180    /// This returns a fluent builder. Call `.shape(...)` to set dimensions and
181    /// `.create("name")` to finalize.
182    ///
183    /// ```no_run
184    /// # use rust_hdf5::H5File;
185    /// let file = H5File::create("build.h5").unwrap();
186    /// let ds = file.new_dataset::<f64>().shape(&[3, 4]).create("matrix").unwrap();
187    /// ```
188    pub fn new_dataset<T: H5Type>(&self) -> DatasetBuilder<T> {
189        DatasetBuilder::new(clone_inner(&self.inner))
190    }
191
192    /// Add a string attribute to the file (root group).
193    pub fn set_attr_string(&self, name: &str, value: &str) -> Result<()> {
194        use crate::format::messages::attribute::AttributeMessage;
195        let attr = AttributeMessage::scalar_string(name, value);
196        let mut inner = borrow_inner_mut(&self.inner);
197        match &mut *inner {
198            H5FileInner::Writer(writer) => {
199                writer.add_root_attribute(attr);
200                Ok(())
201            }
202            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
203        }
204    }
205
206    /// Add a numeric attribute to the file (root group).
207    pub fn set_attr_numeric<T: crate::types::H5Type>(&self, name: &str, value: &T) -> Result<()> {
208        use crate::format::messages::attribute::AttributeMessage;
209        let es = T::element_size();
210        let raw = unsafe { std::slice::from_raw_parts(value as *const T as *const u8, es) };
211        let attr = AttributeMessage::scalar_numeric(name, T::hdf5_type(), raw.to_vec());
212        let mut inner = borrow_inner_mut(&self.inner);
213        match &mut *inner {
214            H5FileInner::Writer(writer) => {
215                writer.add_root_attribute(attr);
216                Ok(())
217            }
218            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
219        }
220    }
221
222    /// Return the names of file-level (root group) attributes.
223    pub fn attr_names(&self) -> Result<Vec<String>> {
224        let inner = borrow_inner(&self.inner);
225        match &*inner {
226            H5FileInner::Reader(reader) => Ok(reader.root_attr_names()),
227            _ => Ok(vec![]),
228        }
229    }
230
231    /// Read a file-level string attribute.
232    pub fn attr_string(&self, name: &str) -> Result<String> {
233        let inner = borrow_inner(&self.inner);
234        match &*inner {
235            H5FileInner::Reader(reader) => {
236                let attr = reader
237                    .root_attr(name)
238                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
239                let end = attr
240                    .data
241                    .iter()
242                    .position(|&b| b == 0)
243                    .unwrap_or(attr.data.len());
244                Ok(String::from_utf8_lossy(&attr.data[..end]).to_string())
245            }
246            _ => Err(Hdf5Error::InvalidState("not in read mode".into())),
247        }
248    }
249
250    /// Check if the file is in write/append mode.
251    pub fn is_writable(&self) -> bool {
252        let inner = borrow_inner(&self.inner);
253        matches!(&*inner, H5FileInner::Writer(_))
254    }
255
256    /// Create a variable-length string dataset and write data.
257    ///
258    /// This is a convenience method for writing h5py-compatible vlen string
259    /// datasets using global heap storage.
260    pub fn write_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
261        let mut inner = borrow_inner_mut(&self.inner);
262        match &mut *inner {
263            H5FileInner::Writer(writer) => {
264                let idx = writer.create_vlen_string_dataset(name, strings)?;
265                // If the name contains '/', assign the dataset to its parent group
266                if let Some(slash_pos) = name.rfind('/') {
267                    let group_path = &name[..slash_pos];
268                    let abs_group_path = if group_path.starts_with('/') {
269                        group_path.to_string()
270                    } else {
271                        format!("/{}", group_path)
272                    };
273                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
274                }
275                Ok(())
276            }
277            H5FileInner::Reader(_) => {
278                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
279            }
280            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
281        }
282    }
283
284    /// Create a chunked, compressed variable-length string dataset.
285    ///
286    /// Like `write_vlen_strings`, but stores the vlen references in chunked
287    /// layout with the given filter pipeline (e.g., `FilterPipeline::deflate(6)`
288    /// or `FilterPipeline::zstd(3)`). `chunk_size` is the number of strings
289    /// per chunk.
290    pub fn write_vlen_strings_compressed(
291        &self,
292        name: &str,
293        strings: &[&str],
294        chunk_size: usize,
295        pipeline: FilterPipeline,
296    ) -> Result<()> {
297        let mut inner = borrow_inner_mut(&self.inner);
298        match &mut *inner {
299            H5FileInner::Writer(writer) => {
300                let idx = writer
301                    .create_vlen_string_dataset_compressed(name, strings, chunk_size, pipeline)?;
302                if let Some(slash_pos) = name.rfind('/') {
303                    let group_path = &name[..slash_pos];
304                    let abs_group_path = if group_path.starts_with('/') {
305                        group_path.to_string()
306                    } else {
307                        format!("/{}", group_path)
308                    };
309                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
310                }
311                Ok(())
312            }
313            H5FileInner::Reader(_) => {
314                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
315            }
316            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
317        }
318    }
319
320    /// Create an empty chunked vlen string dataset ready for incremental appends.
321    ///
322    /// Use `append_vlen_strings` to add data. If `pipeline` is `Some`, chunks
323    /// are compressed (e.g., `Some(FilterPipeline::lz4())`).
324    pub fn create_appendable_vlen_dataset(
325        &self,
326        name: &str,
327        chunk_size: usize,
328        pipeline: Option<FilterPipeline>,
329    ) -> Result<()> {
330        let mut inner = borrow_inner_mut(&self.inner);
331        match &mut *inner {
332            H5FileInner::Writer(writer) => {
333                let idx =
334                    writer.create_appendable_vlen_string_dataset(name, chunk_size, pipeline)?;
335                if let Some(slash_pos) = name.rfind('/') {
336                    let group_path = &name[..slash_pos];
337                    let abs_group_path = if group_path.starts_with('/') {
338                        group_path.to_string()
339                    } else {
340                        format!("/{}", group_path)
341                    };
342                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
343                }
344                Ok(())
345            }
346            H5FileInner::Reader(_) => {
347                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
348            }
349            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
350        }
351    }
352
353    /// Append variable-length strings to an existing chunked vlen string dataset.
354    pub fn append_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
355        let mut inner = borrow_inner_mut(&self.inner);
356        match &mut *inner {
357            H5FileInner::Writer(writer) => {
358                let ds_index = writer
359                    .dataset_index(name)
360                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
361                writer.append_vlen_strings(ds_index, strings)?;
362                Ok(())
363            }
364            H5FileInner::Reader(_) => {
365                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
366            }
367            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
368        }
369    }
370
371    /// Delete a dataset by name. The dataset is unlinked on close;
372    /// file space is not reclaimed.
373    pub fn delete_dataset(&self, name: &str) -> Result<()> {
374        let mut inner = borrow_inner_mut(&self.inner);
375        match &mut *inner {
376            H5FileInner::Writer(writer) => {
377                writer.delete_dataset(name)?;
378                Ok(())
379            }
380            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
381        }
382    }
383
384    /// Delete a group and all its child datasets/sub-groups.
385    /// File space is not reclaimed.
386    pub fn delete_group(&self, name: &str) -> Result<()> {
387        let mut inner = borrow_inner_mut(&self.inner);
388        match &mut *inner {
389            H5FileInner::Writer(writer) => {
390                writer.delete_group(name)?;
391                Ok(())
392            }
393            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
394        }
395    }
396
397    /// Open an existing dataset by name (read mode).
398    pub fn dataset(&self, name: &str) -> Result<H5Dataset> {
399        let inner = borrow_inner(&self.inner);
400        match &*inner {
401            H5FileInner::Reader(reader) => {
402                let info = reader
403                    .dataset_info(name)
404                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
405                let shape: Vec<usize> = info.dataspace.dims.iter().map(|&d| d as usize).collect();
406                let element_size = info.datatype.element_size() as usize;
407                Ok(H5Dataset::new_reader(
408                    clone_inner(&self.inner),
409                    name.to_string(),
410                    shape,
411                    element_size,
412                ))
413            }
414            H5FileInner::Writer(_) => Err(Hdf5Error::InvalidState(
415                "cannot open a dataset by name in write mode; use new_dataset() instead"
416                    .to_string(),
417            )),
418            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".to_string())),
419        }
420    }
421
422    /// Return the names of all datasets in the root group.
423    ///
424    /// Works in both read and write mode: in write mode, returns the names of
425    /// datasets created so far; in read mode, returns the names discovered
426    /// during file open.
427    pub fn dataset_names(&self) -> Vec<String> {
428        let inner = borrow_inner(&self.inner);
429        match &*inner {
430            H5FileInner::Reader(reader) => reader
431                .dataset_names()
432                .iter()
433                .map(|s| s.to_string())
434                .collect(),
435            H5FileInner::Writer(writer) => writer
436                .dataset_names()
437                .iter()
438                .map(|s| s.to_string())
439                .collect(),
440            H5FileInner::Closed => Vec::new(),
441        }
442    }
443
444    /// Explicitly close the file. For a writer, this finalizes the file
445    /// (writes superblock, headers, etc.). For a reader, this is a no-op.
446    ///
447    /// The file is also auto-finalized on drop, but calling `close()` lets
448    /// you handle errors.
449    pub fn close(self) -> Result<()> {
450        let old = {
451            let mut inner = borrow_inner_mut(&self.inner);
452            std::mem::replace(&mut *inner, H5FileInner::Closed)
453        };
454        match old {
455            H5FileInner::Writer(writer) => {
456                writer.close()?;
457                Ok(())
458            }
459            H5FileInner::Reader(_) => Ok(()),
460            H5FileInner::Closed => Ok(()),
461        }
462    }
463
464    /// Flush the file to disk. Only meaningful in write mode.
465    pub fn flush(&self) -> Result<()> {
466        // The underlying writer does not expose a standalone flush; data is
467        // written to disk immediately via pwrite. This is a compatibility
468        // method that does nothing for now.
469        Ok(())
470    }
471}
472
473/// Builder controlling how an [`H5File`] is opened.
474///
475/// The default policy follows the HDF5 C library: an exclusive lock is
476/// acquired for write-mode opens and a shared lock for read-mode opens,
477/// honoring the `HDF5_USE_FILE_LOCKING` environment variable. Calling
478/// [`Self::locking`] overrides the env-var value.
479#[derive(Debug, Default, Clone)]
480pub struct H5FileOptions {
481    locking: Option<FileLocking>,
482}
483
484impl H5FileOptions {
485    /// Construct a fresh options builder with default settings.
486    pub fn new() -> Self {
487        Self::default()
488    }
489
490    /// Override the locking policy. Bypasses the `HDF5_USE_FILE_LOCKING`
491    /// environment variable for the resulting open call.
492    pub fn locking(mut self, policy: FileLocking) -> Self {
493        self.locking = Some(policy);
494        self
495    }
496
497    /// Disable OS-level file locking entirely (equivalent to
498    /// `HDF5_USE_FILE_LOCKING=FALSE`).
499    pub fn no_locking(self) -> Self {
500        self.locking(FileLocking::Disabled)
501    }
502
503    /// Try to acquire the lock but do not fail if the filesystem rejects it
504    /// (equivalent to `HDF5_USE_FILE_LOCKING=BEST_EFFORT`).
505    pub fn best_effort_locking(self) -> Self {
506        self.locking(FileLocking::BestEffort)
507    }
508
509    fn resolved_locking(&self) -> FileLocking {
510        match self.locking {
511            Some(p) => p,
512            None => FileLocking::from_env_or(FileLocking::default()),
513        }
514    }
515
516    /// Create a new HDF5 file at `path` with the configured options.
517    pub fn create<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
518        let writer = Hdf5Writer::create_with_locking(path.as_ref(), self.resolved_locking())?;
519        Ok(H5File {
520            inner: new_shared(H5FileInner::Writer(writer)),
521        })
522    }
523
524    /// Open an existing HDF5 file for reading with the configured options.
525    pub fn open<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
526        let reader = Hdf5Reader::open_with_locking(path.as_ref(), self.resolved_locking())?;
527        Ok(H5File {
528            inner: new_shared(H5FileInner::Reader(reader)),
529        })
530    }
531
532    /// Open an existing HDF5 file for read/write with the configured options.
533    pub fn open_rw<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
534        let writer = Hdf5Writer::open_append_with_locking(path.as_ref(), self.resolved_locking())?;
535        Ok(H5File {
536            inner: new_shared(H5FileInner::Writer(writer)),
537        })
538    }
539}
540
541#[cfg(test)]
542mod tests {
543    use super::*;
544    use std::path::PathBuf;
545
546    fn temp_path(name: &str) -> PathBuf {
547        // PID + atomic counter to avoid path collisions across
548        // concurrent cargo invocations or any flock/LockFileEx race
549        // where a previous close()'d file's lock is briefly visible
550        // when reopening the same path immediately.
551        use std::sync::atomic::{AtomicU64, Ordering};
552        static COUNTER: AtomicU64 = AtomicU64::new(0);
553        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
554        std::env::temp_dir().join(format!(
555            "hdf5_file_test_{}_{}_{}.h5",
556            name,
557            std::process::id(),
558            n
559        ))
560    }
561
562    #[test]
563    fn create_and_close_empty() {
564        let path = temp_path("create_empty");
565        let file = H5File::create(&path).unwrap();
566        file.close().unwrap();
567
568        // Should be readable
569        let file = H5File::open(&path).unwrap();
570        file.close().unwrap();
571
572        std::fs::remove_file(&path).ok();
573    }
574
575    #[test]
576    fn create_and_drop_empty() {
577        let path = temp_path("drop_empty");
578        {
579            let _file = H5File::create(&path).unwrap();
580            // drop auto-finalizes
581        }
582        // Verify the file is valid by opening it
583        let file = H5File::open(&path).unwrap();
584        file.close().unwrap();
585
586        std::fs::remove_file(&path).ok();
587    }
588
589    #[test]
590    fn dataset_not_found() {
591        let path = temp_path("ds_not_found");
592        {
593            let _file = H5File::create(&path).unwrap();
594        }
595        let file = H5File::open(&path).unwrap();
596        let result = file.dataset("nonexistent");
597        assert!(result.is_err());
598
599        std::fs::remove_file(&path).ok();
600    }
601
602    #[test]
603    fn write_and_read_roundtrip() {
604        let path = temp_path("write_read_rt");
605
606        // Write
607        {
608            let file = H5File::create(&path).unwrap();
609            let ds = file
610                .new_dataset::<u8>()
611                .shape([4, 4])
612                .create("data")
613                .unwrap();
614            ds.write_raw(&[0u8; 16]).unwrap();
615            file.close().unwrap();
616        }
617
618        // Read
619        {
620            let file = H5File::open(&path).unwrap();
621            let ds = file.dataset("data").unwrap();
622            assert_eq!(ds.shape(), vec![4, 4]);
623            let data = ds.read_raw::<u8>().unwrap();
624            assert_eq!(data.len(), 16);
625            assert!(data.iter().all(|&b| b == 0));
626            file.close().unwrap();
627        }
628
629        std::fs::remove_file(&path).ok();
630    }
631
632    #[test]
633    fn write_and_read_f64() {
634        let path = temp_path("write_read_f64");
635
636        let values: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
637
638        // Write
639        {
640            let file = H5File::create(&path).unwrap();
641            let ds = file
642                .new_dataset::<f64>()
643                .shape([2, 3])
644                .create("matrix")
645                .unwrap();
646            ds.write_raw(&values).unwrap();
647            file.close().unwrap();
648        }
649
650        // Read
651        {
652            let file = H5File::open(&path).unwrap();
653            let ds = file.dataset("matrix").unwrap();
654            assert_eq!(ds.shape(), vec![2, 3]);
655            let readback = ds.read_raw::<f64>().unwrap();
656            assert_eq!(readback, values);
657        }
658
659        std::fs::remove_file(&path).ok();
660    }
661
662    #[test]
663    fn multiple_datasets() {
664        let path = temp_path("multi_ds");
665
666        {
667            let file = H5File::create(&path).unwrap();
668            let ds1 = file.new_dataset::<i32>().shape([3]).create("ints").unwrap();
669            ds1.write_raw(&[10i32, 20, 30]).unwrap();
670
671            let ds2 = file
672                .new_dataset::<f32>()
673                .shape([2, 2])
674                .create("floats")
675                .unwrap();
676            ds2.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
677
678            file.close().unwrap();
679        }
680
681        {
682            let file = H5File::open(&path).unwrap();
683
684            let ds_ints = file.dataset("ints").unwrap();
685            assert_eq!(ds_ints.shape(), vec![3]);
686            let ints = ds_ints.read_raw::<i32>().unwrap();
687            assert_eq!(ints, vec![10, 20, 30]);
688
689            let ds_floats = file.dataset("floats").unwrap();
690            assert_eq!(ds_floats.shape(), vec![2, 2]);
691            let floats = ds_floats.read_raw::<f32>().unwrap();
692            assert_eq!(floats, vec![1.0f32, 2.0, 3.0, 4.0]);
693        }
694
695        std::fs::remove_file(&path).ok();
696    }
697
698    #[test]
699    fn close_is_idempotent() {
700        let path = temp_path("close_idemp");
701        let file = H5File::create(&path).unwrap();
702        file.close().unwrap();
703        // File is consumed by close(), so no double-close possible at the type level.
704        std::fs::remove_file(&path).ok();
705    }
706}
707
708#[cfg(test)]
709mod integration_tests {
710    use super::*;
711
712    #[test]
713    fn write_file_for_h5dump() {
714        let path = std::env::temp_dir().join("test_hdf5rs_integration.h5");
715        let file = H5File::create(&path).unwrap();
716
717        let ds = file
718            .new_dataset::<u8>()
719            .shape([4usize, 4])
720            .create("data_u8")
721            .unwrap();
722        let data: Vec<u8> = (0..16).collect();
723        ds.write_raw(&data).unwrap();
724
725        let ds2 = file
726            .new_dataset::<f64>()
727            .shape([3usize, 2])
728            .create("data_f64")
729            .unwrap();
730        let fdata: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
731        ds2.write_raw(&fdata).unwrap();
732
733        let ds3 = file
734            .new_dataset::<i32>()
735            .shape([5usize])
736            .create("values")
737            .unwrap();
738        let idata: Vec<i32> = vec![-10, -5, 0, 5, 10];
739        ds3.write_raw(&idata).unwrap();
740
741        file.close().unwrap();
742
743        // File exists
744        assert!(path.exists());
745    }
746
747    #[test]
748    fn write_chunked_file_for_h5dump() {
749        let path = std::env::temp_dir().join("test_hdf5rs_chunked.h5");
750        let file = H5File::create(&path).unwrap();
751
752        // Create a chunked dataset with unlimited first dimension
753        let ds = file
754            .new_dataset::<f64>()
755            .shape([0usize, 4])
756            .chunk(&[1, 4])
757            .max_shape(&[None, Some(4)])
758            .create("streaming_data")
759            .unwrap();
760
761        // Write 5 frames of data
762        for frame in 0..5u64 {
763            let values: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
764            let raw: Vec<u8> = values.iter().flat_map(|v| v.to_le_bytes()).collect();
765            ds.write_chunk(frame as usize, &raw).unwrap();
766        }
767
768        // Extend dimensions to reflect the 5 written frames
769        ds.extend(&[5, 4]).unwrap();
770        ds.flush().unwrap();
771
772        file.close().unwrap();
773
774        assert!(path.exists());
775    }
776
777    #[test]
778    fn write_chunked_many_frames_for_h5dump() {
779        let path = std::env::temp_dir().join("test_hdf5rs_chunked_many.h5");
780        let file = H5File::create(&path).unwrap();
781
782        let ds = file
783            .new_dataset::<i32>()
784            .shape([0usize, 3])
785            .chunk(&[1, 3])
786            .max_shape(&[None, Some(3)])
787            .create("data")
788            .unwrap();
789
790        // Write 10 frames (exceeds idx_blk_elmts=4, uses data blocks)
791        for frame in 0..10u64 {
792            let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
793            let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
794            ds.write_chunk(frame as usize, &raw).unwrap();
795        }
796        ds.extend(&[10, 3]).unwrap();
797        file.close().unwrap();
798
799        assert!(path.exists());
800    }
801
802    #[test]
803    fn write_dataset_with_attributes() {
804        use crate::types::VarLenUnicode;
805
806        let path = std::env::temp_dir().join("test_hdf5rs_attributes.h5");
807        let file = H5File::create(&path).unwrap();
808
809        let ds = file
810            .new_dataset::<f32>()
811            .shape([10usize])
812            .create("temperature")
813            .unwrap();
814        let data: Vec<f32> = (0..10).map(|i| i as f32 * 1.5).collect();
815        ds.write_raw(&data).unwrap();
816
817        // Add string attributes
818        let attr = ds
819            .new_attr::<VarLenUnicode>()
820            .shape(())
821            .create("units")
822            .unwrap();
823        attr.write_scalar(&VarLenUnicode("kelvin".to_string()))
824            .unwrap();
825
826        let attr2 = ds
827            .new_attr::<VarLenUnicode>()
828            .shape(())
829            .create("description")
830            .unwrap();
831        attr2
832            .write_scalar(&VarLenUnicode("Temperature measurements".to_string()))
833            .unwrap();
834
835        // Use write_string convenience method
836        let attr3 = ds
837            .new_attr::<VarLenUnicode>()
838            .shape(())
839            .create("source")
840            .unwrap();
841        attr3.write_string("sensor_01").unwrap();
842
843        // Also test parse -> write_scalar pattern
844        let attr4 = ds
845            .new_attr::<VarLenUnicode>()
846            .shape(())
847            .create("label")
848            .unwrap();
849        let s: VarLenUnicode = "test_label".parse().unwrap_or_default();
850        attr4.write_scalar(&s).unwrap();
851
852        file.close().unwrap();
853
854        assert!(path.exists());
855    }
856
857    #[test]
858    fn chunked_write_read_roundtrip() {
859        let path = std::env::temp_dir().join("hdf5_chunked_roundtrip.h5");
860
861        // Write
862        {
863            let file = H5File::create(&path).unwrap();
864            let ds = file
865                .new_dataset::<i32>()
866                .shape([0usize, 3])
867                .chunk(&[1, 3])
868                .max_shape(&[None, Some(3)])
869                .create("table")
870                .unwrap();
871
872            for frame in 0..8u64 {
873                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
874                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
875                ds.write_chunk(frame as usize, &raw).unwrap();
876            }
877            ds.extend(&[8, 3]).unwrap();
878            file.close().unwrap();
879        }
880
881        // Read
882        {
883            let file = H5File::open(&path).unwrap();
884            let ds = file.dataset("table").unwrap();
885            assert_eq!(ds.shape(), vec![8, 3]);
886            let data = ds.read_raw::<i32>().unwrap();
887            assert_eq!(data.len(), 24);
888            for (i, val) in data.iter().enumerate() {
889                assert_eq!(*val, i as i32);
890            }
891        }
892
893        std::fs::remove_file(&path).ok();
894    }
895
896    #[test]
897    #[cfg(feature = "deflate")]
898    fn compressed_chunked_roundtrip() {
899        let path = std::env::temp_dir().join("hdf5_compressed_roundtrip.h5");
900
901        // Write compressed
902        {
903            let file = H5File::create(&path).unwrap();
904            let ds = file
905                .new_dataset::<f64>()
906                .shape([0usize, 4])
907                .chunk(&[1, 4])
908                .max_shape(&[None, Some(4)])
909                .deflate(6)
910                .create("compressed")
911                .unwrap();
912
913            for frame in 0..10u64 {
914                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
915                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
916                ds.write_chunk(frame as usize, &raw).unwrap();
917            }
918            ds.extend(&[10, 4]).unwrap();
919            file.close().unwrap();
920        }
921
922        // Read back and verify
923        {
924            let file = H5File::open(&path).unwrap();
925            let ds = file.dataset("compressed").unwrap();
926            assert_eq!(ds.shape(), vec![10, 4]);
927            let data = ds.read_raw::<f64>().unwrap();
928            assert_eq!(data.len(), 40);
929            for (i, val) in data.iter().enumerate() {
930                assert!(
931                    (val - i as f64).abs() < 1e-10,
932                    "mismatch at {}: {} != {}",
933                    i,
934                    val,
935                    i
936                );
937            }
938        }
939
940        std::fs::remove_file(&path).ok();
941    }
942
943    #[test]
944    #[cfg(feature = "deflate")]
945    fn compressed_chunked_many_frames() {
946        let path = std::env::temp_dir().join("hdf5_compressed_many.h5");
947
948        {
949            let file = H5File::create(&path).unwrap();
950            let ds = file
951                .new_dataset::<i32>()
952                .shape([0usize, 3])
953                .chunk(&[1, 3])
954                .max_shape(&[None, Some(3)])
955                .deflate(6)
956                .create("stream")
957                .unwrap();
958
959            for frame in 0..100u64 {
960                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
961                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
962                ds.write_chunk(frame as usize, &raw).unwrap();
963            }
964            ds.extend(&[100, 3]).unwrap();
965            file.close().unwrap();
966        }
967
968        {
969            let file = H5File::open(&path).unwrap();
970            let ds = file.dataset("stream").unwrap();
971            assert_eq!(ds.shape(), vec![100, 3]);
972            let data = ds.read_raw::<i32>().unwrap();
973            assert_eq!(data.len(), 300);
974            for (i, val) in data.iter().enumerate() {
975                assert_eq!(*val, i as i32, "mismatch at {}", i);
976            }
977        }
978
979        std::fs::remove_file(&path).ok();
980    }
981    #[test]
982    fn append_mode() {
983        let path = std::env::temp_dir().join("hdf5_append.h5");
984
985        // Create initial file
986        {
987            let file = H5File::create(&path).unwrap();
988            let ds = file
989                .new_dataset::<i32>()
990                .shape([3usize])
991                .create("first")
992                .unwrap();
993            ds.write_raw(&[1i32, 2, 3]).unwrap();
994            file.close().unwrap();
995        }
996
997        // Append new dataset
998        {
999            let file = H5File::open_rw(&path).unwrap();
1000            let ds = file
1001                .new_dataset::<f64>()
1002                .shape([2usize])
1003                .create("second")
1004                .unwrap();
1005            ds.write_raw(&[4.0f64, 5.0]).unwrap();
1006            file.close().unwrap();
1007        }
1008
1009        // Read back both
1010        {
1011            let file = H5File::open(&path).unwrap();
1012            let names = file.dataset_names();
1013            assert!(names.contains(&"first".to_string()));
1014            assert!(names.contains(&"second".to_string()));
1015
1016            let ds1 = file.dataset("first").unwrap();
1017            assert_eq!(ds1.read_raw::<i32>().unwrap(), vec![1, 2, 3]);
1018
1019            let ds2 = file.dataset("second").unwrap();
1020            assert_eq!(ds2.read_raw::<f64>().unwrap(), vec![4.0, 5.0]);
1021        }
1022
1023        std::fs::remove_file(&path).ok();
1024    }
1025
1026    #[test]
1027    fn open_rw_set_attr_preserves_file() {
1028        let path = std::env::temp_dir().join("hdf5_open_rw_attr.h5");
1029        // Create file with a dataset and an attribute
1030        {
1031            let file = H5File::create(&path).unwrap();
1032            let ds = file
1033                .new_dataset::<i32>()
1034                .shape([3usize])
1035                .create("data")
1036                .unwrap();
1037            ds.write_raw(&[10i32, 20, 30]).unwrap();
1038            file.set_attr_string("version", "1.0").unwrap();
1039            file.close().unwrap();
1040        }
1041        // Open rw and modify the attribute
1042        {
1043            let file = H5File::open_rw(&path).unwrap();
1044            file.set_attr_string("version", "2.0").unwrap();
1045            file.close().unwrap();
1046        }
1047        // Verify: dataset intact, attribute updated
1048        {
1049            let file = H5File::open(&path).unwrap();
1050            let ds = file.dataset("data").unwrap();
1051            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![10, 20, 30]);
1052            let ver = file.attr_string("version").unwrap();
1053            assert_eq!(ver, "2.0");
1054        }
1055        std::fs::remove_file(&path).ok();
1056    }
1057
1058    #[test]
1059    #[cfg(feature = "deflate")]
1060    fn open_rw_attr_with_compressed_dataset() {
1061        use crate::format::messages::filter::FilterPipeline;
1062        let path = std::env::temp_dir().join("hdf5_open_rw_compressed.h5");
1063        let input: Vec<&str> = (0..50).map(|_| "test string data").collect();
1064        // Create file with compressed vlen strings
1065        {
1066            let file = H5File::create(&path).unwrap();
1067            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1068                .unwrap();
1069            file.set_attr_string("version", "1.0").unwrap();
1070            file.close().unwrap();
1071        }
1072        // Open rw and modify attribute only
1073        {
1074            let file = H5File::open_rw(&path).unwrap();
1075            file.set_attr_string("version", "2.0").unwrap();
1076            file.close().unwrap();
1077        }
1078        // Verify: compressed dataset still readable, attribute updated
1079        {
1080            let file = H5File::open(&path).unwrap();
1081            let ds = file.dataset("texts").unwrap();
1082            let strings = ds.read_vlen_strings().unwrap();
1083            assert_eq!(strings.len(), 50);
1084            assert_eq!(strings[0], "test string data");
1085            let ver = file.attr_string("version").unwrap();
1086            assert_eq!(ver, "2.0");
1087        }
1088        std::fs::remove_file(&path).ok();
1089    }
1090
1091    #[test]
1092    #[cfg(feature = "lz4")]
1093    fn append_vlen_strings_basic() {
1094        use crate::format::messages::filter::FilterPipeline;
1095        let path = std::env::temp_dir().join("hdf5_append_vlen.h5");
1096        {
1097            let file = H5File::create(&path).unwrap();
1098            file.create_appendable_vlen_dataset("names", 4, Some(FilterPipeline::lz4()))
1099                .unwrap();
1100            file.append_vlen_strings("names", &["alice", "bob", "charlie"])
1101                .unwrap();
1102            file.append_vlen_strings("names", &["dave", "eve"]).unwrap();
1103            file.close().unwrap();
1104        }
1105        {
1106            let file = H5File::open(&path).unwrap();
1107            let ds = file.dataset("names").unwrap();
1108            let strings = ds.read_vlen_strings().unwrap();
1109            assert_eq!(strings, vec!["alice", "bob", "charlie", "dave", "eve"]);
1110        }
1111        std::fs::remove_file(&path).ok();
1112    }
1113
1114    #[test]
1115    #[cfg(feature = "lz4")]
1116    fn append_vlen_strings_large() {
1117        use crate::format::messages::filter::FilterPipeline;
1118        let path = std::env::temp_dir().join("hdf5_append_vlen_large.h5");
1119        let batch1: Vec<String> = (0..5000).map(|i| format!("node-{:06}", i)).collect();
1120        let batch2: Vec<String> = (5000..7189).map(|i| format!("node-{:06}", i)).collect();
1121        {
1122            let file = H5File::create(&path).unwrap();
1123            file.create_appendable_vlen_dataset("data", 512, Some(FilterPipeline::lz4()))
1124                .unwrap();
1125            let r1: Vec<&str> = batch1.iter().map(|s| s.as_str()).collect();
1126            file.append_vlen_strings("data", &r1).unwrap();
1127            let r2: Vec<&str> = batch2.iter().map(|s| s.as_str()).collect();
1128            file.append_vlen_strings("data", &r2).unwrap();
1129            file.close().unwrap();
1130        }
1131        {
1132            let file = H5File::open(&path).unwrap();
1133            let ds = file.dataset("data").unwrap();
1134            let strings = ds.read_vlen_strings().unwrap();
1135            assert_eq!(strings.len(), 7189);
1136            assert_eq!(strings[0], "node-000000");
1137            assert_eq!(strings[7188], "node-007188");
1138        }
1139        std::fs::remove_file(&path).ok();
1140    }
1141
1142    #[test]
1143    fn append_vlen_strings_uncompressed() {
1144        let path = std::env::temp_dir().join("hdf5_append_vlen_unc.h5");
1145        {
1146            let file = H5File::create(&path).unwrap();
1147            file.create_appendable_vlen_dataset("texts", 8, None)
1148                .unwrap();
1149            file.append_vlen_strings("texts", &["hello", "world"])
1150                .unwrap();
1151            file.append_vlen_strings("texts", &["foo", "bar", "baz"])
1152                .unwrap();
1153            file.close().unwrap();
1154        }
1155        {
1156            let file = H5File::open(&path).unwrap();
1157            let ds = file.dataset("texts").unwrap();
1158            let strings = ds.read_vlen_strings().unwrap();
1159            assert_eq!(strings, vec!["hello", "world", "foo", "bar", "baz"]);
1160        }
1161        std::fs::remove_file(&path).ok();
1162    }
1163
1164    #[test]
1165    fn delete_dataset_roundtrip() {
1166        let path = std::env::temp_dir().join("hdf5_delete_ds.h5");
1167        {
1168            let file = H5File::create(&path).unwrap();
1169            file.write_vlen_strings("keep", &["a", "b"]).unwrap();
1170            file.write_vlen_strings("remove", &["x", "y"]).unwrap();
1171            file.delete_dataset("remove").unwrap();
1172            file.close().unwrap();
1173        }
1174        {
1175            let file = H5File::open(&path).unwrap();
1176            let names = file.dataset_names();
1177            assert!(names.contains(&"keep".to_string()));
1178            assert!(!names.contains(&"remove".to_string()));
1179            let ds = file.dataset("keep").unwrap();
1180            assert_eq!(ds.read_vlen_strings().unwrap(), vec!["a", "b"]);
1181        }
1182        std::fs::remove_file(&path).ok();
1183    }
1184
1185    #[test]
1186    fn delete_group_roundtrip() {
1187        let path = std::env::temp_dir().join("hdf5_delete_grp.h5");
1188        {
1189            let file = H5File::create(&path).unwrap();
1190            let g1 = file.create_group("keep").unwrap();
1191            g1.write_vlen_strings("data", &["a"]).unwrap();
1192            let g2 = file.create_group("remove").unwrap();
1193            g2.write_vlen_strings("data", &["x"]).unwrap();
1194            file.delete_group("remove").unwrap();
1195            file.close().unwrap();
1196        }
1197        {
1198            let file = H5File::open(&path).unwrap();
1199            let names = file.dataset_names();
1200            assert!(names.contains(&"keep/data".to_string()));
1201            assert!(!names.contains(&"remove/data".to_string()));
1202        }
1203        std::fs::remove_file(&path).ok();
1204    }
1205
1206    #[test]
1207    fn open_rw_delete_recreate_group() {
1208        let path = std::env::temp_dir().join("hdf5_rw_delete_recreate.h5");
1209        // Step 1: create file with groups
1210        {
1211            let file = H5File::create(&path).unwrap();
1212            let n = file.create_group("nodes").unwrap();
1213            n.write_vlen_strings("id", &["a", "b", "c"]).unwrap();
1214            let e = file.create_group("edges").unwrap();
1215            e.write_vlen_strings("src", &["x", "y"]).unwrap();
1216            file.close().unwrap();
1217        }
1218        // Step 2: open_rw, delete one group, recreate with new data
1219        {
1220            let file = H5File::open_rw(&path).unwrap();
1221            file.delete_group("nodes").unwrap();
1222            let n = file.create_group("nodes").unwrap();
1223            n.write_vlen_strings("id", &["new1", "new2"]).unwrap();
1224            file.close().unwrap();
1225        }
1226        // Step 3: verify
1227        {
1228            let file = H5File::open(&path).unwrap();
1229            let ds = file.dataset("nodes/id").unwrap();
1230            let s = ds.read_vlen_strings().unwrap();
1231            assert_eq!(s, vec!["new1", "new2"]);
1232            // edges should still be intact
1233            let ds = file.dataset("edges/src").unwrap();
1234            let s = ds.read_vlen_strings().unwrap();
1235            assert_eq!(s, vec!["x", "y"]);
1236        }
1237        std::fs::remove_file(&path).ok();
1238    }
1239
1240    #[test]
1241    fn delete_and_recreate_group() {
1242        let path = std::env::temp_dir().join("hdf5_delete_recreate.h5");
1243        {
1244            let file = H5File::create(&path).unwrap();
1245            let g = file.create_group("nodes").unwrap();
1246            g.write_vlen_strings("id", &["old1", "old2"]).unwrap();
1247            file.delete_group("nodes").unwrap();
1248            let g = file.create_group("nodes").unwrap();
1249            g.write_vlen_strings("id", &["new1", "new2", "new3"])
1250                .unwrap();
1251            file.close().unwrap();
1252        }
1253        {
1254            let file = H5File::open(&path).unwrap();
1255            let ds = file.dataset("nodes/id").unwrap();
1256            let strings = ds.read_vlen_strings().unwrap();
1257            assert_eq!(strings, vec!["new1", "new2", "new3"]);
1258        }
1259        std::fs::remove_file(&path).ok();
1260    }
1261
1262    #[test]
1263    #[cfg(feature = "deflate")]
1264    fn vlen_string_compressed_large_roundtrip() {
1265        use crate::format::messages::filter::FilterPipeline;
1266        let path = std::env::temp_dir().join("hdf5_vlen_large.h5");
1267        // Simulate kodex scenario: 7189 strings, chunk_size 512
1268        let input: Vec<String> = (0..7189)
1269            .map(|i| format!("node-{:08x}-{}", i, "a".repeat(20 + (i % 30))))
1270            .collect();
1271        let input_refs: Vec<&str> = input.iter().map(|s| s.as_str()).collect();
1272        {
1273            let file = H5File::create(&path).unwrap();
1274            file.create_group("nodes").unwrap();
1275            file.write_vlen_strings_compressed(
1276                "nodes/id",
1277                &input_refs,
1278                512,
1279                FilterPipeline::deflate(6),
1280            )
1281            .unwrap();
1282            file.close().unwrap();
1283        }
1284        // Read back
1285        {
1286            let file = H5File::open(&path).unwrap();
1287            let ds = file.dataset("nodes/id").unwrap();
1288            let strings = ds.read_vlen_strings().unwrap();
1289            assert_eq!(strings.len(), 7189);
1290            assert_eq!(strings[0], input[0]);
1291            assert_eq!(strings[7188], input[7188]);
1292        }
1293        // Also test open_rw then re-read
1294        {
1295            let file = H5File::open_rw(&path).unwrap();
1296            file.set_attr_string("version", "1.0").unwrap();
1297            file.close().unwrap();
1298        }
1299        {
1300            let file = H5File::open(&path).unwrap();
1301            let ds = file.dataset("nodes/id").unwrap();
1302            let strings = ds.read_vlen_strings().unwrap();
1303            assert_eq!(strings.len(), 7189);
1304            assert_eq!(strings[0], input[0]);
1305        }
1306        std::fs::remove_file(&path).ok();
1307    }
1308
1309    #[test]
1310    fn vlen_string_write_read() {
1311        let path = std::env::temp_dir().join("hdf5_vlen_wr.h5");
1312        {
1313            let file = H5File::create(&path).unwrap();
1314            file.write_vlen_strings("names", &["alice", "bob", "charlie"])
1315                .unwrap();
1316            file.close().unwrap();
1317        }
1318        {
1319            let file = H5File::open(&path).unwrap();
1320            let ds = file.dataset("names").unwrap();
1321            let strings = ds.read_vlen_strings().unwrap();
1322            assert_eq!(strings, vec!["alice", "bob", "charlie"]);
1323        }
1324        std::fs::remove_file(&path).ok();
1325    }
1326
1327    #[test]
1328    #[cfg(feature = "deflate")]
1329    fn vlen_string_deflate_roundtrip() {
1330        use crate::format::messages::filter::FilterPipeline;
1331        let path = std::env::temp_dir().join("hdf5_vlen_deflate.h5");
1332        let input: Vec<&str> = (0..100)
1333            .map(|i| match i % 3 {
1334                0 => "hello world",
1335                1 => "compressed vlen string test",
1336                _ => "rust-hdf5",
1337            })
1338            .collect();
1339        {
1340            let file = H5File::create(&path).unwrap();
1341            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1342                .unwrap();
1343            file.close().unwrap();
1344        }
1345        {
1346            let file = H5File::open(&path).unwrap();
1347            let ds = file.dataset("texts").unwrap();
1348            let strings = ds.read_vlen_strings().unwrap();
1349            assert_eq!(strings.len(), 100);
1350            for (i, s) in strings.iter().enumerate() {
1351                assert_eq!(s, input[i]);
1352            }
1353        }
1354        std::fs::remove_file(&path).ok();
1355    }
1356
1357    #[test]
1358    #[cfg(feature = "zstd")]
1359    fn vlen_string_zstd_roundtrip() {
1360        use crate::format::messages::filter::FilterPipeline;
1361        let path = std::env::temp_dir().join("hdf5_vlen_zstd.h5");
1362        let input: Vec<&str> = (0..200)
1363            .map(|i| match i % 4 {
1364                0 => "zstandard compression test",
1365                1 => "variable length string",
1366                2 => "rust-hdf5 chunked storage",
1367                _ => "hello zstd world",
1368            })
1369            .collect();
1370        {
1371            let file = H5File::create(&path).unwrap();
1372            file.write_vlen_strings_compressed("data", &input, 32, FilterPipeline::zstd(3))
1373                .unwrap();
1374            file.close().unwrap();
1375        }
1376        {
1377            let file = H5File::open(&path).unwrap();
1378            let ds = file.dataset("data").unwrap();
1379            let strings = ds.read_vlen_strings().unwrap();
1380            assert_eq!(strings.len(), 200);
1381            for (i, s) in strings.iter().enumerate() {
1382                assert_eq!(s, input[i]);
1383            }
1384        }
1385        std::fs::remove_file(&path).ok();
1386    }
1387
1388    #[test]
1389    #[cfg(feature = "deflate")]
1390    fn shuffle_deflate_roundtrip() {
1391        let path = std::env::temp_dir().join("hdf5_shuf_defl.h5");
1392        {
1393            let file = H5File::create(&path).unwrap();
1394            let ds = file
1395                .new_dataset::<f64>()
1396                .shape([0usize, 4])
1397                .chunk(&[1, 4])
1398                .max_shape(&[None, Some(4)])
1399                .shuffle_deflate(6)
1400                .create("data")
1401                .unwrap();
1402            for frame in 0..20u64 {
1403                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
1404                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1405                ds.write_chunk(frame as usize, &raw).unwrap();
1406            }
1407            ds.extend(&[20, 4]).unwrap();
1408            file.close().unwrap();
1409        }
1410        {
1411            let file = H5File::open(&path).unwrap();
1412            let ds = file.dataset("data").unwrap();
1413            assert_eq!(ds.shape(), vec![20, 4]);
1414            let data = ds.read_raw::<f64>().unwrap();
1415            assert_eq!(data.len(), 80);
1416            for (i, val) in data.iter().enumerate() {
1417                assert!((val - i as f64).abs() < 1e-10);
1418            }
1419        }
1420        std::fs::remove_file(&path).ok();
1421    }
1422
1423    #[test]
1424    fn file_level_attributes() {
1425        let path = std::env::temp_dir().join("hdf5_file_attr.h5");
1426        {
1427            let file = H5File::create(&path).unwrap();
1428            file.set_attr_string("title", "Test File").unwrap();
1429            file.set_attr_numeric("version", &42i32).unwrap();
1430            let ds = file
1431                .new_dataset::<u8>()
1432                .shape([1usize])
1433                .create("dummy")
1434                .unwrap();
1435            ds.write_raw(&[0u8]).unwrap();
1436            file.close().unwrap();
1437        }
1438        {
1439            let file = H5File::open(&path).unwrap();
1440            assert!(file.dataset_names().contains(&"dummy".to_string()));
1441
1442            // Read file-level attributes
1443            let names = file.attr_names().unwrap();
1444            assert!(names.contains(&"title".to_string()));
1445
1446            let title = file.attr_string("title").unwrap();
1447            assert_eq!(title, "Test File");
1448        }
1449        std::fs::remove_file(&path).ok();
1450    }
1451
1452    #[test]
1453    fn scalar_dataset_roundtrip() {
1454        let path = std::env::temp_dir().join("hdf5_scalar.h5");
1455        {
1456            let file = H5File::create(&path).unwrap();
1457            let ds = file.new_dataset::<f64>().scalar().create("pi").unwrap();
1458            ds.write_raw(&[std::f64::consts::PI]).unwrap();
1459            file.close().unwrap();
1460        }
1461        {
1462            let file = H5File::open(&path).unwrap();
1463            let ds = file.dataset("pi").unwrap();
1464            assert_eq!(ds.shape(), Vec::<usize>::new());
1465            assert_eq!(ds.total_elements(), 1);
1466            let data = ds.read_raw::<f64>().unwrap();
1467            assert_eq!(data.len(), 1);
1468            assert!((data[0] - std::f64::consts::PI).abs() < 1e-15);
1469        }
1470        std::fs::remove_file(&path).ok();
1471    }
1472
1473    #[test]
1474    fn append_mode_extend_chunked() {
1475        let path = std::env::temp_dir().join("hdf5_append_extend.h5");
1476
1477        // Create with 5 frames
1478        {
1479            let file = H5File::create(&path).unwrap();
1480            let ds = file
1481                .new_dataset::<i32>()
1482                .shape([0usize, 3])
1483                .chunk(&[1, 3])
1484                .max_shape(&[None, Some(3)])
1485                .create("stream")
1486                .unwrap();
1487            for i in 0..5u64 {
1488                let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1489                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1490                ds.write_chunk(i as usize, &raw).unwrap();
1491            }
1492            ds.extend(&[5, 3]).unwrap();
1493            file.close().unwrap();
1494        }
1495
1496        // Reopen and add 5 more frames
1497        {
1498            let file = H5File::open_rw(&path).unwrap();
1499            // Find the stream dataset index (it's the first one)
1500            let names = file.dataset_names();
1501            assert!(names.contains(&"stream".to_string()));
1502
1503            // Write more chunks via the writer directly
1504            let mut inner = crate::file::borrow_inner_mut(&file.inner);
1505            if let crate::file::H5FileInner::Writer(writer) = &mut *inner {
1506                let ds_idx = writer.dataset_index("stream").unwrap();
1507                for i in 5..10u64 {
1508                    let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1509                    let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1510                    writer.write_chunk(ds_idx, i, &raw).unwrap();
1511                }
1512                writer.extend_dataset(ds_idx, &[10, 3]).unwrap();
1513            }
1514            drop(inner);
1515            file.close().unwrap();
1516        }
1517
1518        // Read back all 10 frames
1519        {
1520            let file = H5File::open(&path).unwrap();
1521            let ds = file.dataset("stream").unwrap();
1522            assert_eq!(ds.shape(), vec![10, 3]);
1523            let data = ds.read_raw::<i32>().unwrap();
1524            assert_eq!(data.len(), 30);
1525            for (i, val) in data.iter().enumerate() {
1526                assert_eq!(*val, i as i32, "mismatch at {}", i);
1527            }
1528        }
1529
1530        std::fs::remove_file(&path).ok();
1531    }
1532
1533    #[test]
1534    fn group_hierarchy_roundtrip() {
1535        let path = std::env::temp_dir().join("hdf5_groups_rt.h5");
1536
1537        {
1538            let file = H5File::create(&path).unwrap();
1539            let root = file.root_group();
1540
1541            // Create groups
1542            let det = root.create_group("detector").unwrap();
1543            let raw = det.create_group("raw").unwrap();
1544
1545            // Create datasets in groups
1546            let ds1 = det
1547                .new_dataset::<f32>()
1548                .shape([10usize])
1549                .create("temperature")
1550                .unwrap();
1551            ds1.write_raw(&[1.0f32; 10]).unwrap();
1552
1553            let ds2 = raw
1554                .new_dataset::<u16>()
1555                .shape([4usize, 4])
1556                .create("image")
1557                .unwrap();
1558            ds2.write_raw(&[42u16; 16]).unwrap();
1559
1560            // Root-level dataset
1561            let ds3 = file
1562                .new_dataset::<i32>()
1563                .shape([3usize])
1564                .create("version")
1565                .unwrap();
1566            ds3.write_raw(&[1i32, 0, 0]).unwrap();
1567
1568            file.close().unwrap();
1569        }
1570
1571        {
1572            let file = H5File::open(&path).unwrap();
1573            let names = file.dataset_names();
1574            assert!(names.contains(&"version".to_string()));
1575            assert!(names.contains(&"detector/temperature".to_string()));
1576            assert!(names.contains(&"detector/raw/image".to_string()));
1577
1578            // Read datasets
1579            let ds = file.dataset("version").unwrap();
1580            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![1, 0, 0]);
1581
1582            let ds = file.dataset("detector/temperature").unwrap();
1583            assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0f32; 10]);
1584
1585            let ds = file.dataset("detector/raw/image").unwrap();
1586            assert_eq!(ds.shape(), vec![4, 4]);
1587            assert_eq!(ds.read_raw::<u16>().unwrap(), vec![42u16; 16]);
1588
1589            // Group traversal
1590            let root = file.root_group();
1591            let group_names = root.group_names().unwrap();
1592            assert!(group_names.contains(&"detector".to_string()));
1593        }
1594
1595        std::fs::remove_file(&path).ok();
1596    }
1597
1598    #[test]
1599    fn nested_groups_via_file_create_group() {
1600        let path = std::env::temp_dir().join("hdf5_file_create_group.h5");
1601
1602        {
1603            let file = H5File::create(&path).unwrap();
1604
1605            // Use the H5File::create_group convenience method
1606            let grp = file.create_group("sensors").unwrap();
1607            let sub = grp.create_group("accel").unwrap();
1608
1609            let ds = sub
1610                .new_dataset::<f64>()
1611                .shape([3usize])
1612                .create("xyz")
1613                .unwrap();
1614            ds.write_raw(&[1.0f64, 2.0, 3.0]).unwrap();
1615
1616            file.close().unwrap();
1617        }
1618
1619        {
1620            let file = H5File::open(&path).unwrap();
1621            let names = file.dataset_names();
1622            assert!(names.contains(&"sensors/accel/xyz".to_string()));
1623
1624            let ds = file.dataset("sensors/accel/xyz").unwrap();
1625            assert_eq!(ds.read_raw::<f64>().unwrap(), vec![1.0, 2.0, 3.0]);
1626
1627            // Open group in read mode
1628            let root = file.root_group();
1629            let sensors = root.group("sensors").unwrap();
1630            assert_eq!(sensors.name(), "/sensors");
1631
1632            let accel = sensors.group("accel").unwrap();
1633            assert_eq!(accel.name(), "/sensors/accel");
1634
1635            // list_groups from root
1636            let top_groups = root.group_names().unwrap();
1637            assert!(top_groups.contains(&"sensors".to_string()));
1638
1639            // list_groups from sensors
1640            let sub_groups = sensors.group_names().unwrap();
1641            assert!(sub_groups.contains(&"accel".to_string()));
1642        }
1643
1644        std::fs::remove_file(&path).ok();
1645    }
1646}
1647
1648#[cfg(test)]
1649mod h5py_compat_tests {
1650    use super::*;
1651
1652    /// Verify our files can be read by h5dump (if available).
1653    #[test]
1654    #[cfg(feature = "deflate")]
1655    fn h5dump_validates_our_files() {
1656        // Check if h5dump is available
1657        let h5dump = std::process::Command::new("h5dump")
1658            .arg("--version")
1659            .output();
1660        if h5dump.is_err() {
1661            eprintln!("skipping: h5dump not found");
1662            return;
1663        }
1664
1665        let path = std::env::temp_dir().join("hdf5_h5dump_validate.h5");
1666
1667        // Write a comprehensive test file
1668        {
1669            let file = H5File::create(&path).unwrap();
1670
1671            // Contiguous
1672            let ds = file
1673                .new_dataset::<f64>()
1674                .shape([3usize, 4])
1675                .create("matrix")
1676                .unwrap();
1677            let data: Vec<f64> = (0..12).map(|i| i as f64).collect();
1678            ds.write_raw(&data).unwrap();
1679
1680            // Chunked + compressed
1681            let ds2 = file
1682                .new_dataset::<i32>()
1683                .shape([0usize, 2])
1684                .chunk(&[1, 2])
1685                .max_shape(&[None, Some(2)])
1686                .deflate(6)
1687                .create("stream")
1688                .unwrap();
1689            for i in 0..5u64 {
1690                let vals: Vec<i32> = vec![i as i32 * 2, i as i32 * 2 + 1];
1691                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1692                ds2.write_chunk(i as usize, &raw).unwrap();
1693            }
1694            ds2.extend(&[5, 2]).unwrap();
1695
1696            // Group
1697            let grp = file.create_group("meta").unwrap();
1698            let ds3 = grp
1699                .new_dataset::<u8>()
1700                .shape([4usize])
1701                .create("flags")
1702                .unwrap();
1703            ds3.write_raw(&[1u8, 0, 1, 0]).unwrap();
1704
1705            // String attribute
1706            use crate::types::VarLenUnicode;
1707            let attr = ds
1708                .new_attr::<VarLenUnicode>()
1709                .shape(())
1710                .create("units")
1711                .unwrap();
1712            attr.write_string("meters").unwrap();
1713
1714            file.close().unwrap();
1715        }
1716
1717        // Run h5dump and verify exit code
1718        let output = std::process::Command::new("h5dump")
1719            .arg("-H") // header only (faster)
1720            .arg(path.to_str().unwrap())
1721            .output()
1722            .unwrap();
1723
1724        assert!(
1725            output.status.success(),
1726            "h5dump failed:\nstdout: {}\nstderr: {}",
1727            String::from_utf8_lossy(&output.stdout),
1728            String::from_utf8_lossy(&output.stderr),
1729        );
1730
1731        // Full dump (with data) should also work
1732        let output2 = std::process::Command::new("h5dump")
1733            .arg(path.to_str().unwrap())
1734            .output()
1735            .unwrap();
1736
1737        assert!(
1738            output2.status.success(),
1739            "h5dump (full) failed:\nstderr: {}",
1740            String::from_utf8_lossy(&output2.stderr),
1741        );
1742
1743        std::fs::remove_file(&path).ok();
1744    }
1745
1746    #[test]
1747    fn read_h5py_generated_file() {
1748        let path = "/tmp/test_h5py_default.h5";
1749        if !std::path::Path::new(path).exists() {
1750            eprintln!("skipping: h5py test file not found");
1751            return;
1752        }
1753        let file = H5File::open(path).unwrap();
1754
1755        let ds = file.dataset("data").unwrap();
1756        assert_eq!(ds.shape(), vec![4, 5]);
1757        let data = ds.read_raw::<f64>().unwrap();
1758        assert_eq!(data.len(), 20);
1759        assert!((data[0]).abs() < 1e-10);
1760        assert!((data[19] - 19.0).abs() < 1e-10);
1761
1762        let ds2 = file.dataset("images").unwrap();
1763        assert_eq!(ds2.shape(), vec![3, 64, 64]);
1764        let images = ds2.read_raw::<u16>().unwrap();
1765        assert_eq!(images.len(), 3 * 64 * 64);
1766    }
1767}