Skip to main content

rust_hdf5/
file.rs

1//! HDF5 file handle — the main entry point for the public API.
2//!
3//! ```no_run
4//! use rust_hdf5::H5File;
5//!
6//! // Write
7//! let file = H5File::create("example.h5").unwrap();
8//! let ds = file.new_dataset::<u8>().shape(&[10, 20]).create("data").unwrap();
9//! ds.write_raw(&vec![0u8; 200]).unwrap();
10//! drop(file);
11//!
12//! // Read
13//! let file = H5File::open("example.h5").unwrap();
14//! let ds = file.dataset("data").unwrap();
15//! let data = ds.read_raw::<u8>().unwrap();
16//! assert_eq!(data.len(), 200);
17//! ```
18
19use std::path::Path;
20
21use crate::io::locking::FileLocking;
22use crate::io::{Hdf5Reader, Hdf5Writer};
23
24use crate::dataset::{DatasetBuilder, H5Dataset};
25use crate::error::{Hdf5Error, Result};
26use crate::format::messages::filter::FilterPipeline;
27use crate::group::H5Group;
28use crate::types::H5Type;
29
30// ---------------------------------------------------------------------------
31// Thread-safety: choose between Rc<RefCell<>> and Arc<Mutex<>> based on
32// the `threadsafe` feature flag.
33// ---------------------------------------------------------------------------
34
35#[cfg(not(feature = "threadsafe"))]
36pub(crate) type SharedInner = std::rc::Rc<std::cell::RefCell<H5FileInner>>;
37
38#[cfg(feature = "threadsafe")]
39pub(crate) type SharedInner = std::sync::Arc<std::sync::Mutex<H5FileInner>>;
40
41/// Helper to borrow/lock the inner state immutably.
42#[cfg(not(feature = "threadsafe"))]
43pub(crate) fn borrow_inner(inner: &SharedInner) -> std::cell::Ref<'_, H5FileInner> {
44    inner.borrow()
45}
46
47/// Helper to borrow/lock the inner state mutably.
48#[cfg(not(feature = "threadsafe"))]
49pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::cell::RefMut<'_, H5FileInner> {
50    inner.borrow_mut()
51}
52
53/// Helper to clone a SharedInner.
54#[cfg(not(feature = "threadsafe"))]
55pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
56    std::rc::Rc::clone(inner)
57}
58
59/// Helper to wrap an H5FileInner in SharedInner.
60#[cfg(not(feature = "threadsafe"))]
61pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
62    std::rc::Rc::new(std::cell::RefCell::new(inner))
63}
64
65#[cfg(feature = "threadsafe")]
66pub(crate) fn borrow_inner(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
67    inner.lock().unwrap()
68}
69
70#[cfg(feature = "threadsafe")]
71pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
72    inner.lock().unwrap()
73}
74
75#[cfg(feature = "threadsafe")]
76pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
77    std::sync::Arc::clone(inner)
78}
79
80#[cfg(feature = "threadsafe")]
81pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
82    std::sync::Arc::new(std::sync::Mutex::new(inner))
83}
84
85/// The inner state of an HDF5 file, shared with datasets via reference counting.
86///
87/// By default, this uses `Rc<RefCell<>>` for zero-overhead single-threaded use.
88/// Enable the `threadsafe` feature to use `Arc<Mutex<>>` instead, making
89/// `H5File` `Send + Sync`.
90pub(crate) enum H5FileInner {
91    Writer(Hdf5Writer),
92    Reader(Hdf5Reader),
93    /// Sentinel value used during `close()` to take ownership of the writer.
94    Closed,
95}
96
97/// An HDF5 file opened for reading or writing.
98///
99/// Datasets created from this file hold a shared reference to the underlying
100/// I/O handle, so the file does not need to outlive its datasets (they share
101/// ownership via reference counting).
102pub struct H5File {
103    pub(crate) inner: SharedInner,
104}
105
106impl H5File {
107    /// Create a new HDF5 file at `path`. Truncates if the file already exists.
108    pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
109        let writer = Hdf5Writer::create(path.as_ref())?;
110        Ok(Self {
111            inner: new_shared(H5FileInner::Writer(writer)),
112        })
113    }
114
115    /// Open an existing HDF5 file for reading.
116    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
117        let reader = Hdf5Reader::open(path.as_ref())?;
118        Ok(Self {
119            inner: new_shared(H5FileInner::Reader(reader)),
120        })
121    }
122
123    /// Open an existing HDF5 file for appending new datasets.
124    ///
125    /// Existing datasets are preserved. New datasets can be added and will
126    /// be written after the current end of file. Existing chunked datasets
127    /// can be extended with `write_chunk` and `extend_dataset`.
128    ///
129    /// ```no_run
130    /// use rust_hdf5::H5File;
131    /// let file = H5File::open_rw("existing.h5").unwrap();
132    /// let ds = file.new_dataset::<f64>().shape(&[100]).create("new_data").unwrap();
133    /// ds.write_raw(&vec![0.0f64; 100]).unwrap();
134    /// file.close().unwrap();
135    /// ```
136    pub fn open_rw<P: AsRef<Path>>(path: P) -> Result<Self> {
137        let writer = Hdf5Writer::open_append(path.as_ref())?;
138        Ok(Self {
139            inner: new_shared(H5FileInner::Writer(writer)),
140        })
141    }
142
143    /// Start building open options for an HDF5 file.
144    ///
145    /// Use this to control file-locking behavior explicitly:
146    ///
147    /// ```no_run
148    /// use rust_hdf5::{H5File, FileLocking};
149    /// // Open with locking disabled (e.g. on NFS without lock support).
150    /// let file = H5File::options()
151    ///     .locking(FileLocking::Disabled)
152    ///     .open_rw("existing.h5")
153    ///     .unwrap();
154    /// # let _ = file;
155    /// ```
156    pub fn options() -> H5FileOptions {
157        H5FileOptions::default()
158    }
159
160    /// Return a handle to the root group.
161    ///
162    /// The root group can be used to create datasets and sub-groups.
163    pub fn root_group(&self) -> H5Group {
164        H5Group::new(clone_inner(&self.inner), "/".to_string())
165    }
166
167    /// Create a group in the root of the file.
168    ///
169    /// ```no_run
170    /// use rust_hdf5::H5File;
171    /// let file = H5File::create("groups.h5").unwrap();
172    /// let grp = file.create_group("detector").unwrap();
173    /// ```
174    pub fn create_group(&self, name: &str) -> Result<H5Group> {
175        self.root_group().create_group(name)
176    }
177
178    /// Start building a new dataset with the given element type.
179    ///
180    /// This returns a fluent builder. Call `.shape(...)` to set dimensions and
181    /// `.create("name")` to finalize.
182    ///
183    /// ```no_run
184    /// # use rust_hdf5::H5File;
185    /// let file = H5File::create("build.h5").unwrap();
186    /// let ds = file.new_dataset::<f64>().shape(&[3, 4]).create("matrix").unwrap();
187    /// ```
188    pub fn new_dataset<T: H5Type>(&self) -> DatasetBuilder<T> {
189        DatasetBuilder::new(clone_inner(&self.inner))
190    }
191
192    /// Add a string attribute to the file (root group).
193    pub fn set_attr_string(&self, name: &str, value: &str) -> Result<()> {
194        use crate::format::messages::attribute::AttributeMessage;
195        let attr = AttributeMessage::scalar_string(name, value);
196        let mut inner = borrow_inner_mut(&self.inner);
197        match &mut *inner {
198            H5FileInner::Writer(writer) => {
199                writer.add_root_attribute(attr);
200                Ok(())
201            }
202            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
203        }
204    }
205
206    /// Add a numeric attribute to the file (root group).
207    pub fn set_attr_numeric<T: crate::types::H5Type>(&self, name: &str, value: &T) -> Result<()> {
208        use crate::format::messages::attribute::AttributeMessage;
209        let es = T::element_size();
210        let raw = unsafe { std::slice::from_raw_parts(value as *const T as *const u8, es) };
211        let attr = AttributeMessage::scalar_numeric(name, T::hdf5_type(), raw.to_vec());
212        let mut inner = borrow_inner_mut(&self.inner);
213        match &mut *inner {
214            H5FileInner::Writer(writer) => {
215                writer.add_root_attribute(attr);
216                Ok(())
217            }
218            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
219        }
220    }
221
222    /// Return the names of file-level (root group) attributes.
223    pub fn attr_names(&self) -> Result<Vec<String>> {
224        let inner = borrow_inner(&self.inner);
225        match &*inner {
226            H5FileInner::Reader(reader) => Ok(reader.root_attr_names()),
227            _ => Ok(vec![]),
228        }
229    }
230
231    /// Read a file-level string attribute.
232    pub fn attr_string(&self, name: &str) -> Result<String> {
233        let mut inner = borrow_inner_mut(&self.inner);
234        match &mut *inner {
235            H5FileInner::Reader(reader) => {
236                let attr = reader
237                    .root_attr(name)
238                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?
239                    .clone();
240                Ok(reader.attr_string_value(&attr)?)
241            }
242            _ => Err(Hdf5Error::InvalidState("not in read mode".into())),
243        }
244    }
245
246    /// Check if the file is in write/append mode.
247    pub fn is_writable(&self) -> bool {
248        let inner = borrow_inner(&self.inner);
249        matches!(&*inner, H5FileInner::Writer(_))
250    }
251
252    /// Create a variable-length string dataset and write data.
253    ///
254    /// This is a convenience method for writing h5py-compatible vlen string
255    /// datasets using global heap storage.
256    pub fn write_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
257        let mut inner = borrow_inner_mut(&self.inner);
258        match &mut *inner {
259            H5FileInner::Writer(writer) => {
260                let idx = writer.create_vlen_string_dataset(name, strings)?;
261                // If the name contains '/', assign the dataset to its parent group
262                if let Some(slash_pos) = name.rfind('/') {
263                    let group_path = &name[..slash_pos];
264                    let abs_group_path = if group_path.starts_with('/') {
265                        group_path.to_string()
266                    } else {
267                        format!("/{}", group_path)
268                    };
269                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
270                }
271                Ok(())
272            }
273            H5FileInner::Reader(_) => {
274                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
275            }
276            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
277        }
278    }
279
280    /// Create a chunked, compressed variable-length string dataset.
281    ///
282    /// Like `write_vlen_strings`, but stores the vlen references in chunked
283    /// layout with the given filter pipeline (e.g., `FilterPipeline::deflate(6)`
284    /// or `FilterPipeline::zstd(3)`). `chunk_size` is the number of strings
285    /// per chunk.
286    pub fn write_vlen_strings_compressed(
287        &self,
288        name: &str,
289        strings: &[&str],
290        chunk_size: usize,
291        pipeline: FilterPipeline,
292    ) -> Result<()> {
293        let mut inner = borrow_inner_mut(&self.inner);
294        match &mut *inner {
295            H5FileInner::Writer(writer) => {
296                let idx = writer
297                    .create_vlen_string_dataset_compressed(name, strings, chunk_size, pipeline)?;
298                if let Some(slash_pos) = name.rfind('/') {
299                    let group_path = &name[..slash_pos];
300                    let abs_group_path = if group_path.starts_with('/') {
301                        group_path.to_string()
302                    } else {
303                        format!("/{}", group_path)
304                    };
305                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
306                }
307                Ok(())
308            }
309            H5FileInner::Reader(_) => {
310                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
311            }
312            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
313        }
314    }
315
316    /// Create an empty chunked vlen string dataset ready for incremental appends.
317    ///
318    /// Use `append_vlen_strings` to add data. If `pipeline` is `Some`, chunks
319    /// are compressed (e.g., `Some(FilterPipeline::lz4())`).
320    pub fn create_appendable_vlen_dataset(
321        &self,
322        name: &str,
323        chunk_size: usize,
324        pipeline: Option<FilterPipeline>,
325    ) -> Result<()> {
326        let mut inner = borrow_inner_mut(&self.inner);
327        match &mut *inner {
328            H5FileInner::Writer(writer) => {
329                let idx =
330                    writer.create_appendable_vlen_string_dataset(name, chunk_size, pipeline)?;
331                if let Some(slash_pos) = name.rfind('/') {
332                    let group_path = &name[..slash_pos];
333                    let abs_group_path = if group_path.starts_with('/') {
334                        group_path.to_string()
335                    } else {
336                        format!("/{}", group_path)
337                    };
338                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
339                }
340                Ok(())
341            }
342            H5FileInner::Reader(_) => {
343                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
344            }
345            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
346        }
347    }
348
349    /// Append variable-length strings to an existing chunked vlen string dataset.
350    pub fn append_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
351        let mut inner = borrow_inner_mut(&self.inner);
352        match &mut *inner {
353            H5FileInner::Writer(writer) => {
354                let ds_index = writer
355                    .dataset_index(name)
356                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
357                writer.append_vlen_strings(ds_index, strings)?;
358                Ok(())
359            }
360            H5FileInner::Reader(_) => {
361                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
362            }
363            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
364        }
365    }
366
367    /// Delete a dataset by name. The dataset is unlinked on close;
368    /// file space is not reclaimed.
369    pub fn delete_dataset(&self, name: &str) -> Result<()> {
370        let mut inner = borrow_inner_mut(&self.inner);
371        match &mut *inner {
372            H5FileInner::Writer(writer) => {
373                writer.delete_dataset(name)?;
374                Ok(())
375            }
376            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
377        }
378    }
379
380    /// Delete a group and all its child datasets/sub-groups.
381    /// File space is not reclaimed.
382    pub fn delete_group(&self, name: &str) -> Result<()> {
383        let mut inner = borrow_inner_mut(&self.inner);
384        match &mut *inner {
385            H5FileInner::Writer(writer) => {
386                writer.delete_group(name)?;
387                Ok(())
388            }
389            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
390        }
391    }
392
393    /// Open an existing dataset by name (read mode).
394    pub fn dataset(&self, name: &str) -> Result<H5Dataset> {
395        let inner = borrow_inner(&self.inner);
396        match &*inner {
397            H5FileInner::Reader(reader) => {
398                let info = reader
399                    .dataset_info(name)
400                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
401                let shape: Vec<usize> = info.dataspace.dims.iter().map(|&d| d as usize).collect();
402                let element_size = info.datatype.element_size() as usize;
403                Ok(H5Dataset::new_reader(
404                    clone_inner(&self.inner),
405                    name.to_string(),
406                    shape,
407                    element_size,
408                ))
409            }
410            H5FileInner::Writer(_) => Err(Hdf5Error::InvalidState(
411                "cannot open a dataset by name in write mode; use new_dataset() instead"
412                    .to_string(),
413            )),
414            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".to_string())),
415        }
416    }
417
418    /// Return the names of all datasets in the root group.
419    ///
420    /// Works in both read and write mode: in write mode, returns the names of
421    /// datasets created so far; in read mode, returns the names discovered
422    /// during file open.
423    pub fn dataset_names(&self) -> Vec<String> {
424        let inner = borrow_inner(&self.inner);
425        match &*inner {
426            H5FileInner::Reader(reader) => reader
427                .dataset_names()
428                .iter()
429                .map(|s| s.to_string())
430                .collect(),
431            H5FileInner::Writer(writer) => writer
432                .dataset_names()
433                .iter()
434                .map(|s| s.to_string())
435                .collect(),
436            H5FileInner::Closed => Vec::new(),
437        }
438    }
439
440    /// Explicitly close the file. For a writer, this finalizes the file
441    /// (writes superblock, headers, etc.). For a reader, this is a no-op.
442    ///
443    /// The file is also auto-finalized on drop, but calling `close()` lets
444    /// you handle errors.
445    pub fn close(self) -> Result<()> {
446        let old = {
447            let mut inner = borrow_inner_mut(&self.inner);
448            std::mem::replace(&mut *inner, H5FileInner::Closed)
449        };
450        match old {
451            H5FileInner::Writer(writer) => {
452                writer.close()?;
453                Ok(())
454            }
455            H5FileInner::Reader(_) => Ok(()),
456            H5FileInner::Closed => Ok(()),
457        }
458    }
459
460    /// Flush the file to disk. Only meaningful in write mode.
461    pub fn flush(&self) -> Result<()> {
462        // The underlying writer does not expose a standalone flush; data is
463        // written to disk immediately via pwrite. This is a compatibility
464        // method that does nothing for now.
465        Ok(())
466    }
467}
468
469/// Builder controlling how an [`H5File`] is opened.
470///
471/// The default policy follows the HDF5 C library: an exclusive lock is
472/// acquired for write-mode opens and a shared lock for read-mode opens,
473/// honoring the `HDF5_USE_FILE_LOCKING` environment variable. Calling
474/// [`Self::locking`] overrides the env-var value.
475#[derive(Debug, Default, Clone)]
476pub struct H5FileOptions {
477    locking: Option<FileLocking>,
478}
479
480impl H5FileOptions {
481    /// Construct a fresh options builder with default settings.
482    pub fn new() -> Self {
483        Self::default()
484    }
485
486    /// Override the locking policy. Bypasses the `HDF5_USE_FILE_LOCKING`
487    /// environment variable for the resulting open call.
488    pub fn locking(mut self, policy: FileLocking) -> Self {
489        self.locking = Some(policy);
490        self
491    }
492
493    /// Disable OS-level file locking entirely (equivalent to
494    /// `HDF5_USE_FILE_LOCKING=FALSE`).
495    pub fn no_locking(self) -> Self {
496        self.locking(FileLocking::Disabled)
497    }
498
499    /// Try to acquire the lock but do not fail if the filesystem rejects it
500    /// (equivalent to `HDF5_USE_FILE_LOCKING=BEST_EFFORT`).
501    pub fn best_effort_locking(self) -> Self {
502        self.locking(FileLocking::BestEffort)
503    }
504
505    fn resolved_locking(&self) -> FileLocking {
506        match self.locking {
507            Some(p) => p,
508            None => FileLocking::from_env_or(FileLocking::default()),
509        }
510    }
511
512    /// Create a new HDF5 file at `path` with the configured options.
513    pub fn create<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
514        let writer = Hdf5Writer::create_with_locking(path.as_ref(), self.resolved_locking())?;
515        Ok(H5File {
516            inner: new_shared(H5FileInner::Writer(writer)),
517        })
518    }
519
520    /// Open an existing HDF5 file for reading with the configured options.
521    pub fn open<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
522        let reader = Hdf5Reader::open_with_locking(path.as_ref(), self.resolved_locking())?;
523        Ok(H5File {
524            inner: new_shared(H5FileInner::Reader(reader)),
525        })
526    }
527
528    /// Open an existing HDF5 file for read/write with the configured options.
529    pub fn open_rw<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
530        let writer = Hdf5Writer::open_append_with_locking(path.as_ref(), self.resolved_locking())?;
531        Ok(H5File {
532            inner: new_shared(H5FileInner::Writer(writer)),
533        })
534    }
535}
536
537#[cfg(test)]
538fn unique_test_path(name: &str) -> std::path::PathBuf {
539    // PID + atomic counter so each test invocation uses a distinct path,
540    // preventing collisions across concurrent cargo runs and any
541    // flock/LockFileEx race where a previous close()'d file's lock
542    // remains briefly visible when reopening the same path.
543    use std::sync::atomic::{AtomicU64, Ordering};
544    static COUNTER: AtomicU64 = AtomicU64::new(0);
545    let n = COUNTER.fetch_add(1, Ordering::Relaxed);
546    std::env::temp_dir().join(format!(
547        "rust_hdf5_test_{}_{}_{}.h5",
548        name,
549        std::process::id(),
550        n
551    ))
552}
553
554#[cfg(test)]
555mod tests {
556    use super::*;
557    use std::path::PathBuf;
558
559    fn temp_path(name: &str) -> PathBuf {
560        super::unique_test_path(name)
561    }
562
563    #[test]
564    fn create_and_close_empty() {
565        let path = temp_path("create_empty");
566        let file = H5File::create(&path).unwrap();
567        file.close().unwrap();
568
569        // Should be readable
570        let file = H5File::open(&path).unwrap();
571        file.close().unwrap();
572
573        std::fs::remove_file(&path).ok();
574    }
575
576    #[test]
577    fn create_and_drop_empty() {
578        let path = temp_path("drop_empty");
579        {
580            let _file = H5File::create(&path).unwrap();
581            // drop auto-finalizes
582        }
583        // Verify the file is valid by opening it
584        let file = H5File::open(&path).unwrap();
585        file.close().unwrap();
586
587        std::fs::remove_file(&path).ok();
588    }
589
590    #[test]
591    fn dataset_not_found() {
592        let path = temp_path("ds_not_found");
593        {
594            let _file = H5File::create(&path).unwrap();
595        }
596        let file = H5File::open(&path).unwrap();
597        let result = file.dataset("nonexistent");
598        assert!(result.is_err());
599
600        std::fs::remove_file(&path).ok();
601    }
602
603    #[test]
604    fn write_and_read_roundtrip() {
605        let path = temp_path("write_read_rt");
606
607        // Write
608        {
609            let file = H5File::create(&path).unwrap();
610            let ds = file
611                .new_dataset::<u8>()
612                .shape([4, 4])
613                .create("data")
614                .unwrap();
615            ds.write_raw(&[0u8; 16]).unwrap();
616            file.close().unwrap();
617        }
618
619        // Read
620        {
621            let file = H5File::open(&path).unwrap();
622            let ds = file.dataset("data").unwrap();
623            assert_eq!(ds.shape(), vec![4, 4]);
624            let data = ds.read_raw::<u8>().unwrap();
625            assert_eq!(data.len(), 16);
626            assert!(data.iter().all(|&b| b == 0));
627            file.close().unwrap();
628        }
629
630        std::fs::remove_file(&path).ok();
631    }
632
633    #[test]
634    fn write_and_read_f64() {
635        let path = temp_path("write_read_f64");
636
637        let values: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
638
639        // Write
640        {
641            let file = H5File::create(&path).unwrap();
642            let ds = file
643                .new_dataset::<f64>()
644                .shape([2, 3])
645                .create("matrix")
646                .unwrap();
647            ds.write_raw(&values).unwrap();
648            file.close().unwrap();
649        }
650
651        // Read
652        {
653            let file = H5File::open(&path).unwrap();
654            let ds = file.dataset("matrix").unwrap();
655            assert_eq!(ds.shape(), vec![2, 3]);
656            let readback = ds.read_raw::<f64>().unwrap();
657            assert_eq!(readback, values);
658        }
659
660        std::fs::remove_file(&path).ok();
661    }
662
663    #[test]
664    fn multiple_datasets() {
665        let path = temp_path("multi_ds");
666
667        {
668            let file = H5File::create(&path).unwrap();
669            let ds1 = file.new_dataset::<i32>().shape([3]).create("ints").unwrap();
670            ds1.write_raw(&[10i32, 20, 30]).unwrap();
671
672            let ds2 = file
673                .new_dataset::<f32>()
674                .shape([2, 2])
675                .create("floats")
676                .unwrap();
677            ds2.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
678
679            file.close().unwrap();
680        }
681
682        {
683            let file = H5File::open(&path).unwrap();
684
685            let ds_ints = file.dataset("ints").unwrap();
686            assert_eq!(ds_ints.shape(), vec![3]);
687            let ints = ds_ints.read_raw::<i32>().unwrap();
688            assert_eq!(ints, vec![10, 20, 30]);
689
690            let ds_floats = file.dataset("floats").unwrap();
691            assert_eq!(ds_floats.shape(), vec![2, 2]);
692            let floats = ds_floats.read_raw::<f32>().unwrap();
693            assert_eq!(floats, vec![1.0f32, 2.0, 3.0, 4.0]);
694        }
695
696        std::fs::remove_file(&path).ok();
697    }
698
699    #[test]
700    fn close_is_idempotent() {
701        let path = temp_path("close_idemp");
702        let file = H5File::create(&path).unwrap();
703        file.close().unwrap();
704        // File is consumed by close(), so no double-close possible at the type level.
705        std::fs::remove_file(&path).ok();
706    }
707}
708
709#[cfg(test)]
710mod integration_tests {
711    use super::*;
712
713    fn temp_path(name: &str) -> std::path::PathBuf {
714        super::unique_test_path(name)
715    }
716
717    #[test]
718    fn write_file_for_h5dump() {
719        let path = temp_path("integration");
720        let file = H5File::create(&path).unwrap();
721
722        let ds = file
723            .new_dataset::<u8>()
724            .shape([4usize, 4])
725            .create("data_u8")
726            .unwrap();
727        let data: Vec<u8> = (0..16).collect();
728        ds.write_raw(&data).unwrap();
729
730        let ds2 = file
731            .new_dataset::<f64>()
732            .shape([3usize, 2])
733            .create("data_f64")
734            .unwrap();
735        let fdata: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
736        ds2.write_raw(&fdata).unwrap();
737
738        let ds3 = file
739            .new_dataset::<i32>()
740            .shape([5usize])
741            .create("values")
742            .unwrap();
743        let idata: Vec<i32> = vec![-10, -5, 0, 5, 10];
744        ds3.write_raw(&idata).unwrap();
745
746        file.close().unwrap();
747
748        // File exists
749        assert!(path.exists());
750    }
751
752    #[test]
753    fn write_chunked_file_for_h5dump() {
754        let path = temp_path("chunked");
755        let file = H5File::create(&path).unwrap();
756
757        // Create a chunked dataset with unlimited first dimension
758        let ds = file
759            .new_dataset::<f64>()
760            .shape([0usize, 4])
761            .chunk(&[1, 4])
762            .max_shape(&[None, Some(4)])
763            .create("streaming_data")
764            .unwrap();
765
766        // Write 5 frames of data
767        for frame in 0..5u64 {
768            let values: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
769            let raw: Vec<u8> = values.iter().flat_map(|v| v.to_le_bytes()).collect();
770            ds.write_chunk(frame as usize, &raw).unwrap();
771        }
772
773        // Extend dimensions to reflect the 5 written frames
774        ds.extend(&[5, 4]).unwrap();
775        ds.flush().unwrap();
776
777        file.close().unwrap();
778
779        assert!(path.exists());
780    }
781
782    #[test]
783    fn write_chunked_many_frames_for_h5dump() {
784        let path = temp_path("chunked_many");
785        let file = H5File::create(&path).unwrap();
786
787        let ds = file
788            .new_dataset::<i32>()
789            .shape([0usize, 3])
790            .chunk(&[1, 3])
791            .max_shape(&[None, Some(3)])
792            .create("data")
793            .unwrap();
794
795        // Write 10 frames (exceeds idx_blk_elmts=4, uses data blocks)
796        for frame in 0..10u64 {
797            let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
798            let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
799            ds.write_chunk(frame as usize, &raw).unwrap();
800        }
801        ds.extend(&[10, 3]).unwrap();
802        file.close().unwrap();
803
804        assert!(path.exists());
805    }
806
807    #[test]
808    fn write_dataset_with_attributes() {
809        use crate::types::VarLenUnicode;
810
811        let path = temp_path("attributes");
812        let file = H5File::create(&path).unwrap();
813
814        let ds = file
815            .new_dataset::<f32>()
816            .shape([10usize])
817            .create("temperature")
818            .unwrap();
819        let data: Vec<f32> = (0..10).map(|i| i as f32 * 1.5).collect();
820        ds.write_raw(&data).unwrap();
821
822        // Add string attributes
823        let attr = ds
824            .new_attr::<VarLenUnicode>()
825            .shape(())
826            .create("units")
827            .unwrap();
828        attr.write_scalar(&VarLenUnicode("kelvin".to_string()))
829            .unwrap();
830
831        let attr2 = ds
832            .new_attr::<VarLenUnicode>()
833            .shape(())
834            .create("description")
835            .unwrap();
836        attr2
837            .write_scalar(&VarLenUnicode("Temperature measurements".to_string()))
838            .unwrap();
839
840        // Use write_string convenience method
841        let attr3 = ds
842            .new_attr::<VarLenUnicode>()
843            .shape(())
844            .create("source")
845            .unwrap();
846        attr3.write_string("sensor_01").unwrap();
847
848        // Also test parse -> write_scalar pattern
849        let attr4 = ds
850            .new_attr::<VarLenUnicode>()
851            .shape(())
852            .create("label")
853            .unwrap();
854        let s: VarLenUnicode = "test_label".parse().unwrap_or_default();
855        attr4.write_scalar(&s).unwrap();
856
857        file.close().unwrap();
858
859        assert!(path.exists());
860    }
861
862    #[test]
863    fn chunked_write_read_roundtrip() {
864        let path = temp_path("chunked_roundtrip");
865
866        // Write
867        {
868            let file = H5File::create(&path).unwrap();
869            let ds = file
870                .new_dataset::<i32>()
871                .shape([0usize, 3])
872                .chunk(&[1, 3])
873                .max_shape(&[None, Some(3)])
874                .create("table")
875                .unwrap();
876
877            for frame in 0..8u64 {
878                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
879                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
880                ds.write_chunk(frame as usize, &raw).unwrap();
881            }
882            ds.extend(&[8, 3]).unwrap();
883            file.close().unwrap();
884        }
885
886        // Read
887        {
888            let file = H5File::open(&path).unwrap();
889            let ds = file.dataset("table").unwrap();
890            assert_eq!(ds.shape(), vec![8, 3]);
891            let data = ds.read_raw::<i32>().unwrap();
892            assert_eq!(data.len(), 24);
893            for (i, val) in data.iter().enumerate() {
894                assert_eq!(*val, i as i32);
895            }
896        }
897
898        std::fs::remove_file(&path).ok();
899    }
900
901    #[test]
902    #[cfg(feature = "deflate")]
903    fn compressed_chunked_roundtrip() {
904        let path = temp_path("compressed_roundtrip");
905
906        // Write compressed
907        {
908            let file = H5File::create(&path).unwrap();
909            let ds = file
910                .new_dataset::<f64>()
911                .shape([0usize, 4])
912                .chunk(&[1, 4])
913                .max_shape(&[None, Some(4)])
914                .deflate(6)
915                .create("compressed")
916                .unwrap();
917
918            for frame in 0..10u64 {
919                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
920                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
921                ds.write_chunk(frame as usize, &raw).unwrap();
922            }
923            ds.extend(&[10, 4]).unwrap();
924            file.close().unwrap();
925        }
926
927        // Read back and verify
928        {
929            let file = H5File::open(&path).unwrap();
930            let ds = file.dataset("compressed").unwrap();
931            assert_eq!(ds.shape(), vec![10, 4]);
932            let data = ds.read_raw::<f64>().unwrap();
933            assert_eq!(data.len(), 40);
934            for (i, val) in data.iter().enumerate() {
935                assert!(
936                    (val - i as f64).abs() < 1e-10,
937                    "mismatch at {}: {} != {}",
938                    i,
939                    val,
940                    i
941                );
942            }
943        }
944
945        std::fs::remove_file(&path).ok();
946    }
947
948    #[test]
949    #[cfg(feature = "deflate")]
950    fn compressed_chunked_many_frames() {
951        let path = temp_path("compressed_many");
952
953        {
954            let file = H5File::create(&path).unwrap();
955            let ds = file
956                .new_dataset::<i32>()
957                .shape([0usize, 3])
958                .chunk(&[1, 3])
959                .max_shape(&[None, Some(3)])
960                .deflate(6)
961                .create("stream")
962                .unwrap();
963
964            for frame in 0..100u64 {
965                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
966                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
967                ds.write_chunk(frame as usize, &raw).unwrap();
968            }
969            ds.extend(&[100, 3]).unwrap();
970            file.close().unwrap();
971        }
972
973        {
974            let file = H5File::open(&path).unwrap();
975            let ds = file.dataset("stream").unwrap();
976            assert_eq!(ds.shape(), vec![100, 3]);
977            let data = ds.read_raw::<i32>().unwrap();
978            assert_eq!(data.len(), 300);
979            for (i, val) in data.iter().enumerate() {
980                assert_eq!(*val, i as i32, "mismatch at {}", i);
981            }
982        }
983
984        std::fs::remove_file(&path).ok();
985    }
986    #[test]
987    fn append_mode() {
988        let path = temp_path("append");
989
990        // Create initial file
991        {
992            let file = H5File::create(&path).unwrap();
993            let ds = file
994                .new_dataset::<i32>()
995                .shape([3usize])
996                .create("first")
997                .unwrap();
998            ds.write_raw(&[1i32, 2, 3]).unwrap();
999            file.close().unwrap();
1000        }
1001
1002        // Append new dataset
1003        {
1004            let file = H5File::open_rw(&path).unwrap();
1005            let ds = file
1006                .new_dataset::<f64>()
1007                .shape([2usize])
1008                .create("second")
1009                .unwrap();
1010            ds.write_raw(&[4.0f64, 5.0]).unwrap();
1011            file.close().unwrap();
1012        }
1013
1014        // Read back both
1015        {
1016            let file = H5File::open(&path).unwrap();
1017            let names = file.dataset_names();
1018            assert!(names.contains(&"first".to_string()));
1019            assert!(names.contains(&"second".to_string()));
1020
1021            let ds1 = file.dataset("first").unwrap();
1022            assert_eq!(ds1.read_raw::<i32>().unwrap(), vec![1, 2, 3]);
1023
1024            let ds2 = file.dataset("second").unwrap();
1025            assert_eq!(ds2.read_raw::<f64>().unwrap(), vec![4.0, 5.0]);
1026        }
1027
1028        std::fs::remove_file(&path).ok();
1029    }
1030
1031    #[test]
1032    fn open_rw_set_attr_preserves_file() {
1033        let path = temp_path("open_rw_attr");
1034        // Create file with a dataset and an attribute
1035        {
1036            let file = H5File::create(&path).unwrap();
1037            let ds = file
1038                .new_dataset::<i32>()
1039                .shape([3usize])
1040                .create("data")
1041                .unwrap();
1042            ds.write_raw(&[10i32, 20, 30]).unwrap();
1043            file.set_attr_string("version", "1.0").unwrap();
1044            file.close().unwrap();
1045        }
1046        // Open rw and modify the attribute
1047        {
1048            let file = H5File::open_rw(&path).unwrap();
1049            file.set_attr_string("version", "2.0").unwrap();
1050            file.close().unwrap();
1051        }
1052        // Verify: dataset intact, attribute updated
1053        {
1054            let file = H5File::open(&path).unwrap();
1055            let ds = file.dataset("data").unwrap();
1056            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![10, 20, 30]);
1057            let ver = file.attr_string("version").unwrap();
1058            assert_eq!(ver, "2.0");
1059        }
1060        std::fs::remove_file(&path).ok();
1061    }
1062
1063    #[test]
1064    #[cfg(feature = "deflate")]
1065    fn open_rw_attr_with_compressed_dataset() {
1066        use crate::format::messages::filter::FilterPipeline;
1067        let path = temp_path("open_rw_compressed");
1068        let input: Vec<&str> = (0..50).map(|_| "test string data").collect();
1069        // Create file with compressed vlen strings
1070        {
1071            let file = H5File::create(&path).unwrap();
1072            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1073                .unwrap();
1074            file.set_attr_string("version", "1.0").unwrap();
1075            file.close().unwrap();
1076        }
1077        // Open rw and modify attribute only
1078        {
1079            let file = H5File::open_rw(&path).unwrap();
1080            file.set_attr_string("version", "2.0").unwrap();
1081            file.close().unwrap();
1082        }
1083        // Verify: compressed dataset still readable, attribute updated
1084        {
1085            let file = H5File::open(&path).unwrap();
1086            let ds = file.dataset("texts").unwrap();
1087            let strings = ds.read_vlen_strings().unwrap();
1088            assert_eq!(strings.len(), 50);
1089            assert_eq!(strings[0], "test string data");
1090            let ver = file.attr_string("version").unwrap();
1091            assert_eq!(ver, "2.0");
1092        }
1093        std::fs::remove_file(&path).ok();
1094    }
1095
1096    #[test]
1097    #[cfg(feature = "lz4")]
1098    fn append_vlen_strings_basic() {
1099        use crate::format::messages::filter::FilterPipeline;
1100        let path = temp_path("append_vlen");
1101        {
1102            let file = H5File::create(&path).unwrap();
1103            file.create_appendable_vlen_dataset("names", 4, Some(FilterPipeline::lz4()))
1104                .unwrap();
1105            file.append_vlen_strings("names", &["alice", "bob", "charlie"])
1106                .unwrap();
1107            file.append_vlen_strings("names", &["dave", "eve"]).unwrap();
1108            file.close().unwrap();
1109        }
1110        {
1111            let file = H5File::open(&path).unwrap();
1112            let ds = file.dataset("names").unwrap();
1113            let strings = ds.read_vlen_strings().unwrap();
1114            assert_eq!(strings, vec!["alice", "bob", "charlie", "dave", "eve"]);
1115        }
1116        std::fs::remove_file(&path).ok();
1117    }
1118
1119    #[test]
1120    #[cfg(feature = "lz4")]
1121    fn append_vlen_strings_large() {
1122        use crate::format::messages::filter::FilterPipeline;
1123        let path = temp_path("append_vlen_large");
1124        let batch1: Vec<String> = (0..5000).map(|i| format!("node-{:06}", i)).collect();
1125        let batch2: Vec<String> = (5000..7189).map(|i| format!("node-{:06}", i)).collect();
1126        {
1127            let file = H5File::create(&path).unwrap();
1128            file.create_appendable_vlen_dataset("data", 512, Some(FilterPipeline::lz4()))
1129                .unwrap();
1130            let r1: Vec<&str> = batch1.iter().map(|s| s.as_str()).collect();
1131            file.append_vlen_strings("data", &r1).unwrap();
1132            let r2: Vec<&str> = batch2.iter().map(|s| s.as_str()).collect();
1133            file.append_vlen_strings("data", &r2).unwrap();
1134            file.close().unwrap();
1135        }
1136        {
1137            let file = H5File::open(&path).unwrap();
1138            let ds = file.dataset("data").unwrap();
1139            let strings = ds.read_vlen_strings().unwrap();
1140            assert_eq!(strings.len(), 7189);
1141            assert_eq!(strings[0], "node-000000");
1142            assert_eq!(strings[7188], "node-007188");
1143        }
1144        std::fs::remove_file(&path).ok();
1145    }
1146
1147    #[test]
1148    fn append_vlen_strings_uncompressed() {
1149        let path = temp_path("append_vlen_unc");
1150        {
1151            let file = H5File::create(&path).unwrap();
1152            file.create_appendable_vlen_dataset("texts", 8, None)
1153                .unwrap();
1154            file.append_vlen_strings("texts", &["hello", "world"])
1155                .unwrap();
1156            file.append_vlen_strings("texts", &["foo", "bar", "baz"])
1157                .unwrap();
1158            file.close().unwrap();
1159        }
1160        {
1161            let file = H5File::open(&path).unwrap();
1162            let ds = file.dataset("texts").unwrap();
1163            let strings = ds.read_vlen_strings().unwrap();
1164            assert_eq!(strings, vec!["hello", "world", "foo", "bar", "baz"]);
1165        }
1166        std::fs::remove_file(&path).ok();
1167    }
1168
1169    #[test]
1170    fn delete_dataset_roundtrip() {
1171        let path = temp_path("delete_ds");
1172        {
1173            let file = H5File::create(&path).unwrap();
1174            file.write_vlen_strings("keep", &["a", "b"]).unwrap();
1175            file.write_vlen_strings("remove", &["x", "y"]).unwrap();
1176            file.delete_dataset("remove").unwrap();
1177            file.close().unwrap();
1178        }
1179        {
1180            let file = H5File::open(&path).unwrap();
1181            let names = file.dataset_names();
1182            assert!(names.contains(&"keep".to_string()));
1183            assert!(!names.contains(&"remove".to_string()));
1184            let ds = file.dataset("keep").unwrap();
1185            assert_eq!(ds.read_vlen_strings().unwrap(), vec!["a", "b"]);
1186        }
1187        std::fs::remove_file(&path).ok();
1188    }
1189
1190    #[test]
1191    fn delete_group_roundtrip() {
1192        let path = temp_path("delete_grp");
1193        {
1194            let file = H5File::create(&path).unwrap();
1195            let g1 = file.create_group("keep").unwrap();
1196            g1.write_vlen_strings("data", &["a"]).unwrap();
1197            let g2 = file.create_group("remove").unwrap();
1198            g2.write_vlen_strings("data", &["x"]).unwrap();
1199            file.delete_group("remove").unwrap();
1200            file.close().unwrap();
1201        }
1202        {
1203            let file = H5File::open(&path).unwrap();
1204            let names = file.dataset_names();
1205            assert!(names.contains(&"keep/data".to_string()));
1206            assert!(!names.contains(&"remove/data".to_string()));
1207        }
1208        std::fs::remove_file(&path).ok();
1209    }
1210
1211    #[test]
1212    fn open_rw_delete_recreate_group() {
1213        let path = temp_path("rw_delete_recreate");
1214        // Step 1: create file with groups
1215        {
1216            let file = H5File::create(&path).unwrap();
1217            let n = file.create_group("nodes").unwrap();
1218            n.write_vlen_strings("id", &["a", "b", "c"]).unwrap();
1219            let e = file.create_group("edges").unwrap();
1220            e.write_vlen_strings("src", &["x", "y"]).unwrap();
1221            file.close().unwrap();
1222        }
1223        // Step 2: open_rw, delete one group, recreate with new data
1224        {
1225            let file = H5File::open_rw(&path).unwrap();
1226            file.delete_group("nodes").unwrap();
1227            let n = file.create_group("nodes").unwrap();
1228            n.write_vlen_strings("id", &["new1", "new2"]).unwrap();
1229            file.close().unwrap();
1230        }
1231        // Step 3: verify
1232        {
1233            let file = H5File::open(&path).unwrap();
1234            let ds = file.dataset("nodes/id").unwrap();
1235            let s = ds.read_vlen_strings().unwrap();
1236            assert_eq!(s, vec!["new1", "new2"]);
1237            // edges should still be intact
1238            let ds = file.dataset("edges/src").unwrap();
1239            let s = ds.read_vlen_strings().unwrap();
1240            assert_eq!(s, vec!["x", "y"]);
1241        }
1242        std::fs::remove_file(&path).ok();
1243    }
1244
1245    #[test]
1246    fn delete_and_recreate_group() {
1247        let path = temp_path("delete_recreate");
1248        {
1249            let file = H5File::create(&path).unwrap();
1250            let g = file.create_group("nodes").unwrap();
1251            g.write_vlen_strings("id", &["old1", "old2"]).unwrap();
1252            file.delete_group("nodes").unwrap();
1253            let g = file.create_group("nodes").unwrap();
1254            g.write_vlen_strings("id", &["new1", "new2", "new3"])
1255                .unwrap();
1256            file.close().unwrap();
1257        }
1258        {
1259            let file = H5File::open(&path).unwrap();
1260            let ds = file.dataset("nodes/id").unwrap();
1261            let strings = ds.read_vlen_strings().unwrap();
1262            assert_eq!(strings, vec!["new1", "new2", "new3"]);
1263        }
1264        std::fs::remove_file(&path).ok();
1265    }
1266
1267    #[test]
1268    #[cfg(feature = "deflate")]
1269    fn vlen_string_compressed_large_roundtrip() {
1270        use crate::format::messages::filter::FilterPipeline;
1271        let path = temp_path("vlen_large");
1272        // Simulate kodex scenario: 7189 strings, chunk_size 512
1273        let input: Vec<String> = (0..7189)
1274            .map(|i| format!("node-{:08x}-{}", i, "a".repeat(20 + (i % 30))))
1275            .collect();
1276        let input_refs: Vec<&str> = input.iter().map(|s| s.as_str()).collect();
1277        {
1278            let file = H5File::create(&path).unwrap();
1279            file.create_group("nodes").unwrap();
1280            file.write_vlen_strings_compressed(
1281                "nodes/id",
1282                &input_refs,
1283                512,
1284                FilterPipeline::deflate(6),
1285            )
1286            .unwrap();
1287            file.close().unwrap();
1288        }
1289        // Read back
1290        {
1291            let file = H5File::open(&path).unwrap();
1292            let ds = file.dataset("nodes/id").unwrap();
1293            let strings = ds.read_vlen_strings().unwrap();
1294            assert_eq!(strings.len(), 7189);
1295            assert_eq!(strings[0], input[0]);
1296            assert_eq!(strings[7188], input[7188]);
1297        }
1298        // Also test open_rw then re-read
1299        {
1300            let file = H5File::open_rw(&path).unwrap();
1301            file.set_attr_string("version", "1.0").unwrap();
1302            file.close().unwrap();
1303        }
1304        {
1305            let file = H5File::open(&path).unwrap();
1306            let ds = file.dataset("nodes/id").unwrap();
1307            let strings = ds.read_vlen_strings().unwrap();
1308            assert_eq!(strings.len(), 7189);
1309            assert_eq!(strings[0], input[0]);
1310        }
1311        std::fs::remove_file(&path).ok();
1312    }
1313
1314    #[test]
1315    fn vlen_string_write_read() {
1316        let path = temp_path("vlen_wr");
1317        {
1318            let file = H5File::create(&path).unwrap();
1319            file.write_vlen_strings("names", &["alice", "bob", "charlie"])
1320                .unwrap();
1321            file.close().unwrap();
1322        }
1323        {
1324            let file = H5File::open(&path).unwrap();
1325            let ds = file.dataset("names").unwrap();
1326            let strings = ds.read_vlen_strings().unwrap();
1327            assert_eq!(strings, vec!["alice", "bob", "charlie"]);
1328        }
1329        std::fs::remove_file(&path).ok();
1330    }
1331
1332    #[test]
1333    #[cfg(feature = "deflate")]
1334    fn vlen_string_deflate_roundtrip() {
1335        use crate::format::messages::filter::FilterPipeline;
1336        let path = temp_path("vlen_deflate");
1337        let input: Vec<&str> = (0..100)
1338            .map(|i| match i % 3 {
1339                0 => "hello world",
1340                1 => "compressed vlen string test",
1341                _ => "rust-hdf5",
1342            })
1343            .collect();
1344        {
1345            let file = H5File::create(&path).unwrap();
1346            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1347                .unwrap();
1348            file.close().unwrap();
1349        }
1350        {
1351            let file = H5File::open(&path).unwrap();
1352            let ds = file.dataset("texts").unwrap();
1353            let strings = ds.read_vlen_strings().unwrap();
1354            assert_eq!(strings.len(), 100);
1355            for (i, s) in strings.iter().enumerate() {
1356                assert_eq!(s, input[i]);
1357            }
1358        }
1359        std::fs::remove_file(&path).ok();
1360    }
1361
1362    #[test]
1363    #[cfg(feature = "zstd")]
1364    fn vlen_string_zstd_roundtrip() {
1365        use crate::format::messages::filter::FilterPipeline;
1366        let path = temp_path("vlen_zstd");
1367        let input: Vec<&str> = (0..200)
1368            .map(|i| match i % 4 {
1369                0 => "zstandard compression test",
1370                1 => "variable length string",
1371                2 => "rust-hdf5 chunked storage",
1372                _ => "hello zstd world",
1373            })
1374            .collect();
1375        {
1376            let file = H5File::create(&path).unwrap();
1377            file.write_vlen_strings_compressed("data", &input, 32, FilterPipeline::zstd(3))
1378                .unwrap();
1379            file.close().unwrap();
1380        }
1381        {
1382            let file = H5File::open(&path).unwrap();
1383            let ds = file.dataset("data").unwrap();
1384            let strings = ds.read_vlen_strings().unwrap();
1385            assert_eq!(strings.len(), 200);
1386            for (i, s) in strings.iter().enumerate() {
1387                assert_eq!(s, input[i]);
1388            }
1389        }
1390        std::fs::remove_file(&path).ok();
1391    }
1392
1393    #[test]
1394    #[cfg(feature = "deflate")]
1395    fn shuffle_deflate_roundtrip() {
1396        let path = temp_path("shuf_defl");
1397        {
1398            let file = H5File::create(&path).unwrap();
1399            let ds = file
1400                .new_dataset::<f64>()
1401                .shape([0usize, 4])
1402                .chunk(&[1, 4])
1403                .max_shape(&[None, Some(4)])
1404                .shuffle_deflate(6)
1405                .create("data")
1406                .unwrap();
1407            for frame in 0..20u64 {
1408                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
1409                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1410                ds.write_chunk(frame as usize, &raw).unwrap();
1411            }
1412            ds.extend(&[20, 4]).unwrap();
1413            file.close().unwrap();
1414        }
1415        {
1416            let file = H5File::open(&path).unwrap();
1417            let ds = file.dataset("data").unwrap();
1418            assert_eq!(ds.shape(), vec![20, 4]);
1419            let data = ds.read_raw::<f64>().unwrap();
1420            assert_eq!(data.len(), 80);
1421            for (i, val) in data.iter().enumerate() {
1422                assert!((val - i as f64).abs() < 1e-10);
1423            }
1424        }
1425        std::fs::remove_file(&path).ok();
1426    }
1427
1428    #[test]
1429    fn file_level_attributes() {
1430        let path = temp_path("file_attr");
1431        {
1432            let file = H5File::create(&path).unwrap();
1433            file.set_attr_string("title", "Test File").unwrap();
1434            file.set_attr_numeric("version", &42i32).unwrap();
1435            let ds = file
1436                .new_dataset::<u8>()
1437                .shape([1usize])
1438                .create("dummy")
1439                .unwrap();
1440            ds.write_raw(&[0u8]).unwrap();
1441            file.close().unwrap();
1442        }
1443        {
1444            let file = H5File::open(&path).unwrap();
1445            assert!(file.dataset_names().contains(&"dummy".to_string()));
1446
1447            // Read file-level attributes
1448            let names = file.attr_names().unwrap();
1449            assert!(names.contains(&"title".to_string()));
1450
1451            let title = file.attr_string("title").unwrap();
1452            assert_eq!(title, "Test File");
1453        }
1454        std::fs::remove_file(&path).ok();
1455    }
1456
1457    #[test]
1458    fn scalar_dataset_roundtrip() {
1459        let path = temp_path("scalar");
1460        {
1461            let file = H5File::create(&path).unwrap();
1462            let ds = file.new_dataset::<f64>().scalar().create("pi").unwrap();
1463            ds.write_raw(&[std::f64::consts::PI]).unwrap();
1464            file.close().unwrap();
1465        }
1466        {
1467            let file = H5File::open(&path).unwrap();
1468            let ds = file.dataset("pi").unwrap();
1469            assert_eq!(ds.shape(), Vec::<usize>::new());
1470            assert_eq!(ds.total_elements(), 1);
1471            let data = ds.read_raw::<f64>().unwrap();
1472            assert_eq!(data.len(), 1);
1473            assert!((data[0] - std::f64::consts::PI).abs() < 1e-15);
1474        }
1475        std::fs::remove_file(&path).ok();
1476    }
1477
1478    #[test]
1479    fn append_mode_extend_chunked() {
1480        let path = temp_path("append_extend");
1481
1482        // Create with 5 frames
1483        {
1484            let file = H5File::create(&path).unwrap();
1485            let ds = file
1486                .new_dataset::<i32>()
1487                .shape([0usize, 3])
1488                .chunk(&[1, 3])
1489                .max_shape(&[None, Some(3)])
1490                .create("stream")
1491                .unwrap();
1492            for i in 0..5u64 {
1493                let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1494                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1495                ds.write_chunk(i as usize, &raw).unwrap();
1496            }
1497            ds.extend(&[5, 3]).unwrap();
1498            file.close().unwrap();
1499        }
1500
1501        // Reopen and add 5 more frames
1502        {
1503            let file = H5File::open_rw(&path).unwrap();
1504            // Find the stream dataset index (it's the first one)
1505            let names = file.dataset_names();
1506            assert!(names.contains(&"stream".to_string()));
1507
1508            // Write more chunks via the writer directly
1509            let mut inner = crate::file::borrow_inner_mut(&file.inner);
1510            if let crate::file::H5FileInner::Writer(writer) = &mut *inner {
1511                let ds_idx = writer.dataset_index("stream").unwrap();
1512                for i in 5..10u64 {
1513                    let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1514                    let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1515                    writer.write_chunk(ds_idx, i, &raw).unwrap();
1516                }
1517                writer.extend_dataset(ds_idx, &[10, 3]).unwrap();
1518            }
1519            drop(inner);
1520            file.close().unwrap();
1521        }
1522
1523        // Read back all 10 frames
1524        {
1525            let file = H5File::open(&path).unwrap();
1526            let ds = file.dataset("stream").unwrap();
1527            assert_eq!(ds.shape(), vec![10, 3]);
1528            let data = ds.read_raw::<i32>().unwrap();
1529            assert_eq!(data.len(), 30);
1530            for (i, val) in data.iter().enumerate() {
1531                assert_eq!(*val, i as i32, "mismatch at {}", i);
1532            }
1533        }
1534
1535        std::fs::remove_file(&path).ok();
1536    }
1537
1538    #[test]
1539    fn group_hierarchy_roundtrip() {
1540        let path = temp_path("groups_rt");
1541
1542        {
1543            let file = H5File::create(&path).unwrap();
1544            let root = file.root_group();
1545
1546            // Create groups
1547            let det = root.create_group("detector").unwrap();
1548            let raw = det.create_group("raw").unwrap();
1549
1550            // Create datasets in groups
1551            let ds1 = det
1552                .new_dataset::<f32>()
1553                .shape([10usize])
1554                .create("temperature")
1555                .unwrap();
1556            ds1.write_raw(&[1.0f32; 10]).unwrap();
1557
1558            let ds2 = raw
1559                .new_dataset::<u16>()
1560                .shape([4usize, 4])
1561                .create("image")
1562                .unwrap();
1563            ds2.write_raw(&[42u16; 16]).unwrap();
1564
1565            // Root-level dataset
1566            let ds3 = file
1567                .new_dataset::<i32>()
1568                .shape([3usize])
1569                .create("version")
1570                .unwrap();
1571            ds3.write_raw(&[1i32, 0, 0]).unwrap();
1572
1573            file.close().unwrap();
1574        }
1575
1576        {
1577            let file = H5File::open(&path).unwrap();
1578            let names = file.dataset_names();
1579            assert!(names.contains(&"version".to_string()));
1580            assert!(names.contains(&"detector/temperature".to_string()));
1581            assert!(names.contains(&"detector/raw/image".to_string()));
1582
1583            // Read datasets
1584            let ds = file.dataset("version").unwrap();
1585            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![1, 0, 0]);
1586
1587            let ds = file.dataset("detector/temperature").unwrap();
1588            assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0f32; 10]);
1589
1590            let ds = file.dataset("detector/raw/image").unwrap();
1591            assert_eq!(ds.shape(), vec![4, 4]);
1592            assert_eq!(ds.read_raw::<u16>().unwrap(), vec![42u16; 16]);
1593
1594            // Group traversal
1595            let root = file.root_group();
1596            let group_names = root.group_names().unwrap();
1597            assert!(group_names.contains(&"detector".to_string()));
1598        }
1599
1600        std::fs::remove_file(&path).ok();
1601    }
1602
1603    #[test]
1604    fn nested_groups_via_file_create_group() {
1605        let path = temp_path("file_create_group");
1606
1607        {
1608            let file = H5File::create(&path).unwrap();
1609
1610            // Use the H5File::create_group convenience method
1611            let grp = file.create_group("sensors").unwrap();
1612            let sub = grp.create_group("accel").unwrap();
1613
1614            let ds = sub
1615                .new_dataset::<f64>()
1616                .shape([3usize])
1617                .create("xyz")
1618                .unwrap();
1619            ds.write_raw(&[1.0f64, 2.0, 3.0]).unwrap();
1620
1621            file.close().unwrap();
1622        }
1623
1624        {
1625            let file = H5File::open(&path).unwrap();
1626            let names = file.dataset_names();
1627            assert!(names.contains(&"sensors/accel/xyz".to_string()));
1628
1629            let ds = file.dataset("sensors/accel/xyz").unwrap();
1630            assert_eq!(ds.read_raw::<f64>().unwrap(), vec![1.0, 2.0, 3.0]);
1631
1632            // Open group in read mode
1633            let root = file.root_group();
1634            let sensors = root.group("sensors").unwrap();
1635            assert_eq!(sensors.name(), "/sensors");
1636
1637            let accel = sensors.group("accel").unwrap();
1638            assert_eq!(accel.name(), "/sensors/accel");
1639
1640            // list_groups from root
1641            let top_groups = root.group_names().unwrap();
1642            assert!(top_groups.contains(&"sensors".to_string()));
1643
1644            // list_groups from sensors
1645            let sub_groups = sensors.group_names().unwrap();
1646            assert!(sub_groups.contains(&"accel".to_string()));
1647        }
1648
1649        std::fs::remove_file(&path).ok();
1650    }
1651}
1652
1653#[cfg(test)]
1654mod h5py_compat_tests {
1655    use super::*;
1656
1657    // Only the deflate-gated h5dump test creates files; without that
1658    // feature this helper has no callers.
1659    #[cfg(feature = "deflate")]
1660    fn temp_path(name: &str) -> std::path::PathBuf {
1661        super::unique_test_path(name)
1662    }
1663
1664    /// Verify our files can be read by h5dump (if available).
1665    #[test]
1666    #[cfg(feature = "deflate")]
1667    fn h5dump_validates_our_files() {
1668        // Check if h5dump is available
1669        let h5dump = std::process::Command::new("h5dump")
1670            .arg("--version")
1671            .output();
1672        if h5dump.is_err() {
1673            eprintln!("skipping: h5dump not found");
1674            return;
1675        }
1676
1677        let path = temp_path("h5dump_validate");
1678
1679        // Write a comprehensive test file
1680        {
1681            let file = H5File::create(&path).unwrap();
1682
1683            // Contiguous
1684            let ds = file
1685                .new_dataset::<f64>()
1686                .shape([3usize, 4])
1687                .create("matrix")
1688                .unwrap();
1689            let data: Vec<f64> = (0..12).map(|i| i as f64).collect();
1690            ds.write_raw(&data).unwrap();
1691
1692            // Chunked + compressed
1693            let ds2 = file
1694                .new_dataset::<i32>()
1695                .shape([0usize, 2])
1696                .chunk(&[1, 2])
1697                .max_shape(&[None, Some(2)])
1698                .deflate(6)
1699                .create("stream")
1700                .unwrap();
1701            for i in 0..5u64 {
1702                let vals: Vec<i32> = vec![i as i32 * 2, i as i32 * 2 + 1];
1703                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1704                ds2.write_chunk(i as usize, &raw).unwrap();
1705            }
1706            ds2.extend(&[5, 2]).unwrap();
1707
1708            // Group
1709            let grp = file.create_group("meta").unwrap();
1710            let ds3 = grp
1711                .new_dataset::<u8>()
1712                .shape([4usize])
1713                .create("flags")
1714                .unwrap();
1715            ds3.write_raw(&[1u8, 0, 1, 0]).unwrap();
1716
1717            // String attribute
1718            use crate::types::VarLenUnicode;
1719            let attr = ds
1720                .new_attr::<VarLenUnicode>()
1721                .shape(())
1722                .create("units")
1723                .unwrap();
1724            attr.write_string("meters").unwrap();
1725
1726            file.close().unwrap();
1727        }
1728
1729        // Run h5dump and verify exit code
1730        let output = std::process::Command::new("h5dump")
1731            .arg("-H") // header only (faster)
1732            .arg(path.to_str().unwrap())
1733            .output()
1734            .unwrap();
1735
1736        assert!(
1737            output.status.success(),
1738            "h5dump failed:\nstdout: {}\nstderr: {}",
1739            String::from_utf8_lossy(&output.stdout),
1740            String::from_utf8_lossy(&output.stderr),
1741        );
1742
1743        // Full dump (with data) should also work
1744        let output2 = std::process::Command::new("h5dump")
1745            .arg(path.to_str().unwrap())
1746            .output()
1747            .unwrap();
1748
1749        assert!(
1750            output2.status.success(),
1751            "h5dump (full) failed:\nstderr: {}",
1752            String::from_utf8_lossy(&output2.stderr),
1753        );
1754
1755        std::fs::remove_file(&path).ok();
1756    }
1757
1758    #[test]
1759    fn read_h5py_generated_file() {
1760        let path = "/tmp/test_h5py_default.h5";
1761        if !std::path::Path::new(path).exists() {
1762            eprintln!("skipping: h5py test file not found");
1763            return;
1764        }
1765        let file = H5File::open(path).unwrap();
1766
1767        let ds = file.dataset("data").unwrap();
1768        assert_eq!(ds.shape(), vec![4, 5]);
1769        let data = ds.read_raw::<f64>().unwrap();
1770        assert_eq!(data.len(), 20);
1771        assert!((data[0]).abs() < 1e-10);
1772        assert!((data[19] - 19.0).abs() < 1e-10);
1773
1774        let ds2 = file.dataset("images").unwrap();
1775        assert_eq!(ds2.shape(), vec![3, 64, 64]);
1776        let images = ds2.read_raw::<u16>().unwrap();
1777        assert_eq!(images.len(), 3 * 64 * 64);
1778    }
1779}