Skip to main content

rust_hdf5/
file.rs

1//! HDF5 file handle — the main entry point for the public API.
2//!
3//! ```no_run
4//! use rust_hdf5::H5File;
5//!
6//! // Write
7//! let file = H5File::create("example.h5").unwrap();
8//! let ds = file.new_dataset::<u8>().shape(&[10, 20]).create("data").unwrap();
9//! ds.write_raw(&vec![0u8; 200]).unwrap();
10//! drop(file);
11//!
12//! // Read
13//! let file = H5File::open("example.h5").unwrap();
14//! let ds = file.dataset("data").unwrap();
15//! let data = ds.read_raw::<u8>().unwrap();
16//! assert_eq!(data.len(), 200);
17//! ```
18
19use std::path::Path;
20
21use crate::io::locking::FileLocking;
22use crate::io::{Hdf5Reader, Hdf5Writer};
23
24use crate::dataset::{DatasetBuilder, H5Dataset};
25use crate::error::{Hdf5Error, Result};
26use crate::format::messages::filter::FilterPipeline;
27use crate::group::H5Group;
28use crate::types::H5Type;
29
30// ---------------------------------------------------------------------------
31// Thread-safety: choose between Rc<RefCell<>> and Arc<Mutex<>> based on
32// the `threadsafe` feature flag.
33// ---------------------------------------------------------------------------
34
35#[cfg(not(feature = "threadsafe"))]
36pub(crate) type SharedInner = std::rc::Rc<std::cell::RefCell<H5FileInner>>;
37
38#[cfg(feature = "threadsafe")]
39pub(crate) type SharedInner = std::sync::Arc<std::sync::Mutex<H5FileInner>>;
40
41/// Helper to borrow/lock the inner state immutably.
42#[cfg(not(feature = "threadsafe"))]
43pub(crate) fn borrow_inner(inner: &SharedInner) -> std::cell::Ref<'_, H5FileInner> {
44    inner.borrow()
45}
46
47/// Helper to borrow/lock the inner state mutably.
48#[cfg(not(feature = "threadsafe"))]
49pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::cell::RefMut<'_, H5FileInner> {
50    inner.borrow_mut()
51}
52
53/// Helper to clone a SharedInner.
54#[cfg(not(feature = "threadsafe"))]
55pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
56    std::rc::Rc::clone(inner)
57}
58
59/// Helper to wrap an H5FileInner in SharedInner.
60#[cfg(not(feature = "threadsafe"))]
61pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
62    std::rc::Rc::new(std::cell::RefCell::new(inner))
63}
64
65#[cfg(feature = "threadsafe")]
66pub(crate) fn borrow_inner(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
67    inner.lock().unwrap()
68}
69
70#[cfg(feature = "threadsafe")]
71pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
72    inner.lock().unwrap()
73}
74
75#[cfg(feature = "threadsafe")]
76pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
77    std::sync::Arc::clone(inner)
78}
79
80#[cfg(feature = "threadsafe")]
81pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
82    std::sync::Arc::new(std::sync::Mutex::new(inner))
83}
84
85/// The inner state of an HDF5 file, shared with datasets via reference counting.
86///
87/// By default, this uses `Rc<RefCell<>>` for zero-overhead single-threaded use.
88/// Enable the `threadsafe` feature to use `Arc<Mutex<>>` instead, making
89/// `H5File` `Send + Sync`.
90pub(crate) enum H5FileInner {
91    Writer(Hdf5Writer),
92    Reader(Hdf5Reader),
93    /// Sentinel value used during `close()` to take ownership of the writer.
94    Closed,
95}
96
97/// An HDF5 file opened for reading or writing.
98///
99/// Datasets created from this file hold a shared reference to the underlying
100/// I/O handle, so the file does not need to outlive its datasets (they share
101/// ownership via reference counting).
102pub struct H5File {
103    pub(crate) inner: SharedInner,
104}
105
106impl H5File {
107    /// Create a new HDF5 file at `path`. Truncates if the file already exists.
108    pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
109        let writer = Hdf5Writer::create(path.as_ref())?;
110        Ok(Self {
111            inner: new_shared(H5FileInner::Writer(writer)),
112        })
113    }
114
115    /// Open an existing HDF5 file for reading.
116    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
117        let reader = Hdf5Reader::open(path.as_ref())?;
118        Ok(Self {
119            inner: new_shared(H5FileInner::Reader(reader)),
120        })
121    }
122
123    /// Open an existing HDF5 file for appending new datasets.
124    ///
125    /// Existing datasets are preserved. New datasets can be added and will
126    /// be written after the current end of file. Existing chunked datasets
127    /// can be extended with `write_chunk` and `extend_dataset`.
128    ///
129    /// ```no_run
130    /// use rust_hdf5::H5File;
131    /// let file = H5File::open_rw("existing.h5").unwrap();
132    /// let ds = file.new_dataset::<f64>().shape(&[100]).create("new_data").unwrap();
133    /// ds.write_raw(&vec![0.0f64; 100]).unwrap();
134    /// file.close().unwrap();
135    /// ```
136    pub fn open_rw<P: AsRef<Path>>(path: P) -> Result<Self> {
137        let writer = Hdf5Writer::open_append(path.as_ref())?;
138        Ok(Self {
139            inner: new_shared(H5FileInner::Writer(writer)),
140        })
141    }
142
143    /// Start building open options for an HDF5 file.
144    ///
145    /// Use this to control file-locking behavior explicitly:
146    ///
147    /// ```no_run
148    /// use rust_hdf5::{H5File, FileLocking};
149    /// // Open with locking disabled (e.g. on NFS without lock support).
150    /// let file = H5File::options()
151    ///     .locking(FileLocking::Disabled)
152    ///     .open_rw("existing.h5")
153    ///     .unwrap();
154    /// # let _ = file;
155    /// ```
156    pub fn options() -> H5FileOptions {
157        H5FileOptions::default()
158    }
159
160    /// Return a handle to the root group.
161    ///
162    /// The root group can be used to create datasets and sub-groups.
163    pub fn root_group(&self) -> H5Group {
164        H5Group::new(clone_inner(&self.inner), "/".to_string())
165    }
166
167    /// Create a group in the root of the file.
168    ///
169    /// ```no_run
170    /// use rust_hdf5::H5File;
171    /// let file = H5File::create("groups.h5").unwrap();
172    /// let grp = file.create_group("detector").unwrap();
173    /// ```
174    pub fn create_group(&self, name: &str) -> Result<H5Group> {
175        self.root_group().create_group(name)
176    }
177
178    /// Start building a new dataset with the given element type.
179    ///
180    /// This returns a fluent builder. Call `.shape(...)` to set dimensions and
181    /// `.create("name")` to finalize.
182    ///
183    /// ```no_run
184    /// # use rust_hdf5::H5File;
185    /// let file = H5File::create("build.h5").unwrap();
186    /// let ds = file.new_dataset::<f64>().shape(&[3, 4]).create("matrix").unwrap();
187    /// ```
188    pub fn new_dataset<T: H5Type>(&self) -> DatasetBuilder<T> {
189        DatasetBuilder::new(clone_inner(&self.inner))
190    }
191
192    /// Add a string attribute to the file (root group).
193    pub fn set_attr_string(&self, name: &str, value: &str) -> Result<()> {
194        use crate::format::messages::attribute::AttributeMessage;
195        let attr = AttributeMessage::scalar_string(name, value);
196        let mut inner = borrow_inner_mut(&self.inner);
197        match &mut *inner {
198            H5FileInner::Writer(writer) => {
199                writer.add_root_attribute(attr);
200                Ok(())
201            }
202            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
203        }
204    }
205
206    /// Add a numeric attribute to the file (root group).
207    pub fn set_attr_numeric<T: crate::types::H5Type>(&self, name: &str, value: &T) -> Result<()> {
208        use crate::format::messages::attribute::AttributeMessage;
209        let es = T::element_size();
210        let raw = unsafe { std::slice::from_raw_parts(value as *const T as *const u8, es) };
211        let attr = AttributeMessage::scalar_numeric(name, T::hdf5_type(), raw.to_vec());
212        let mut inner = borrow_inner_mut(&self.inner);
213        match &mut *inner {
214            H5FileInner::Writer(writer) => {
215                writer.add_root_attribute(attr);
216                Ok(())
217            }
218            _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
219        }
220    }
221
222    /// Return the names of file-level (root group) attributes.
223    pub fn attr_names(&self) -> Result<Vec<String>> {
224        let inner = borrow_inner(&self.inner);
225        match &*inner {
226            H5FileInner::Reader(reader) => Ok(reader.root_attr_names()),
227            _ => Ok(vec![]),
228        }
229    }
230
231    /// Read a file-level string attribute.
232    pub fn attr_string(&self, name: &str) -> Result<String> {
233        let inner = borrow_inner(&self.inner);
234        match &*inner {
235            H5FileInner::Reader(reader) => {
236                let attr = reader
237                    .root_attr(name)
238                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
239                let end = attr
240                    .data
241                    .iter()
242                    .position(|&b| b == 0)
243                    .unwrap_or(attr.data.len());
244                Ok(String::from_utf8_lossy(&attr.data[..end]).to_string())
245            }
246            _ => Err(Hdf5Error::InvalidState("not in read mode".into())),
247        }
248    }
249
250    /// Check if the file is in write/append mode.
251    pub fn is_writable(&self) -> bool {
252        let inner = borrow_inner(&self.inner);
253        matches!(&*inner, H5FileInner::Writer(_))
254    }
255
256    /// Create a variable-length string dataset and write data.
257    ///
258    /// This is a convenience method for writing h5py-compatible vlen string
259    /// datasets using global heap storage.
260    pub fn write_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
261        let mut inner = borrow_inner_mut(&self.inner);
262        match &mut *inner {
263            H5FileInner::Writer(writer) => {
264                let idx = writer.create_vlen_string_dataset(name, strings)?;
265                // If the name contains '/', assign the dataset to its parent group
266                if let Some(slash_pos) = name.rfind('/') {
267                    let group_path = &name[..slash_pos];
268                    let abs_group_path = if group_path.starts_with('/') {
269                        group_path.to_string()
270                    } else {
271                        format!("/{}", group_path)
272                    };
273                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
274                }
275                Ok(())
276            }
277            H5FileInner::Reader(_) => {
278                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
279            }
280            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
281        }
282    }
283
284    /// Create a chunked, compressed variable-length string dataset.
285    ///
286    /// Like `write_vlen_strings`, but stores the vlen references in chunked
287    /// layout with the given filter pipeline (e.g., `FilterPipeline::deflate(6)`
288    /// or `FilterPipeline::zstd(3)`). `chunk_size` is the number of strings
289    /// per chunk.
290    pub fn write_vlen_strings_compressed(
291        &self,
292        name: &str,
293        strings: &[&str],
294        chunk_size: usize,
295        pipeline: FilterPipeline,
296    ) -> Result<()> {
297        let mut inner = borrow_inner_mut(&self.inner);
298        match &mut *inner {
299            H5FileInner::Writer(writer) => {
300                let idx = writer
301                    .create_vlen_string_dataset_compressed(name, strings, chunk_size, pipeline)?;
302                if let Some(slash_pos) = name.rfind('/') {
303                    let group_path = &name[..slash_pos];
304                    let abs_group_path = if group_path.starts_with('/') {
305                        group_path.to_string()
306                    } else {
307                        format!("/{}", group_path)
308                    };
309                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
310                }
311                Ok(())
312            }
313            H5FileInner::Reader(_) => {
314                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
315            }
316            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
317        }
318    }
319
320    /// Create an empty chunked vlen string dataset ready for incremental appends.
321    ///
322    /// Use `append_vlen_strings` to add data. If `pipeline` is `Some`, chunks
323    /// are compressed (e.g., `Some(FilterPipeline::lz4())`).
324    pub fn create_appendable_vlen_dataset(
325        &self,
326        name: &str,
327        chunk_size: usize,
328        pipeline: Option<FilterPipeline>,
329    ) -> Result<()> {
330        let mut inner = borrow_inner_mut(&self.inner);
331        match &mut *inner {
332            H5FileInner::Writer(writer) => {
333                let idx =
334                    writer.create_appendable_vlen_string_dataset(name, chunk_size, pipeline)?;
335                if let Some(slash_pos) = name.rfind('/') {
336                    let group_path = &name[..slash_pos];
337                    let abs_group_path = if group_path.starts_with('/') {
338                        group_path.to_string()
339                    } else {
340                        format!("/{}", group_path)
341                    };
342                    writer.assign_dataset_to_group(&abs_group_path, idx)?;
343                }
344                Ok(())
345            }
346            H5FileInner::Reader(_) => {
347                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
348            }
349            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
350        }
351    }
352
353    /// Append variable-length strings to an existing chunked vlen string dataset.
354    pub fn append_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
355        let mut inner = borrow_inner_mut(&self.inner);
356        match &mut *inner {
357            H5FileInner::Writer(writer) => {
358                let ds_index = writer
359                    .dataset_index(name)
360                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
361                writer.append_vlen_strings(ds_index, strings)?;
362                Ok(())
363            }
364            H5FileInner::Reader(_) => {
365                Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
366            }
367            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
368        }
369    }
370
371    /// Delete a dataset by name. The dataset is unlinked on close;
372    /// file space is not reclaimed.
373    pub fn delete_dataset(&self, name: &str) -> Result<()> {
374        let mut inner = borrow_inner_mut(&self.inner);
375        match &mut *inner {
376            H5FileInner::Writer(writer) => {
377                writer.delete_dataset(name)?;
378                Ok(())
379            }
380            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
381        }
382    }
383
384    /// Delete a group and all its child datasets/sub-groups.
385    /// File space is not reclaimed.
386    pub fn delete_group(&self, name: &str) -> Result<()> {
387        let mut inner = borrow_inner_mut(&self.inner);
388        match &mut *inner {
389            H5FileInner::Writer(writer) => {
390                writer.delete_group(name)?;
391                Ok(())
392            }
393            _ => Err(Hdf5Error::InvalidState("cannot delete in read mode".into())),
394        }
395    }
396
397    /// Open an existing dataset by name (read mode).
398    pub fn dataset(&self, name: &str) -> Result<H5Dataset> {
399        let inner = borrow_inner(&self.inner);
400        match &*inner {
401            H5FileInner::Reader(reader) => {
402                let info = reader
403                    .dataset_info(name)
404                    .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
405                let shape: Vec<usize> = info.dataspace.dims.iter().map(|&d| d as usize).collect();
406                let element_size = info.datatype.element_size() as usize;
407                Ok(H5Dataset::new_reader(
408                    clone_inner(&self.inner),
409                    name.to_string(),
410                    shape,
411                    element_size,
412                ))
413            }
414            H5FileInner::Writer(_) => Err(Hdf5Error::InvalidState(
415                "cannot open a dataset by name in write mode; use new_dataset() instead"
416                    .to_string(),
417            )),
418            H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".to_string())),
419        }
420    }
421
422    /// Return the names of all datasets in the root group.
423    ///
424    /// Works in both read and write mode: in write mode, returns the names of
425    /// datasets created so far; in read mode, returns the names discovered
426    /// during file open.
427    pub fn dataset_names(&self) -> Vec<String> {
428        let inner = borrow_inner(&self.inner);
429        match &*inner {
430            H5FileInner::Reader(reader) => reader
431                .dataset_names()
432                .iter()
433                .map(|s| s.to_string())
434                .collect(),
435            H5FileInner::Writer(writer) => writer
436                .dataset_names()
437                .iter()
438                .map(|s| s.to_string())
439                .collect(),
440            H5FileInner::Closed => Vec::new(),
441        }
442    }
443
444    /// Explicitly close the file. For a writer, this finalizes the file
445    /// (writes superblock, headers, etc.). For a reader, this is a no-op.
446    ///
447    /// The file is also auto-finalized on drop, but calling `close()` lets
448    /// you handle errors.
449    pub fn close(self) -> Result<()> {
450        let old = {
451            let mut inner = borrow_inner_mut(&self.inner);
452            std::mem::replace(&mut *inner, H5FileInner::Closed)
453        };
454        match old {
455            H5FileInner::Writer(writer) => {
456                writer.close()?;
457                Ok(())
458            }
459            H5FileInner::Reader(_) => Ok(()),
460            H5FileInner::Closed => Ok(()),
461        }
462    }
463
464    /// Flush the file to disk. Only meaningful in write mode.
465    pub fn flush(&self) -> Result<()> {
466        // The underlying writer does not expose a standalone flush; data is
467        // written to disk immediately via pwrite. This is a compatibility
468        // method that does nothing for now.
469        Ok(())
470    }
471}
472
473/// Builder controlling how an [`H5File`] is opened.
474///
475/// The default policy follows the HDF5 C library: an exclusive lock is
476/// acquired for write-mode opens and a shared lock for read-mode opens,
477/// honoring the `HDF5_USE_FILE_LOCKING` environment variable. Calling
478/// [`Self::locking`] overrides the env-var value.
479#[derive(Debug, Default, Clone)]
480pub struct H5FileOptions {
481    locking: Option<FileLocking>,
482}
483
484impl H5FileOptions {
485    /// Construct a fresh options builder with default settings.
486    pub fn new() -> Self {
487        Self::default()
488    }
489
490    /// Override the locking policy. Bypasses the `HDF5_USE_FILE_LOCKING`
491    /// environment variable for the resulting open call.
492    pub fn locking(mut self, policy: FileLocking) -> Self {
493        self.locking = Some(policy);
494        self
495    }
496
497    /// Disable OS-level file locking entirely (equivalent to
498    /// `HDF5_USE_FILE_LOCKING=FALSE`).
499    pub fn no_locking(self) -> Self {
500        self.locking(FileLocking::Disabled)
501    }
502
503    /// Try to acquire the lock but do not fail if the filesystem rejects it
504    /// (equivalent to `HDF5_USE_FILE_LOCKING=BEST_EFFORT`).
505    pub fn best_effort_locking(self) -> Self {
506        self.locking(FileLocking::BestEffort)
507    }
508
509    fn resolved_locking(&self) -> FileLocking {
510        match self.locking {
511            Some(p) => p,
512            None => FileLocking::from_env_or(FileLocking::default()),
513        }
514    }
515
516    /// Create a new HDF5 file at `path` with the configured options.
517    pub fn create<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
518        let writer = Hdf5Writer::create_with_locking(path.as_ref(), self.resolved_locking())?;
519        Ok(H5File {
520            inner: new_shared(H5FileInner::Writer(writer)),
521        })
522    }
523
524    /// Open an existing HDF5 file for reading with the configured options.
525    pub fn open<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
526        let reader = Hdf5Reader::open_with_locking(path.as_ref(), self.resolved_locking())?;
527        Ok(H5File {
528            inner: new_shared(H5FileInner::Reader(reader)),
529        })
530    }
531
532    /// Open an existing HDF5 file for read/write with the configured options.
533    pub fn open_rw<P: AsRef<Path>>(self, path: P) -> Result<H5File> {
534        let writer = Hdf5Writer::open_append_with_locking(path.as_ref(), self.resolved_locking())?;
535        Ok(H5File {
536            inner: new_shared(H5FileInner::Writer(writer)),
537        })
538    }
539}
540
541#[cfg(test)]
542fn unique_test_path(name: &str) -> std::path::PathBuf {
543    // PID + atomic counter so each test invocation uses a distinct path,
544    // preventing collisions across concurrent cargo runs and any
545    // flock/LockFileEx race where a previous close()'d file's lock
546    // remains briefly visible when reopening the same path.
547    use std::sync::atomic::{AtomicU64, Ordering};
548    static COUNTER: AtomicU64 = AtomicU64::new(0);
549    let n = COUNTER.fetch_add(1, Ordering::Relaxed);
550    std::env::temp_dir().join(format!(
551        "rust_hdf5_test_{}_{}_{}.h5",
552        name,
553        std::process::id(),
554        n
555    ))
556}
557
558#[cfg(test)]
559mod tests {
560    use super::*;
561    use std::path::PathBuf;
562
563    fn temp_path(name: &str) -> PathBuf {
564        super::unique_test_path(name)
565    }
566
567    #[test]
568    fn create_and_close_empty() {
569        let path = temp_path("create_empty");
570        let file = H5File::create(&path).unwrap();
571        file.close().unwrap();
572
573        // Should be readable
574        let file = H5File::open(&path).unwrap();
575        file.close().unwrap();
576
577        std::fs::remove_file(&path).ok();
578    }
579
580    #[test]
581    fn create_and_drop_empty() {
582        let path = temp_path("drop_empty");
583        {
584            let _file = H5File::create(&path).unwrap();
585            // drop auto-finalizes
586        }
587        // Verify the file is valid by opening it
588        let file = H5File::open(&path).unwrap();
589        file.close().unwrap();
590
591        std::fs::remove_file(&path).ok();
592    }
593
594    #[test]
595    fn dataset_not_found() {
596        let path = temp_path("ds_not_found");
597        {
598            let _file = H5File::create(&path).unwrap();
599        }
600        let file = H5File::open(&path).unwrap();
601        let result = file.dataset("nonexistent");
602        assert!(result.is_err());
603
604        std::fs::remove_file(&path).ok();
605    }
606
607    #[test]
608    fn write_and_read_roundtrip() {
609        let path = temp_path("write_read_rt");
610
611        // Write
612        {
613            let file = H5File::create(&path).unwrap();
614            let ds = file
615                .new_dataset::<u8>()
616                .shape([4, 4])
617                .create("data")
618                .unwrap();
619            ds.write_raw(&[0u8; 16]).unwrap();
620            file.close().unwrap();
621        }
622
623        // Read
624        {
625            let file = H5File::open(&path).unwrap();
626            let ds = file.dataset("data").unwrap();
627            assert_eq!(ds.shape(), vec![4, 4]);
628            let data = ds.read_raw::<u8>().unwrap();
629            assert_eq!(data.len(), 16);
630            assert!(data.iter().all(|&b| b == 0));
631            file.close().unwrap();
632        }
633
634        std::fs::remove_file(&path).ok();
635    }
636
637    #[test]
638    fn write_and_read_f64() {
639        let path = temp_path("write_read_f64");
640
641        let values: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
642
643        // Write
644        {
645            let file = H5File::create(&path).unwrap();
646            let ds = file
647                .new_dataset::<f64>()
648                .shape([2, 3])
649                .create("matrix")
650                .unwrap();
651            ds.write_raw(&values).unwrap();
652            file.close().unwrap();
653        }
654
655        // Read
656        {
657            let file = H5File::open(&path).unwrap();
658            let ds = file.dataset("matrix").unwrap();
659            assert_eq!(ds.shape(), vec![2, 3]);
660            let readback = ds.read_raw::<f64>().unwrap();
661            assert_eq!(readback, values);
662        }
663
664        std::fs::remove_file(&path).ok();
665    }
666
667    #[test]
668    fn multiple_datasets() {
669        let path = temp_path("multi_ds");
670
671        {
672            let file = H5File::create(&path).unwrap();
673            let ds1 = file.new_dataset::<i32>().shape([3]).create("ints").unwrap();
674            ds1.write_raw(&[10i32, 20, 30]).unwrap();
675
676            let ds2 = file
677                .new_dataset::<f32>()
678                .shape([2, 2])
679                .create("floats")
680                .unwrap();
681            ds2.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
682
683            file.close().unwrap();
684        }
685
686        {
687            let file = H5File::open(&path).unwrap();
688
689            let ds_ints = file.dataset("ints").unwrap();
690            assert_eq!(ds_ints.shape(), vec![3]);
691            let ints = ds_ints.read_raw::<i32>().unwrap();
692            assert_eq!(ints, vec![10, 20, 30]);
693
694            let ds_floats = file.dataset("floats").unwrap();
695            assert_eq!(ds_floats.shape(), vec![2, 2]);
696            let floats = ds_floats.read_raw::<f32>().unwrap();
697            assert_eq!(floats, vec![1.0f32, 2.0, 3.0, 4.0]);
698        }
699
700        std::fs::remove_file(&path).ok();
701    }
702
703    #[test]
704    fn close_is_idempotent() {
705        let path = temp_path("close_idemp");
706        let file = H5File::create(&path).unwrap();
707        file.close().unwrap();
708        // File is consumed by close(), so no double-close possible at the type level.
709        std::fs::remove_file(&path).ok();
710    }
711}
712
713#[cfg(test)]
714mod integration_tests {
715    use super::*;
716
717    fn temp_path(name: &str) -> std::path::PathBuf {
718        super::unique_test_path(name)
719    }
720
721    #[test]
722    fn write_file_for_h5dump() {
723        let path = temp_path("integration");
724        let file = H5File::create(&path).unwrap();
725
726        let ds = file
727            .new_dataset::<u8>()
728            .shape([4usize, 4])
729            .create("data_u8")
730            .unwrap();
731        let data: Vec<u8> = (0..16).collect();
732        ds.write_raw(&data).unwrap();
733
734        let ds2 = file
735            .new_dataset::<f64>()
736            .shape([3usize, 2])
737            .create("data_f64")
738            .unwrap();
739        let fdata: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
740        ds2.write_raw(&fdata).unwrap();
741
742        let ds3 = file
743            .new_dataset::<i32>()
744            .shape([5usize])
745            .create("values")
746            .unwrap();
747        let idata: Vec<i32> = vec![-10, -5, 0, 5, 10];
748        ds3.write_raw(&idata).unwrap();
749
750        file.close().unwrap();
751
752        // File exists
753        assert!(path.exists());
754    }
755
756    #[test]
757    fn write_chunked_file_for_h5dump() {
758        let path = temp_path("chunked");
759        let file = H5File::create(&path).unwrap();
760
761        // Create a chunked dataset with unlimited first dimension
762        let ds = file
763            .new_dataset::<f64>()
764            .shape([0usize, 4])
765            .chunk(&[1, 4])
766            .max_shape(&[None, Some(4)])
767            .create("streaming_data")
768            .unwrap();
769
770        // Write 5 frames of data
771        for frame in 0..5u64 {
772            let values: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
773            let raw: Vec<u8> = values.iter().flat_map(|v| v.to_le_bytes()).collect();
774            ds.write_chunk(frame as usize, &raw).unwrap();
775        }
776
777        // Extend dimensions to reflect the 5 written frames
778        ds.extend(&[5, 4]).unwrap();
779        ds.flush().unwrap();
780
781        file.close().unwrap();
782
783        assert!(path.exists());
784    }
785
786    #[test]
787    fn write_chunked_many_frames_for_h5dump() {
788        let path = temp_path("chunked_many");
789        let file = H5File::create(&path).unwrap();
790
791        let ds = file
792            .new_dataset::<i32>()
793            .shape([0usize, 3])
794            .chunk(&[1, 3])
795            .max_shape(&[None, Some(3)])
796            .create("data")
797            .unwrap();
798
799        // Write 10 frames (exceeds idx_blk_elmts=4, uses data blocks)
800        for frame in 0..10u64 {
801            let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
802            let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
803            ds.write_chunk(frame as usize, &raw).unwrap();
804        }
805        ds.extend(&[10, 3]).unwrap();
806        file.close().unwrap();
807
808        assert!(path.exists());
809    }
810
811    #[test]
812    fn write_dataset_with_attributes() {
813        use crate::types::VarLenUnicode;
814
815        let path = temp_path("attributes");
816        let file = H5File::create(&path).unwrap();
817
818        let ds = file
819            .new_dataset::<f32>()
820            .shape([10usize])
821            .create("temperature")
822            .unwrap();
823        let data: Vec<f32> = (0..10).map(|i| i as f32 * 1.5).collect();
824        ds.write_raw(&data).unwrap();
825
826        // Add string attributes
827        let attr = ds
828            .new_attr::<VarLenUnicode>()
829            .shape(())
830            .create("units")
831            .unwrap();
832        attr.write_scalar(&VarLenUnicode("kelvin".to_string()))
833            .unwrap();
834
835        let attr2 = ds
836            .new_attr::<VarLenUnicode>()
837            .shape(())
838            .create("description")
839            .unwrap();
840        attr2
841            .write_scalar(&VarLenUnicode("Temperature measurements".to_string()))
842            .unwrap();
843
844        // Use write_string convenience method
845        let attr3 = ds
846            .new_attr::<VarLenUnicode>()
847            .shape(())
848            .create("source")
849            .unwrap();
850        attr3.write_string("sensor_01").unwrap();
851
852        // Also test parse -> write_scalar pattern
853        let attr4 = ds
854            .new_attr::<VarLenUnicode>()
855            .shape(())
856            .create("label")
857            .unwrap();
858        let s: VarLenUnicode = "test_label".parse().unwrap_or_default();
859        attr4.write_scalar(&s).unwrap();
860
861        file.close().unwrap();
862
863        assert!(path.exists());
864    }
865
866    #[test]
867    fn chunked_write_read_roundtrip() {
868        let path = temp_path("chunked_roundtrip");
869
870        // Write
871        {
872            let file = H5File::create(&path).unwrap();
873            let ds = file
874                .new_dataset::<i32>()
875                .shape([0usize, 3])
876                .chunk(&[1, 3])
877                .max_shape(&[None, Some(3)])
878                .create("table")
879                .unwrap();
880
881            for frame in 0..8u64 {
882                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
883                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
884                ds.write_chunk(frame as usize, &raw).unwrap();
885            }
886            ds.extend(&[8, 3]).unwrap();
887            file.close().unwrap();
888        }
889
890        // Read
891        {
892            let file = H5File::open(&path).unwrap();
893            let ds = file.dataset("table").unwrap();
894            assert_eq!(ds.shape(), vec![8, 3]);
895            let data = ds.read_raw::<i32>().unwrap();
896            assert_eq!(data.len(), 24);
897            for (i, val) in data.iter().enumerate() {
898                assert_eq!(*val, i as i32);
899            }
900        }
901
902        std::fs::remove_file(&path).ok();
903    }
904
905    #[test]
906    #[cfg(feature = "deflate")]
907    fn compressed_chunked_roundtrip() {
908        let path = temp_path("compressed_roundtrip");
909
910        // Write compressed
911        {
912            let file = H5File::create(&path).unwrap();
913            let ds = file
914                .new_dataset::<f64>()
915                .shape([0usize, 4])
916                .chunk(&[1, 4])
917                .max_shape(&[None, Some(4)])
918                .deflate(6)
919                .create("compressed")
920                .unwrap();
921
922            for frame in 0..10u64 {
923                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
924                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
925                ds.write_chunk(frame as usize, &raw).unwrap();
926            }
927            ds.extend(&[10, 4]).unwrap();
928            file.close().unwrap();
929        }
930
931        // Read back and verify
932        {
933            let file = H5File::open(&path).unwrap();
934            let ds = file.dataset("compressed").unwrap();
935            assert_eq!(ds.shape(), vec![10, 4]);
936            let data = ds.read_raw::<f64>().unwrap();
937            assert_eq!(data.len(), 40);
938            for (i, val) in data.iter().enumerate() {
939                assert!(
940                    (val - i as f64).abs() < 1e-10,
941                    "mismatch at {}: {} != {}",
942                    i,
943                    val,
944                    i
945                );
946            }
947        }
948
949        std::fs::remove_file(&path).ok();
950    }
951
952    #[test]
953    #[cfg(feature = "deflate")]
954    fn compressed_chunked_many_frames() {
955        let path = temp_path("compressed_many");
956
957        {
958            let file = H5File::create(&path).unwrap();
959            let ds = file
960                .new_dataset::<i32>()
961                .shape([0usize, 3])
962                .chunk(&[1, 3])
963                .max_shape(&[None, Some(3)])
964                .deflate(6)
965                .create("stream")
966                .unwrap();
967
968            for frame in 0..100u64 {
969                let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
970                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
971                ds.write_chunk(frame as usize, &raw).unwrap();
972            }
973            ds.extend(&[100, 3]).unwrap();
974            file.close().unwrap();
975        }
976
977        {
978            let file = H5File::open(&path).unwrap();
979            let ds = file.dataset("stream").unwrap();
980            assert_eq!(ds.shape(), vec![100, 3]);
981            let data = ds.read_raw::<i32>().unwrap();
982            assert_eq!(data.len(), 300);
983            for (i, val) in data.iter().enumerate() {
984                assert_eq!(*val, i as i32, "mismatch at {}", i);
985            }
986        }
987
988        std::fs::remove_file(&path).ok();
989    }
990    #[test]
991    fn append_mode() {
992        let path = temp_path("append");
993
994        // Create initial file
995        {
996            let file = H5File::create(&path).unwrap();
997            let ds = file
998                .new_dataset::<i32>()
999                .shape([3usize])
1000                .create("first")
1001                .unwrap();
1002            ds.write_raw(&[1i32, 2, 3]).unwrap();
1003            file.close().unwrap();
1004        }
1005
1006        // Append new dataset
1007        {
1008            let file = H5File::open_rw(&path).unwrap();
1009            let ds = file
1010                .new_dataset::<f64>()
1011                .shape([2usize])
1012                .create("second")
1013                .unwrap();
1014            ds.write_raw(&[4.0f64, 5.0]).unwrap();
1015            file.close().unwrap();
1016        }
1017
1018        // Read back both
1019        {
1020            let file = H5File::open(&path).unwrap();
1021            let names = file.dataset_names();
1022            assert!(names.contains(&"first".to_string()));
1023            assert!(names.contains(&"second".to_string()));
1024
1025            let ds1 = file.dataset("first").unwrap();
1026            assert_eq!(ds1.read_raw::<i32>().unwrap(), vec![1, 2, 3]);
1027
1028            let ds2 = file.dataset("second").unwrap();
1029            assert_eq!(ds2.read_raw::<f64>().unwrap(), vec![4.0, 5.0]);
1030        }
1031
1032        std::fs::remove_file(&path).ok();
1033    }
1034
1035    #[test]
1036    fn open_rw_set_attr_preserves_file() {
1037        let path = temp_path("open_rw_attr");
1038        // Create file with a dataset and an attribute
1039        {
1040            let file = H5File::create(&path).unwrap();
1041            let ds = file
1042                .new_dataset::<i32>()
1043                .shape([3usize])
1044                .create("data")
1045                .unwrap();
1046            ds.write_raw(&[10i32, 20, 30]).unwrap();
1047            file.set_attr_string("version", "1.0").unwrap();
1048            file.close().unwrap();
1049        }
1050        // Open rw and modify the attribute
1051        {
1052            let file = H5File::open_rw(&path).unwrap();
1053            file.set_attr_string("version", "2.0").unwrap();
1054            file.close().unwrap();
1055        }
1056        // Verify: dataset intact, attribute updated
1057        {
1058            let file = H5File::open(&path).unwrap();
1059            let ds = file.dataset("data").unwrap();
1060            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![10, 20, 30]);
1061            let ver = file.attr_string("version").unwrap();
1062            assert_eq!(ver, "2.0");
1063        }
1064        std::fs::remove_file(&path).ok();
1065    }
1066
1067    #[test]
1068    #[cfg(feature = "deflate")]
1069    fn open_rw_attr_with_compressed_dataset() {
1070        use crate::format::messages::filter::FilterPipeline;
1071        let path = temp_path("open_rw_compressed");
1072        let input: Vec<&str> = (0..50).map(|_| "test string data").collect();
1073        // Create file with compressed vlen strings
1074        {
1075            let file = H5File::create(&path).unwrap();
1076            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1077                .unwrap();
1078            file.set_attr_string("version", "1.0").unwrap();
1079            file.close().unwrap();
1080        }
1081        // Open rw and modify attribute only
1082        {
1083            let file = H5File::open_rw(&path).unwrap();
1084            file.set_attr_string("version", "2.0").unwrap();
1085            file.close().unwrap();
1086        }
1087        // Verify: compressed dataset still readable, attribute updated
1088        {
1089            let file = H5File::open(&path).unwrap();
1090            let ds = file.dataset("texts").unwrap();
1091            let strings = ds.read_vlen_strings().unwrap();
1092            assert_eq!(strings.len(), 50);
1093            assert_eq!(strings[0], "test string data");
1094            let ver = file.attr_string("version").unwrap();
1095            assert_eq!(ver, "2.0");
1096        }
1097        std::fs::remove_file(&path).ok();
1098    }
1099
1100    #[test]
1101    #[cfg(feature = "lz4")]
1102    fn append_vlen_strings_basic() {
1103        use crate::format::messages::filter::FilterPipeline;
1104        let path = temp_path("append_vlen");
1105        {
1106            let file = H5File::create(&path).unwrap();
1107            file.create_appendable_vlen_dataset("names", 4, Some(FilterPipeline::lz4()))
1108                .unwrap();
1109            file.append_vlen_strings("names", &["alice", "bob", "charlie"])
1110                .unwrap();
1111            file.append_vlen_strings("names", &["dave", "eve"]).unwrap();
1112            file.close().unwrap();
1113        }
1114        {
1115            let file = H5File::open(&path).unwrap();
1116            let ds = file.dataset("names").unwrap();
1117            let strings = ds.read_vlen_strings().unwrap();
1118            assert_eq!(strings, vec!["alice", "bob", "charlie", "dave", "eve"]);
1119        }
1120        std::fs::remove_file(&path).ok();
1121    }
1122
1123    #[test]
1124    #[cfg(feature = "lz4")]
1125    fn append_vlen_strings_large() {
1126        use crate::format::messages::filter::FilterPipeline;
1127        let path = temp_path("append_vlen_large");
1128        let batch1: Vec<String> = (0..5000).map(|i| format!("node-{:06}", i)).collect();
1129        let batch2: Vec<String> = (5000..7189).map(|i| format!("node-{:06}", i)).collect();
1130        {
1131            let file = H5File::create(&path).unwrap();
1132            file.create_appendable_vlen_dataset("data", 512, Some(FilterPipeline::lz4()))
1133                .unwrap();
1134            let r1: Vec<&str> = batch1.iter().map(|s| s.as_str()).collect();
1135            file.append_vlen_strings("data", &r1).unwrap();
1136            let r2: Vec<&str> = batch2.iter().map(|s| s.as_str()).collect();
1137            file.append_vlen_strings("data", &r2).unwrap();
1138            file.close().unwrap();
1139        }
1140        {
1141            let file = H5File::open(&path).unwrap();
1142            let ds = file.dataset("data").unwrap();
1143            let strings = ds.read_vlen_strings().unwrap();
1144            assert_eq!(strings.len(), 7189);
1145            assert_eq!(strings[0], "node-000000");
1146            assert_eq!(strings[7188], "node-007188");
1147        }
1148        std::fs::remove_file(&path).ok();
1149    }
1150
1151    #[test]
1152    fn append_vlen_strings_uncompressed() {
1153        let path = temp_path("append_vlen_unc");
1154        {
1155            let file = H5File::create(&path).unwrap();
1156            file.create_appendable_vlen_dataset("texts", 8, None)
1157                .unwrap();
1158            file.append_vlen_strings("texts", &["hello", "world"])
1159                .unwrap();
1160            file.append_vlen_strings("texts", &["foo", "bar", "baz"])
1161                .unwrap();
1162            file.close().unwrap();
1163        }
1164        {
1165            let file = H5File::open(&path).unwrap();
1166            let ds = file.dataset("texts").unwrap();
1167            let strings = ds.read_vlen_strings().unwrap();
1168            assert_eq!(strings, vec!["hello", "world", "foo", "bar", "baz"]);
1169        }
1170        std::fs::remove_file(&path).ok();
1171    }
1172
1173    #[test]
1174    fn delete_dataset_roundtrip() {
1175        let path = temp_path("delete_ds");
1176        {
1177            let file = H5File::create(&path).unwrap();
1178            file.write_vlen_strings("keep", &["a", "b"]).unwrap();
1179            file.write_vlen_strings("remove", &["x", "y"]).unwrap();
1180            file.delete_dataset("remove").unwrap();
1181            file.close().unwrap();
1182        }
1183        {
1184            let file = H5File::open(&path).unwrap();
1185            let names = file.dataset_names();
1186            assert!(names.contains(&"keep".to_string()));
1187            assert!(!names.contains(&"remove".to_string()));
1188            let ds = file.dataset("keep").unwrap();
1189            assert_eq!(ds.read_vlen_strings().unwrap(), vec!["a", "b"]);
1190        }
1191        std::fs::remove_file(&path).ok();
1192    }
1193
1194    #[test]
1195    fn delete_group_roundtrip() {
1196        let path = temp_path("delete_grp");
1197        {
1198            let file = H5File::create(&path).unwrap();
1199            let g1 = file.create_group("keep").unwrap();
1200            g1.write_vlen_strings("data", &["a"]).unwrap();
1201            let g2 = file.create_group("remove").unwrap();
1202            g2.write_vlen_strings("data", &["x"]).unwrap();
1203            file.delete_group("remove").unwrap();
1204            file.close().unwrap();
1205        }
1206        {
1207            let file = H5File::open(&path).unwrap();
1208            let names = file.dataset_names();
1209            assert!(names.contains(&"keep/data".to_string()));
1210            assert!(!names.contains(&"remove/data".to_string()));
1211        }
1212        std::fs::remove_file(&path).ok();
1213    }
1214
1215    #[test]
1216    fn open_rw_delete_recreate_group() {
1217        let path = temp_path("rw_delete_recreate");
1218        // Step 1: create file with groups
1219        {
1220            let file = H5File::create(&path).unwrap();
1221            let n = file.create_group("nodes").unwrap();
1222            n.write_vlen_strings("id", &["a", "b", "c"]).unwrap();
1223            let e = file.create_group("edges").unwrap();
1224            e.write_vlen_strings("src", &["x", "y"]).unwrap();
1225            file.close().unwrap();
1226        }
1227        // Step 2: open_rw, delete one group, recreate with new data
1228        {
1229            let file = H5File::open_rw(&path).unwrap();
1230            file.delete_group("nodes").unwrap();
1231            let n = file.create_group("nodes").unwrap();
1232            n.write_vlen_strings("id", &["new1", "new2"]).unwrap();
1233            file.close().unwrap();
1234        }
1235        // Step 3: verify
1236        {
1237            let file = H5File::open(&path).unwrap();
1238            let ds = file.dataset("nodes/id").unwrap();
1239            let s = ds.read_vlen_strings().unwrap();
1240            assert_eq!(s, vec!["new1", "new2"]);
1241            // edges should still be intact
1242            let ds = file.dataset("edges/src").unwrap();
1243            let s = ds.read_vlen_strings().unwrap();
1244            assert_eq!(s, vec!["x", "y"]);
1245        }
1246        std::fs::remove_file(&path).ok();
1247    }
1248
1249    #[test]
1250    fn delete_and_recreate_group() {
1251        let path = temp_path("delete_recreate");
1252        {
1253            let file = H5File::create(&path).unwrap();
1254            let g = file.create_group("nodes").unwrap();
1255            g.write_vlen_strings("id", &["old1", "old2"]).unwrap();
1256            file.delete_group("nodes").unwrap();
1257            let g = file.create_group("nodes").unwrap();
1258            g.write_vlen_strings("id", &["new1", "new2", "new3"])
1259                .unwrap();
1260            file.close().unwrap();
1261        }
1262        {
1263            let file = H5File::open(&path).unwrap();
1264            let ds = file.dataset("nodes/id").unwrap();
1265            let strings = ds.read_vlen_strings().unwrap();
1266            assert_eq!(strings, vec!["new1", "new2", "new3"]);
1267        }
1268        std::fs::remove_file(&path).ok();
1269    }
1270
1271    #[test]
1272    #[cfg(feature = "deflate")]
1273    fn vlen_string_compressed_large_roundtrip() {
1274        use crate::format::messages::filter::FilterPipeline;
1275        let path = temp_path("vlen_large");
1276        // Simulate kodex scenario: 7189 strings, chunk_size 512
1277        let input: Vec<String> = (0..7189)
1278            .map(|i| format!("node-{:08x}-{}", i, "a".repeat(20 + (i % 30))))
1279            .collect();
1280        let input_refs: Vec<&str> = input.iter().map(|s| s.as_str()).collect();
1281        {
1282            let file = H5File::create(&path).unwrap();
1283            file.create_group("nodes").unwrap();
1284            file.write_vlen_strings_compressed(
1285                "nodes/id",
1286                &input_refs,
1287                512,
1288                FilterPipeline::deflate(6),
1289            )
1290            .unwrap();
1291            file.close().unwrap();
1292        }
1293        // Read back
1294        {
1295            let file = H5File::open(&path).unwrap();
1296            let ds = file.dataset("nodes/id").unwrap();
1297            let strings = ds.read_vlen_strings().unwrap();
1298            assert_eq!(strings.len(), 7189);
1299            assert_eq!(strings[0], input[0]);
1300            assert_eq!(strings[7188], input[7188]);
1301        }
1302        // Also test open_rw then re-read
1303        {
1304            let file = H5File::open_rw(&path).unwrap();
1305            file.set_attr_string("version", "1.0").unwrap();
1306            file.close().unwrap();
1307        }
1308        {
1309            let file = H5File::open(&path).unwrap();
1310            let ds = file.dataset("nodes/id").unwrap();
1311            let strings = ds.read_vlen_strings().unwrap();
1312            assert_eq!(strings.len(), 7189);
1313            assert_eq!(strings[0], input[0]);
1314        }
1315        std::fs::remove_file(&path).ok();
1316    }
1317
1318    #[test]
1319    fn vlen_string_write_read() {
1320        let path = temp_path("vlen_wr");
1321        {
1322            let file = H5File::create(&path).unwrap();
1323            file.write_vlen_strings("names", &["alice", "bob", "charlie"])
1324                .unwrap();
1325            file.close().unwrap();
1326        }
1327        {
1328            let file = H5File::open(&path).unwrap();
1329            let ds = file.dataset("names").unwrap();
1330            let strings = ds.read_vlen_strings().unwrap();
1331            assert_eq!(strings, vec!["alice", "bob", "charlie"]);
1332        }
1333        std::fs::remove_file(&path).ok();
1334    }
1335
1336    #[test]
1337    #[cfg(feature = "deflate")]
1338    fn vlen_string_deflate_roundtrip() {
1339        use crate::format::messages::filter::FilterPipeline;
1340        let path = temp_path("vlen_deflate");
1341        let input: Vec<&str> = (0..100)
1342            .map(|i| match i % 3 {
1343                0 => "hello world",
1344                1 => "compressed vlen string test",
1345                _ => "rust-hdf5",
1346            })
1347            .collect();
1348        {
1349            let file = H5File::create(&path).unwrap();
1350            file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
1351                .unwrap();
1352            file.close().unwrap();
1353        }
1354        {
1355            let file = H5File::open(&path).unwrap();
1356            let ds = file.dataset("texts").unwrap();
1357            let strings = ds.read_vlen_strings().unwrap();
1358            assert_eq!(strings.len(), 100);
1359            for (i, s) in strings.iter().enumerate() {
1360                assert_eq!(s, input[i]);
1361            }
1362        }
1363        std::fs::remove_file(&path).ok();
1364    }
1365
1366    #[test]
1367    #[cfg(feature = "zstd")]
1368    fn vlen_string_zstd_roundtrip() {
1369        use crate::format::messages::filter::FilterPipeline;
1370        let path = temp_path("vlen_zstd");
1371        let input: Vec<&str> = (0..200)
1372            .map(|i| match i % 4 {
1373                0 => "zstandard compression test",
1374                1 => "variable length string",
1375                2 => "rust-hdf5 chunked storage",
1376                _ => "hello zstd world",
1377            })
1378            .collect();
1379        {
1380            let file = H5File::create(&path).unwrap();
1381            file.write_vlen_strings_compressed("data", &input, 32, FilterPipeline::zstd(3))
1382                .unwrap();
1383            file.close().unwrap();
1384        }
1385        {
1386            let file = H5File::open(&path).unwrap();
1387            let ds = file.dataset("data").unwrap();
1388            let strings = ds.read_vlen_strings().unwrap();
1389            assert_eq!(strings.len(), 200);
1390            for (i, s) in strings.iter().enumerate() {
1391                assert_eq!(s, input[i]);
1392            }
1393        }
1394        std::fs::remove_file(&path).ok();
1395    }
1396
1397    #[test]
1398    #[cfg(feature = "deflate")]
1399    fn shuffle_deflate_roundtrip() {
1400        let path = temp_path("shuf_defl");
1401        {
1402            let file = H5File::create(&path).unwrap();
1403            let ds = file
1404                .new_dataset::<f64>()
1405                .shape([0usize, 4])
1406                .chunk(&[1, 4])
1407                .max_shape(&[None, Some(4)])
1408                .shuffle_deflate(6)
1409                .create("data")
1410                .unwrap();
1411            for frame in 0..20u64 {
1412                let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
1413                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1414                ds.write_chunk(frame as usize, &raw).unwrap();
1415            }
1416            ds.extend(&[20, 4]).unwrap();
1417            file.close().unwrap();
1418        }
1419        {
1420            let file = H5File::open(&path).unwrap();
1421            let ds = file.dataset("data").unwrap();
1422            assert_eq!(ds.shape(), vec![20, 4]);
1423            let data = ds.read_raw::<f64>().unwrap();
1424            assert_eq!(data.len(), 80);
1425            for (i, val) in data.iter().enumerate() {
1426                assert!((val - i as f64).abs() < 1e-10);
1427            }
1428        }
1429        std::fs::remove_file(&path).ok();
1430    }
1431
1432    #[test]
1433    fn file_level_attributes() {
1434        let path = temp_path("file_attr");
1435        {
1436            let file = H5File::create(&path).unwrap();
1437            file.set_attr_string("title", "Test File").unwrap();
1438            file.set_attr_numeric("version", &42i32).unwrap();
1439            let ds = file
1440                .new_dataset::<u8>()
1441                .shape([1usize])
1442                .create("dummy")
1443                .unwrap();
1444            ds.write_raw(&[0u8]).unwrap();
1445            file.close().unwrap();
1446        }
1447        {
1448            let file = H5File::open(&path).unwrap();
1449            assert!(file.dataset_names().contains(&"dummy".to_string()));
1450
1451            // Read file-level attributes
1452            let names = file.attr_names().unwrap();
1453            assert!(names.contains(&"title".to_string()));
1454
1455            let title = file.attr_string("title").unwrap();
1456            assert_eq!(title, "Test File");
1457        }
1458        std::fs::remove_file(&path).ok();
1459    }
1460
1461    #[test]
1462    fn scalar_dataset_roundtrip() {
1463        let path = temp_path("scalar");
1464        {
1465            let file = H5File::create(&path).unwrap();
1466            let ds = file.new_dataset::<f64>().scalar().create("pi").unwrap();
1467            ds.write_raw(&[std::f64::consts::PI]).unwrap();
1468            file.close().unwrap();
1469        }
1470        {
1471            let file = H5File::open(&path).unwrap();
1472            let ds = file.dataset("pi").unwrap();
1473            assert_eq!(ds.shape(), Vec::<usize>::new());
1474            assert_eq!(ds.total_elements(), 1);
1475            let data = ds.read_raw::<f64>().unwrap();
1476            assert_eq!(data.len(), 1);
1477            assert!((data[0] - std::f64::consts::PI).abs() < 1e-15);
1478        }
1479        std::fs::remove_file(&path).ok();
1480    }
1481
1482    #[test]
1483    fn append_mode_extend_chunked() {
1484        let path = temp_path("append_extend");
1485
1486        // Create with 5 frames
1487        {
1488            let file = H5File::create(&path).unwrap();
1489            let ds = file
1490                .new_dataset::<i32>()
1491                .shape([0usize, 3])
1492                .chunk(&[1, 3])
1493                .max_shape(&[None, Some(3)])
1494                .create("stream")
1495                .unwrap();
1496            for i in 0..5u64 {
1497                let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1498                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1499                ds.write_chunk(i as usize, &raw).unwrap();
1500            }
1501            ds.extend(&[5, 3]).unwrap();
1502            file.close().unwrap();
1503        }
1504
1505        // Reopen and add 5 more frames
1506        {
1507            let file = H5File::open_rw(&path).unwrap();
1508            // Find the stream dataset index (it's the first one)
1509            let names = file.dataset_names();
1510            assert!(names.contains(&"stream".to_string()));
1511
1512            // Write more chunks via the writer directly
1513            let mut inner = crate::file::borrow_inner_mut(&file.inner);
1514            if let crate::file::H5FileInner::Writer(writer) = &mut *inner {
1515                let ds_idx = writer.dataset_index("stream").unwrap();
1516                for i in 5..10u64 {
1517                    let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1518                    let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1519                    writer.write_chunk(ds_idx, i, &raw).unwrap();
1520                }
1521                writer.extend_dataset(ds_idx, &[10, 3]).unwrap();
1522            }
1523            drop(inner);
1524            file.close().unwrap();
1525        }
1526
1527        // Read back all 10 frames
1528        {
1529            let file = H5File::open(&path).unwrap();
1530            let ds = file.dataset("stream").unwrap();
1531            assert_eq!(ds.shape(), vec![10, 3]);
1532            let data = ds.read_raw::<i32>().unwrap();
1533            assert_eq!(data.len(), 30);
1534            for (i, val) in data.iter().enumerate() {
1535                assert_eq!(*val, i as i32, "mismatch at {}", i);
1536            }
1537        }
1538
1539        std::fs::remove_file(&path).ok();
1540    }
1541
1542    #[test]
1543    fn group_hierarchy_roundtrip() {
1544        let path = temp_path("groups_rt");
1545
1546        {
1547            let file = H5File::create(&path).unwrap();
1548            let root = file.root_group();
1549
1550            // Create groups
1551            let det = root.create_group("detector").unwrap();
1552            let raw = det.create_group("raw").unwrap();
1553
1554            // Create datasets in groups
1555            let ds1 = det
1556                .new_dataset::<f32>()
1557                .shape([10usize])
1558                .create("temperature")
1559                .unwrap();
1560            ds1.write_raw(&[1.0f32; 10]).unwrap();
1561
1562            let ds2 = raw
1563                .new_dataset::<u16>()
1564                .shape([4usize, 4])
1565                .create("image")
1566                .unwrap();
1567            ds2.write_raw(&[42u16; 16]).unwrap();
1568
1569            // Root-level dataset
1570            let ds3 = file
1571                .new_dataset::<i32>()
1572                .shape([3usize])
1573                .create("version")
1574                .unwrap();
1575            ds3.write_raw(&[1i32, 0, 0]).unwrap();
1576
1577            file.close().unwrap();
1578        }
1579
1580        {
1581            let file = H5File::open(&path).unwrap();
1582            let names = file.dataset_names();
1583            assert!(names.contains(&"version".to_string()));
1584            assert!(names.contains(&"detector/temperature".to_string()));
1585            assert!(names.contains(&"detector/raw/image".to_string()));
1586
1587            // Read datasets
1588            let ds = file.dataset("version").unwrap();
1589            assert_eq!(ds.read_raw::<i32>().unwrap(), vec![1, 0, 0]);
1590
1591            let ds = file.dataset("detector/temperature").unwrap();
1592            assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0f32; 10]);
1593
1594            let ds = file.dataset("detector/raw/image").unwrap();
1595            assert_eq!(ds.shape(), vec![4, 4]);
1596            assert_eq!(ds.read_raw::<u16>().unwrap(), vec![42u16; 16]);
1597
1598            // Group traversal
1599            let root = file.root_group();
1600            let group_names = root.group_names().unwrap();
1601            assert!(group_names.contains(&"detector".to_string()));
1602        }
1603
1604        std::fs::remove_file(&path).ok();
1605    }
1606
1607    #[test]
1608    fn nested_groups_via_file_create_group() {
1609        let path = temp_path("file_create_group");
1610
1611        {
1612            let file = H5File::create(&path).unwrap();
1613
1614            // Use the H5File::create_group convenience method
1615            let grp = file.create_group("sensors").unwrap();
1616            let sub = grp.create_group("accel").unwrap();
1617
1618            let ds = sub
1619                .new_dataset::<f64>()
1620                .shape([3usize])
1621                .create("xyz")
1622                .unwrap();
1623            ds.write_raw(&[1.0f64, 2.0, 3.0]).unwrap();
1624
1625            file.close().unwrap();
1626        }
1627
1628        {
1629            let file = H5File::open(&path).unwrap();
1630            let names = file.dataset_names();
1631            assert!(names.contains(&"sensors/accel/xyz".to_string()));
1632
1633            let ds = file.dataset("sensors/accel/xyz").unwrap();
1634            assert_eq!(ds.read_raw::<f64>().unwrap(), vec![1.0, 2.0, 3.0]);
1635
1636            // Open group in read mode
1637            let root = file.root_group();
1638            let sensors = root.group("sensors").unwrap();
1639            assert_eq!(sensors.name(), "/sensors");
1640
1641            let accel = sensors.group("accel").unwrap();
1642            assert_eq!(accel.name(), "/sensors/accel");
1643
1644            // list_groups from root
1645            let top_groups = root.group_names().unwrap();
1646            assert!(top_groups.contains(&"sensors".to_string()));
1647
1648            // list_groups from sensors
1649            let sub_groups = sensors.group_names().unwrap();
1650            assert!(sub_groups.contains(&"accel".to_string()));
1651        }
1652
1653        std::fs::remove_file(&path).ok();
1654    }
1655}
1656
1657#[cfg(test)]
1658mod h5py_compat_tests {
1659    use super::*;
1660
1661    fn temp_path(name: &str) -> std::path::PathBuf {
1662        super::unique_test_path(name)
1663    }
1664
1665    /// Verify our files can be read by h5dump (if available).
1666    #[test]
1667    #[cfg(feature = "deflate")]
1668    fn h5dump_validates_our_files() {
1669        // Check if h5dump is available
1670        let h5dump = std::process::Command::new("h5dump")
1671            .arg("--version")
1672            .output();
1673        if h5dump.is_err() {
1674            eprintln!("skipping: h5dump not found");
1675            return;
1676        }
1677
1678        let path = temp_path("h5dump_validate");
1679
1680        // Write a comprehensive test file
1681        {
1682            let file = H5File::create(&path).unwrap();
1683
1684            // Contiguous
1685            let ds = file
1686                .new_dataset::<f64>()
1687                .shape([3usize, 4])
1688                .create("matrix")
1689                .unwrap();
1690            let data: Vec<f64> = (0..12).map(|i| i as f64).collect();
1691            ds.write_raw(&data).unwrap();
1692
1693            // Chunked + compressed
1694            let ds2 = file
1695                .new_dataset::<i32>()
1696                .shape([0usize, 2])
1697                .chunk(&[1, 2])
1698                .max_shape(&[None, Some(2)])
1699                .deflate(6)
1700                .create("stream")
1701                .unwrap();
1702            for i in 0..5u64 {
1703                let vals: Vec<i32> = vec![i as i32 * 2, i as i32 * 2 + 1];
1704                let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1705                ds2.write_chunk(i as usize, &raw).unwrap();
1706            }
1707            ds2.extend(&[5, 2]).unwrap();
1708
1709            // Group
1710            let grp = file.create_group("meta").unwrap();
1711            let ds3 = grp
1712                .new_dataset::<u8>()
1713                .shape([4usize])
1714                .create("flags")
1715                .unwrap();
1716            ds3.write_raw(&[1u8, 0, 1, 0]).unwrap();
1717
1718            // String attribute
1719            use crate::types::VarLenUnicode;
1720            let attr = ds
1721                .new_attr::<VarLenUnicode>()
1722                .shape(())
1723                .create("units")
1724                .unwrap();
1725            attr.write_string("meters").unwrap();
1726
1727            file.close().unwrap();
1728        }
1729
1730        // Run h5dump and verify exit code
1731        let output = std::process::Command::new("h5dump")
1732            .arg("-H") // header only (faster)
1733            .arg(path.to_str().unwrap())
1734            .output()
1735            .unwrap();
1736
1737        assert!(
1738            output.status.success(),
1739            "h5dump failed:\nstdout: {}\nstderr: {}",
1740            String::from_utf8_lossy(&output.stdout),
1741            String::from_utf8_lossy(&output.stderr),
1742        );
1743
1744        // Full dump (with data) should also work
1745        let output2 = std::process::Command::new("h5dump")
1746            .arg(path.to_str().unwrap())
1747            .output()
1748            .unwrap();
1749
1750        assert!(
1751            output2.status.success(),
1752            "h5dump (full) failed:\nstderr: {}",
1753            String::from_utf8_lossy(&output2.stderr),
1754        );
1755
1756        std::fs::remove_file(&path).ok();
1757    }
1758
1759    #[test]
1760    fn read_h5py_generated_file() {
1761        let path = "/tmp/test_h5py_default.h5";
1762        if !std::path::Path::new(path).exists() {
1763            eprintln!("skipping: h5py test file not found");
1764            return;
1765        }
1766        let file = H5File::open(path).unwrap();
1767
1768        let ds = file.dataset("data").unwrap();
1769        assert_eq!(ds.shape(), vec![4, 5]);
1770        let data = ds.read_raw::<f64>().unwrap();
1771        assert_eq!(data.len(), 20);
1772        assert!((data[0]).abs() < 1e-10);
1773        assert!((data[19] - 19.0).abs() < 1e-10);
1774
1775        let ds2 = file.dataset("images").unwrap();
1776        assert_eq!(ds2.shape(), vec![3, 64, 64]);
1777        let images = ds2.read_raw::<u16>().unwrap();
1778        assert_eq!(images.len(), 3 * 64 * 64);
1779    }
1780}