1use std::path::Path;
20
21use crate::io::{Hdf5Reader, Hdf5Writer};
22
23use crate::dataset::{DatasetBuilder, H5Dataset};
24use crate::error::{Hdf5Error, Result};
25use crate::format::messages::filter::FilterPipeline;
26use crate::group::H5Group;
27use crate::types::H5Type;
28
29#[cfg(not(feature = "threadsafe"))]
35pub(crate) type SharedInner = std::rc::Rc<std::cell::RefCell<H5FileInner>>;
36
37#[cfg(feature = "threadsafe")]
38pub(crate) type SharedInner = std::sync::Arc<std::sync::Mutex<H5FileInner>>;
39
40#[cfg(not(feature = "threadsafe"))]
42pub(crate) fn borrow_inner(inner: &SharedInner) -> std::cell::Ref<'_, H5FileInner> {
43 inner.borrow()
44}
45
46#[cfg(not(feature = "threadsafe"))]
48pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::cell::RefMut<'_, H5FileInner> {
49 inner.borrow_mut()
50}
51
52#[cfg(not(feature = "threadsafe"))]
54pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
55 std::rc::Rc::clone(inner)
56}
57
58#[cfg(not(feature = "threadsafe"))]
60pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
61 std::rc::Rc::new(std::cell::RefCell::new(inner))
62}
63
64#[cfg(feature = "threadsafe")]
65pub(crate) fn borrow_inner(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
66 inner.lock().unwrap()
67}
68
69#[cfg(feature = "threadsafe")]
70pub(crate) fn borrow_inner_mut(inner: &SharedInner) -> std::sync::MutexGuard<'_, H5FileInner> {
71 inner.lock().unwrap()
72}
73
74#[cfg(feature = "threadsafe")]
75pub(crate) fn clone_inner(inner: &SharedInner) -> SharedInner {
76 std::sync::Arc::clone(inner)
77}
78
79#[cfg(feature = "threadsafe")]
80pub(crate) fn new_shared(inner: H5FileInner) -> SharedInner {
81 std::sync::Arc::new(std::sync::Mutex::new(inner))
82}
83
84pub(crate) enum H5FileInner {
90 Writer(Hdf5Writer),
91 Reader(Hdf5Reader),
92 Closed,
94}
95
96pub struct H5File {
102 pub(crate) inner: SharedInner,
103}
104
105impl H5File {
106 pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
108 let writer = Hdf5Writer::create(path.as_ref())?;
109 Ok(Self {
110 inner: new_shared(H5FileInner::Writer(writer)),
111 })
112 }
113
114 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
116 let reader = Hdf5Reader::open(path.as_ref())?;
117 Ok(Self {
118 inner: new_shared(H5FileInner::Reader(reader)),
119 })
120 }
121
122 pub fn open_rw<P: AsRef<Path>>(path: P) -> Result<Self> {
136 let writer = Hdf5Writer::open_append(path.as_ref())?;
137 Ok(Self {
138 inner: new_shared(H5FileInner::Writer(writer)),
139 })
140 }
141
142 pub fn root_group(&self) -> H5Group {
146 H5Group::new(clone_inner(&self.inner), "/".to_string())
147 }
148
149 pub fn create_group(&self, name: &str) -> Result<H5Group> {
157 self.root_group().create_group(name)
158 }
159
160 pub fn new_dataset<T: H5Type>(&self) -> DatasetBuilder<T> {
171 DatasetBuilder::new(clone_inner(&self.inner))
172 }
173
174 pub fn set_attr_string(&self, name: &str, value: &str) -> Result<()> {
176 use crate::format::messages::attribute::AttributeMessage;
177 let attr = AttributeMessage::scalar_string(name, value);
178 let mut inner = borrow_inner_mut(&self.inner);
179 match &mut *inner {
180 H5FileInner::Writer(writer) => {
181 writer.add_root_attribute(attr);
182 Ok(())
183 }
184 _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
185 }
186 }
187
188 pub fn set_attr_numeric<T: crate::types::H5Type>(&self, name: &str, value: &T) -> Result<()> {
190 use crate::format::messages::attribute::AttributeMessage;
191 let es = T::element_size();
192 let raw = unsafe { std::slice::from_raw_parts(value as *const T as *const u8, es) };
193 let attr = AttributeMessage::scalar_numeric(name, T::hdf5_type(), raw.to_vec());
194 let mut inner = borrow_inner_mut(&self.inner);
195 match &mut *inner {
196 H5FileInner::Writer(writer) => {
197 writer.add_root_attribute(attr);
198 Ok(())
199 }
200 _ => Err(Hdf5Error::InvalidState("cannot write in read mode".into())),
201 }
202 }
203
204 pub fn attr_names(&self) -> Result<Vec<String>> {
206 let inner = borrow_inner(&self.inner);
207 match &*inner {
208 H5FileInner::Reader(reader) => Ok(reader.root_attr_names()),
209 _ => Ok(vec![]),
210 }
211 }
212
213 pub fn attr_string(&self, name: &str) -> Result<String> {
215 let inner = borrow_inner(&self.inner);
216 match &*inner {
217 H5FileInner::Reader(reader) => {
218 let attr = reader
219 .root_attr(name)
220 .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
221 let end = attr
222 .data
223 .iter()
224 .position(|&b| b == 0)
225 .unwrap_or(attr.data.len());
226 Ok(String::from_utf8_lossy(&attr.data[..end]).to_string())
227 }
228 _ => Err(Hdf5Error::InvalidState("not in read mode".into())),
229 }
230 }
231
232 pub fn is_writable(&self) -> bool {
234 let inner = borrow_inner(&self.inner);
235 matches!(&*inner, H5FileInner::Writer(_))
236 }
237
238 pub fn write_vlen_strings(&self, name: &str, strings: &[&str]) -> Result<()> {
243 let mut inner = borrow_inner_mut(&self.inner);
244 match &mut *inner {
245 H5FileInner::Writer(writer) => {
246 let idx = writer.create_vlen_string_dataset(name, strings)?;
247 if let Some(slash_pos) = name.rfind('/') {
249 let group_path = &name[..slash_pos];
250 let abs_group_path = if group_path.starts_with('/') {
251 group_path.to_string()
252 } else {
253 format!("/{}", group_path)
254 };
255 writer.assign_dataset_to_group(&abs_group_path, idx)?;
256 }
257 Ok(())
258 }
259 H5FileInner::Reader(_) => {
260 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
261 }
262 H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
263 }
264 }
265
266 pub fn write_vlen_strings_compressed(
273 &self,
274 name: &str,
275 strings: &[&str],
276 chunk_size: usize,
277 pipeline: FilterPipeline,
278 ) -> Result<()> {
279 let mut inner = borrow_inner_mut(&self.inner);
280 match &mut *inner {
281 H5FileInner::Writer(writer) => {
282 let idx = writer
283 .create_vlen_string_dataset_compressed(name, strings, chunk_size, pipeline)?;
284 if let Some(slash_pos) = name.rfind('/') {
285 let group_path = &name[..slash_pos];
286 let abs_group_path = if group_path.starts_with('/') {
287 group_path.to_string()
288 } else {
289 format!("/{}", group_path)
290 };
291 writer.assign_dataset_to_group(&abs_group_path, idx)?;
292 }
293 Ok(())
294 }
295 H5FileInner::Reader(_) => {
296 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
297 }
298 H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".into())),
299 }
300 }
301
302 pub fn dataset(&self, name: &str) -> Result<H5Dataset> {
304 let inner = borrow_inner(&self.inner);
305 match &*inner {
306 H5FileInner::Reader(reader) => {
307 let info = reader
308 .dataset_info(name)
309 .ok_or_else(|| Hdf5Error::NotFound(name.to_string()))?;
310 let shape: Vec<usize> = info.dataspace.dims.iter().map(|&d| d as usize).collect();
311 let element_size = info.datatype.element_size() as usize;
312 Ok(H5Dataset::new_reader(
313 clone_inner(&self.inner),
314 name.to_string(),
315 shape,
316 element_size,
317 ))
318 }
319 H5FileInner::Writer(_) => Err(Hdf5Error::InvalidState(
320 "cannot open a dataset by name in write mode; use new_dataset() instead"
321 .to_string(),
322 )),
323 H5FileInner::Closed => Err(Hdf5Error::InvalidState("file is closed".to_string())),
324 }
325 }
326
327 pub fn dataset_names(&self) -> Vec<String> {
333 let inner = borrow_inner(&self.inner);
334 match &*inner {
335 H5FileInner::Reader(reader) => reader
336 .dataset_names()
337 .iter()
338 .map(|s| s.to_string())
339 .collect(),
340 H5FileInner::Writer(writer) => writer
341 .dataset_names()
342 .iter()
343 .map(|s| s.to_string())
344 .collect(),
345 H5FileInner::Closed => Vec::new(),
346 }
347 }
348
349 pub fn close(self) -> Result<()> {
355 let old = {
356 let mut inner = borrow_inner_mut(&self.inner);
357 std::mem::replace(&mut *inner, H5FileInner::Closed)
358 };
359 match old {
360 H5FileInner::Writer(writer) => {
361 writer.close()?;
362 Ok(())
363 }
364 H5FileInner::Reader(_) => Ok(()),
365 H5FileInner::Closed => Ok(()),
366 }
367 }
368
369 pub fn flush(&self) -> Result<()> {
371 Ok(())
375 }
376}
377
378#[cfg(test)]
379mod tests {
380 use super::*;
381 use std::path::PathBuf;
382
383 fn temp_path(name: &str) -> PathBuf {
384 std::env::temp_dir().join(format!("hdf5_file_test_{}.h5", name))
385 }
386
387 #[test]
388 fn create_and_close_empty() {
389 let path = temp_path("create_empty");
390 let file = H5File::create(&path).unwrap();
391 file.close().unwrap();
392
393 let file = H5File::open(&path).unwrap();
395 file.close().unwrap();
396
397 std::fs::remove_file(&path).ok();
398 }
399
400 #[test]
401 fn create_and_drop_empty() {
402 let path = temp_path("drop_empty");
403 {
404 let _file = H5File::create(&path).unwrap();
405 }
407 let file = H5File::open(&path).unwrap();
409 file.close().unwrap();
410
411 std::fs::remove_file(&path).ok();
412 }
413
414 #[test]
415 fn dataset_not_found() {
416 let path = temp_path("ds_not_found");
417 {
418 let _file = H5File::create(&path).unwrap();
419 }
420 let file = H5File::open(&path).unwrap();
421 let result = file.dataset("nonexistent");
422 assert!(result.is_err());
423
424 std::fs::remove_file(&path).ok();
425 }
426
427 #[test]
428 fn write_and_read_roundtrip() {
429 let path = temp_path("write_read_rt");
430
431 {
433 let file = H5File::create(&path).unwrap();
434 let ds = file
435 .new_dataset::<u8>()
436 .shape([4, 4])
437 .create("data")
438 .unwrap();
439 ds.write_raw(&[0u8; 16]).unwrap();
440 file.close().unwrap();
441 }
442
443 {
445 let file = H5File::open(&path).unwrap();
446 let ds = file.dataset("data").unwrap();
447 assert_eq!(ds.shape(), vec![4, 4]);
448 let data = ds.read_raw::<u8>().unwrap();
449 assert_eq!(data.len(), 16);
450 assert!(data.iter().all(|&b| b == 0));
451 file.close().unwrap();
452 }
453
454 std::fs::remove_file(&path).ok();
455 }
456
457 #[test]
458 fn write_and_read_f64() {
459 let path = temp_path("write_read_f64");
460
461 let values: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
462
463 {
465 let file = H5File::create(&path).unwrap();
466 let ds = file
467 .new_dataset::<f64>()
468 .shape([2, 3])
469 .create("matrix")
470 .unwrap();
471 ds.write_raw(&values).unwrap();
472 file.close().unwrap();
473 }
474
475 {
477 let file = H5File::open(&path).unwrap();
478 let ds = file.dataset("matrix").unwrap();
479 assert_eq!(ds.shape(), vec![2, 3]);
480 let readback = ds.read_raw::<f64>().unwrap();
481 assert_eq!(readback, values);
482 }
483
484 std::fs::remove_file(&path).ok();
485 }
486
487 #[test]
488 fn multiple_datasets() {
489 let path = temp_path("multi_ds");
490
491 {
492 let file = H5File::create(&path).unwrap();
493 let ds1 = file.new_dataset::<i32>().shape([3]).create("ints").unwrap();
494 ds1.write_raw(&[10i32, 20, 30]).unwrap();
495
496 let ds2 = file
497 .new_dataset::<f32>()
498 .shape([2, 2])
499 .create("floats")
500 .unwrap();
501 ds2.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
502
503 file.close().unwrap();
504 }
505
506 {
507 let file = H5File::open(&path).unwrap();
508
509 let ds_ints = file.dataset("ints").unwrap();
510 assert_eq!(ds_ints.shape(), vec![3]);
511 let ints = ds_ints.read_raw::<i32>().unwrap();
512 assert_eq!(ints, vec![10, 20, 30]);
513
514 let ds_floats = file.dataset("floats").unwrap();
515 assert_eq!(ds_floats.shape(), vec![2, 2]);
516 let floats = ds_floats.read_raw::<f32>().unwrap();
517 assert_eq!(floats, vec![1.0f32, 2.0, 3.0, 4.0]);
518 }
519
520 std::fs::remove_file(&path).ok();
521 }
522
523 #[test]
524 fn close_is_idempotent() {
525 let path = temp_path("close_idemp");
526 let file = H5File::create(&path).unwrap();
527 file.close().unwrap();
528 std::fs::remove_file(&path).ok();
530 }
531}
532
533#[cfg(test)]
534mod integration_tests {
535 use super::*;
536
537 #[test]
538 fn write_file_for_h5dump() {
539 let path = std::env::temp_dir().join("test_hdf5rs_integration.h5");
540 let file = H5File::create(&path).unwrap();
541
542 let ds = file
543 .new_dataset::<u8>()
544 .shape([4usize, 4])
545 .create("data_u8")
546 .unwrap();
547 let data: Vec<u8> = (0..16).collect();
548 ds.write_raw(&data).unwrap();
549
550 let ds2 = file
551 .new_dataset::<f64>()
552 .shape([3usize, 2])
553 .create("data_f64")
554 .unwrap();
555 let fdata: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
556 ds2.write_raw(&fdata).unwrap();
557
558 let ds3 = file
559 .new_dataset::<i32>()
560 .shape([5usize])
561 .create("values")
562 .unwrap();
563 let idata: Vec<i32> = vec![-10, -5, 0, 5, 10];
564 ds3.write_raw(&idata).unwrap();
565
566 file.close().unwrap();
567
568 assert!(path.exists());
570 }
571
572 #[test]
573 fn write_chunked_file_for_h5dump() {
574 let path = std::env::temp_dir().join("test_hdf5rs_chunked.h5");
575 let file = H5File::create(&path).unwrap();
576
577 let ds = file
579 .new_dataset::<f64>()
580 .shape([0usize, 4])
581 .chunk(&[1, 4])
582 .max_shape(&[None, Some(4)])
583 .create("streaming_data")
584 .unwrap();
585
586 for frame in 0..5u64 {
588 let values: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
589 let raw: Vec<u8> = values.iter().flat_map(|v| v.to_le_bytes()).collect();
590 ds.write_chunk(frame as usize, &raw).unwrap();
591 }
592
593 ds.extend(&[5, 4]).unwrap();
595 ds.flush().unwrap();
596
597 file.close().unwrap();
598
599 assert!(path.exists());
600 }
601
602 #[test]
603 fn write_chunked_many_frames_for_h5dump() {
604 let path = std::env::temp_dir().join("test_hdf5rs_chunked_many.h5");
605 let file = H5File::create(&path).unwrap();
606
607 let ds = file
608 .new_dataset::<i32>()
609 .shape([0usize, 3])
610 .chunk(&[1, 3])
611 .max_shape(&[None, Some(3)])
612 .create("data")
613 .unwrap();
614
615 for frame in 0..10u64 {
617 let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
618 let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
619 ds.write_chunk(frame as usize, &raw).unwrap();
620 }
621 ds.extend(&[10, 3]).unwrap();
622 file.close().unwrap();
623
624 assert!(path.exists());
625 }
626
627 #[test]
628 fn write_dataset_with_attributes() {
629 use crate::types::VarLenUnicode;
630
631 let path = std::env::temp_dir().join("test_hdf5rs_attributes.h5");
632 let file = H5File::create(&path).unwrap();
633
634 let ds = file
635 .new_dataset::<f32>()
636 .shape([10usize])
637 .create("temperature")
638 .unwrap();
639 let data: Vec<f32> = (0..10).map(|i| i as f32 * 1.5).collect();
640 ds.write_raw(&data).unwrap();
641
642 let attr = ds
644 .new_attr::<VarLenUnicode>()
645 .shape(())
646 .create("units")
647 .unwrap();
648 attr.write_scalar(&VarLenUnicode("kelvin".to_string()))
649 .unwrap();
650
651 let attr2 = ds
652 .new_attr::<VarLenUnicode>()
653 .shape(())
654 .create("description")
655 .unwrap();
656 attr2
657 .write_scalar(&VarLenUnicode("Temperature measurements".to_string()))
658 .unwrap();
659
660 let attr3 = ds
662 .new_attr::<VarLenUnicode>()
663 .shape(())
664 .create("source")
665 .unwrap();
666 attr3.write_string("sensor_01").unwrap();
667
668 let attr4 = ds
670 .new_attr::<VarLenUnicode>()
671 .shape(())
672 .create("label")
673 .unwrap();
674 let s: VarLenUnicode = "test_label".parse().unwrap_or_default();
675 attr4.write_scalar(&s).unwrap();
676
677 file.close().unwrap();
678
679 assert!(path.exists());
680 }
681
682 #[test]
683 fn chunked_write_read_roundtrip() {
684 let path = std::env::temp_dir().join("hdf5_chunked_roundtrip.h5");
685
686 {
688 let file = H5File::create(&path).unwrap();
689 let ds = file
690 .new_dataset::<i32>()
691 .shape([0usize, 3])
692 .chunk(&[1, 3])
693 .max_shape(&[None, Some(3)])
694 .create("table")
695 .unwrap();
696
697 for frame in 0..8u64 {
698 let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
699 let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
700 ds.write_chunk(frame as usize, &raw).unwrap();
701 }
702 ds.extend(&[8, 3]).unwrap();
703 file.close().unwrap();
704 }
705
706 {
708 let file = H5File::open(&path).unwrap();
709 let ds = file.dataset("table").unwrap();
710 assert_eq!(ds.shape(), vec![8, 3]);
711 let data = ds.read_raw::<i32>().unwrap();
712 assert_eq!(data.len(), 24);
713 for (i, val) in data.iter().enumerate() {
714 assert_eq!(*val, i as i32);
715 }
716 }
717
718 std::fs::remove_file(&path).ok();
719 }
720
721 #[test]
722 #[cfg(feature = "deflate")]
723 fn compressed_chunked_roundtrip() {
724 let path = std::env::temp_dir().join("hdf5_compressed_roundtrip.h5");
725
726 {
728 let file = H5File::create(&path).unwrap();
729 let ds = file
730 .new_dataset::<f64>()
731 .shape([0usize, 4])
732 .chunk(&[1, 4])
733 .max_shape(&[None, Some(4)])
734 .deflate(6)
735 .create("compressed")
736 .unwrap();
737
738 for frame in 0..10u64 {
739 let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
740 let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
741 ds.write_chunk(frame as usize, &raw).unwrap();
742 }
743 ds.extend(&[10, 4]).unwrap();
744 file.close().unwrap();
745 }
746
747 {
749 let file = H5File::open(&path).unwrap();
750 let ds = file.dataset("compressed").unwrap();
751 assert_eq!(ds.shape(), vec![10, 4]);
752 let data = ds.read_raw::<f64>().unwrap();
753 assert_eq!(data.len(), 40);
754 for (i, val) in data.iter().enumerate() {
755 assert!(
756 (val - i as f64).abs() < 1e-10,
757 "mismatch at {}: {} != {}",
758 i,
759 val,
760 i
761 );
762 }
763 }
764
765 std::fs::remove_file(&path).ok();
766 }
767
768 #[test]
769 #[cfg(feature = "deflate")]
770 fn compressed_chunked_many_frames() {
771 let path = std::env::temp_dir().join("hdf5_compressed_many.h5");
772
773 {
774 let file = H5File::create(&path).unwrap();
775 let ds = file
776 .new_dataset::<i32>()
777 .shape([0usize, 3])
778 .chunk(&[1, 3])
779 .max_shape(&[None, Some(3)])
780 .deflate(6)
781 .create("stream")
782 .unwrap();
783
784 for frame in 0..100u64 {
785 let vals: Vec<i32> = (0..3).map(|i| (frame * 3 + i) as i32).collect();
786 let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
787 ds.write_chunk(frame as usize, &raw).unwrap();
788 }
789 ds.extend(&[100, 3]).unwrap();
790 file.close().unwrap();
791 }
792
793 {
794 let file = H5File::open(&path).unwrap();
795 let ds = file.dataset("stream").unwrap();
796 assert_eq!(ds.shape(), vec![100, 3]);
797 let data = ds.read_raw::<i32>().unwrap();
798 assert_eq!(data.len(), 300);
799 for (i, val) in data.iter().enumerate() {
800 assert_eq!(*val, i as i32, "mismatch at {}", i);
801 }
802 }
803
804 std::fs::remove_file(&path).ok();
805 }
806 #[test]
807 fn append_mode() {
808 let path = std::env::temp_dir().join("hdf5_append.h5");
809
810 {
812 let file = H5File::create(&path).unwrap();
813 let ds = file
814 .new_dataset::<i32>()
815 .shape([3usize])
816 .create("first")
817 .unwrap();
818 ds.write_raw(&[1i32, 2, 3]).unwrap();
819 file.close().unwrap();
820 }
821
822 {
824 let file = H5File::open_rw(&path).unwrap();
825 let ds = file
826 .new_dataset::<f64>()
827 .shape([2usize])
828 .create("second")
829 .unwrap();
830 ds.write_raw(&[4.0f64, 5.0]).unwrap();
831 file.close().unwrap();
832 }
833
834 {
836 let file = H5File::open(&path).unwrap();
837 let names = file.dataset_names();
838 assert!(names.contains(&"first".to_string()));
839 assert!(names.contains(&"second".to_string()));
840
841 let ds1 = file.dataset("first").unwrap();
842 assert_eq!(ds1.read_raw::<i32>().unwrap(), vec![1, 2, 3]);
843
844 let ds2 = file.dataset("second").unwrap();
845 assert_eq!(ds2.read_raw::<f64>().unwrap(), vec![4.0, 5.0]);
846 }
847
848 std::fs::remove_file(&path).ok();
849 }
850
851 #[test]
852 fn open_rw_set_attr_preserves_file() {
853 let path = std::env::temp_dir().join("hdf5_open_rw_attr.h5");
854 {
856 let file = H5File::create(&path).unwrap();
857 let ds = file
858 .new_dataset::<i32>()
859 .shape([3usize])
860 .create("data")
861 .unwrap();
862 ds.write_raw(&[10i32, 20, 30]).unwrap();
863 file.set_attr_string("version", "1.0").unwrap();
864 file.close().unwrap();
865 }
866 {
868 let file = H5File::open_rw(&path).unwrap();
869 file.set_attr_string("version", "2.0").unwrap();
870 file.close().unwrap();
871 }
872 {
874 let file = H5File::open(&path).unwrap();
875 let ds = file.dataset("data").unwrap();
876 assert_eq!(ds.read_raw::<i32>().unwrap(), vec![10, 20, 30]);
877 let ver = file.attr_string("version").unwrap();
878 assert_eq!(ver, "2.0");
879 }
880 std::fs::remove_file(&path).ok();
881 }
882
883 #[test]
884 #[cfg(feature = "deflate")]
885 fn open_rw_attr_with_compressed_dataset() {
886 use crate::format::messages::filter::FilterPipeline;
887 let path = std::env::temp_dir().join("hdf5_open_rw_compressed.h5");
888 let input: Vec<&str> = (0..50).map(|_| "test string data").collect();
889 {
891 let file = H5File::create(&path).unwrap();
892 file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
893 .unwrap();
894 file.set_attr_string("version", "1.0").unwrap();
895 file.close().unwrap();
896 }
897 {
899 let file = H5File::open_rw(&path).unwrap();
900 file.set_attr_string("version", "2.0").unwrap();
901 file.close().unwrap();
902 }
903 {
905 let file = H5File::open(&path).unwrap();
906 let ds = file.dataset("texts").unwrap();
907 let strings = ds.read_vlen_strings().unwrap();
908 assert_eq!(strings.len(), 50);
909 assert_eq!(strings[0], "test string data");
910 let ver = file.attr_string("version").unwrap();
911 assert_eq!(ver, "2.0");
912 }
913 std::fs::remove_file(&path).ok();
914 }
915
916 #[test]
917 #[cfg(feature = "deflate")]
918 fn vlen_string_compressed_large_roundtrip() {
919 use crate::format::messages::filter::FilterPipeline;
920 let path = std::env::temp_dir().join("hdf5_vlen_large.h5");
921 let input: Vec<String> = (0..7189)
923 .map(|i| format!("node-{:08x}-{}", i, "a".repeat(20 + (i % 30))))
924 .collect();
925 let input_refs: Vec<&str> = input.iter().map(|s| s.as_str()).collect();
926 {
927 let file = H5File::create(&path).unwrap();
928 file.create_group("nodes").unwrap();
929 file.write_vlen_strings_compressed(
930 "nodes/id",
931 &input_refs,
932 512,
933 FilterPipeline::deflate(6),
934 )
935 .unwrap();
936 file.close().unwrap();
937 }
938 {
940 let file = H5File::open(&path).unwrap();
941 let ds = file.dataset("nodes/id").unwrap();
942 let strings = ds.read_vlen_strings().unwrap();
943 assert_eq!(strings.len(), 7189);
944 assert_eq!(strings[0], input[0]);
945 assert_eq!(strings[7188], input[7188]);
946 }
947 {
949 let file = H5File::open_rw(&path).unwrap();
950 file.set_attr_string("version", "1.0").unwrap();
951 file.close().unwrap();
952 }
953 {
954 let file = H5File::open(&path).unwrap();
955 let ds = file.dataset("nodes/id").unwrap();
956 let strings = ds.read_vlen_strings().unwrap();
957 assert_eq!(strings.len(), 7189);
958 assert_eq!(strings[0], input[0]);
959 }
960 std::fs::remove_file(&path).ok();
961 }
962
963 #[test]
964 fn vlen_string_write_read() {
965 let path = std::env::temp_dir().join("hdf5_vlen_wr.h5");
966 {
967 let file = H5File::create(&path).unwrap();
968 file.write_vlen_strings("names", &["alice", "bob", "charlie"])
969 .unwrap();
970 file.close().unwrap();
971 }
972 {
973 let file = H5File::open(&path).unwrap();
974 let ds = file.dataset("names").unwrap();
975 let strings = ds.read_vlen_strings().unwrap();
976 assert_eq!(strings, vec!["alice", "bob", "charlie"]);
977 }
978 std::fs::remove_file(&path).ok();
979 }
980
981 #[test]
982 #[cfg(feature = "deflate")]
983 fn vlen_string_deflate_roundtrip() {
984 use crate::format::messages::filter::FilterPipeline;
985 let path = std::env::temp_dir().join("hdf5_vlen_deflate.h5");
986 let input: Vec<&str> = (0..100)
987 .map(|i| match i % 3 {
988 0 => "hello world",
989 1 => "compressed vlen string test",
990 _ => "rust-hdf5",
991 })
992 .collect();
993 {
994 let file = H5File::create(&path).unwrap();
995 file.write_vlen_strings_compressed("texts", &input, 16, FilterPipeline::deflate(6))
996 .unwrap();
997 file.close().unwrap();
998 }
999 {
1000 let file = H5File::open(&path).unwrap();
1001 let ds = file.dataset("texts").unwrap();
1002 let strings = ds.read_vlen_strings().unwrap();
1003 assert_eq!(strings.len(), 100);
1004 for (i, s) in strings.iter().enumerate() {
1005 assert_eq!(s, input[i]);
1006 }
1007 }
1008 std::fs::remove_file(&path).ok();
1009 }
1010
1011 #[test]
1012 #[cfg(feature = "zstd")]
1013 fn vlen_string_zstd_roundtrip() {
1014 use crate::format::messages::filter::FilterPipeline;
1015 let path = std::env::temp_dir().join("hdf5_vlen_zstd.h5");
1016 let input: Vec<&str> = (0..200)
1017 .map(|i| match i % 4 {
1018 0 => "zstandard compression test",
1019 1 => "variable length string",
1020 2 => "rust-hdf5 chunked storage",
1021 _ => "hello zstd world",
1022 })
1023 .collect();
1024 {
1025 let file = H5File::create(&path).unwrap();
1026 file.write_vlen_strings_compressed("data", &input, 32, FilterPipeline::zstd(3))
1027 .unwrap();
1028 file.close().unwrap();
1029 }
1030 {
1031 let file = H5File::open(&path).unwrap();
1032 let ds = file.dataset("data").unwrap();
1033 let strings = ds.read_vlen_strings().unwrap();
1034 assert_eq!(strings.len(), 200);
1035 for (i, s) in strings.iter().enumerate() {
1036 assert_eq!(s, input[i]);
1037 }
1038 }
1039 std::fs::remove_file(&path).ok();
1040 }
1041
1042 #[test]
1043 #[cfg(feature = "deflate")]
1044 fn shuffle_deflate_roundtrip() {
1045 let path = std::env::temp_dir().join("hdf5_shuf_defl.h5");
1046 {
1047 let file = H5File::create(&path).unwrap();
1048 let ds = file
1049 .new_dataset::<f64>()
1050 .shape([0usize, 4])
1051 .chunk(&[1, 4])
1052 .max_shape(&[None, Some(4)])
1053 .shuffle_deflate(6)
1054 .create("data")
1055 .unwrap();
1056 for frame in 0..20u64 {
1057 let vals: Vec<f64> = (0..4).map(|i| (frame * 4 + i) as f64).collect();
1058 let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1059 ds.write_chunk(frame as usize, &raw).unwrap();
1060 }
1061 ds.extend(&[20, 4]).unwrap();
1062 file.close().unwrap();
1063 }
1064 {
1065 let file = H5File::open(&path).unwrap();
1066 let ds = file.dataset("data").unwrap();
1067 assert_eq!(ds.shape(), vec![20, 4]);
1068 let data = ds.read_raw::<f64>().unwrap();
1069 assert_eq!(data.len(), 80);
1070 for (i, val) in data.iter().enumerate() {
1071 assert!((val - i as f64).abs() < 1e-10);
1072 }
1073 }
1074 std::fs::remove_file(&path).ok();
1075 }
1076
1077 #[test]
1078 fn file_level_attributes() {
1079 let path = std::env::temp_dir().join("hdf5_file_attr.h5");
1080 {
1081 let file = H5File::create(&path).unwrap();
1082 file.set_attr_string("title", "Test File").unwrap();
1083 file.set_attr_numeric("version", &42i32).unwrap();
1084 let ds = file
1085 .new_dataset::<u8>()
1086 .shape([1usize])
1087 .create("dummy")
1088 .unwrap();
1089 ds.write_raw(&[0u8]).unwrap();
1090 file.close().unwrap();
1091 }
1092 {
1093 let file = H5File::open(&path).unwrap();
1094 assert!(file.dataset_names().contains(&"dummy".to_string()));
1095
1096 let names = file.attr_names().unwrap();
1098 assert!(names.contains(&"title".to_string()));
1099
1100 let title = file.attr_string("title").unwrap();
1101 assert_eq!(title, "Test File");
1102 }
1103 std::fs::remove_file(&path).ok();
1104 }
1105
1106 #[test]
1107 fn scalar_dataset_roundtrip() {
1108 let path = std::env::temp_dir().join("hdf5_scalar.h5");
1109 {
1110 let file = H5File::create(&path).unwrap();
1111 let ds = file.new_dataset::<f64>().scalar().create("pi").unwrap();
1112 ds.write_raw(&[std::f64::consts::PI]).unwrap();
1113 file.close().unwrap();
1114 }
1115 {
1116 let file = H5File::open(&path).unwrap();
1117 let ds = file.dataset("pi").unwrap();
1118 assert_eq!(ds.shape(), Vec::<usize>::new());
1119 assert_eq!(ds.total_elements(), 1);
1120 let data = ds.read_raw::<f64>().unwrap();
1121 assert_eq!(data.len(), 1);
1122 assert!((data[0] - std::f64::consts::PI).abs() < 1e-15);
1123 }
1124 std::fs::remove_file(&path).ok();
1125 }
1126
1127 #[test]
1128 fn append_mode_extend_chunked() {
1129 let path = std::env::temp_dir().join("hdf5_append_extend.h5");
1130
1131 {
1133 let file = H5File::create(&path).unwrap();
1134 let ds = file
1135 .new_dataset::<i32>()
1136 .shape([0usize, 3])
1137 .chunk(&[1, 3])
1138 .max_shape(&[None, Some(3)])
1139 .create("stream")
1140 .unwrap();
1141 for i in 0..5u64 {
1142 let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1143 let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1144 ds.write_chunk(i as usize, &raw).unwrap();
1145 }
1146 ds.extend(&[5, 3]).unwrap();
1147 file.close().unwrap();
1148 }
1149
1150 {
1152 let file = H5File::open_rw(&path).unwrap();
1153 let names = file.dataset_names();
1155 assert!(names.contains(&"stream".to_string()));
1156
1157 let mut inner = crate::file::borrow_inner_mut(&file.inner);
1159 if let crate::file::H5FileInner::Writer(writer) = &mut *inner {
1160 let ds_idx = writer.dataset_index("stream").unwrap();
1161 for i in 5..10u64 {
1162 let vals: Vec<i32> = (0..3).map(|j| (i * 3 + j) as i32).collect();
1163 let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1164 writer.write_chunk(ds_idx, i, &raw).unwrap();
1165 }
1166 writer.extend_dataset(ds_idx, &[10, 3]).unwrap();
1167 }
1168 drop(inner);
1169 file.close().unwrap();
1170 }
1171
1172 {
1174 let file = H5File::open(&path).unwrap();
1175 let ds = file.dataset("stream").unwrap();
1176 assert_eq!(ds.shape(), vec![10, 3]);
1177 let data = ds.read_raw::<i32>().unwrap();
1178 assert_eq!(data.len(), 30);
1179 for (i, val) in data.iter().enumerate() {
1180 assert_eq!(*val, i as i32, "mismatch at {}", i);
1181 }
1182 }
1183
1184 std::fs::remove_file(&path).ok();
1185 }
1186
1187 #[test]
1188 fn group_hierarchy_roundtrip() {
1189 let path = std::env::temp_dir().join("hdf5_groups_rt.h5");
1190
1191 {
1192 let file = H5File::create(&path).unwrap();
1193 let root = file.root_group();
1194
1195 let det = root.create_group("detector").unwrap();
1197 let raw = det.create_group("raw").unwrap();
1198
1199 let ds1 = det
1201 .new_dataset::<f32>()
1202 .shape([10usize])
1203 .create("temperature")
1204 .unwrap();
1205 ds1.write_raw(&[1.0f32; 10]).unwrap();
1206
1207 let ds2 = raw
1208 .new_dataset::<u16>()
1209 .shape([4usize, 4])
1210 .create("image")
1211 .unwrap();
1212 ds2.write_raw(&[42u16; 16]).unwrap();
1213
1214 let ds3 = file
1216 .new_dataset::<i32>()
1217 .shape([3usize])
1218 .create("version")
1219 .unwrap();
1220 ds3.write_raw(&[1i32, 0, 0]).unwrap();
1221
1222 file.close().unwrap();
1223 }
1224
1225 {
1226 let file = H5File::open(&path).unwrap();
1227 let names = file.dataset_names();
1228 assert!(names.contains(&"version".to_string()));
1229 assert!(names.contains(&"detector/temperature".to_string()));
1230 assert!(names.contains(&"detector/raw/image".to_string()));
1231
1232 let ds = file.dataset("version").unwrap();
1234 assert_eq!(ds.read_raw::<i32>().unwrap(), vec![1, 0, 0]);
1235
1236 let ds = file.dataset("detector/temperature").unwrap();
1237 assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0f32; 10]);
1238
1239 let ds = file.dataset("detector/raw/image").unwrap();
1240 assert_eq!(ds.shape(), vec![4, 4]);
1241 assert_eq!(ds.read_raw::<u16>().unwrap(), vec![42u16; 16]);
1242
1243 let root = file.root_group();
1245 let group_names = root.group_names().unwrap();
1246 assert!(group_names.contains(&"detector".to_string()));
1247 }
1248
1249 std::fs::remove_file(&path).ok();
1250 }
1251
1252 #[test]
1253 fn nested_groups_via_file_create_group() {
1254 let path = std::env::temp_dir().join("hdf5_file_create_group.h5");
1255
1256 {
1257 let file = H5File::create(&path).unwrap();
1258
1259 let grp = file.create_group("sensors").unwrap();
1261 let sub = grp.create_group("accel").unwrap();
1262
1263 let ds = sub
1264 .new_dataset::<f64>()
1265 .shape([3usize])
1266 .create("xyz")
1267 .unwrap();
1268 ds.write_raw(&[1.0f64, 2.0, 3.0]).unwrap();
1269
1270 file.close().unwrap();
1271 }
1272
1273 {
1274 let file = H5File::open(&path).unwrap();
1275 let names = file.dataset_names();
1276 assert!(names.contains(&"sensors/accel/xyz".to_string()));
1277
1278 let ds = file.dataset("sensors/accel/xyz").unwrap();
1279 assert_eq!(ds.read_raw::<f64>().unwrap(), vec![1.0, 2.0, 3.0]);
1280
1281 let root = file.root_group();
1283 let sensors = root.group("sensors").unwrap();
1284 assert_eq!(sensors.name(), "/sensors");
1285
1286 let accel = sensors.group("accel").unwrap();
1287 assert_eq!(accel.name(), "/sensors/accel");
1288
1289 let top_groups = root.group_names().unwrap();
1291 assert!(top_groups.contains(&"sensors".to_string()));
1292
1293 let sub_groups = sensors.group_names().unwrap();
1295 assert!(sub_groups.contains(&"accel".to_string()));
1296 }
1297
1298 std::fs::remove_file(&path).ok();
1299 }
1300}
1301
1302#[cfg(test)]
1303mod h5py_compat_tests {
1304 use super::*;
1305
1306 #[test]
1308 #[cfg(feature = "deflate")]
1309 fn h5dump_validates_our_files() {
1310 let h5dump = std::process::Command::new("h5dump")
1312 .arg("--version")
1313 .output();
1314 if h5dump.is_err() {
1315 eprintln!("skipping: h5dump not found");
1316 return;
1317 }
1318
1319 let path = std::env::temp_dir().join("hdf5_h5dump_validate.h5");
1320
1321 {
1323 let file = H5File::create(&path).unwrap();
1324
1325 let ds = file
1327 .new_dataset::<f64>()
1328 .shape([3usize, 4])
1329 .create("matrix")
1330 .unwrap();
1331 let data: Vec<f64> = (0..12).map(|i| i as f64).collect();
1332 ds.write_raw(&data).unwrap();
1333
1334 let ds2 = file
1336 .new_dataset::<i32>()
1337 .shape([0usize, 2])
1338 .chunk(&[1, 2])
1339 .max_shape(&[None, Some(2)])
1340 .deflate(6)
1341 .create("stream")
1342 .unwrap();
1343 for i in 0..5u64 {
1344 let vals: Vec<i32> = vec![i as i32 * 2, i as i32 * 2 + 1];
1345 let raw: Vec<u8> = vals.iter().flat_map(|v| v.to_le_bytes()).collect();
1346 ds2.write_chunk(i as usize, &raw).unwrap();
1347 }
1348 ds2.extend(&[5, 2]).unwrap();
1349
1350 let grp = file.create_group("meta").unwrap();
1352 let ds3 = grp
1353 .new_dataset::<u8>()
1354 .shape([4usize])
1355 .create("flags")
1356 .unwrap();
1357 ds3.write_raw(&[1u8, 0, 1, 0]).unwrap();
1358
1359 use crate::types::VarLenUnicode;
1361 let attr = ds
1362 .new_attr::<VarLenUnicode>()
1363 .shape(())
1364 .create("units")
1365 .unwrap();
1366 attr.write_string("meters").unwrap();
1367
1368 file.close().unwrap();
1369 }
1370
1371 let output = std::process::Command::new("h5dump")
1373 .arg("-H") .arg(path.to_str().unwrap())
1375 .output()
1376 .unwrap();
1377
1378 assert!(
1379 output.status.success(),
1380 "h5dump failed:\nstdout: {}\nstderr: {}",
1381 String::from_utf8_lossy(&output.stdout),
1382 String::from_utf8_lossy(&output.stderr),
1383 );
1384
1385 let output2 = std::process::Command::new("h5dump")
1387 .arg(path.to_str().unwrap())
1388 .output()
1389 .unwrap();
1390
1391 assert!(
1392 output2.status.success(),
1393 "h5dump (full) failed:\nstderr: {}",
1394 String::from_utf8_lossy(&output2.stderr),
1395 );
1396
1397 std::fs::remove_file(&path).ok();
1398 }
1399
1400 #[test]
1401 fn read_h5py_generated_file() {
1402 let path = "/tmp/test_h5py_default.h5";
1403 if !std::path::Path::new(path).exists() {
1404 eprintln!("skipping: h5py test file not found");
1405 return;
1406 }
1407 let file = H5File::open(path).unwrap();
1408
1409 let ds = file.dataset("data").unwrap();
1410 assert_eq!(ds.shape(), vec![4, 5]);
1411 let data = ds.read_raw::<f64>().unwrap();
1412 assert_eq!(data.len(), 20);
1413 assert!((data[0]).abs() < 1e-10);
1414 assert!((data[19] - 19.0).abs() < 1e-10);
1415
1416 let ds2 = file.dataset("images").unwrap();
1417 assert_eq!(ds2.shape(), vec![3, 64, 64]);
1418 let images = ds2.read_raw::<u16>().unwrap();
1419 assert_eq!(images.len(), 3 * 64 * 64);
1420 }
1421}