1use crate::attribute::AttrBuilder;
8use crate::error::{Hdf5Error, Result};
9use crate::file::{borrow_inner, borrow_inner_mut, clone_inner, H5FileInner, SharedInner};
10use crate::types::H5Type;
11
12pub struct DatasetBuilder<T: H5Type> {
29 file_inner: SharedInner,
30 shape: Option<Vec<usize>>,
31 chunk_dims: Option<Vec<usize>>,
32 max_shape: Option<Vec<Option<usize>>>,
33 deflate_level: Option<u32>,
34 shuffle_deflate_level: Option<u32>,
35 custom_pipeline: Option<crate::format::messages::filter::FilterPipeline>,
36 group_path: Option<String>,
37 fill_value: Option<Vec<u8>>,
38 _marker: std::marker::PhantomData<T>,
39}
40
41impl<T: H5Type> DatasetBuilder<T> {
42 pub(crate) fn new(file_inner: SharedInner) -> Self {
43 Self {
44 file_inner,
45 shape: None,
46 chunk_dims: None,
47 max_shape: None,
48 deflate_level: None,
49 shuffle_deflate_level: None,
50 custom_pipeline: None,
51 group_path: None,
52 fill_value: None,
53 _marker: std::marker::PhantomData,
54 }
55 }
56
57 pub(crate) fn new_in_group(file_inner: SharedInner, group_path: String) -> Self {
58 Self {
59 file_inner,
60 shape: None,
61 chunk_dims: None,
62 max_shape: None,
63 deflate_level: None,
64 shuffle_deflate_level: None,
65 custom_pipeline: None,
66 group_path: Some(group_path),
67 fill_value: None,
68 _marker: std::marker::PhantomData,
69 }
70 }
71
72 #[must_use]
77 pub fn shape<S: AsRef<[usize]>>(mut self, dims: S) -> Self {
78 self.shape = Some(dims.as_ref().to_vec());
79 self
80 }
81
82 #[must_use]
84 pub fn scalar(mut self) -> Self {
85 self.shape = Some(vec![]);
86 self
87 }
88
89 #[must_use]
95 pub fn chunk(mut self, chunk_dims: &[usize]) -> Self {
96 self.chunk_dims = Some(chunk_dims.to_vec());
97 self
98 }
99
100 #[must_use]
104 pub fn resizable(mut self) -> Self {
105 self.max_shape = Some(vec![None; self.shape.as_ref().map_or(0, |s| s.len())]);
106 self
107 }
108
109 #[must_use]
111 pub fn max_shape(mut self, max: &[Option<usize>]) -> Self {
112 self.max_shape = Some(max.to_vec());
113 self
114 }
115
116 #[must_use]
121 pub fn deflate(mut self, level: u32) -> Self {
122 self.deflate_level = Some(level);
123 self
124 }
125
126 #[must_use]
132 pub fn shuffle_deflate(mut self, level: u32) -> Self {
133 self.shuffle_deflate_level = Some(level);
134 self
135 }
136
137 #[must_use]
141 pub fn zstd(mut self, level: u32) -> Self {
142 self.custom_pipeline = Some(crate::format::messages::filter::FilterPipeline::zstd(level));
143 self
144 }
145
146 #[must_use]
151 pub fn filter_pipeline(
152 mut self,
153 pipeline: crate::format::messages::filter::FilterPipeline,
154 ) -> Self {
155 self.custom_pipeline = Some(pipeline);
156 self
157 }
158
159 #[must_use]
176 pub fn fill_value(mut self, value: T) -> Self {
177 let es = T::element_size();
178 let raw = unsafe { std::slice::from_raw_parts(&value as *const T as *const u8, es) };
182 self.fill_value = Some(raw.to_vec());
183 self
184 }
185
186 pub fn create(self, name: &str) -> Result<H5Dataset> {
191 let shape = self.shape.ok_or_else(|| {
192 Hdf5Error::InvalidState("shape must be set before calling create()".into())
193 })?;
194
195 let full_name = if let Some(ref gp) = self.group_path {
197 if gp == "/" {
198 name.to_string()
199 } else {
200 let trimmed = gp.trim_start_matches('/');
201 format!("{}/{}", trimmed, name)
202 }
203 } else {
204 name.to_string()
205 };
206 let group_path = self.group_path.clone();
207 let fill_value = self.fill_value.clone();
208
209 let dims_u64: Vec<u64> = shape.iter().map(|&d| d as u64).collect();
210 let datatype = T::hdf5_type();
211 let element_size = T::element_size();
212
213 if let Some(ref chunk_dims) = self.chunk_dims {
214 let chunk_u64: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
216 let max_u64: Vec<u64> = if let Some(ref max) = self.max_shape {
217 max.iter()
218 .map(|m| m.map_or(u64::MAX, |v| v as u64))
219 .collect()
220 } else {
221 dims_u64.clone()
223 };
224
225 let n_unlimited = max_u64.iter().filter(|&&m| m == u64::MAX).count();
229 let is_btree2 = n_unlimited >= 2;
230 let is_fixed_array = n_unlimited == 0;
231 let wants_filter = self.custom_pipeline.is_some()
232 || self.shuffle_deflate_level.is_some()
233 || self.deflate_level.is_some();
234
235 let index = {
236 let mut inner = borrow_inner_mut(&self.file_inner);
237 match &mut *inner {
238 H5FileInner::Writer(writer) => {
239 let idx = if is_btree2 {
240 if wants_filter {
241 return Err(Hdf5Error::InvalidState(
242 "compression of v2 B-tree (multi-unlimited-dimension) \
243 datasets is not yet supported"
244 .into(),
245 ));
246 }
247 writer.create_btree_v2_dataset(
248 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
249 )?
250 } else if is_fixed_array {
251 if wants_filter {
257 let pipeline = if let Some(p) = self.custom_pipeline {
258 p
259 } else if let Some(level) = self.shuffle_deflate_level {
260 crate::format::messages::filter::FilterPipeline::shuffle_deflate(
261 T::element_size() as u32,
262 level,
263 )
264 } else {
265 crate::format::messages::filter::FilterPipeline::deflate(
267 self.deflate_level.unwrap(),
268 )
269 };
270 writer.create_fixed_array_dataset_with_pipeline(
271 &full_name, datatype, &dims_u64, &chunk_u64, pipeline,
272 )?
273 } else {
274 writer.create_fixed_array_dataset(
275 &full_name, datatype, &dims_u64, &chunk_u64,
276 )?
277 }
278 } else if let Some(pipeline) = self.custom_pipeline {
279 writer.create_chunked_dataset_with_pipeline(
280 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
281 )?
282 } else if let Some(level) = self.shuffle_deflate_level {
283 let pipeline =
284 crate::format::messages::filter::FilterPipeline::shuffle_deflate(
285 T::element_size() as u32,
286 level,
287 );
288 writer.create_chunked_dataset_with_pipeline(
289 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
290 )?
291 } else if let Some(level) = self.deflate_level {
292 writer.create_chunked_dataset_compressed(
293 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, level,
294 )?
295 } else {
296 writer.create_chunked_dataset(
297 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
298 )?
299 };
300 if let Some(ref gp) = group_path {
301 if gp != "/" {
302 writer.assign_dataset_to_group(gp, idx)?;
303 }
304 }
305 if let Some(ref fv) = fill_value {
306 writer.set_dataset_fill_value(idx, fv.clone())?;
307 }
308 idx
309 }
310 H5FileInner::Reader(_) => {
311 return Err(Hdf5Error::InvalidState(
312 "cannot create a dataset in read mode".into(),
313 ));
314 }
315 H5FileInner::Closed => {
316 return Err(Hdf5Error::InvalidState("file is closed".into()));
317 }
318 }
319 };
320
321 Ok(H5Dataset {
322 file_inner: clone_inner(&self.file_inner),
323 info: DatasetInfo::Writer {
324 index,
325 shape,
326 element_size,
327 chunked: true,
328 btree2: is_btree2,
329 fixed_array: is_fixed_array,
330 },
331 })
332 } else {
333 let index = {
335 let mut inner = borrow_inner_mut(&self.file_inner);
336 match &mut *inner {
337 H5FileInner::Writer(writer) => {
338 let idx = writer.create_dataset(&full_name, datatype, &dims_u64)?;
339 if let Some(ref gp) = group_path {
340 if gp != "/" {
341 writer.assign_dataset_to_group(gp, idx)?;
342 }
343 }
344 if let Some(ref fv) = fill_value {
345 writer.set_dataset_fill_value(idx, fv.clone())?;
346 }
347 idx
348 }
349 H5FileInner::Reader(_) => {
350 return Err(Hdf5Error::InvalidState(
351 "cannot create a dataset in read mode".into(),
352 ));
353 }
354 H5FileInner::Closed => {
355 return Err(Hdf5Error::InvalidState("file is closed".into()));
356 }
357 }
358 };
359
360 Ok(H5Dataset {
361 file_inner: clone_inner(&self.file_inner),
362 info: DatasetInfo::Writer {
363 index,
364 shape,
365 element_size,
366 chunked: false,
367 btree2: false,
368 fixed_array: false,
369 },
370 })
371 }
372 }
373}
374
375enum DatasetInfo {
381 Writer {
383 index: usize,
385 shape: Vec<usize>,
387 element_size: usize,
389 chunked: bool,
391 btree2: bool,
393 fixed_array: bool,
395 },
396 Reader {
398 name: String,
400 shape: Vec<usize>,
402 element_size: usize,
404 },
405}
406
407pub struct H5Dataset {
417 file_inner: SharedInner,
418 info: DatasetInfo,
419}
420
421impl H5Dataset {
422 pub(crate) fn new_reader(
424 file_inner: SharedInner,
425 name: String,
426 shape: Vec<usize>,
427 element_size: usize,
428 ) -> Self {
429 Self {
430 file_inner,
431 info: DatasetInfo::Reader {
432 name,
433 shape,
434 element_size,
435 },
436 }
437 }
438
439 pub fn shape(&self) -> Vec<usize> {
441 match &self.info {
442 DatasetInfo::Writer { shape, .. } => shape.clone(),
443 DatasetInfo::Reader { shape, .. } => shape.clone(),
444 }
445 }
446
447 pub fn ndims(&self) -> usize {
449 match &self.info {
450 DatasetInfo::Writer { shape, .. } => shape.len(),
451 DatasetInfo::Reader { shape, .. } => shape.len(),
452 }
453 }
454
455 pub fn total_elements(&self) -> usize {
457 match &self.info {
458 DatasetInfo::Writer { shape, .. } => shape.iter().product(),
459 DatasetInfo::Reader { shape, .. } => shape.iter().product(),
460 }
461 }
462
463 pub fn element_size(&self) -> usize {
465 match &self.info {
466 DatasetInfo::Writer { element_size, .. } => *element_size,
467 DatasetInfo::Reader { element_size, .. } => *element_size,
468 }
469 }
470
471 pub fn chunk_dims(&self) -> Option<Vec<usize>> {
473 match &self.info {
474 DatasetInfo::Reader { name, .. } => {
475 let inner = borrow_inner(&self.file_inner);
476 if let H5FileInner::Reader(reader) = &*inner {
477 if let Some(info) = reader.dataset_info(name) {
478 use crate::format::messages::data_layout::DataLayoutMessage;
479 let chunk_dims = match &info.layout {
480 DataLayoutMessage::ChunkedV4 { chunk_dims, .. }
481 | DataLayoutMessage::ChunkedV3 { chunk_dims, .. } => Some(chunk_dims),
482 _ => None,
483 };
484 if let Some(chunk_dims) = chunk_dims {
485 return Some(
487 chunk_dims[..chunk_dims.len() - 1]
488 .iter()
489 .map(|&d| d as usize)
490 .collect(),
491 );
492 }
493 }
494 }
495 None
496 }
497 DatasetInfo::Writer { .. } => None,
498 }
499 }
500
501 pub fn is_chunked(&self) -> bool {
503 match &self.info {
504 DatasetInfo::Writer { chunked, .. } => *chunked,
505 DatasetInfo::Reader { name, .. } => {
506 let inner = borrow_inner(&self.file_inner);
507 match &*inner {
508 H5FileInner::Reader(reader) => {
509 if let Some(info) = reader.dataset_info(name) {
510 use crate::format::messages::data_layout::DataLayoutMessage;
511 matches!(
512 info.layout,
513 DataLayoutMessage::ChunkedV4 { .. }
514 | DataLayoutMessage::ChunkedV3 { .. }
515 )
516 } else {
517 false
518 }
519 }
520 _ => false,
521 }
522 }
523 }
524 }
525
526 pub fn attr_names(&self) -> Result<Vec<String>> {
528 match &self.info {
529 DatasetInfo::Reader { name, .. } => {
530 let inner = borrow_inner(&self.file_inner);
531 match &*inner {
532 H5FileInner::Reader(reader) => Ok(reader.dataset_attr_names(name)?),
533 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
534 }
535 }
536 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
537 "attr_names not available in write mode".into(),
538 )),
539 }
540 }
541
542 pub fn attr(&self, attr_name: &str) -> Result<crate::attribute::H5Attribute> {
544 match &self.info {
545 DatasetInfo::Reader { name, .. } => {
546 let inner = borrow_inner(&self.file_inner);
547 match &*inner {
548 H5FileInner::Reader(reader) => {
549 let attr_msg = reader.dataset_attr(name, attr_name)?.clone();
550 Ok(crate::attribute::H5Attribute::new_reader(
551 clone_inner(&self.file_inner),
552 attr_msg,
553 ))
554 }
555 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
556 }
557 }
558 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
559 "attr() not available in write mode".into(),
560 )),
561 }
562 }
563
564 pub fn new_attr<T: 'static>(&self) -> AttrBuilder<'_, T> {
580 let ds_index = match &self.info {
581 DatasetInfo::Writer { index, .. } => *index,
582 DatasetInfo::Reader { .. } => {
583 usize::MAX
586 }
587 };
588 AttrBuilder::new(&self.file_inner, ds_index)
589 }
590
591 pub fn write_raw<T: H5Type>(&self, data: &[T]) -> Result<()> {
603 match &self.info {
604 DatasetInfo::Writer {
605 index,
606 shape,
607 element_size,
608 chunked,
609 btree2: _,
610 fixed_array: _,
611 } => {
612 if *chunked {
613 return Err(Hdf5Error::InvalidState(
614 "use write_chunk for chunked datasets".into(),
615 ));
616 }
617
618 let total_elements: usize = shape.iter().product();
619 if data.len() != total_elements {
620 return Err(Hdf5Error::InvalidState(format!(
621 "data length {} does not match dataset size {}",
622 data.len(),
623 total_elements,
624 )));
625 }
626
627 if T::element_size() != *element_size {
629 return Err(Hdf5Error::TypeMismatch(format!(
630 "write type has element size {} but dataset expects {}",
631 T::element_size(),
632 element_size,
633 )));
634 }
635
636 let byte_len = data.len() * T::element_size();
640 let raw =
641 unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
642
643 let mut inner = borrow_inner_mut(&self.file_inner);
644 match &mut *inner {
645 H5FileInner::Writer(writer) => {
646 writer.write_dataset_raw(*index, raw)?;
647 Ok(())
648 }
649 _ => Err(Hdf5Error::InvalidState(
650 "file is no longer in write mode".into(),
651 )),
652 }
653 }
654 DatasetInfo::Reader { .. } => Err(Hdf5Error::InvalidState(
655 "cannot write to a dataset opened in read mode".into(),
656 )),
657 }
658 }
659
660 pub fn write_chunk(&self, chunk_idx: usize, data: &[u8]) -> Result<()> {
668 match &self.info {
669 DatasetInfo::Writer {
670 index,
671 chunked,
672 btree2,
673 fixed_array,
674 ..
675 } => {
676 if !*chunked {
677 return Err(Hdf5Error::InvalidState(
678 "write_chunk is only for chunked datasets".into(),
679 ));
680 }
681 if *btree2 {
682 return Err(Hdf5Error::InvalidState(
683 "this dataset uses a v2 B-tree chunk index; use write_chunk_at \
684 with the chunk's grid coordinates"
685 .into(),
686 ));
687 }
688
689 let mut inner = borrow_inner_mut(&self.file_inner);
690 match &mut *inner {
691 H5FileInner::Writer(writer) => {
692 if *fixed_array {
693 let chunk_dims = writer
696 .dataset_chunk_dims(*index)
697 .ok_or_else(|| {
698 Hdf5Error::InvalidState("dataset has no chunk info".into())
699 })?
700 .to_vec();
701 let dims = writer.dataset_dims(*index).to_vec();
702 let mut grid = vec![0u64; dims.len()];
703 for d in 0..dims.len() {
704 grid[d] = if chunk_dims[d] > 0 {
705 dims[d].div_ceil(chunk_dims[d])
706 } else {
707 1
708 };
709 }
710 if grid.contains(&0) {
713 return Err(Hdf5Error::InvalidState(
714 "dataset has a zero-extent dimension and no chunks".into(),
715 ));
716 }
717 let mut rem = chunk_idx as u64;
718 let mut coords = vec![0u64; dims.len()];
719 for d in (0..dims.len()).rev() {
720 coords[d] = rem % grid[d];
721 rem /= grid[d];
722 }
723 if rem != 0 {
725 return Err(Hdf5Error::InvalidState(format!(
726 "chunk index {chunk_idx} is out of range for this dataset"
727 )));
728 }
729 writer.write_chunk_fixed_array(*index, &coords, data)?;
730 } else {
731 writer.write_chunk(*index, chunk_idx as u64, data)?;
732 }
733 Ok(())
734 }
735 _ => Err(Hdf5Error::InvalidState(
736 "file is no longer in write mode".into(),
737 )),
738 }
739 }
740 DatasetInfo::Reader { .. } => {
741 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
742 }
743 }
744 }
745
746 pub fn write_chunk_at(&self, chunk_coords: &[usize], data: &[u8]) -> Result<()> {
767 match &self.info {
768 DatasetInfo::Writer {
769 index,
770 chunked,
771 btree2,
772 fixed_array,
773 ..
774 } => {
775 if !*chunked {
776 return Err(Hdf5Error::InvalidState(
777 "write_chunk_at is only for chunked datasets".into(),
778 ));
779 }
780 let coords: Vec<u64> = chunk_coords.iter().map(|&c| c as u64).collect();
781 let btree2 = *btree2;
782 let fixed_array = *fixed_array;
783 let mut inner = borrow_inner_mut(&self.file_inner);
784 let writer = match &mut *inner {
785 H5FileInner::Writer(w) => w,
786 _ => {
787 return Err(Hdf5Error::InvalidState(
788 "file is no longer in write mode".into(),
789 ))
790 }
791 };
792 let chunk_dims = writer
793 .dataset_chunk_dims(*index)
794 .ok_or_else(|| Hdf5Error::InvalidState("dataset has no chunk info".into()))?
795 .to_vec();
796 let dims = writer.dataset_dims(*index).to_vec();
797 if coords.len() != dims.len() {
798 return Err(Hdf5Error::InvalidState(format!(
799 "chunk_coords has {} entries but the dataset has {} dimensions",
800 coords.len(),
801 dims.len()
802 )));
803 }
804 if chunk_dims.len() != dims.len() {
805 return Err(Hdf5Error::InvalidState(format!(
806 "dataset chunk shape has {} dimensions but the dataspace has {}",
807 chunk_dims.len(),
808 dims.len()
809 )));
810 }
811
812 let mut new_dims = dims.clone();
816 for d in 0..dims.len() {
817 let needed = coords[d]
818 .checked_add(1)
819 .and_then(|c| c.checked_mul(chunk_dims[d]))
820 .ok_or_else(|| {
821 Hdf5Error::InvalidState(format!(
822 "chunk coordinate {} in dimension {} is too large",
823 coords[d], d
824 ))
825 })?;
826 if needed > new_dims[d] {
827 new_dims[d] = needed;
828 }
829 }
830
831 if fixed_array {
832 writer.write_chunk_fixed_array(*index, &coords, data)?;
834 return Ok(());
835 }
836
837 if btree2 {
838 writer.write_chunk_btree_v2(*index, &coords, data)?;
839 } else {
840 let mut linear = 0u64;
843 for d in 0..dims.len() {
844 let grid = if chunk_dims[d] > 0 {
845 dims[d].div_ceil(chunk_dims[d])
846 } else {
847 1
848 };
849 linear = linear
850 .checked_mul(grid)
851 .and_then(|l| l.checked_add(coords[d]))
852 .ok_or_else(|| {
853 Hdf5Error::InvalidState(
854 "chunk coordinates overflow the array index".into(),
855 )
856 })?;
857 }
858 writer.write_chunk(*index, linear, data)?;
859 }
860
861 if new_dims != dims {
862 writer.extend_dataset(*index, &new_dims)?;
863 }
864 Ok(())
865 }
866 DatasetInfo::Reader { .. } => {
867 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
868 }
869 }
870 }
871
872 pub fn write_chunks_batch(&self, chunks: &[(usize, &[u8])]) -> Result<()> {
878 match &self.info {
879 DatasetInfo::Writer { index, chunked, .. } => {
880 if !*chunked {
881 return Err(Hdf5Error::InvalidState(
882 "write_chunks_batch is only for chunked datasets".into(),
883 ));
884 }
885 let pairs: Vec<(u64, &[u8])> = chunks
886 .iter()
887 .map(|(idx, data)| (*idx as u64, *data))
888 .collect();
889 let mut inner = borrow_inner_mut(&self.file_inner);
890 match &mut *inner {
891 H5FileInner::Writer(writer) => {
892 writer.write_chunks_batch(*index, &pairs)?;
893 Ok(())
894 }
895 _ => Err(Hdf5Error::InvalidState(
896 "file is no longer in write mode".into(),
897 )),
898 }
899 }
900 DatasetInfo::Reader { .. } => {
901 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
902 }
903 }
904 }
905
906 pub fn append<T: H5Type>(&self, data: &[T]) -> Result<()> {
929 match &self.info {
930 DatasetInfo::Writer {
931 index,
932 element_size,
933 chunked,
934 ..
935 } => {
936 if !*chunked {
937 return Err(Hdf5Error::InvalidState(
938 "append is only for chunked datasets".into(),
939 ));
940 }
941 if T::element_size() != *element_size {
942 return Err(Hdf5Error::TypeMismatch(format!(
943 "append type has element size {} but dataset expects {}",
944 T::element_size(),
945 element_size,
946 )));
947 }
948
949 let ds_index = *index;
950 let es = *element_size;
951
952 let mut inner = borrow_inner_mut(&self.file_inner);
953 let writer = match &mut *inner {
954 H5FileInner::Writer(w) => w,
955 _ => {
956 return Err(Hdf5Error::InvalidState(
957 "file is no longer in write mode".into(),
958 ))
959 }
960 };
961
962 let chunk_dims = writer
963 .dataset_chunk_dims(ds_index)
964 .ok_or_else(|| Hdf5Error::InvalidState("dataset has no chunk info".into()))?
965 .to_vec();
966 let dims = writer.dataset_dims(ds_index).to_vec();
967
968 let frame_elems: usize = if dims.len() > 1 {
970 dims[1..].iter().map(|&d| d as usize).product()
971 } else {
972 1
973 };
974
975 if frame_elems == 0 {
976 return Err(Hdf5Error::InvalidState(
977 "cannot append to dataset with zero-size trailing dimensions".into(),
978 ));
979 }
980
981 if !data.len().is_multiple_of(frame_elems) {
982 return Err(Hdf5Error::InvalidState(format!(
983 "data length {} is not a multiple of frame size {}",
984 data.len(),
985 frame_elems,
986 )));
987 }
988
989 let n_new_frames = data.len() / frame_elems;
990 let current_dim0 = dims[0] as usize;
991
992 let chunk_dim0 = chunk_dims[0] as usize;
994 let frame_bytes = frame_elems * es;
995
996 let raw = unsafe {
997 std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * es)
998 };
999
1000 let ds = &mut writer.datasets[ds_index];
1002 let buffered_frames = ds.append_buffered_frames as usize;
1003 let mut combined = std::mem::take(&mut ds.append_buffer);
1004 combined.extend_from_slice(raw);
1005 ds.append_buffered_frames = 0;
1006
1007 let total_frames = buffered_frames + n_new_frames;
1008 let total_bytes = combined.len();
1009
1010 let base_dim0 = current_dim0 - buffered_frames;
1012 let mut byte_pos = 0usize;
1013 let mut frame_pos = 0usize;
1014
1015 while frame_pos < total_frames {
1016 let abs_frame = base_dim0 + frame_pos;
1017 let chunk_idx = abs_frame / chunk_dim0;
1018 let remaining_frames = total_frames - frame_pos;
1019 let frames_to_fill = chunk_dim0 - (abs_frame % chunk_dim0);
1020
1021 if remaining_frames >= frames_to_fill {
1022 let end = byte_pos + frames_to_fill * frame_bytes;
1024 if frames_to_fill == chunk_dim0 {
1025 writer.write_chunk(
1026 ds_index,
1027 chunk_idx as u64,
1028 &combined[byte_pos..end],
1029 )?;
1030 } else {
1031 let offset_in_chunk = (abs_frame % chunk_dim0) * frame_bytes;
1037 let mut chunk_buf =
1038 match writer.read_chunk_if_present(ds_index, chunk_idx as u64)? {
1039 Some(existing) => existing,
1040 None => {
1041 return Err(Hdf5Error::InvalidState(format!(
1042 "cannot append into partially-written chunk {}: \
1043 its existing content was not found in the chunk \
1044 index (the file may be inconsistent)",
1045 chunk_idx
1046 )));
1047 }
1048 };
1049 chunk_buf
1050 [offset_in_chunk..offset_in_chunk + frames_to_fill * frame_bytes]
1051 .copy_from_slice(&combined[byte_pos..end]);
1052 writer.write_chunk(ds_index, chunk_idx as u64, &chunk_buf)?;
1053 }
1054 byte_pos = end;
1055 frame_pos += frames_to_fill;
1056 } else {
1057 let ds = &mut writer.datasets[ds_index];
1059 ds.append_buffer = combined[byte_pos..total_bytes].to_vec();
1060 ds.append_buffered_frames = remaining_frames as u64;
1061 frame_pos = total_frames;
1062 }
1063 }
1064
1065 let logical_dim0 = base_dim0 + total_frames;
1067 let mut new_dims: Vec<u64> = dims;
1068 new_dims[0] = logical_dim0 as u64;
1069 writer.extend_dataset(ds_index, &new_dims)?;
1070
1071 Ok(())
1072 }
1073 DatasetInfo::Reader { .. } => {
1074 Err(Hdf5Error::InvalidState("cannot append in read mode".into()))
1075 }
1076 }
1077 }
1078
1079 pub fn extend(&self, new_dims: &[usize]) -> Result<()> {
1081 match &self.info {
1082 DatasetInfo::Writer { index, chunked, .. } => {
1083 if !*chunked {
1084 return Err(Hdf5Error::InvalidState(
1085 "extend is only for chunked datasets".into(),
1086 ));
1087 }
1088
1089 let dims_u64: Vec<u64> = new_dims.iter().map(|&d| d as u64).collect();
1090 let mut inner = borrow_inner_mut(&self.file_inner);
1091 match &mut *inner {
1092 H5FileInner::Writer(writer) => {
1093 writer.extend_dataset(*index, &dims_u64)?;
1094 Ok(())
1095 }
1096 _ => Err(Hdf5Error::InvalidState(
1097 "file is no longer in write mode".into(),
1098 )),
1099 }
1100 }
1101 DatasetInfo::Reader { .. } => {
1102 Err(Hdf5Error::InvalidState("cannot extend in read mode".into()))
1103 }
1104 }
1105 }
1106
1107 pub fn flush(&self) -> Result<()> {
1109 match &self.info {
1110 DatasetInfo::Writer { index, .. } => {
1111 let mut inner = borrow_inner_mut(&self.file_inner);
1112 match &mut *inner {
1113 H5FileInner::Writer(writer) => {
1114 writer.flush_dataset(*index)?;
1115 Ok(())
1116 }
1117 _ => Ok(()),
1118 }
1119 }
1120 DatasetInfo::Reader { .. } => Ok(()),
1121 }
1122 }
1123
1124 pub fn read_slice<T: H5Type>(&self, starts: &[usize], counts: &[usize]) -> Result<Vec<T>> {
1129 match &self.info {
1130 DatasetInfo::Reader {
1131 name, element_size, ..
1132 } => {
1133 if T::element_size() != *element_size {
1134 return Err(Hdf5Error::TypeMismatch(format!(
1135 "read type has element size {} but dataset has element size {}",
1136 T::element_size(),
1137 element_size,
1138 )));
1139 }
1140 let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
1141 let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
1142
1143 let raw = {
1144 let mut inner = borrow_inner_mut(&self.file_inner);
1145 match &mut *inner {
1146 H5FileInner::Reader(reader) => {
1147 reader.read_slice(name, &starts_u64, &counts_u64)?
1148 }
1149 _ => {
1150 return Err(Hdf5Error::InvalidState("file is not in read mode".into()))
1151 }
1152 }
1153 };
1154
1155 if raw.len() % T::element_size() != 0 {
1156 return Err(Hdf5Error::TypeMismatch(format!(
1157 "raw data size {} is not a multiple of element size {}",
1158 raw.len(),
1159 T::element_size(),
1160 )));
1161 }
1162
1163 let count = raw.len() / T::element_size();
1164 let mut result = Vec::<T>::with_capacity(count);
1165 unsafe {
1166 std::ptr::copy_nonoverlapping(
1167 raw.as_ptr(),
1168 result.as_mut_ptr() as *mut u8,
1169 raw.len(),
1170 );
1171 result.set_len(count);
1172 }
1173 Ok(result)
1174 }
1175 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1176 "cannot read_slice from a dataset in write mode".into(),
1177 )),
1178 }
1179 }
1180
1181 pub fn write_slice<T: H5Type>(
1185 &self,
1186 starts: &[usize],
1187 counts: &[usize],
1188 data: &[T],
1189 ) -> Result<()> {
1190 match &self.info {
1191 DatasetInfo::Writer {
1192 index,
1193 element_size,
1194 chunked,
1195 ..
1196 } => {
1197 if *chunked {
1198 return Err(Hdf5Error::InvalidState(
1199 "write_slice is only for contiguous datasets".into(),
1200 ));
1201 }
1202 if T::element_size() != *element_size {
1203 return Err(Hdf5Error::TypeMismatch(format!(
1204 "write type has element size {} but dataset expects {}",
1205 T::element_size(),
1206 element_size,
1207 )));
1208 }
1209
1210 let expected: usize = counts.iter().product();
1211 if data.len() != expected {
1212 return Err(Hdf5Error::InvalidState(format!(
1213 "data length {} does not match slice size {}",
1214 data.len(),
1215 expected,
1216 )));
1217 }
1218
1219 let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
1220 let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
1221
1222 let byte_len = data.len() * T::element_size();
1223 let raw =
1224 unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
1225
1226 let mut inner = borrow_inner_mut(&self.file_inner);
1227 match &mut *inner {
1228 H5FileInner::Writer(writer) => {
1229 writer.write_slice(*index, &starts_u64, &counts_u64, raw)?;
1230 Ok(())
1231 }
1232 _ => Err(Hdf5Error::InvalidState(
1233 "file is no longer in write mode".into(),
1234 )),
1235 }
1236 }
1237 DatasetInfo::Reader { .. } => {
1238 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
1239 }
1240 }
1241 }
1242
1243 pub fn read_vlen_strings(&self) -> Result<Vec<String>> {
1248 match &self.info {
1249 DatasetInfo::Reader { name, .. } => {
1250 let mut inner = borrow_inner_mut(&self.file_inner);
1251 match &mut *inner {
1252 H5FileInner::Reader(reader) => Ok(reader.read_vlen_strings(name)?),
1253 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
1254 }
1255 }
1256 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1257 "cannot read vlen strings from a dataset in write mode".into(),
1258 )),
1259 }
1260 }
1261
1262 pub fn read_raw<T: H5Type>(&self) -> Result<Vec<T>> {
1274 match &self.info {
1275 DatasetInfo::Reader {
1276 name, element_size, ..
1277 } => {
1278 if T::element_size() != *element_size {
1279 return Err(Hdf5Error::TypeMismatch(format!(
1280 "read type has element size {} but dataset has element size {}",
1281 T::element_size(),
1282 element_size,
1283 )));
1284 }
1285
1286 let raw = {
1287 let mut inner = borrow_inner_mut(&self.file_inner);
1288 match &mut *inner {
1289 H5FileInner::Reader(reader) => reader.read_dataset_raw(name)?,
1290 _ => {
1291 return Err(Hdf5Error::InvalidState("file is not in read mode".into()));
1292 }
1293 }
1294 };
1295
1296 if raw.len() % T::element_size() != 0 {
1297 return Err(Hdf5Error::TypeMismatch(format!(
1298 "raw data size {} is not a multiple of element size {}",
1299 raw.len(),
1300 T::element_size(),
1301 )));
1302 }
1303
1304 let count = raw.len() / T::element_size();
1305 let mut result = Vec::<T>::with_capacity(count);
1306
1307 unsafe {
1314 std::ptr::copy_nonoverlapping(
1315 raw.as_ptr(),
1316 result.as_mut_ptr() as *mut u8,
1317 raw.len(),
1318 );
1319 result.set_len(count);
1320 }
1321
1322 Ok(result)
1323 }
1324 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1325 "cannot read from a dataset in write mode".into(),
1326 )),
1327 }
1328 }
1329}
1330
1331#[cfg(test)]
1332mod tests {
1333 use crate::H5File;
1334 use std::path::PathBuf;
1335
1336 fn temp_path(name: &str) -> PathBuf {
1337 use std::sync::atomic::{AtomicU64, Ordering};
1341 static COUNTER: AtomicU64 = AtomicU64::new(0);
1342 let n = COUNTER.fetch_add(1, Ordering::Relaxed);
1343 std::env::temp_dir().join(format!(
1344 "hdf5_dataset_test_{}_{}_{}.h5",
1345 name,
1346 std::process::id(),
1347 n
1348 ))
1349 }
1350
1351 #[test]
1352 fn builder_requires_shape() {
1353 let path = temp_path("no_shape");
1354 let file = H5File::create(&path).unwrap();
1355 let result = file.new_dataset::<u8>().create("data");
1356 assert!(result.is_err());
1357 std::fs::remove_file(&path).ok();
1358 }
1359
1360 #[test]
1361 fn write_raw_size_mismatch() {
1362 let path = temp_path("size_mismatch");
1363 let file = H5File::create(&path).unwrap();
1364 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1365 let result = ds.write_raw(&[1u8, 2, 3]);
1367 assert!(result.is_err());
1368 std::fs::remove_file(&path).ok();
1369 }
1370
1371 #[test]
1372 fn roundtrip_u8_1d() {
1373 let path = temp_path("rt_u8_1d");
1374 let data: Vec<u8> = (0..10).collect();
1375
1376 {
1377 let file = H5File::create(&path).unwrap();
1378 let ds = file.new_dataset::<u8>().shape([10]).create("seq").unwrap();
1379 ds.write_raw(&data).unwrap();
1380 file.close().unwrap();
1381 }
1382
1383 {
1384 let file = H5File::open(&path).unwrap();
1385 let ds = file.dataset("seq").unwrap();
1386 assert_eq!(ds.shape(), vec![10]);
1387 let readback = ds.read_raw::<u8>().unwrap();
1388 assert_eq!(readback, data);
1389 }
1390
1391 std::fs::remove_file(&path).ok();
1392 }
1393
1394 #[test]
1395 fn roundtrip_i32_2d() {
1396 let path = temp_path("rt_i32_2d");
1397 let data: Vec<i32> = vec![-1, 0, 1, 2, 3, 4];
1398
1399 {
1400 let file = H5File::create(&path).unwrap();
1401 let ds = file
1402 .new_dataset::<i32>()
1403 .shape([2, 3])
1404 .create("matrix")
1405 .unwrap();
1406 ds.write_raw(&data).unwrap();
1407 file.close().unwrap();
1408 }
1409
1410 {
1411 let file = H5File::open(&path).unwrap();
1412 let ds = file.dataset("matrix").unwrap();
1413 assert_eq!(ds.shape(), vec![2, 3]);
1414 let readback = ds.read_raw::<i32>().unwrap();
1415 assert_eq!(readback, data);
1416 }
1417
1418 std::fs::remove_file(&path).ok();
1419 }
1420
1421 #[test]
1422 fn roundtrip_f64_3d() {
1423 let path = temp_path("rt_f64_3d");
1424 let data: Vec<f64> = (0..24).map(|i| i as f64 * 0.5).collect();
1425
1426 {
1427 let file = H5File::create(&path).unwrap();
1428 let ds = file
1429 .new_dataset::<f64>()
1430 .shape([2, 3, 4])
1431 .create("cube")
1432 .unwrap();
1433 ds.write_raw(&data).unwrap();
1434 file.close().unwrap();
1435 }
1436
1437 {
1438 let file = H5File::open(&path).unwrap();
1439 let ds = file.dataset("cube").unwrap();
1440 assert_eq!(ds.shape(), vec![2, 3, 4]);
1441 let readback = ds.read_raw::<f64>().unwrap();
1442 assert_eq!(readback, data);
1443 }
1444
1445 std::fs::remove_file(&path).ok();
1446 }
1447
1448 #[test]
1449 fn cannot_read_in_write_mode() {
1450 let path = temp_path("no_read_write");
1451 let file = H5File::create(&path).unwrap();
1452 let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1453 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1454 let result = ds.read_raw::<u8>();
1455 assert!(result.is_err());
1456 std::fs::remove_file(&path).ok();
1457 }
1458
1459 #[test]
1460 fn cannot_write_in_read_mode() {
1461 let path = temp_path("no_write_read");
1462
1463 {
1464 let file = H5File::create(&path).unwrap();
1465 let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1466 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1467 file.close().unwrap();
1468 }
1469
1470 {
1471 let file = H5File::open(&path).unwrap();
1472 let ds = file.dataset("x").unwrap();
1473 let result = ds.write_raw(&[5u8, 6, 7, 8]);
1474 assert!(result.is_err());
1475 }
1476
1477 std::fs::remove_file(&path).ok();
1478 }
1479
1480 #[test]
1481 fn numeric_attr_roundtrip() {
1482 let path = temp_path("num_attr");
1483 {
1484 let file = H5File::create(&path).unwrap();
1485 let ds = file.new_dataset::<f32>().shape([4]).create("data").unwrap();
1486 ds.write_raw(&[1.0f32; 4]).unwrap();
1487
1488 let a1 = ds.new_attr::<f64>().shape(()).create("scale").unwrap();
1489 a1.write_numeric(&1.2345f64).unwrap();
1490
1491 let a2 = ds.new_attr::<i32>().shape(()).create("count").unwrap();
1492 a2.write_numeric(&42i32).unwrap();
1493
1494 file.close().unwrap();
1495 }
1496 {
1497 let file = H5File::open(&path).unwrap();
1498 let ds = file.dataset("data").unwrap();
1499
1500 let scale = ds.attr("scale").unwrap();
1501 let val: f64 = scale.read_numeric().unwrap();
1502 assert!((val - 1.2345).abs() < 1e-10);
1503
1504 let count = ds.attr("count").unwrap();
1505 let val: i32 = count.read_numeric().unwrap();
1506 assert_eq!(val, 42);
1507 }
1508 std::fs::remove_file(&path).ok();
1509 }
1510
1511 #[test]
1512 fn cannot_create_dataset_in_read_mode() {
1513 let path = temp_path("no_create_read");
1514
1515 {
1516 let _file = H5File::create(&path).unwrap();
1517 }
1518
1519 {
1520 let file = H5File::open(&path).unwrap();
1521 let result = file.new_dataset::<u8>().shape([4]).create("x");
1522 assert!(result.is_err());
1523 }
1524
1525 std::fs::remove_file(&path).ok();
1526 }
1527
1528 #[test]
1529 fn shape_accessor() {
1530 let path = temp_path("shape_acc");
1531
1532 let file = H5File::create(&path).unwrap();
1533 let ds = file
1534 .new_dataset::<f32>()
1535 .shape([5, 10, 3])
1536 .create("tensor")
1537 .unwrap();
1538 assert_eq!(ds.shape(), vec![5, 10, 3]);
1539
1540 std::fs::remove_file(&path).ok();
1541 }
1542
1543 #[test]
1544 fn slice_roundtrip_2d() {
1545 let path = temp_path("slice_2d");
1546
1547 let data: Vec<i32> = (0..20).collect();
1549 {
1550 let file = H5File::create(&path).unwrap();
1551 let ds = file
1552 .new_dataset::<i32>()
1553 .shape([4, 5])
1554 .create("mat")
1555 .unwrap();
1556 ds.write_raw(&data).unwrap();
1557 file.close().unwrap();
1558 }
1559 {
1560 let file = H5File::open(&path).unwrap();
1561 let ds = file.dataset("mat").unwrap();
1562 let slice = ds.read_slice::<i32>(&[1, 2], &[2, 2]).unwrap();
1564 assert_eq!(slice, vec![7, 8, 12, 13]);
1567 }
1568
1569 std::fs::remove_file(&path).ok();
1570 }
1571
1572 #[test]
1573 fn write_slice_2d() {
1574 let path = temp_path("write_slice_2d");
1575
1576 {
1577 let file = H5File::create(&path).unwrap();
1578 let ds = file
1579 .new_dataset::<f32>()
1580 .shape([3, 4])
1581 .create("data")
1582 .unwrap();
1583 ds.write_raw(&[0.0f32; 12]).unwrap();
1584 ds.write_slice(&[1, 1], &[2, 2], &[10.0f32, 20.0, 30.0, 40.0])
1586 .unwrap();
1587 file.close().unwrap();
1588 }
1589 {
1590 let file = H5File::open(&path).unwrap();
1591 let ds = file.dataset("data").unwrap();
1592 let full = ds.read_raw::<f32>().unwrap();
1593 assert_eq!(
1597 full,
1598 vec![0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 20.0, 0.0, 0.0, 30.0, 40.0, 0.0,]
1599 );
1600 }
1601
1602 std::fs::remove_file(&path).ok();
1603 }
1604
1605 #[test]
1606 fn write_slice_out_of_bounds_rejected() {
1607 let path = temp_path("write_slice_oob");
1608 let file = H5File::create(&path).unwrap();
1609 let ds = file.new_dataset::<i32>().shape([4]).create("d").unwrap();
1610 ds.write_raw(&[0i32; 4]).unwrap();
1611 assert!(ds.write_slice(&[2], &[6], &[9i32; 6]).is_err());
1613 assert!(ds.write_slice(&[1], &[2], &[7i32, 8]).is_ok());
1615 std::fs::remove_file(&path).ok();
1616 }
1617
1618 #[test]
1619 fn duplicate_dataset_name_rejected() {
1620 let path = temp_path("dup_name");
1621 let file = H5File::create(&path).unwrap();
1622 let _ = file.new_dataset::<i32>().shape([2]).create("d").unwrap();
1623 assert!(file.new_dataset::<i32>().shape([2]).create("d").is_err());
1624 std::fs::remove_file(&path).ok();
1625 }
1626
1627 #[test]
1628 fn extend_cannot_shrink() {
1629 let path = temp_path("extend_shrink");
1630 let file = H5File::create(&path).unwrap();
1631 let ds = file
1632 .new_dataset::<i32>()
1633 .shape([0])
1634 .chunk(&[2])
1635 .max_shape(&[None])
1636 .create("d")
1637 .unwrap();
1638 ds.append(&[1i32, 2, 3, 4]).unwrap();
1639 assert!(ds.extend(&[2]).is_err());
1641 assert!(ds.extend(&[6]).is_ok());
1643 std::fs::remove_file(&path).ok();
1644 }
1645
1646 #[test]
1647 fn attr_read_roundtrip() {
1648 use crate::types::VarLenUnicode;
1649 let path = temp_path("attr_read");
1650
1651 {
1652 let file = H5File::create(&path).unwrap();
1653 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1654 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1655 let a1 = ds
1656 .new_attr::<VarLenUnicode>()
1657 .shape(())
1658 .create("units")
1659 .unwrap();
1660 a1.write_string("meters").unwrap();
1661 let a2 = ds
1662 .new_attr::<VarLenUnicode>()
1663 .shape(())
1664 .create("desc")
1665 .unwrap();
1666 a2.write_string("test data").unwrap();
1667 file.close().unwrap();
1668 }
1669 {
1670 let file = H5File::open(&path).unwrap();
1671 let ds = file.dataset("data").unwrap();
1672
1673 let names = ds.attr_names().unwrap();
1674 assert!(names.contains(&"units".to_string()));
1675 assert!(names.contains(&"desc".to_string()));
1676
1677 let units = ds.attr("units").unwrap();
1678 assert_eq!(units.read_string().unwrap(), "meters");
1679
1680 let desc = ds.attr("desc").unwrap();
1681 assert_eq!(desc.read_string().unwrap(), "test data");
1682 }
1683
1684 std::fs::remove_file(&path).ok();
1685 }
1686
1687 #[test]
1688 fn type_mismatch_element_size() {
1689 let path = temp_path("type_mismatch");
1690
1691 {
1692 let file = H5File::create(&path).unwrap();
1693 let ds = file.new_dataset::<f64>().shape([4]).create("data").unwrap();
1694 ds.write_raw(&[1.0f64, 2.0, 3.0, 4.0]).unwrap();
1695 file.close().unwrap();
1696 }
1697
1698 {
1699 let file = H5File::open(&path).unwrap();
1700 let ds = file.dataset("data").unwrap();
1701 let result = ds.read_raw::<u8>();
1703 assert!(result.is_err());
1704 }
1705
1706 std::fs::remove_file(&path).ok();
1707 }
1708
1709 #[test]
1710 fn dataset_survives_file_move() {
1711 let path = temp_path("ds_survives");
1712
1713 let ds = {
1714 let file = H5File::create(&path).unwrap();
1715 file.new_dataset::<u8>().shape([4]).create("x").unwrap()
1716 };
1717 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1719 std::fs::remove_file(&path).ok();
1722 }
1723
1724 #[test]
1725 fn new_attr_scalar_string() {
1726 use crate::types::VarLenUnicode;
1727
1728 let path = temp_path("attr_scalar_string");
1729 {
1730 let file = H5File::create(&path).unwrap();
1731 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1732 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1733
1734 let attr = ds
1735 .new_attr::<VarLenUnicode>()
1736 .shape(())
1737 .create("name")
1738 .unwrap();
1739 attr.write_scalar(&VarLenUnicode("test_value".to_string()))
1740 .unwrap();
1741
1742 file.close().unwrap();
1743 }
1744
1745 {
1747 let file = H5File::open(&path).unwrap();
1748 let ds = file.dataset("data").unwrap();
1749 assert_eq!(ds.shape(), vec![4]);
1750 let readback = ds.read_raw::<u8>().unwrap();
1751 assert_eq!(readback, vec![1u8, 2, 3, 4]);
1752 }
1753
1754 std::fs::remove_file(&path).ok();
1755 }
1756
1757 #[test]
1758 fn all_numeric_types_roundtrip() {
1759 let path = temp_path("all_types");
1760
1761 {
1762 let file = H5File::create(&path).unwrap();
1763
1764 let ds = file.new_dataset::<u8>().shape([2]).create("u8").unwrap();
1765 ds.write_raw(&[1u8, 2]).unwrap();
1766
1767 let ds = file.new_dataset::<i8>().shape([2]).create("i8").unwrap();
1768 ds.write_raw(&[-1i8, 1]).unwrap();
1769
1770 let ds = file.new_dataset::<u16>().shape([2]).create("u16").unwrap();
1771 ds.write_raw(&[100u16, 200]).unwrap();
1772
1773 let ds = file.new_dataset::<i16>().shape([2]).create("i16").unwrap();
1774 ds.write_raw(&[-100i16, 100]).unwrap();
1775
1776 let ds = file.new_dataset::<u32>().shape([2]).create("u32").unwrap();
1777 ds.write_raw(&[1000u32, 2000]).unwrap();
1778
1779 let ds = file.new_dataset::<i32>().shape([2]).create("i32").unwrap();
1780 ds.write_raw(&[-1000i32, 1000]).unwrap();
1781
1782 let ds = file.new_dataset::<u64>().shape([2]).create("u64").unwrap();
1783 ds.write_raw(&[10000u64, 20000]).unwrap();
1784
1785 let ds = file.new_dataset::<i64>().shape([2]).create("i64").unwrap();
1786 ds.write_raw(&[-10000i64, 10000]).unwrap();
1787
1788 let ds = file.new_dataset::<f32>().shape([2]).create("f32").unwrap();
1789 ds.write_raw(&[1.5f32, 2.5]).unwrap();
1790
1791 let ds = file.new_dataset::<f64>().shape([2]).create("f64").unwrap();
1792 ds.write_raw(&[1.23456f64, 7.89012]).unwrap();
1793
1794 file.close().unwrap();
1795 }
1796
1797 {
1798 let file = H5File::open(&path).unwrap();
1799
1800 assert_eq!(
1801 file.dataset("u8").unwrap().read_raw::<u8>().unwrap(),
1802 vec![1u8, 2]
1803 );
1804 assert_eq!(
1805 file.dataset("i8").unwrap().read_raw::<i8>().unwrap(),
1806 vec![-1i8, 1]
1807 );
1808 assert_eq!(
1809 file.dataset("u16").unwrap().read_raw::<u16>().unwrap(),
1810 vec![100u16, 200]
1811 );
1812 assert_eq!(
1813 file.dataset("i16").unwrap().read_raw::<i16>().unwrap(),
1814 vec![-100i16, 100]
1815 );
1816 assert_eq!(
1817 file.dataset("u32").unwrap().read_raw::<u32>().unwrap(),
1818 vec![1000u32, 2000]
1819 );
1820 assert_eq!(
1821 file.dataset("i32").unwrap().read_raw::<i32>().unwrap(),
1822 vec![-1000i32, 1000]
1823 );
1824 assert_eq!(
1825 file.dataset("u64").unwrap().read_raw::<u64>().unwrap(),
1826 vec![10000u64, 20000]
1827 );
1828 assert_eq!(
1829 file.dataset("i64").unwrap().read_raw::<i64>().unwrap(),
1830 vec![-10000i64, 10000]
1831 );
1832 assert_eq!(
1833 file.dataset("f32").unwrap().read_raw::<f32>().unwrap(),
1834 vec![1.5f32, 2.5]
1835 );
1836 assert_eq!(
1837 file.dataset("f64").unwrap().read_raw::<f64>().unwrap(),
1838 vec![1.23456f64, 7.89012]
1839 );
1840 }
1841
1842 std::fs::remove_file(&path).ok();
1843 }
1844
1845 #[test]
1846 fn append_chunked_roundtrip() {
1847 let path = temp_path("append_chunked");
1848
1849 {
1850 let file = H5File::create(&path).unwrap();
1851 let ds = file
1852 .new_dataset::<f64>()
1853 .shape([0, 3])
1854 .chunk(&[1, 3])
1855 .max_shape(&[None, Some(3)])
1856 .create("data")
1857 .unwrap();
1858
1859 ds.append(&[1.0f64, 2.0, 3.0]).unwrap();
1861 ds.append(&[4.0f64, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap();
1863
1864 file.close().unwrap();
1865 }
1866
1867 {
1868 let file = H5File::open(&path).unwrap();
1869 let ds = file.dataset("data").unwrap();
1870 assert_eq!(ds.shape(), vec![3, 3]);
1871 let all = ds.read_raw::<f64>().unwrap();
1872 assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
1873 }
1874
1875 std::fs::remove_file(&path).ok();
1876 }
1877
1878 #[test]
1879 fn append_1d_chunked() {
1880 let path = temp_path("append_1d");
1881
1882 {
1883 let file = H5File::create(&path).unwrap();
1884 let ds = file
1885 .new_dataset::<i32>()
1886 .shape([0])
1887 .chunk(&[4])
1888 .max_shape(&[None])
1889 .create("values")
1890 .unwrap();
1891
1892 ds.append(&[10i32, 20, 30]).unwrap(); ds.append(&[40i32]).unwrap(); ds.append(&[50i32, 60, 70, 80]).unwrap(); file.close().unwrap();
1897 }
1898
1899 {
1900 let file = H5File::open(&path).unwrap();
1901 let ds = file.dataset("values").unwrap();
1902 assert_eq!(ds.shape(), vec![8]);
1903 let all = ds.read_raw::<i32>().unwrap();
1904 assert_eq!(all, vec![10, 20, 30, 40, 50, 60, 70, 80]);
1905 }
1906
1907 std::fs::remove_file(&path).ok();
1908 }
1909
1910 #[test]
1911 fn append_partial_chunk_flushed_on_close() {
1912 let path = temp_path("append_partial_close");
1913
1914 {
1915 let file = H5File::create(&path).unwrap();
1916 let ds = file
1917 .new_dataset::<f64>()
1918 .shape([0])
1919 .chunk(&[4])
1920 .max_shape(&[None])
1921 .create("vals")
1922 .unwrap();
1923
1924 ds.append(&[1.0f64, 2.0, 3.0, 4.0, 5.0]).unwrap();
1926 file.close().unwrap();
1927 }
1928
1929 {
1930 let file = H5File::open(&path).unwrap();
1931 let ds = file.dataset("vals").unwrap();
1932 assert_eq!(ds.shape(), vec![5]);
1933 let all = ds.read_raw::<f64>().unwrap();
1934 assert_eq!(all.len(), 5);
1937 assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1938 }
1939
1940 std::fs::remove_file(&path).ok();
1941 }
1942
1943 #[test]
1944 fn vlen_append_after_reopen_filtered() {
1945 let path = temp_path("vlen_reopen_filtered");
1949 {
1950 let file = H5File::create(&path).unwrap();
1951 file.create_appendable_vlen_dataset(
1952 "strs",
1953 4,
1954 Some(crate::format::messages::filter::FilterPipeline::deflate(6)),
1955 )
1956 .unwrap();
1957 file.append_vlen_strings("strs", &["alpha", "beta", "gamma"])
1958 .unwrap();
1959 file.close().unwrap();
1960 }
1961 {
1962 let file = H5File::open_rw(&path).unwrap();
1963 file.append_vlen_strings("strs", &["delta"]).unwrap();
1964 file.close().unwrap();
1965 }
1966 {
1967 let file = H5File::open(&path).unwrap();
1968 let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
1969 assert_eq!(
1970 got.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
1971 vec!["alpha", "beta", "gamma", "delta"]
1972 );
1973 }
1974 std::fs::remove_file(&path).ok();
1975 }
1976
1977 #[test]
1978 fn vlen_append_after_reopen_data_block() {
1979 let path = temp_path("vlen_reopen_datablk");
1983 let labels: Vec<String> = (0..9).map(|i| format!("s{i}")).collect();
1984 {
1985 let file = H5File::create(&path).unwrap();
1986 file.create_appendable_vlen_dataset("strs", 2, None)
1987 .unwrap();
1988 let refs: Vec<&str> = labels.iter().map(|s| s.as_str()).collect();
1989 file.append_vlen_strings("strs", &refs).unwrap();
1990 file.close().unwrap();
1991 }
1992 {
1993 let file = H5File::open_rw(&path).unwrap();
1994 file.append_vlen_strings("strs", &["s9"]).unwrap();
1995 file.close().unwrap();
1996 }
1997 {
1998 let file = H5File::open(&path).unwrap();
1999 let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
2000 let want: Vec<String> = (0..10).map(|i| format!("s{i}")).collect();
2001 assert_eq!(got, want);
2002 }
2003 std::fs::remove_file(&path).ok();
2004 }
2005
2006 #[test]
2007 fn vlen_append_after_reopen_super_block() {
2008 let path = temp_path("vlen_reopen_super");
2016 let labels: Vec<String> = (0..489).map(|i| format!("v{i}")).collect();
2019 {
2020 let file = H5File::create(&path).unwrap();
2021 file.create_appendable_vlen_dataset("strs", 2, None)
2022 .unwrap();
2023 let refs: Vec<&str> = labels.iter().map(|s| s.as_str()).collect();
2024 file.append_vlen_strings("strs", &refs).unwrap();
2025 file.close().unwrap();
2026 }
2027 {
2028 let file = H5File::open_rw(&path).unwrap();
2029 file.append_vlen_strings("strs", &["v489"]).unwrap();
2030 file.close().unwrap();
2031 }
2032 {
2033 let file = H5File::open(&path).unwrap();
2034 let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
2035 let want: Vec<String> = (0..490).map(|i| format!("v{i}")).collect();
2036 assert_eq!(got, want);
2037 }
2038 std::fs::remove_file(&path).ok();
2039 }
2040
2041 #[test]
2042 fn vlen_append_after_reopen_filtered_data_block() {
2043 let path = temp_path("vlen_reopen_filt_datablk");
2046 let labels: Vec<String> = (0..9).map(|i| format!("item{i:02}")).collect();
2047 {
2048 let file = H5File::create(&path).unwrap();
2049 file.create_appendable_vlen_dataset(
2050 "strs",
2051 2,
2052 Some(crate::format::messages::filter::FilterPipeline::deflate(6)),
2053 )
2054 .unwrap();
2055 let refs: Vec<&str> = labels.iter().map(|s| s.as_str()).collect();
2056 file.append_vlen_strings("strs", &refs).unwrap();
2057 file.close().unwrap();
2058 }
2059 {
2060 let file = H5File::open_rw(&path).unwrap();
2061 file.append_vlen_strings("strs", &["item09"]).unwrap();
2062 file.close().unwrap();
2063 }
2064 {
2065 let file = H5File::open(&path).unwrap();
2066 let got = file.dataset("strs").unwrap().read_vlen_strings().unwrap();
2067 let want: Vec<String> = (0..10).map(|i| format!("item{i:02}")).collect();
2068 assert_eq!(got, want);
2069 }
2070 std::fs::remove_file(&path).ok();
2071 }
2072
2073 #[test]
2074 fn group_nx_class_attribute_roundtrip() {
2075 let path = temp_path("group_nx_class");
2078 {
2079 let file = H5File::create(&path).unwrap();
2080 let entry = file.create_group("entry").unwrap();
2081 entry.set_attr_string("NX_class", "NXentry").unwrap();
2082 let det = entry.create_group("detector").unwrap();
2083 det.set_attr_string("NX_class", "NXdetector").unwrap();
2084 det.set_attr_numeric("frame_count", &7i32).unwrap();
2085 det.new_dataset::<f32>()
2086 .shape([4])
2087 .create("data")
2088 .unwrap()
2089 .write_raw(&[1.0f32; 4])
2090 .unwrap();
2091 file.close().unwrap();
2092 }
2093 {
2094 let file = H5File::open(&path).unwrap();
2095 let entry = file.root_group().group("entry").unwrap();
2096 assert_eq!(entry.attr_string("NX_class").unwrap(), "NXentry");
2097 let det = entry.group("detector").unwrap();
2098 assert_eq!(det.attr_string("NX_class").unwrap(), "NXdetector");
2099 let names = det.attr_names().unwrap();
2100 assert!(names.contains(&"NX_class".to_string()));
2101 assert!(names.contains(&"frame_count".to_string()));
2102 }
2103 std::fs::remove_file(&path).ok();
2104 }
2105
2106 #[test]
2107 fn ea_super_block_roundtrip() {
2108 let path = temp_path("ea_super_rt");
2111 {
2112 let file = H5File::create(&path).unwrap();
2113 let ds = file
2114 .new_dataset::<i32>()
2115 .shape([0])
2116 .chunk(&[1])
2117 .max_shape(&[None])
2118 .create("v")
2119 .unwrap();
2120 ds.append(&(0..2000).collect::<Vec<i32>>()).unwrap();
2121 file.close().unwrap();
2122 }
2123 {
2124 let file = H5File::open(&path).unwrap();
2125 let v = file.dataset("v").unwrap().read_raw::<i32>().unwrap();
2126 assert_eq!(v.len(), 2000);
2127 assert!(v.iter().enumerate().all(|(i, &x)| x == i as i32));
2128 }
2129 std::fs::remove_file(&path).ok();
2130 }
2131
2132 #[test]
2133 fn ea_filtered_super_block_roundtrip() {
2134 let path = temp_path("ea_filt_super");
2136 {
2137 let file = H5File::create(&path).unwrap();
2138 let ds = file
2139 .new_dataset::<i32>()
2140 .shape([0])
2141 .chunk(&[1])
2142 .max_shape(&[None])
2143 .deflate(4)
2144 .create("v")
2145 .unwrap();
2146 ds.append(&(0..600).collect::<Vec<i32>>()).unwrap();
2147 file.close().unwrap();
2148 }
2149 {
2150 let file = H5File::open(&path).unwrap();
2151 let v = file.dataset("v").unwrap().read_raw::<i32>().unwrap();
2152 assert_eq!(v, (0..600).collect::<Vec<i32>>());
2153 }
2154 std::fs::remove_file(&path).ok();
2155 }
2156
2157 #[test]
2158 fn ea_super_block_open_append() {
2159 let path = temp_path("ea_super_append");
2161 {
2162 let file = H5File::create(&path).unwrap();
2163 let ds = file
2164 .new_dataset::<i32>()
2165 .shape([0])
2166 .chunk(&[1])
2167 .max_shape(&[None])
2168 .create("v")
2169 .unwrap();
2170 ds.append(&(0..300).collect::<Vec<i32>>()).unwrap();
2171 file.close().unwrap();
2172 }
2173 {
2174 let mut w = crate::io::writer::Hdf5Writer::open_append(&path).unwrap();
2175 let idx = w.dataset_index("v").unwrap();
2176 for c in 300..900u64 {
2177 w.write_chunk(idx, c, &(c as i32).to_le_bytes()).unwrap();
2178 }
2179 w.extend_dataset(idx, &[900]).unwrap();
2180 w.close().unwrap();
2181 }
2182 {
2183 let file = H5File::open(&path).unwrap();
2184 let v = file.dataset("v").unwrap().read_raw::<i32>().unwrap();
2185 assert_eq!(v.len(), 900);
2186 assert!(v.iter().enumerate().all(|(i, &x)| x == i as i32));
2187 }
2188 std::fs::remove_file(&path).ok();
2189 }
2190
2191 #[test]
2192 fn btree_v2_multi_unlimited_roundtrip() {
2193 let path = temp_path("bt2_multi");
2196 {
2197 let file = H5File::create(&path).unwrap();
2198 let ds = file
2199 .new_dataset::<i32>()
2200 .shape([0, 0])
2201 .chunk(&[2, 2])
2202 .max_shape(&[None, None])
2203 .create("grid")
2204 .unwrap();
2205 assert!(ds.is_chunked());
2206 for cr in 0..2usize {
2208 for cc in 0..2usize {
2209 let mut bytes = Vec::new();
2210 for i in 0..2usize {
2211 for j in 0..2usize {
2212 let v = ((cr * 2 + i) * 4 + (cc * 2 + j)) as i32;
2213 bytes.extend_from_slice(&v.to_le_bytes());
2214 }
2215 }
2216 ds.write_chunk_at(&[cr, cc], &bytes).unwrap();
2217 }
2218 }
2219 file.close().unwrap();
2220 }
2221 {
2222 let file = H5File::open(&path).unwrap();
2223 let ds = file.dataset("grid").unwrap();
2224 assert_eq!(ds.shape(), vec![4, 4]);
2225 assert_eq!(ds.read_raw::<i32>().unwrap(), (0..16).collect::<Vec<i32>>());
2226 }
2227 std::fs::remove_file(&path).ok();
2228 }
2229
2230 #[test]
2231 fn subframe_chunking_roundtrip() {
2232 let path = temp_path("subframe");
2236 {
2237 let file = H5File::create(&path).unwrap();
2238 let ds = file
2239 .new_dataset::<i32>()
2240 .shape([0, 8, 8])
2241 .chunk(&[1, 4, 4])
2242 .max_shape(&[None, Some(8), Some(8)])
2243 .create("v")
2244 .unwrap();
2245 for f in 0..3usize {
2246 for cr in 0..2usize {
2247 for cc in 0..2usize {
2248 let mut bytes = Vec::new();
2249 for i in 0..4usize {
2250 for j in 0..4usize {
2251 let v = (f * 64 + (cr * 4 + i) * 8 + (cc * 4 + j)) as i32;
2252 bytes.extend_from_slice(&v.to_le_bytes());
2253 }
2254 }
2255 ds.write_chunk_at(&[f, cr, cc], &bytes).unwrap();
2256 }
2257 }
2258 }
2259 file.close().unwrap();
2260 }
2261 {
2262 let file = H5File::open(&path).unwrap();
2263 let ds = file.dataset("v").unwrap();
2264 assert_eq!(ds.shape(), vec![3, 8, 8]);
2265 assert_eq!(
2266 ds.read_raw::<i32>().unwrap(),
2267 (0..192).collect::<Vec<i32>>()
2268 );
2269 }
2270 std::fs::remove_file(&path).ok();
2271 }
2272
2273 #[test]
2274 fn fill_value_contiguous_roundtrip() {
2275 let path = temp_path("fill_value_contig");
2276 {
2277 let file = H5File::create(&path).unwrap();
2278 let ds = file
2279 .new_dataset::<f32>()
2280 .shape([4])
2281 .fill_value(2.5f32)
2282 .create("data")
2283 .unwrap();
2284 ds.write_raw(&[1.0f32, 2.0, 3.0, 4.0]).unwrap();
2285 file.close().unwrap();
2286 }
2287 {
2289 let writer = crate::io::writer::Hdf5Writer::open_append(&path).unwrap();
2290 let idx = writer.dataset_index("data").unwrap();
2291 assert_eq!(
2292 writer.datasets[idx].fill_value,
2293 Some(2.5f32.to_le_bytes().to_vec())
2294 );
2295 }
2296 {
2298 let file = H5File::open(&path).unwrap();
2299 let ds = file.dataset("data").unwrap();
2300 assert_eq!(ds.read_raw::<f32>().unwrap(), vec![1.0, 2.0, 3.0, 4.0]);
2301 }
2302 std::fs::remove_file(&path).ok();
2303 }
2304
2305 #[test]
2306 fn fill_value_chunked_roundtrip() {
2307 let path = temp_path("fill_value_chunked");
2308 {
2309 let file = H5File::create(&path).unwrap();
2310 let ds = file
2311 .new_dataset::<i32>()
2312 .shape([0])
2313 .chunk(&[4])
2314 .max_shape(&[None])
2315 .fill_value(-7i32)
2316 .create("vals")
2317 .unwrap();
2318 ds.append(&[1i32, 2, 3, 4]).unwrap();
2319 file.close().unwrap();
2320 }
2321 {
2322 let writer = crate::io::writer::Hdf5Writer::open_append(&path).unwrap();
2323 let idx = writer.dataset_index("vals").unwrap();
2324 assert_eq!(
2325 writer.datasets[idx].fill_value,
2326 Some((-7i32).to_le_bytes().to_vec())
2327 );
2328 }
2329 std::fs::remove_file(&path).ok();
2330 }
2331
2332 #[test]
2333 fn fill_value_read_missing_chunks() {
2334 fn i32_bytes(vals: &[i32]) -> Vec<u8> {
2337 vals.iter().flat_map(|v| v.to_le_bytes()).collect()
2338 }
2339 let path = temp_path("fill_value_read_missing");
2340 {
2341 let file = H5File::create(&path).unwrap();
2342 let ds = file
2343 .new_dataset::<i32>()
2344 .shape([0])
2345 .chunk(&[2])
2346 .max_shape(&[None])
2347 .fill_value(-1i32)
2348 .create("vals")
2349 .unwrap();
2350 ds.write_chunk(0, &i32_bytes(&[10, 20])).unwrap();
2352 ds.write_chunk(2, &i32_bytes(&[50, 60])).unwrap();
2353 ds.extend(&[6]).unwrap();
2354 file.close().unwrap();
2355 }
2356 {
2357 let file = H5File::open(&path).unwrap();
2358 let ds = file.dataset("vals").unwrap();
2359 let all = ds.read_raw::<i32>().unwrap();
2360 assert_eq!(all, vec![10, 20, -1, -1, 50, 60]);
2361 }
2362 std::fs::remove_file(&path).ok();
2363 }
2364
2365 #[test]
2366 fn fill_value_partial_chunk_padded_with_fill() {
2367 let path = temp_path("fill_value_partial_pad");
2371 {
2372 let file = H5File::create(&path).unwrap();
2373 let ds = file
2374 .new_dataset::<i32>()
2375 .shape([0])
2376 .chunk(&[4])
2377 .max_shape(&[None])
2378 .fill_value(-9i32)
2379 .create("vals")
2380 .unwrap();
2381 ds.append(&[1i32, 2, 3]).unwrap();
2383 file.close().unwrap();
2384 }
2385 let bytes = std::fs::read(&path).unwrap();
2386 let needle: Vec<u8> = [1i32, 2, 3].iter().flat_map(|v| v.to_le_bytes()).collect();
2388 let pos = bytes
2389 .windows(needle.len())
2390 .position(|w| w == needle)
2391 .expect("chunk data [1,2,3] not found in file");
2392 let pad = &bytes[pos + needle.len()..pos + needle.len() + 4];
2393 assert_eq!(
2394 pad,
2395 &(-9i32).to_le_bytes(),
2396 "partial chunk tail must be padded with fill value -9, got {:?}",
2397 pad
2398 );
2399 std::fs::remove_file(&path).ok();
2400 }
2401
2402 #[test]
2403 fn vlen_append_after_reopen_preserves_existing() {
2404 let path = temp_path("vlen_append_reopen");
2407 {
2408 let file = H5File::create(&path).unwrap();
2409 file.create_appendable_vlen_dataset("strs", 4, None)
2410 .unwrap();
2411 file.append_vlen_strings("strs", &["a", "b", "c"]).unwrap();
2413 file.close().unwrap();
2414 }
2415 {
2416 let file = H5File::open_rw(&path).unwrap();
2418 file.append_vlen_strings("strs", &["d"]).unwrap();
2419 file.close().unwrap();
2420 }
2421 {
2422 let file = H5File::open(&path).unwrap();
2423 let ds = file.dataset("strs").unwrap();
2424 let got = ds.read_vlen_strings().unwrap();
2425 assert_eq!(
2426 got.iter().map(|s| s.as_str()).collect::<Vec<_>>(),
2427 vec!["a", "b", "c", "d"]
2428 );
2429 }
2430 std::fs::remove_file(&path).ok();
2431 }
2432
2433 #[test]
2434 fn fill_value_size_mismatch_errors() {
2435 let path = temp_path("fill_value_mismatch");
2436 let mut writer = crate::io::writer::Hdf5Writer::create(&path).unwrap();
2437 let dt = <f64 as crate::types::H5Type>::hdf5_type();
2438 let idx = writer.create_dataset("d", dt, &[4u64]).unwrap();
2439 assert!(writer.set_dataset_fill_value(idx, vec![0u8; 4]).is_err());
2441 writer.set_dataset_fill_value(idx, vec![0u8; 8]).unwrap();
2443 writer.close().unwrap();
2444 std::fs::remove_file(&path).ok();
2445 }
2446}