1use crate::attribute::AttrBuilder;
8use crate::error::{Hdf5Error, Result};
9use crate::file::{borrow_inner, borrow_inner_mut, clone_inner, H5FileInner, SharedInner};
10use crate::types::H5Type;
11
12pub struct DatasetBuilder<T: H5Type> {
29 file_inner: SharedInner,
30 shape: Option<Vec<usize>>,
31 chunk_dims: Option<Vec<usize>>,
32 max_shape: Option<Vec<Option<usize>>>,
33 deflate_level: Option<u32>,
34 shuffle_deflate_level: Option<u32>,
35 custom_pipeline: Option<crate::format::messages::filter::FilterPipeline>,
36 group_path: Option<String>,
37 _marker: std::marker::PhantomData<T>,
38}
39
40impl<T: H5Type> DatasetBuilder<T> {
41 pub(crate) fn new(file_inner: SharedInner) -> Self {
42 Self {
43 file_inner,
44 shape: None,
45 chunk_dims: None,
46 max_shape: None,
47 deflate_level: None,
48 shuffle_deflate_level: None,
49 custom_pipeline: None,
50 group_path: None,
51 _marker: std::marker::PhantomData,
52 }
53 }
54
55 pub(crate) fn new_in_group(file_inner: SharedInner, group_path: String) -> Self {
56 Self {
57 file_inner,
58 shape: None,
59 chunk_dims: None,
60 max_shape: None,
61 deflate_level: None,
62 shuffle_deflate_level: None,
63 custom_pipeline: None,
64 group_path: Some(group_path),
65 _marker: std::marker::PhantomData,
66 }
67 }
68
69 #[must_use]
74 pub fn shape<S: AsRef<[usize]>>(mut self, dims: S) -> Self {
75 self.shape = Some(dims.as_ref().to_vec());
76 self
77 }
78
79 #[must_use]
81 pub fn scalar(mut self) -> Self {
82 self.shape = Some(vec![]);
83 self
84 }
85
86 #[must_use]
92 pub fn chunk(mut self, chunk_dims: &[usize]) -> Self {
93 self.chunk_dims = Some(chunk_dims.to_vec());
94 self
95 }
96
97 #[must_use]
101 pub fn resizable(mut self) -> Self {
102 self.max_shape = Some(vec![None; self.shape.as_ref().map_or(0, |s| s.len())]);
103 self
104 }
105
106 #[must_use]
108 pub fn max_shape(mut self, max: &[Option<usize>]) -> Self {
109 self.max_shape = Some(max.to_vec());
110 self
111 }
112
113 #[must_use]
118 pub fn deflate(mut self, level: u32) -> Self {
119 self.deflate_level = Some(level);
120 self
121 }
122
123 #[must_use]
129 pub fn shuffle_deflate(mut self, level: u32) -> Self {
130 self.shuffle_deflate_level = Some(level);
131 self
132 }
133
134 #[must_use]
138 pub fn zstd(mut self, level: u32) -> Self {
139 self.custom_pipeline = Some(crate::format::messages::filter::FilterPipeline::zstd(level));
140 self
141 }
142
143 #[must_use]
148 pub fn filter_pipeline(
149 mut self,
150 pipeline: crate::format::messages::filter::FilterPipeline,
151 ) -> Self {
152 self.custom_pipeline = Some(pipeline);
153 self
154 }
155
156 pub fn create(self, name: &str) -> Result<H5Dataset> {
161 let shape = self.shape.ok_or_else(|| {
162 Hdf5Error::InvalidState("shape must be set before calling create()".into())
163 })?;
164
165 let full_name = if let Some(ref gp) = self.group_path {
167 if gp == "/" {
168 name.to_string()
169 } else {
170 let trimmed = gp.trim_start_matches('/');
171 format!("{}/{}", trimmed, name)
172 }
173 } else {
174 name.to_string()
175 };
176 let group_path = self.group_path.clone();
177
178 let dims_u64: Vec<u64> = shape.iter().map(|&d| d as u64).collect();
179 let datatype = T::hdf5_type();
180 let element_size = T::element_size();
181
182 if let Some(ref chunk_dims) = self.chunk_dims {
183 let chunk_u64: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
185 let max_u64: Vec<u64> = if let Some(ref max) = self.max_shape {
186 max.iter()
187 .map(|m| m.map_or(u64::MAX, |v| v as u64))
188 .collect()
189 } else {
190 dims_u64.clone()
192 };
193
194 let index = {
195 let mut inner = borrow_inner_mut(&self.file_inner);
196 match &mut *inner {
197 H5FileInner::Writer(writer) => {
198 let idx = if let Some(pipeline) = self.custom_pipeline {
199 writer.create_chunked_dataset_with_pipeline(
200 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
201 )?
202 } else if let Some(level) = self.shuffle_deflate_level {
203 let pipeline =
204 crate::format::messages::filter::FilterPipeline::shuffle_deflate(
205 T::element_size() as u32,
206 level,
207 );
208 writer.create_chunked_dataset_with_pipeline(
209 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
210 )?
211 } else if let Some(level) = self.deflate_level {
212 writer.create_chunked_dataset_compressed(
213 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, level,
214 )?
215 } else {
216 writer.create_chunked_dataset(
217 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
218 )?
219 };
220 if let Some(ref gp) = group_path {
221 if gp != "/" {
222 writer.assign_dataset_to_group(gp, idx)?;
223 }
224 }
225 idx
226 }
227 H5FileInner::Reader(_) => {
228 return Err(Hdf5Error::InvalidState(
229 "cannot create a dataset in read mode".into(),
230 ));
231 }
232 H5FileInner::Closed => {
233 return Err(Hdf5Error::InvalidState("file is closed".into()));
234 }
235 }
236 };
237
238 Ok(H5Dataset {
239 file_inner: clone_inner(&self.file_inner),
240 info: DatasetInfo::Writer {
241 index,
242 shape,
243 element_size,
244 chunked: true,
245 },
246 })
247 } else {
248 let index = {
250 let mut inner = borrow_inner_mut(&self.file_inner);
251 match &mut *inner {
252 H5FileInner::Writer(writer) => {
253 let idx = writer.create_dataset(&full_name, datatype, &dims_u64)?;
254 if let Some(ref gp) = group_path {
255 if gp != "/" {
256 writer.assign_dataset_to_group(gp, idx)?;
257 }
258 }
259 idx
260 }
261 H5FileInner::Reader(_) => {
262 return Err(Hdf5Error::InvalidState(
263 "cannot create a dataset in read mode".into(),
264 ));
265 }
266 H5FileInner::Closed => {
267 return Err(Hdf5Error::InvalidState("file is closed".into()));
268 }
269 }
270 };
271
272 Ok(H5Dataset {
273 file_inner: clone_inner(&self.file_inner),
274 info: DatasetInfo::Writer {
275 index,
276 shape,
277 element_size,
278 chunked: false,
279 },
280 })
281 }
282 }
283}
284
285enum DatasetInfo {
291 Writer {
293 index: usize,
295 shape: Vec<usize>,
297 element_size: usize,
299 chunked: bool,
301 },
302 Reader {
304 name: String,
306 shape: Vec<usize>,
308 element_size: usize,
310 },
311}
312
313pub struct H5Dataset {
323 file_inner: SharedInner,
324 info: DatasetInfo,
325}
326
327impl H5Dataset {
328 pub(crate) fn new_reader(
330 file_inner: SharedInner,
331 name: String,
332 shape: Vec<usize>,
333 element_size: usize,
334 ) -> Self {
335 Self {
336 file_inner,
337 info: DatasetInfo::Reader {
338 name,
339 shape,
340 element_size,
341 },
342 }
343 }
344
345 pub fn shape(&self) -> Vec<usize> {
347 match &self.info {
348 DatasetInfo::Writer { shape, .. } => shape.clone(),
349 DatasetInfo::Reader { shape, .. } => shape.clone(),
350 }
351 }
352
353 pub fn ndims(&self) -> usize {
355 match &self.info {
356 DatasetInfo::Writer { shape, .. } => shape.len(),
357 DatasetInfo::Reader { shape, .. } => shape.len(),
358 }
359 }
360
361 pub fn total_elements(&self) -> usize {
363 match &self.info {
364 DatasetInfo::Writer { shape, .. } => shape.iter().product(),
365 DatasetInfo::Reader { shape, .. } => shape.iter().product(),
366 }
367 }
368
369 pub fn element_size(&self) -> usize {
371 match &self.info {
372 DatasetInfo::Writer { element_size, .. } => *element_size,
373 DatasetInfo::Reader { element_size, .. } => *element_size,
374 }
375 }
376
377 pub fn chunk_dims(&self) -> Option<Vec<usize>> {
379 match &self.info {
380 DatasetInfo::Reader { name, .. } => {
381 let inner = borrow_inner(&self.file_inner);
382 if let H5FileInner::Reader(reader) = &*inner {
383 if let Some(info) = reader.dataset_info(name) {
384 if let crate::format::messages::data_layout::DataLayoutMessage::ChunkedV4 {
385 chunk_dims,
386 ..
387 } = &info.layout
388 {
389 return Some(
391 chunk_dims[..chunk_dims.len() - 1]
392 .iter()
393 .map(|&d| d as usize)
394 .collect(),
395 );
396 }
397 }
398 }
399 None
400 }
401 DatasetInfo::Writer { .. } => None,
402 }
403 }
404
405 pub fn is_chunked(&self) -> bool {
407 match &self.info {
408 DatasetInfo::Writer { chunked, .. } => *chunked,
409 DatasetInfo::Reader { name, .. } => {
410 let inner = borrow_inner(&self.file_inner);
411 match &*inner {
412 H5FileInner::Reader(reader) => {
413 if let Some(info) = reader.dataset_info(name) {
414 matches!(
415 info.layout,
416 crate::format::messages::data_layout::DataLayoutMessage::ChunkedV4 { .. }
417 )
418 } else {
419 false
420 }
421 }
422 _ => false,
423 }
424 }
425 }
426 }
427
428 pub fn attr_names(&self) -> Result<Vec<String>> {
430 match &self.info {
431 DatasetInfo::Reader { name, .. } => {
432 let inner = borrow_inner(&self.file_inner);
433 match &*inner {
434 H5FileInner::Reader(reader) => Ok(reader.dataset_attr_names(name)?),
435 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
436 }
437 }
438 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
439 "attr_names not available in write mode".into(),
440 )),
441 }
442 }
443
444 pub fn attr(&self, attr_name: &str) -> Result<crate::attribute::H5Attribute> {
446 match &self.info {
447 DatasetInfo::Reader { name, .. } => {
448 let inner = borrow_inner(&self.file_inner);
449 match &*inner {
450 H5FileInner::Reader(reader) => {
451 let attr_msg = reader.dataset_attr(name, attr_name)?;
452 Ok(crate::attribute::H5Attribute::new_reader(
453 clone_inner(&self.file_inner),
454 attr_msg.name.clone(),
455 attr_msg.data.clone(),
456 ))
457 }
458 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
459 }
460 }
461 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
462 "attr() not available in write mode".into(),
463 )),
464 }
465 }
466
467 pub fn new_attr<T: 'static>(&self) -> AttrBuilder<'_, T> {
483 let ds_index = match &self.info {
484 DatasetInfo::Writer { index, .. } => *index,
485 DatasetInfo::Reader { .. } => {
486 usize::MAX
489 }
490 };
491 AttrBuilder::new(&self.file_inner, ds_index)
492 }
493
494 pub fn write_raw<T: H5Type>(&self, data: &[T]) -> Result<()> {
506 match &self.info {
507 DatasetInfo::Writer {
508 index,
509 shape,
510 element_size,
511 chunked,
512 } => {
513 if *chunked {
514 return Err(Hdf5Error::InvalidState(
515 "use write_chunk for chunked datasets".into(),
516 ));
517 }
518
519 let total_elements: usize = shape.iter().product();
520 if data.len() != total_elements {
521 return Err(Hdf5Error::InvalidState(format!(
522 "data length {} does not match dataset size {}",
523 data.len(),
524 total_elements,
525 )));
526 }
527
528 if T::element_size() != *element_size {
530 return Err(Hdf5Error::TypeMismatch(format!(
531 "write type has element size {} but dataset expects {}",
532 T::element_size(),
533 element_size,
534 )));
535 }
536
537 let byte_len = data.len() * T::element_size();
541 let raw =
542 unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
543
544 let mut inner = borrow_inner_mut(&self.file_inner);
545 match &mut *inner {
546 H5FileInner::Writer(writer) => {
547 writer.write_dataset_raw(*index, raw)?;
548 Ok(())
549 }
550 _ => Err(Hdf5Error::InvalidState(
551 "file is no longer in write mode".into(),
552 )),
553 }
554 }
555 DatasetInfo::Reader { .. } => Err(Hdf5Error::InvalidState(
556 "cannot write to a dataset opened in read mode".into(),
557 )),
558 }
559 }
560
561 pub fn write_chunk(&self, chunk_idx: usize, data: &[u8]) -> Result<()> {
566 match &self.info {
567 DatasetInfo::Writer { index, chunked, .. } => {
568 if !*chunked {
569 return Err(Hdf5Error::InvalidState(
570 "write_chunk is only for chunked datasets".into(),
571 ));
572 }
573
574 let mut inner = borrow_inner_mut(&self.file_inner);
575 match &mut *inner {
576 H5FileInner::Writer(writer) => {
577 writer.write_chunk(*index, chunk_idx as u64, data)?;
578 Ok(())
579 }
580 _ => Err(Hdf5Error::InvalidState(
581 "file is no longer in write mode".into(),
582 )),
583 }
584 }
585 DatasetInfo::Reader { .. } => {
586 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
587 }
588 }
589 }
590
591 pub fn write_chunks_batch(&self, chunks: &[(usize, &[u8])]) -> Result<()> {
597 match &self.info {
598 DatasetInfo::Writer { index, chunked, .. } => {
599 if !*chunked {
600 return Err(Hdf5Error::InvalidState(
601 "write_chunks_batch is only for chunked datasets".into(),
602 ));
603 }
604 let pairs: Vec<(u64, &[u8])> = chunks
605 .iter()
606 .map(|(idx, data)| (*idx as u64, *data))
607 .collect();
608 let mut inner = borrow_inner_mut(&self.file_inner);
609 match &mut *inner {
610 H5FileInner::Writer(writer) => {
611 writer.write_chunks_batch(*index, &pairs)?;
612 Ok(())
613 }
614 _ => Err(Hdf5Error::InvalidState(
615 "file is no longer in write mode".into(),
616 )),
617 }
618 }
619 DatasetInfo::Reader { .. } => {
620 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
621 }
622 }
623 }
624
625 pub fn append<T: H5Type>(&self, data: &[T]) -> Result<()> {
648 match &self.info {
649 DatasetInfo::Writer {
650 index,
651 element_size,
652 chunked,
653 ..
654 } => {
655 if !*chunked {
656 return Err(Hdf5Error::InvalidState(
657 "append is only for chunked datasets".into(),
658 ));
659 }
660 if T::element_size() != *element_size {
661 return Err(Hdf5Error::TypeMismatch(format!(
662 "append type has element size {} but dataset expects {}",
663 T::element_size(),
664 element_size,
665 )));
666 }
667
668 let ds_index = *index;
669 let es = *element_size;
670
671 let mut inner = borrow_inner_mut(&self.file_inner);
672 let writer = match &mut *inner {
673 H5FileInner::Writer(w) => w,
674 _ => {
675 return Err(Hdf5Error::InvalidState(
676 "file is no longer in write mode".into(),
677 ))
678 }
679 };
680
681 let chunk_dims = writer
682 .dataset_chunk_dims(ds_index)
683 .ok_or_else(|| Hdf5Error::InvalidState("dataset has no chunk info".into()))?
684 .to_vec();
685 let dims = writer.dataset_dims(ds_index).to_vec();
686
687 let frame_elems: usize = if dims.len() > 1 {
689 dims[1..].iter().map(|&d| d as usize).product()
690 } else {
691 1
692 };
693
694 if frame_elems == 0 {
695 return Err(Hdf5Error::InvalidState(
696 "cannot append to dataset with zero-size trailing dimensions".into(),
697 ));
698 }
699
700 if !data.len().is_multiple_of(frame_elems) {
701 return Err(Hdf5Error::InvalidState(format!(
702 "data length {} is not a multiple of frame size {}",
703 data.len(),
704 frame_elems,
705 )));
706 }
707
708 let n_new_frames = data.len() / frame_elems;
709 let current_dim0 = dims[0] as usize;
710
711 let chunk_dim0 = chunk_dims[0] as usize;
713 let chunk_bytes = chunk_dims.iter().map(|&d| d as usize).product::<usize>() * es;
715 let frame_bytes = frame_elems * es;
716
717 let raw = unsafe {
718 std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * es)
719 };
720
721 let ds = &mut writer.datasets[ds_index];
723 let buffered_frames = ds.append_buffered_frames as usize;
724 let mut combined = std::mem::take(&mut ds.append_buffer);
725 combined.extend_from_slice(raw);
726 ds.append_buffered_frames = 0;
727
728 let total_frames = buffered_frames + n_new_frames;
729 let total_bytes = combined.len();
730
731 let base_dim0 = current_dim0 - buffered_frames;
733 let mut byte_pos = 0usize;
734 let mut frame_pos = 0usize;
735
736 while frame_pos < total_frames {
737 let abs_frame = base_dim0 + frame_pos;
738 let chunk_idx = abs_frame / chunk_dim0;
739 let remaining_frames = total_frames - frame_pos;
740 let frames_to_fill = chunk_dim0 - (abs_frame % chunk_dim0);
741
742 if remaining_frames >= frames_to_fill {
743 let end = byte_pos + frames_to_fill * frame_bytes;
745 if frames_to_fill == chunk_dim0 {
746 writer.write_chunk(
747 ds_index,
748 chunk_idx as u64,
749 &combined[byte_pos..end],
750 )?;
751 } else {
752 let mut chunk_buf = vec![0u8; chunk_bytes];
754 let offset_in_chunk = (abs_frame % chunk_dim0) * frame_bytes;
755 chunk_buf
756 [offset_in_chunk..offset_in_chunk + frames_to_fill * frame_bytes]
757 .copy_from_slice(&combined[byte_pos..end]);
758 writer.write_chunk(ds_index, chunk_idx as u64, &chunk_buf)?;
759 }
760 byte_pos = end;
761 frame_pos += frames_to_fill;
762 } else {
763 let ds = &mut writer.datasets[ds_index];
765 ds.append_buffer = combined[byte_pos..total_bytes].to_vec();
766 ds.append_buffered_frames = remaining_frames as u64;
767 frame_pos = total_frames;
768 }
769 }
770
771 let logical_dim0 = base_dim0 + total_frames;
773 let mut new_dims: Vec<u64> = dims;
774 new_dims[0] = logical_dim0 as u64;
775 writer.extend_dataset(ds_index, &new_dims)?;
776
777 Ok(())
778 }
779 DatasetInfo::Reader { .. } => {
780 Err(Hdf5Error::InvalidState("cannot append in read mode".into()))
781 }
782 }
783 }
784
785 pub fn extend(&self, new_dims: &[usize]) -> Result<()> {
787 match &self.info {
788 DatasetInfo::Writer { index, chunked, .. } => {
789 if !*chunked {
790 return Err(Hdf5Error::InvalidState(
791 "extend is only for chunked datasets".into(),
792 ));
793 }
794
795 let dims_u64: Vec<u64> = new_dims.iter().map(|&d| d as u64).collect();
796 let mut inner = borrow_inner_mut(&self.file_inner);
797 match &mut *inner {
798 H5FileInner::Writer(writer) => {
799 writer.extend_dataset(*index, &dims_u64)?;
800 Ok(())
801 }
802 _ => Err(Hdf5Error::InvalidState(
803 "file is no longer in write mode".into(),
804 )),
805 }
806 }
807 DatasetInfo::Reader { .. } => {
808 Err(Hdf5Error::InvalidState("cannot extend in read mode".into()))
809 }
810 }
811 }
812
813 pub fn flush(&self) -> Result<()> {
815 match &self.info {
816 DatasetInfo::Writer { index, .. } => {
817 let mut inner = borrow_inner_mut(&self.file_inner);
818 match &mut *inner {
819 H5FileInner::Writer(writer) => {
820 writer.flush_dataset(*index)?;
821 Ok(())
822 }
823 _ => Ok(()),
824 }
825 }
826 DatasetInfo::Reader { .. } => Ok(()),
827 }
828 }
829
830 pub fn read_slice<T: H5Type>(&self, starts: &[usize], counts: &[usize]) -> Result<Vec<T>> {
835 match &self.info {
836 DatasetInfo::Reader {
837 name, element_size, ..
838 } => {
839 if T::element_size() != *element_size {
840 return Err(Hdf5Error::TypeMismatch(format!(
841 "read type has element size {} but dataset has element size {}",
842 T::element_size(),
843 element_size,
844 )));
845 }
846 let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
847 let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
848
849 let raw = {
850 let mut inner = borrow_inner_mut(&self.file_inner);
851 match &mut *inner {
852 H5FileInner::Reader(reader) => {
853 reader.read_slice(name, &starts_u64, &counts_u64)?
854 }
855 _ => {
856 return Err(Hdf5Error::InvalidState("file is not in read mode".into()))
857 }
858 }
859 };
860
861 if raw.len() % T::element_size() != 0 {
862 return Err(Hdf5Error::TypeMismatch(format!(
863 "raw data size {} is not a multiple of element size {}",
864 raw.len(),
865 T::element_size(),
866 )));
867 }
868
869 let count = raw.len() / T::element_size();
870 let mut result = Vec::<T>::with_capacity(count);
871 unsafe {
872 std::ptr::copy_nonoverlapping(
873 raw.as_ptr(),
874 result.as_mut_ptr() as *mut u8,
875 raw.len(),
876 );
877 result.set_len(count);
878 }
879 Ok(result)
880 }
881 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
882 "cannot read_slice from a dataset in write mode".into(),
883 )),
884 }
885 }
886
887 pub fn write_slice<T: H5Type>(
891 &self,
892 starts: &[usize],
893 counts: &[usize],
894 data: &[T],
895 ) -> Result<()> {
896 match &self.info {
897 DatasetInfo::Writer {
898 index,
899 element_size,
900 chunked,
901 ..
902 } => {
903 if *chunked {
904 return Err(Hdf5Error::InvalidState(
905 "write_slice is only for contiguous datasets".into(),
906 ));
907 }
908 if T::element_size() != *element_size {
909 return Err(Hdf5Error::TypeMismatch(format!(
910 "write type has element size {} but dataset expects {}",
911 T::element_size(),
912 element_size,
913 )));
914 }
915
916 let expected: usize = counts.iter().product();
917 if data.len() != expected {
918 return Err(Hdf5Error::InvalidState(format!(
919 "data length {} does not match slice size {}",
920 data.len(),
921 expected,
922 )));
923 }
924
925 let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
926 let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
927
928 let byte_len = data.len() * T::element_size();
929 let raw =
930 unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
931
932 let mut inner = borrow_inner_mut(&self.file_inner);
933 match &mut *inner {
934 H5FileInner::Writer(writer) => {
935 writer.write_slice(*index, &starts_u64, &counts_u64, raw)?;
936 Ok(())
937 }
938 _ => Err(Hdf5Error::InvalidState(
939 "file is no longer in write mode".into(),
940 )),
941 }
942 }
943 DatasetInfo::Reader { .. } => {
944 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
945 }
946 }
947 }
948
949 pub fn read_vlen_strings(&self) -> Result<Vec<String>> {
954 match &self.info {
955 DatasetInfo::Reader { name, .. } => {
956 let mut inner = borrow_inner_mut(&self.file_inner);
957 match &mut *inner {
958 H5FileInner::Reader(reader) => Ok(reader.read_vlen_strings(name)?),
959 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
960 }
961 }
962 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
963 "cannot read vlen strings from a dataset in write mode".into(),
964 )),
965 }
966 }
967
968 pub fn read_raw<T: H5Type>(&self) -> Result<Vec<T>> {
980 match &self.info {
981 DatasetInfo::Reader {
982 name, element_size, ..
983 } => {
984 if T::element_size() != *element_size {
985 return Err(Hdf5Error::TypeMismatch(format!(
986 "read type has element size {} but dataset has element size {}",
987 T::element_size(),
988 element_size,
989 )));
990 }
991
992 let raw = {
993 let mut inner = borrow_inner_mut(&self.file_inner);
994 match &mut *inner {
995 H5FileInner::Reader(reader) => reader.read_dataset_raw(name)?,
996 _ => {
997 return Err(Hdf5Error::InvalidState("file is not in read mode".into()));
998 }
999 }
1000 };
1001
1002 if raw.len() % T::element_size() != 0 {
1003 return Err(Hdf5Error::TypeMismatch(format!(
1004 "raw data size {} is not a multiple of element size {}",
1005 raw.len(),
1006 T::element_size(),
1007 )));
1008 }
1009
1010 let count = raw.len() / T::element_size();
1011 let mut result = Vec::<T>::with_capacity(count);
1012
1013 unsafe {
1020 std::ptr::copy_nonoverlapping(
1021 raw.as_ptr(),
1022 result.as_mut_ptr() as *mut u8,
1023 raw.len(),
1024 );
1025 result.set_len(count);
1026 }
1027
1028 Ok(result)
1029 }
1030 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1031 "cannot read from a dataset in write mode".into(),
1032 )),
1033 }
1034 }
1035}
1036
1037#[cfg(test)]
1038mod tests {
1039 use crate::H5File;
1040 use std::path::PathBuf;
1041
1042 fn temp_path(name: &str) -> PathBuf {
1043 use std::sync::atomic::{AtomicU64, Ordering};
1047 static COUNTER: AtomicU64 = AtomicU64::new(0);
1048 let n = COUNTER.fetch_add(1, Ordering::Relaxed);
1049 std::env::temp_dir().join(format!(
1050 "hdf5_dataset_test_{}_{}_{}.h5",
1051 name,
1052 std::process::id(),
1053 n
1054 ))
1055 }
1056
1057 #[test]
1058 fn builder_requires_shape() {
1059 let path = temp_path("no_shape");
1060 let file = H5File::create(&path).unwrap();
1061 let result = file.new_dataset::<u8>().create("data");
1062 assert!(result.is_err());
1063 std::fs::remove_file(&path).ok();
1064 }
1065
1066 #[test]
1067 fn write_raw_size_mismatch() {
1068 let path = temp_path("size_mismatch");
1069 let file = H5File::create(&path).unwrap();
1070 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1071 let result = ds.write_raw(&[1u8, 2, 3]);
1073 assert!(result.is_err());
1074 std::fs::remove_file(&path).ok();
1075 }
1076
1077 #[test]
1078 fn roundtrip_u8_1d() {
1079 let path = temp_path("rt_u8_1d");
1080 let data: Vec<u8> = (0..10).collect();
1081
1082 {
1083 let file = H5File::create(&path).unwrap();
1084 let ds = file.new_dataset::<u8>().shape([10]).create("seq").unwrap();
1085 ds.write_raw(&data).unwrap();
1086 file.close().unwrap();
1087 }
1088
1089 {
1090 let file = H5File::open(&path).unwrap();
1091 let ds = file.dataset("seq").unwrap();
1092 assert_eq!(ds.shape(), vec![10]);
1093 let readback = ds.read_raw::<u8>().unwrap();
1094 assert_eq!(readback, data);
1095 }
1096
1097 std::fs::remove_file(&path).ok();
1098 }
1099
1100 #[test]
1101 fn roundtrip_i32_2d() {
1102 let path = temp_path("rt_i32_2d");
1103 let data: Vec<i32> = vec![-1, 0, 1, 2, 3, 4];
1104
1105 {
1106 let file = H5File::create(&path).unwrap();
1107 let ds = file
1108 .new_dataset::<i32>()
1109 .shape([2, 3])
1110 .create("matrix")
1111 .unwrap();
1112 ds.write_raw(&data).unwrap();
1113 file.close().unwrap();
1114 }
1115
1116 {
1117 let file = H5File::open(&path).unwrap();
1118 let ds = file.dataset("matrix").unwrap();
1119 assert_eq!(ds.shape(), vec![2, 3]);
1120 let readback = ds.read_raw::<i32>().unwrap();
1121 assert_eq!(readback, data);
1122 }
1123
1124 std::fs::remove_file(&path).ok();
1125 }
1126
1127 #[test]
1128 fn roundtrip_f64_3d() {
1129 let path = temp_path("rt_f64_3d");
1130 let data: Vec<f64> = (0..24).map(|i| i as f64 * 0.5).collect();
1131
1132 {
1133 let file = H5File::create(&path).unwrap();
1134 let ds = file
1135 .new_dataset::<f64>()
1136 .shape([2, 3, 4])
1137 .create("cube")
1138 .unwrap();
1139 ds.write_raw(&data).unwrap();
1140 file.close().unwrap();
1141 }
1142
1143 {
1144 let file = H5File::open(&path).unwrap();
1145 let ds = file.dataset("cube").unwrap();
1146 assert_eq!(ds.shape(), vec![2, 3, 4]);
1147 let readback = ds.read_raw::<f64>().unwrap();
1148 assert_eq!(readback, data);
1149 }
1150
1151 std::fs::remove_file(&path).ok();
1152 }
1153
1154 #[test]
1155 fn cannot_read_in_write_mode() {
1156 let path = temp_path("no_read_write");
1157 let file = H5File::create(&path).unwrap();
1158 let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1159 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1160 let result = ds.read_raw::<u8>();
1161 assert!(result.is_err());
1162 std::fs::remove_file(&path).ok();
1163 }
1164
1165 #[test]
1166 fn cannot_write_in_read_mode() {
1167 let path = temp_path("no_write_read");
1168
1169 {
1170 let file = H5File::create(&path).unwrap();
1171 let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1172 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1173 file.close().unwrap();
1174 }
1175
1176 {
1177 let file = H5File::open(&path).unwrap();
1178 let ds = file.dataset("x").unwrap();
1179 let result = ds.write_raw(&[5u8, 6, 7, 8]);
1180 assert!(result.is_err());
1181 }
1182
1183 std::fs::remove_file(&path).ok();
1184 }
1185
1186 #[test]
1187 fn numeric_attr_roundtrip() {
1188 let path = temp_path("num_attr");
1189 {
1190 let file = H5File::create(&path).unwrap();
1191 let ds = file.new_dataset::<f32>().shape([4]).create("data").unwrap();
1192 ds.write_raw(&[1.0f32; 4]).unwrap();
1193
1194 let a1 = ds.new_attr::<f64>().shape(()).create("scale").unwrap();
1195 a1.write_numeric(&1.2345f64).unwrap();
1196
1197 let a2 = ds.new_attr::<i32>().shape(()).create("count").unwrap();
1198 a2.write_numeric(&42i32).unwrap();
1199
1200 file.close().unwrap();
1201 }
1202 {
1203 let file = H5File::open(&path).unwrap();
1204 let ds = file.dataset("data").unwrap();
1205
1206 let scale = ds.attr("scale").unwrap();
1207 let val: f64 = scale.read_numeric().unwrap();
1208 assert!((val - 1.2345).abs() < 1e-10);
1209
1210 let count = ds.attr("count").unwrap();
1211 let val: i32 = count.read_numeric().unwrap();
1212 assert_eq!(val, 42);
1213 }
1214 std::fs::remove_file(&path).ok();
1215 }
1216
1217 #[test]
1218 fn cannot_create_dataset_in_read_mode() {
1219 let path = temp_path("no_create_read");
1220
1221 {
1222 let _file = H5File::create(&path).unwrap();
1223 }
1224
1225 {
1226 let file = H5File::open(&path).unwrap();
1227 let result = file.new_dataset::<u8>().shape([4]).create("x");
1228 assert!(result.is_err());
1229 }
1230
1231 std::fs::remove_file(&path).ok();
1232 }
1233
1234 #[test]
1235 fn shape_accessor() {
1236 let path = temp_path("shape_acc");
1237
1238 let file = H5File::create(&path).unwrap();
1239 let ds = file
1240 .new_dataset::<f32>()
1241 .shape([5, 10, 3])
1242 .create("tensor")
1243 .unwrap();
1244 assert_eq!(ds.shape(), vec![5, 10, 3]);
1245
1246 std::fs::remove_file(&path).ok();
1247 }
1248
1249 #[test]
1250 fn slice_roundtrip_2d() {
1251 let path = temp_path("slice_2d");
1252
1253 let data: Vec<i32> = (0..20).collect();
1255 {
1256 let file = H5File::create(&path).unwrap();
1257 let ds = file
1258 .new_dataset::<i32>()
1259 .shape([4, 5])
1260 .create("mat")
1261 .unwrap();
1262 ds.write_raw(&data).unwrap();
1263 file.close().unwrap();
1264 }
1265 {
1266 let file = H5File::open(&path).unwrap();
1267 let ds = file.dataset("mat").unwrap();
1268 let slice = ds.read_slice::<i32>(&[1, 2], &[2, 2]).unwrap();
1270 assert_eq!(slice, vec![7, 8, 12, 13]);
1273 }
1274
1275 std::fs::remove_file(&path).ok();
1276 }
1277
1278 #[test]
1279 fn write_slice_2d() {
1280 let path = temp_path("write_slice_2d");
1281
1282 {
1283 let file = H5File::create(&path).unwrap();
1284 let ds = file
1285 .new_dataset::<f32>()
1286 .shape([3, 4])
1287 .create("data")
1288 .unwrap();
1289 ds.write_raw(&[0.0f32; 12]).unwrap();
1290 ds.write_slice(&[1, 1], &[2, 2], &[10.0f32, 20.0, 30.0, 40.0])
1292 .unwrap();
1293 file.close().unwrap();
1294 }
1295 {
1296 let file = H5File::open(&path).unwrap();
1297 let ds = file.dataset("data").unwrap();
1298 let full = ds.read_raw::<f32>().unwrap();
1299 assert_eq!(
1303 full,
1304 vec![0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 20.0, 0.0, 0.0, 30.0, 40.0, 0.0,]
1305 );
1306 }
1307
1308 std::fs::remove_file(&path).ok();
1309 }
1310
1311 #[test]
1312 fn attr_read_roundtrip() {
1313 use crate::types::VarLenUnicode;
1314 let path = temp_path("attr_read");
1315
1316 {
1317 let file = H5File::create(&path).unwrap();
1318 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1319 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1320 let a1 = ds
1321 .new_attr::<VarLenUnicode>()
1322 .shape(())
1323 .create("units")
1324 .unwrap();
1325 a1.write_string("meters").unwrap();
1326 let a2 = ds
1327 .new_attr::<VarLenUnicode>()
1328 .shape(())
1329 .create("desc")
1330 .unwrap();
1331 a2.write_string("test data").unwrap();
1332 file.close().unwrap();
1333 }
1334 {
1335 let file = H5File::open(&path).unwrap();
1336 let ds = file.dataset("data").unwrap();
1337
1338 let names = ds.attr_names().unwrap();
1339 assert!(names.contains(&"units".to_string()));
1340 assert!(names.contains(&"desc".to_string()));
1341
1342 let units = ds.attr("units").unwrap();
1343 assert_eq!(units.read_string().unwrap(), "meters");
1344
1345 let desc = ds.attr("desc").unwrap();
1346 assert_eq!(desc.read_string().unwrap(), "test data");
1347 }
1348
1349 std::fs::remove_file(&path).ok();
1350 }
1351
1352 #[test]
1353 fn type_mismatch_element_size() {
1354 let path = temp_path("type_mismatch");
1355
1356 {
1357 let file = H5File::create(&path).unwrap();
1358 let ds = file.new_dataset::<f64>().shape([4]).create("data").unwrap();
1359 ds.write_raw(&[1.0f64, 2.0, 3.0, 4.0]).unwrap();
1360 file.close().unwrap();
1361 }
1362
1363 {
1364 let file = H5File::open(&path).unwrap();
1365 let ds = file.dataset("data").unwrap();
1366 let result = ds.read_raw::<u8>();
1368 assert!(result.is_err());
1369 }
1370
1371 std::fs::remove_file(&path).ok();
1372 }
1373
1374 #[test]
1375 fn dataset_survives_file_move() {
1376 let path = temp_path("ds_survives");
1377
1378 let ds = {
1379 let file = H5File::create(&path).unwrap();
1380 file.new_dataset::<u8>().shape([4]).create("x").unwrap()
1381 };
1382 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1384 std::fs::remove_file(&path).ok();
1387 }
1388
1389 #[test]
1390 fn new_attr_scalar_string() {
1391 use crate::types::VarLenUnicode;
1392
1393 let path = temp_path("attr_scalar_string");
1394 {
1395 let file = H5File::create(&path).unwrap();
1396 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1397 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1398
1399 let attr = ds
1400 .new_attr::<VarLenUnicode>()
1401 .shape(())
1402 .create("name")
1403 .unwrap();
1404 attr.write_scalar(&VarLenUnicode("test_value".to_string()))
1405 .unwrap();
1406
1407 file.close().unwrap();
1408 }
1409
1410 {
1412 let file = H5File::open(&path).unwrap();
1413 let ds = file.dataset("data").unwrap();
1414 assert_eq!(ds.shape(), vec![4]);
1415 let readback = ds.read_raw::<u8>().unwrap();
1416 assert_eq!(readback, vec![1u8, 2, 3, 4]);
1417 }
1418
1419 std::fs::remove_file(&path).ok();
1420 }
1421
1422 #[test]
1423 fn all_numeric_types_roundtrip() {
1424 let path = temp_path("all_types");
1425
1426 {
1427 let file = H5File::create(&path).unwrap();
1428
1429 let ds = file.new_dataset::<u8>().shape([2]).create("u8").unwrap();
1430 ds.write_raw(&[1u8, 2]).unwrap();
1431
1432 let ds = file.new_dataset::<i8>().shape([2]).create("i8").unwrap();
1433 ds.write_raw(&[-1i8, 1]).unwrap();
1434
1435 let ds = file.new_dataset::<u16>().shape([2]).create("u16").unwrap();
1436 ds.write_raw(&[100u16, 200]).unwrap();
1437
1438 let ds = file.new_dataset::<i16>().shape([2]).create("i16").unwrap();
1439 ds.write_raw(&[-100i16, 100]).unwrap();
1440
1441 let ds = file.new_dataset::<u32>().shape([2]).create("u32").unwrap();
1442 ds.write_raw(&[1000u32, 2000]).unwrap();
1443
1444 let ds = file.new_dataset::<i32>().shape([2]).create("i32").unwrap();
1445 ds.write_raw(&[-1000i32, 1000]).unwrap();
1446
1447 let ds = file.new_dataset::<u64>().shape([2]).create("u64").unwrap();
1448 ds.write_raw(&[10000u64, 20000]).unwrap();
1449
1450 let ds = file.new_dataset::<i64>().shape([2]).create("i64").unwrap();
1451 ds.write_raw(&[-10000i64, 10000]).unwrap();
1452
1453 let ds = file.new_dataset::<f32>().shape([2]).create("f32").unwrap();
1454 ds.write_raw(&[1.5f32, 2.5]).unwrap();
1455
1456 let ds = file.new_dataset::<f64>().shape([2]).create("f64").unwrap();
1457 ds.write_raw(&[1.23456f64, 7.89012]).unwrap();
1458
1459 file.close().unwrap();
1460 }
1461
1462 {
1463 let file = H5File::open(&path).unwrap();
1464
1465 assert_eq!(
1466 file.dataset("u8").unwrap().read_raw::<u8>().unwrap(),
1467 vec![1u8, 2]
1468 );
1469 assert_eq!(
1470 file.dataset("i8").unwrap().read_raw::<i8>().unwrap(),
1471 vec![-1i8, 1]
1472 );
1473 assert_eq!(
1474 file.dataset("u16").unwrap().read_raw::<u16>().unwrap(),
1475 vec![100u16, 200]
1476 );
1477 assert_eq!(
1478 file.dataset("i16").unwrap().read_raw::<i16>().unwrap(),
1479 vec![-100i16, 100]
1480 );
1481 assert_eq!(
1482 file.dataset("u32").unwrap().read_raw::<u32>().unwrap(),
1483 vec![1000u32, 2000]
1484 );
1485 assert_eq!(
1486 file.dataset("i32").unwrap().read_raw::<i32>().unwrap(),
1487 vec![-1000i32, 1000]
1488 );
1489 assert_eq!(
1490 file.dataset("u64").unwrap().read_raw::<u64>().unwrap(),
1491 vec![10000u64, 20000]
1492 );
1493 assert_eq!(
1494 file.dataset("i64").unwrap().read_raw::<i64>().unwrap(),
1495 vec![-10000i64, 10000]
1496 );
1497 assert_eq!(
1498 file.dataset("f32").unwrap().read_raw::<f32>().unwrap(),
1499 vec![1.5f32, 2.5]
1500 );
1501 assert_eq!(
1502 file.dataset("f64").unwrap().read_raw::<f64>().unwrap(),
1503 vec![1.23456f64, 7.89012]
1504 );
1505 }
1506
1507 std::fs::remove_file(&path).ok();
1508 }
1509
1510 #[test]
1511 fn append_chunked_roundtrip() {
1512 let path = temp_path("append_chunked");
1513
1514 {
1515 let file = H5File::create(&path).unwrap();
1516 let ds = file
1517 .new_dataset::<f64>()
1518 .shape([0, 3])
1519 .chunk(&[1, 3])
1520 .max_shape(&[None, Some(3)])
1521 .create("data")
1522 .unwrap();
1523
1524 ds.append(&[1.0f64, 2.0, 3.0]).unwrap();
1526 ds.append(&[4.0f64, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap();
1528
1529 file.close().unwrap();
1530 }
1531
1532 {
1533 let file = H5File::open(&path).unwrap();
1534 let ds = file.dataset("data").unwrap();
1535 assert_eq!(ds.shape(), vec![3, 3]);
1536 let all = ds.read_raw::<f64>().unwrap();
1537 assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
1538 }
1539
1540 std::fs::remove_file(&path).ok();
1541 }
1542
1543 #[test]
1544 fn append_1d_chunked() {
1545 let path = temp_path("append_1d");
1546
1547 {
1548 let file = H5File::create(&path).unwrap();
1549 let ds = file
1550 .new_dataset::<i32>()
1551 .shape([0])
1552 .chunk(&[4])
1553 .max_shape(&[None])
1554 .create("values")
1555 .unwrap();
1556
1557 ds.append(&[10i32, 20, 30]).unwrap(); ds.append(&[40i32]).unwrap(); ds.append(&[50i32, 60, 70, 80]).unwrap(); file.close().unwrap();
1562 }
1563
1564 {
1565 let file = H5File::open(&path).unwrap();
1566 let ds = file.dataset("values").unwrap();
1567 assert_eq!(ds.shape(), vec![8]);
1568 let all = ds.read_raw::<i32>().unwrap();
1569 assert_eq!(all, vec![10, 20, 30, 40, 50, 60, 70, 80]);
1570 }
1571
1572 std::fs::remove_file(&path).ok();
1573 }
1574
1575 #[test]
1576 fn append_partial_chunk_flushed_on_close() {
1577 let path = temp_path("append_partial_close");
1578
1579 {
1580 let file = H5File::create(&path).unwrap();
1581 let ds = file
1582 .new_dataset::<f64>()
1583 .shape([0])
1584 .chunk(&[4])
1585 .max_shape(&[None])
1586 .create("vals")
1587 .unwrap();
1588
1589 ds.append(&[1.0f64, 2.0, 3.0, 4.0, 5.0]).unwrap();
1591 file.close().unwrap();
1592 }
1593
1594 {
1595 let file = H5File::open(&path).unwrap();
1596 let ds = file.dataset("vals").unwrap();
1597 assert_eq!(ds.shape(), vec![5]);
1598 let all = ds.read_raw::<f64>().unwrap();
1599 assert_eq!(all.len(), 5);
1602 assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1603 }
1604
1605 std::fs::remove_file(&path).ok();
1606 }
1607}