1use crate::attribute::AttrBuilder;
8use crate::error::{Hdf5Error, Result};
9use crate::file::{borrow_inner, borrow_inner_mut, clone_inner, H5FileInner, SharedInner};
10use crate::types::H5Type;
11
12pub struct DatasetBuilder<T: H5Type> {
29 file_inner: SharedInner,
30 shape: Option<Vec<usize>>,
31 chunk_dims: Option<Vec<usize>>,
32 max_shape: Option<Vec<Option<usize>>>,
33 deflate_level: Option<u32>,
34 shuffle_deflate_level: Option<u32>,
35 custom_pipeline: Option<crate::format::messages::filter::FilterPipeline>,
36 group_path: Option<String>,
37 _marker: std::marker::PhantomData<T>,
38}
39
40impl<T: H5Type> DatasetBuilder<T> {
41 pub(crate) fn new(file_inner: SharedInner) -> Self {
42 Self {
43 file_inner,
44 shape: None,
45 chunk_dims: None,
46 max_shape: None,
47 deflate_level: None,
48 shuffle_deflate_level: None,
49 custom_pipeline: None,
50 group_path: None,
51 _marker: std::marker::PhantomData,
52 }
53 }
54
55 pub(crate) fn new_in_group(file_inner: SharedInner, group_path: String) -> Self {
56 Self {
57 file_inner,
58 shape: None,
59 chunk_dims: None,
60 max_shape: None,
61 deflate_level: None,
62 shuffle_deflate_level: None,
63 custom_pipeline: None,
64 group_path: Some(group_path),
65 _marker: std::marker::PhantomData,
66 }
67 }
68
69 #[must_use]
74 pub fn shape<S: AsRef<[usize]>>(mut self, dims: S) -> Self {
75 self.shape = Some(dims.as_ref().to_vec());
76 self
77 }
78
79 #[must_use]
81 pub fn scalar(mut self) -> Self {
82 self.shape = Some(vec![]);
83 self
84 }
85
86 #[must_use]
92 pub fn chunk(mut self, chunk_dims: &[usize]) -> Self {
93 self.chunk_dims = Some(chunk_dims.to_vec());
94 self
95 }
96
97 #[must_use]
101 pub fn resizable(mut self) -> Self {
102 self.max_shape = Some(vec![None; self.shape.as_ref().map_or(0, |s| s.len())]);
103 self
104 }
105
106 #[must_use]
108 pub fn max_shape(mut self, max: &[Option<usize>]) -> Self {
109 self.max_shape = Some(max.to_vec());
110 self
111 }
112
113 #[must_use]
118 pub fn deflate(mut self, level: u32) -> Self {
119 self.deflate_level = Some(level);
120 self
121 }
122
123 #[must_use]
129 pub fn shuffle_deflate(mut self, level: u32) -> Self {
130 self.shuffle_deflate_level = Some(level);
131 self
132 }
133
134 #[must_use]
138 pub fn zstd(mut self, level: u32) -> Self {
139 self.custom_pipeline = Some(crate::format::messages::filter::FilterPipeline::zstd(level));
140 self
141 }
142
143 #[must_use]
148 pub fn filter_pipeline(
149 mut self,
150 pipeline: crate::format::messages::filter::FilterPipeline,
151 ) -> Self {
152 self.custom_pipeline = Some(pipeline);
153 self
154 }
155
156 pub fn create(self, name: &str) -> Result<H5Dataset> {
161 let shape = self.shape.ok_or_else(|| {
162 Hdf5Error::InvalidState("shape must be set before calling create()".into())
163 })?;
164
165 let full_name = if let Some(ref gp) = self.group_path {
167 if gp == "/" {
168 name.to_string()
169 } else {
170 let trimmed = gp.trim_start_matches('/');
171 format!("{}/{}", trimmed, name)
172 }
173 } else {
174 name.to_string()
175 };
176 let group_path = self.group_path.clone();
177
178 let dims_u64: Vec<u64> = shape.iter().map(|&d| d as u64).collect();
179 let datatype = T::hdf5_type();
180 let element_size = T::element_size();
181
182 if let Some(ref chunk_dims) = self.chunk_dims {
183 let chunk_u64: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
185 let max_u64: Vec<u64> = if let Some(ref max) = self.max_shape {
186 max.iter()
187 .map(|m| m.map_or(u64::MAX, |v| v as u64))
188 .collect()
189 } else {
190 dims_u64.clone()
192 };
193
194 let index = {
195 let mut inner = borrow_inner_mut(&self.file_inner);
196 match &mut *inner {
197 H5FileInner::Writer(writer) => {
198 let idx = if let Some(pipeline) = self.custom_pipeline {
199 writer.create_chunked_dataset_with_pipeline(
200 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
201 )?
202 } else if let Some(level) = self.shuffle_deflate_level {
203 let pipeline =
204 crate::format::messages::filter::FilterPipeline::shuffle_deflate(
205 T::element_size() as u32,
206 level,
207 );
208 writer.create_chunked_dataset_with_pipeline(
209 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
210 )?
211 } else if let Some(level) = self.deflate_level {
212 writer.create_chunked_dataset_compressed(
213 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, level,
214 )?
215 } else {
216 writer.create_chunked_dataset(
217 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
218 )?
219 };
220 if let Some(ref gp) = group_path {
221 if gp != "/" {
222 writer.assign_dataset_to_group(gp, idx)?;
223 }
224 }
225 idx
226 }
227 H5FileInner::Reader(_) => {
228 return Err(Hdf5Error::InvalidState(
229 "cannot create a dataset in read mode".into(),
230 ));
231 }
232 H5FileInner::Closed => {
233 return Err(Hdf5Error::InvalidState("file is closed".into()));
234 }
235 }
236 };
237
238 Ok(H5Dataset {
239 file_inner: clone_inner(&self.file_inner),
240 info: DatasetInfo::Writer {
241 index,
242 shape,
243 element_size,
244 chunked: true,
245 },
246 })
247 } else {
248 let index = {
250 let mut inner = borrow_inner_mut(&self.file_inner);
251 match &mut *inner {
252 H5FileInner::Writer(writer) => {
253 let idx = writer.create_dataset(&full_name, datatype, &dims_u64)?;
254 if let Some(ref gp) = group_path {
255 if gp != "/" {
256 writer.assign_dataset_to_group(gp, idx)?;
257 }
258 }
259 idx
260 }
261 H5FileInner::Reader(_) => {
262 return Err(Hdf5Error::InvalidState(
263 "cannot create a dataset in read mode".into(),
264 ));
265 }
266 H5FileInner::Closed => {
267 return Err(Hdf5Error::InvalidState("file is closed".into()));
268 }
269 }
270 };
271
272 Ok(H5Dataset {
273 file_inner: clone_inner(&self.file_inner),
274 info: DatasetInfo::Writer {
275 index,
276 shape,
277 element_size,
278 chunked: false,
279 },
280 })
281 }
282 }
283}
284
285enum DatasetInfo {
291 Writer {
293 index: usize,
295 shape: Vec<usize>,
297 element_size: usize,
299 chunked: bool,
301 },
302 Reader {
304 name: String,
306 shape: Vec<usize>,
308 element_size: usize,
310 },
311}
312
313pub struct H5Dataset {
323 file_inner: SharedInner,
324 info: DatasetInfo,
325}
326
327impl H5Dataset {
328 pub(crate) fn new_reader(
330 file_inner: SharedInner,
331 name: String,
332 shape: Vec<usize>,
333 element_size: usize,
334 ) -> Self {
335 Self {
336 file_inner,
337 info: DatasetInfo::Reader {
338 name,
339 shape,
340 element_size,
341 },
342 }
343 }
344
345 pub fn shape(&self) -> Vec<usize> {
347 match &self.info {
348 DatasetInfo::Writer { shape, .. } => shape.clone(),
349 DatasetInfo::Reader { shape, .. } => shape.clone(),
350 }
351 }
352
353 pub fn ndims(&self) -> usize {
355 match &self.info {
356 DatasetInfo::Writer { shape, .. } => shape.len(),
357 DatasetInfo::Reader { shape, .. } => shape.len(),
358 }
359 }
360
361 pub fn total_elements(&self) -> usize {
363 match &self.info {
364 DatasetInfo::Writer { shape, .. } => shape.iter().product(),
365 DatasetInfo::Reader { shape, .. } => shape.iter().product(),
366 }
367 }
368
369 pub fn element_size(&self) -> usize {
371 match &self.info {
372 DatasetInfo::Writer { element_size, .. } => *element_size,
373 DatasetInfo::Reader { element_size, .. } => *element_size,
374 }
375 }
376
377 pub fn chunk_dims(&self) -> Option<Vec<usize>> {
379 match &self.info {
380 DatasetInfo::Reader { name, .. } => {
381 let inner = borrow_inner(&self.file_inner);
382 if let H5FileInner::Reader(reader) = &*inner {
383 if let Some(info) = reader.dataset_info(name) {
384 if let crate::format::messages::data_layout::DataLayoutMessage::ChunkedV4 {
385 chunk_dims,
386 ..
387 } = &info.layout
388 {
389 return Some(
391 chunk_dims[..chunk_dims.len() - 1]
392 .iter()
393 .map(|&d| d as usize)
394 .collect(),
395 );
396 }
397 }
398 }
399 None
400 }
401 DatasetInfo::Writer { .. } => None,
402 }
403 }
404
405 pub fn is_chunked(&self) -> bool {
407 match &self.info {
408 DatasetInfo::Writer { chunked, .. } => *chunked,
409 DatasetInfo::Reader { name, .. } => {
410 let inner = borrow_inner(&self.file_inner);
411 match &*inner {
412 H5FileInner::Reader(reader) => {
413 if let Some(info) = reader.dataset_info(name) {
414 matches!(
415 info.layout,
416 crate::format::messages::data_layout::DataLayoutMessage::ChunkedV4 { .. }
417 )
418 } else {
419 false
420 }
421 }
422 _ => false,
423 }
424 }
425 }
426 }
427
428 pub fn attr_names(&self) -> Result<Vec<String>> {
430 match &self.info {
431 DatasetInfo::Reader { name, .. } => {
432 let inner = borrow_inner(&self.file_inner);
433 match &*inner {
434 H5FileInner::Reader(reader) => Ok(reader.dataset_attr_names(name)?),
435 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
436 }
437 }
438 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
439 "attr_names not available in write mode".into(),
440 )),
441 }
442 }
443
444 pub fn attr(&self, attr_name: &str) -> Result<crate::attribute::H5Attribute> {
446 match &self.info {
447 DatasetInfo::Reader { name, .. } => {
448 let inner = borrow_inner(&self.file_inner);
449 match &*inner {
450 H5FileInner::Reader(reader) => {
451 let attr_msg = reader.dataset_attr(name, attr_name)?;
452 Ok(crate::attribute::H5Attribute::new_reader(
453 clone_inner(&self.file_inner),
454 attr_msg.name.clone(),
455 attr_msg.data.clone(),
456 ))
457 }
458 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
459 }
460 }
461 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
462 "attr() not available in write mode".into(),
463 )),
464 }
465 }
466
467 pub fn new_attr<T: 'static>(&self) -> AttrBuilder<'_, T> {
483 let ds_index = match &self.info {
484 DatasetInfo::Writer { index, .. } => *index,
485 DatasetInfo::Reader { .. } => {
486 usize::MAX
489 }
490 };
491 AttrBuilder::new(&self.file_inner, ds_index)
492 }
493
494 pub fn write_raw<T: H5Type>(&self, data: &[T]) -> Result<()> {
506 match &self.info {
507 DatasetInfo::Writer {
508 index,
509 shape,
510 element_size,
511 chunked,
512 } => {
513 if *chunked {
514 return Err(Hdf5Error::InvalidState(
515 "use write_chunk for chunked datasets".into(),
516 ));
517 }
518
519 let total_elements: usize = shape.iter().product();
520 if data.len() != total_elements {
521 return Err(Hdf5Error::InvalidState(format!(
522 "data length {} does not match dataset size {}",
523 data.len(),
524 total_elements,
525 )));
526 }
527
528 if T::element_size() != *element_size {
530 return Err(Hdf5Error::TypeMismatch(format!(
531 "write type has element size {} but dataset expects {}",
532 T::element_size(),
533 element_size,
534 )));
535 }
536
537 let byte_len = data.len() * T::element_size();
541 let raw =
542 unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
543
544 let mut inner = borrow_inner_mut(&self.file_inner);
545 match &mut *inner {
546 H5FileInner::Writer(writer) => {
547 writer.write_dataset_raw(*index, raw)?;
548 Ok(())
549 }
550 _ => Err(Hdf5Error::InvalidState(
551 "file is no longer in write mode".into(),
552 )),
553 }
554 }
555 DatasetInfo::Reader { .. } => Err(Hdf5Error::InvalidState(
556 "cannot write to a dataset opened in read mode".into(),
557 )),
558 }
559 }
560
561 pub fn write_chunk(&self, chunk_idx: usize, data: &[u8]) -> Result<()> {
566 match &self.info {
567 DatasetInfo::Writer { index, chunked, .. } => {
568 if !*chunked {
569 return Err(Hdf5Error::InvalidState(
570 "write_chunk is only for chunked datasets".into(),
571 ));
572 }
573
574 let mut inner = borrow_inner_mut(&self.file_inner);
575 match &mut *inner {
576 H5FileInner::Writer(writer) => {
577 writer.write_chunk(*index, chunk_idx as u64, data)?;
578 Ok(())
579 }
580 _ => Err(Hdf5Error::InvalidState(
581 "file is no longer in write mode".into(),
582 )),
583 }
584 }
585 DatasetInfo::Reader { .. } => {
586 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
587 }
588 }
589 }
590
591 pub fn write_chunks_batch(&self, chunks: &[(usize, &[u8])]) -> Result<()> {
597 match &self.info {
598 DatasetInfo::Writer { index, chunked, .. } => {
599 if !*chunked {
600 return Err(Hdf5Error::InvalidState(
601 "write_chunks_batch is only for chunked datasets".into(),
602 ));
603 }
604 let pairs: Vec<(u64, &[u8])> = chunks
605 .iter()
606 .map(|(idx, data)| (*idx as u64, *data))
607 .collect();
608 let mut inner = borrow_inner_mut(&self.file_inner);
609 match &mut *inner {
610 H5FileInner::Writer(writer) => {
611 writer.write_chunks_batch(*index, &pairs)?;
612 Ok(())
613 }
614 _ => Err(Hdf5Error::InvalidState(
615 "file is no longer in write mode".into(),
616 )),
617 }
618 }
619 DatasetInfo::Reader { .. } => {
620 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
621 }
622 }
623 }
624
625 pub fn append<T: H5Type>(&self, data: &[T]) -> Result<()> {
648 match &self.info {
649 DatasetInfo::Writer {
650 index,
651 element_size,
652 chunked,
653 ..
654 } => {
655 if !*chunked {
656 return Err(Hdf5Error::InvalidState(
657 "append is only for chunked datasets".into(),
658 ));
659 }
660 if T::element_size() != *element_size {
661 return Err(Hdf5Error::TypeMismatch(format!(
662 "append type has element size {} but dataset expects {}",
663 T::element_size(),
664 element_size,
665 )));
666 }
667
668 let ds_index = *index;
669 let es = *element_size;
670
671 let mut inner = borrow_inner_mut(&self.file_inner);
672 let writer = match &mut *inner {
673 H5FileInner::Writer(w) => w,
674 _ => {
675 return Err(Hdf5Error::InvalidState(
676 "file is no longer in write mode".into(),
677 ))
678 }
679 };
680
681 let chunk_dims = writer
682 .dataset_chunk_dims(ds_index)
683 .ok_or_else(|| Hdf5Error::InvalidState("dataset has no chunk info".into()))?
684 .to_vec();
685 let dims = writer.dataset_dims(ds_index).to_vec();
686
687 let frame_elems: usize = if dims.len() > 1 {
689 dims[1..].iter().map(|&d| d as usize).product()
690 } else {
691 1
692 };
693
694 if frame_elems == 0 {
695 return Err(Hdf5Error::InvalidState(
696 "cannot append to dataset with zero-size trailing dimensions".into(),
697 ));
698 }
699
700 if !data.len().is_multiple_of(frame_elems) {
701 return Err(Hdf5Error::InvalidState(format!(
702 "data length {} is not a multiple of frame size {}",
703 data.len(),
704 frame_elems,
705 )));
706 }
707
708 let n_new_frames = data.len() / frame_elems;
709 let current_dim0 = dims[0] as usize;
710
711 let chunk_dim0 = chunk_dims[0] as usize;
713 let chunk_bytes = chunk_dims.iter().map(|&d| d as usize).product::<usize>() * es;
715 let frame_bytes = frame_elems * es;
716
717 let raw = unsafe {
718 std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * es)
719 };
720
721 let ds = &mut writer.datasets[ds_index];
723 let buffered_frames = ds.append_buffered_frames as usize;
724 let mut combined = std::mem::take(&mut ds.append_buffer);
725 combined.extend_from_slice(raw);
726 ds.append_buffered_frames = 0;
727
728 let total_frames = buffered_frames + n_new_frames;
729 let total_bytes = combined.len();
730
731 let base_dim0 = current_dim0 - buffered_frames;
733 let mut byte_pos = 0usize;
734 let mut frame_pos = 0usize;
735
736 while frame_pos < total_frames {
737 let abs_frame = base_dim0 + frame_pos;
738 let chunk_idx = abs_frame / chunk_dim0;
739 let remaining_frames = total_frames - frame_pos;
740 let frames_to_fill = chunk_dim0 - (abs_frame % chunk_dim0);
741
742 if remaining_frames >= frames_to_fill {
743 let end = byte_pos + frames_to_fill * frame_bytes;
745 if frames_to_fill == chunk_dim0 {
746 writer.write_chunk(
747 ds_index,
748 chunk_idx as u64,
749 &combined[byte_pos..end],
750 )?;
751 } else {
752 let mut chunk_buf = vec![0u8; chunk_bytes];
754 let offset_in_chunk = (abs_frame % chunk_dim0) * frame_bytes;
755 chunk_buf
756 [offset_in_chunk..offset_in_chunk + frames_to_fill * frame_bytes]
757 .copy_from_slice(&combined[byte_pos..end]);
758 writer.write_chunk(ds_index, chunk_idx as u64, &chunk_buf)?;
759 }
760 byte_pos = end;
761 frame_pos += frames_to_fill;
762 } else {
763 let ds = &mut writer.datasets[ds_index];
765 ds.append_buffer = combined[byte_pos..total_bytes].to_vec();
766 ds.append_buffered_frames = remaining_frames as u64;
767 frame_pos = total_frames;
768 }
769 }
770
771 let logical_dim0 = base_dim0 + total_frames;
773 let mut new_dims: Vec<u64> = dims;
774 new_dims[0] = logical_dim0 as u64;
775 writer.extend_dataset(ds_index, &new_dims)?;
776
777 Ok(())
778 }
779 DatasetInfo::Reader { .. } => {
780 Err(Hdf5Error::InvalidState("cannot append in read mode".into()))
781 }
782 }
783 }
784
785 pub fn extend(&self, new_dims: &[usize]) -> Result<()> {
787 match &self.info {
788 DatasetInfo::Writer { index, chunked, .. } => {
789 if !*chunked {
790 return Err(Hdf5Error::InvalidState(
791 "extend is only for chunked datasets".into(),
792 ));
793 }
794
795 let dims_u64: Vec<u64> = new_dims.iter().map(|&d| d as u64).collect();
796 let mut inner = borrow_inner_mut(&self.file_inner);
797 match &mut *inner {
798 H5FileInner::Writer(writer) => {
799 writer.extend_dataset(*index, &dims_u64)?;
800 Ok(())
801 }
802 _ => Err(Hdf5Error::InvalidState(
803 "file is no longer in write mode".into(),
804 )),
805 }
806 }
807 DatasetInfo::Reader { .. } => {
808 Err(Hdf5Error::InvalidState("cannot extend in read mode".into()))
809 }
810 }
811 }
812
813 pub fn flush(&self) -> Result<()> {
815 match &self.info {
816 DatasetInfo::Writer { index, .. } => {
817 let mut inner = borrow_inner_mut(&self.file_inner);
818 match &mut *inner {
819 H5FileInner::Writer(writer) => {
820 writer.flush_dataset(*index)?;
821 Ok(())
822 }
823 _ => Ok(()),
824 }
825 }
826 DatasetInfo::Reader { .. } => Ok(()),
827 }
828 }
829
830 pub fn read_slice<T: H5Type>(&self, starts: &[usize], counts: &[usize]) -> Result<Vec<T>> {
835 match &self.info {
836 DatasetInfo::Reader {
837 name, element_size, ..
838 } => {
839 if T::element_size() != *element_size {
840 return Err(Hdf5Error::TypeMismatch(format!(
841 "read type has element size {} but dataset has element size {}",
842 T::element_size(),
843 element_size,
844 )));
845 }
846 let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
847 let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
848
849 let raw = {
850 let mut inner = borrow_inner_mut(&self.file_inner);
851 match &mut *inner {
852 H5FileInner::Reader(reader) => {
853 reader.read_slice(name, &starts_u64, &counts_u64)?
854 }
855 _ => {
856 return Err(Hdf5Error::InvalidState("file is not in read mode".into()))
857 }
858 }
859 };
860
861 if raw.len() % T::element_size() != 0 {
862 return Err(Hdf5Error::TypeMismatch(format!(
863 "raw data size {} is not a multiple of element size {}",
864 raw.len(),
865 T::element_size(),
866 )));
867 }
868
869 let count = raw.len() / T::element_size();
870 let mut result = Vec::<T>::with_capacity(count);
871 unsafe {
872 std::ptr::copy_nonoverlapping(
873 raw.as_ptr(),
874 result.as_mut_ptr() as *mut u8,
875 raw.len(),
876 );
877 result.set_len(count);
878 }
879 Ok(result)
880 }
881 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
882 "cannot read_slice from a dataset in write mode".into(),
883 )),
884 }
885 }
886
887 pub fn write_slice<T: H5Type>(
891 &self,
892 starts: &[usize],
893 counts: &[usize],
894 data: &[T],
895 ) -> Result<()> {
896 match &self.info {
897 DatasetInfo::Writer {
898 index,
899 element_size,
900 chunked,
901 ..
902 } => {
903 if *chunked {
904 return Err(Hdf5Error::InvalidState(
905 "write_slice is only for contiguous datasets".into(),
906 ));
907 }
908 if T::element_size() != *element_size {
909 return Err(Hdf5Error::TypeMismatch(format!(
910 "write type has element size {} but dataset expects {}",
911 T::element_size(),
912 element_size,
913 )));
914 }
915
916 let expected: usize = counts.iter().product();
917 if data.len() != expected {
918 return Err(Hdf5Error::InvalidState(format!(
919 "data length {} does not match slice size {}",
920 data.len(),
921 expected,
922 )));
923 }
924
925 let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
926 let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
927
928 let byte_len = data.len() * T::element_size();
929 let raw =
930 unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
931
932 let mut inner = borrow_inner_mut(&self.file_inner);
933 match &mut *inner {
934 H5FileInner::Writer(writer) => {
935 writer.write_slice(*index, &starts_u64, &counts_u64, raw)?;
936 Ok(())
937 }
938 _ => Err(Hdf5Error::InvalidState(
939 "file is no longer in write mode".into(),
940 )),
941 }
942 }
943 DatasetInfo::Reader { .. } => {
944 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
945 }
946 }
947 }
948
949 pub fn read_vlen_strings(&self) -> Result<Vec<String>> {
954 match &self.info {
955 DatasetInfo::Reader { name, .. } => {
956 let mut inner = borrow_inner_mut(&self.file_inner);
957 match &mut *inner {
958 H5FileInner::Reader(reader) => Ok(reader.read_vlen_strings(name)?),
959 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
960 }
961 }
962 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
963 "cannot read vlen strings from a dataset in write mode".into(),
964 )),
965 }
966 }
967
968 pub fn read_raw<T: H5Type>(&self) -> Result<Vec<T>> {
980 match &self.info {
981 DatasetInfo::Reader {
982 name, element_size, ..
983 } => {
984 if T::element_size() != *element_size {
985 return Err(Hdf5Error::TypeMismatch(format!(
986 "read type has element size {} but dataset has element size {}",
987 T::element_size(),
988 element_size,
989 )));
990 }
991
992 let raw = {
993 let mut inner = borrow_inner_mut(&self.file_inner);
994 match &mut *inner {
995 H5FileInner::Reader(reader) => reader.read_dataset_raw(name)?,
996 _ => {
997 return Err(Hdf5Error::InvalidState("file is not in read mode".into()));
998 }
999 }
1000 };
1001
1002 if raw.len() % T::element_size() != 0 {
1003 return Err(Hdf5Error::TypeMismatch(format!(
1004 "raw data size {} is not a multiple of element size {}",
1005 raw.len(),
1006 T::element_size(),
1007 )));
1008 }
1009
1010 let count = raw.len() / T::element_size();
1011 let mut result = Vec::<T>::with_capacity(count);
1012
1013 unsafe {
1020 std::ptr::copy_nonoverlapping(
1021 raw.as_ptr(),
1022 result.as_mut_ptr() as *mut u8,
1023 raw.len(),
1024 );
1025 result.set_len(count);
1026 }
1027
1028 Ok(result)
1029 }
1030 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1031 "cannot read from a dataset in write mode".into(),
1032 )),
1033 }
1034 }
1035}
1036
1037#[cfg(test)]
1038mod tests {
1039 use crate::H5File;
1040 use std::path::PathBuf;
1041
1042 fn temp_path(name: &str) -> PathBuf {
1043 std::env::temp_dir().join(format!("hdf5_dataset_test_{}.h5", name))
1044 }
1045
1046 #[test]
1047 fn builder_requires_shape() {
1048 let path = temp_path("no_shape");
1049 let file = H5File::create(&path).unwrap();
1050 let result = file.new_dataset::<u8>().create("data");
1051 assert!(result.is_err());
1052 std::fs::remove_file(&path).ok();
1053 }
1054
1055 #[test]
1056 fn write_raw_size_mismatch() {
1057 let path = temp_path("size_mismatch");
1058 let file = H5File::create(&path).unwrap();
1059 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1060 let result = ds.write_raw(&[1u8, 2, 3]);
1062 assert!(result.is_err());
1063 std::fs::remove_file(&path).ok();
1064 }
1065
1066 #[test]
1067 fn roundtrip_u8_1d() {
1068 let path = temp_path("rt_u8_1d");
1069 let data: Vec<u8> = (0..10).collect();
1070
1071 {
1072 let file = H5File::create(&path).unwrap();
1073 let ds = file.new_dataset::<u8>().shape([10]).create("seq").unwrap();
1074 ds.write_raw(&data).unwrap();
1075 file.close().unwrap();
1076 }
1077
1078 {
1079 let file = H5File::open(&path).unwrap();
1080 let ds = file.dataset("seq").unwrap();
1081 assert_eq!(ds.shape(), vec![10]);
1082 let readback = ds.read_raw::<u8>().unwrap();
1083 assert_eq!(readback, data);
1084 }
1085
1086 std::fs::remove_file(&path).ok();
1087 }
1088
1089 #[test]
1090 fn roundtrip_i32_2d() {
1091 let path = temp_path("rt_i32_2d");
1092 let data: Vec<i32> = vec![-1, 0, 1, 2, 3, 4];
1093
1094 {
1095 let file = H5File::create(&path).unwrap();
1096 let ds = file
1097 .new_dataset::<i32>()
1098 .shape([2, 3])
1099 .create("matrix")
1100 .unwrap();
1101 ds.write_raw(&data).unwrap();
1102 file.close().unwrap();
1103 }
1104
1105 {
1106 let file = H5File::open(&path).unwrap();
1107 let ds = file.dataset("matrix").unwrap();
1108 assert_eq!(ds.shape(), vec![2, 3]);
1109 let readback = ds.read_raw::<i32>().unwrap();
1110 assert_eq!(readback, data);
1111 }
1112
1113 std::fs::remove_file(&path).ok();
1114 }
1115
1116 #[test]
1117 fn roundtrip_f64_3d() {
1118 let path = temp_path("rt_f64_3d");
1119 let data: Vec<f64> = (0..24).map(|i| i as f64 * 0.5).collect();
1120
1121 {
1122 let file = H5File::create(&path).unwrap();
1123 let ds = file
1124 .new_dataset::<f64>()
1125 .shape([2, 3, 4])
1126 .create("cube")
1127 .unwrap();
1128 ds.write_raw(&data).unwrap();
1129 file.close().unwrap();
1130 }
1131
1132 {
1133 let file = H5File::open(&path).unwrap();
1134 let ds = file.dataset("cube").unwrap();
1135 assert_eq!(ds.shape(), vec![2, 3, 4]);
1136 let readback = ds.read_raw::<f64>().unwrap();
1137 assert_eq!(readback, data);
1138 }
1139
1140 std::fs::remove_file(&path).ok();
1141 }
1142
1143 #[test]
1144 fn cannot_read_in_write_mode() {
1145 let path = temp_path("no_read_write");
1146 let file = H5File::create(&path).unwrap();
1147 let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1148 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1149 let result = ds.read_raw::<u8>();
1150 assert!(result.is_err());
1151 std::fs::remove_file(&path).ok();
1152 }
1153
1154 #[test]
1155 fn cannot_write_in_read_mode() {
1156 let path = temp_path("no_write_read");
1157
1158 {
1159 let file = H5File::create(&path).unwrap();
1160 let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1161 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1162 file.close().unwrap();
1163 }
1164
1165 {
1166 let file = H5File::open(&path).unwrap();
1167 let ds = file.dataset("x").unwrap();
1168 let result = ds.write_raw(&[5u8, 6, 7, 8]);
1169 assert!(result.is_err());
1170 }
1171
1172 std::fs::remove_file(&path).ok();
1173 }
1174
1175 #[test]
1176 fn numeric_attr_roundtrip() {
1177 let path = temp_path("num_attr");
1178 {
1179 let file = H5File::create(&path).unwrap();
1180 let ds = file.new_dataset::<f32>().shape([4]).create("data").unwrap();
1181 ds.write_raw(&[1.0f32; 4]).unwrap();
1182
1183 let a1 = ds.new_attr::<f64>().shape(()).create("scale").unwrap();
1184 a1.write_numeric(&1.2345f64).unwrap();
1185
1186 let a2 = ds.new_attr::<i32>().shape(()).create("count").unwrap();
1187 a2.write_numeric(&42i32).unwrap();
1188
1189 file.close().unwrap();
1190 }
1191 {
1192 let file = H5File::open(&path).unwrap();
1193 let ds = file.dataset("data").unwrap();
1194
1195 let scale = ds.attr("scale").unwrap();
1196 let val: f64 = scale.read_numeric().unwrap();
1197 assert!((val - 1.2345).abs() < 1e-10);
1198
1199 let count = ds.attr("count").unwrap();
1200 let val: i32 = count.read_numeric().unwrap();
1201 assert_eq!(val, 42);
1202 }
1203 std::fs::remove_file(&path).ok();
1204 }
1205
1206 #[test]
1207 fn cannot_create_dataset_in_read_mode() {
1208 let path = temp_path("no_create_read");
1209
1210 {
1211 let _file = H5File::create(&path).unwrap();
1212 }
1213
1214 {
1215 let file = H5File::open(&path).unwrap();
1216 let result = file.new_dataset::<u8>().shape([4]).create("x");
1217 assert!(result.is_err());
1218 }
1219
1220 std::fs::remove_file(&path).ok();
1221 }
1222
1223 #[test]
1224 fn shape_accessor() {
1225 let path = temp_path("shape_acc");
1226
1227 let file = H5File::create(&path).unwrap();
1228 let ds = file
1229 .new_dataset::<f32>()
1230 .shape([5, 10, 3])
1231 .create("tensor")
1232 .unwrap();
1233 assert_eq!(ds.shape(), vec![5, 10, 3]);
1234
1235 std::fs::remove_file(&path).ok();
1236 }
1237
1238 #[test]
1239 fn slice_roundtrip_2d() {
1240 let path = temp_path("slice_2d");
1241
1242 let data: Vec<i32> = (0..20).collect();
1244 {
1245 let file = H5File::create(&path).unwrap();
1246 let ds = file
1247 .new_dataset::<i32>()
1248 .shape([4, 5])
1249 .create("mat")
1250 .unwrap();
1251 ds.write_raw(&data).unwrap();
1252 file.close().unwrap();
1253 }
1254 {
1255 let file = H5File::open(&path).unwrap();
1256 let ds = file.dataset("mat").unwrap();
1257 let slice = ds.read_slice::<i32>(&[1, 2], &[2, 2]).unwrap();
1259 assert_eq!(slice, vec![7, 8, 12, 13]);
1262 }
1263
1264 std::fs::remove_file(&path).ok();
1265 }
1266
1267 #[test]
1268 fn write_slice_2d() {
1269 let path = temp_path("write_slice_2d");
1270
1271 {
1272 let file = H5File::create(&path).unwrap();
1273 let ds = file
1274 .new_dataset::<f32>()
1275 .shape([3, 4])
1276 .create("data")
1277 .unwrap();
1278 ds.write_raw(&[0.0f32; 12]).unwrap();
1279 ds.write_slice(&[1, 1], &[2, 2], &[10.0f32, 20.0, 30.0, 40.0])
1281 .unwrap();
1282 file.close().unwrap();
1283 }
1284 {
1285 let file = H5File::open(&path).unwrap();
1286 let ds = file.dataset("data").unwrap();
1287 let full = ds.read_raw::<f32>().unwrap();
1288 assert_eq!(
1292 full,
1293 vec![0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 20.0, 0.0, 0.0, 30.0, 40.0, 0.0,]
1294 );
1295 }
1296
1297 std::fs::remove_file(&path).ok();
1298 }
1299
1300 #[test]
1301 fn attr_read_roundtrip() {
1302 use crate::types::VarLenUnicode;
1303 let path = temp_path("attr_read");
1304
1305 {
1306 let file = H5File::create(&path).unwrap();
1307 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1308 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1309 let a1 = ds
1310 .new_attr::<VarLenUnicode>()
1311 .shape(())
1312 .create("units")
1313 .unwrap();
1314 a1.write_string("meters").unwrap();
1315 let a2 = ds
1316 .new_attr::<VarLenUnicode>()
1317 .shape(())
1318 .create("desc")
1319 .unwrap();
1320 a2.write_string("test data").unwrap();
1321 file.close().unwrap();
1322 }
1323 {
1324 let file = H5File::open(&path).unwrap();
1325 let ds = file.dataset("data").unwrap();
1326
1327 let names = ds.attr_names().unwrap();
1328 assert!(names.contains(&"units".to_string()));
1329 assert!(names.contains(&"desc".to_string()));
1330
1331 let units = ds.attr("units").unwrap();
1332 assert_eq!(units.read_string().unwrap(), "meters");
1333
1334 let desc = ds.attr("desc").unwrap();
1335 assert_eq!(desc.read_string().unwrap(), "test data");
1336 }
1337
1338 std::fs::remove_file(&path).ok();
1339 }
1340
1341 #[test]
1342 fn type_mismatch_element_size() {
1343 let path = temp_path("type_mismatch");
1344
1345 {
1346 let file = H5File::create(&path).unwrap();
1347 let ds = file.new_dataset::<f64>().shape([4]).create("data").unwrap();
1348 ds.write_raw(&[1.0f64, 2.0, 3.0, 4.0]).unwrap();
1349 file.close().unwrap();
1350 }
1351
1352 {
1353 let file = H5File::open(&path).unwrap();
1354 let ds = file.dataset("data").unwrap();
1355 let result = ds.read_raw::<u8>();
1357 assert!(result.is_err());
1358 }
1359
1360 std::fs::remove_file(&path).ok();
1361 }
1362
1363 #[test]
1364 fn dataset_survives_file_move() {
1365 let path = temp_path("ds_survives");
1366
1367 let ds = {
1368 let file = H5File::create(&path).unwrap();
1369 file.new_dataset::<u8>().shape([4]).create("x").unwrap()
1370 };
1371 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1373 std::fs::remove_file(&path).ok();
1376 }
1377
1378 #[test]
1379 fn new_attr_scalar_string() {
1380 use crate::types::VarLenUnicode;
1381
1382 let path = temp_path("attr_scalar_string");
1383 {
1384 let file = H5File::create(&path).unwrap();
1385 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1386 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1387
1388 let attr = ds
1389 .new_attr::<VarLenUnicode>()
1390 .shape(())
1391 .create("name")
1392 .unwrap();
1393 attr.write_scalar(&VarLenUnicode("test_value".to_string()))
1394 .unwrap();
1395
1396 file.close().unwrap();
1397 }
1398
1399 {
1401 let file = H5File::open(&path).unwrap();
1402 let ds = file.dataset("data").unwrap();
1403 assert_eq!(ds.shape(), vec![4]);
1404 let readback = ds.read_raw::<u8>().unwrap();
1405 assert_eq!(readback, vec![1u8, 2, 3, 4]);
1406 }
1407
1408 std::fs::remove_file(&path).ok();
1409 }
1410
1411 #[test]
1412 fn all_numeric_types_roundtrip() {
1413 let path = temp_path("all_types");
1414
1415 {
1416 let file = H5File::create(&path).unwrap();
1417
1418 let ds = file.new_dataset::<u8>().shape([2]).create("u8").unwrap();
1419 ds.write_raw(&[1u8, 2]).unwrap();
1420
1421 let ds = file.new_dataset::<i8>().shape([2]).create("i8").unwrap();
1422 ds.write_raw(&[-1i8, 1]).unwrap();
1423
1424 let ds = file.new_dataset::<u16>().shape([2]).create("u16").unwrap();
1425 ds.write_raw(&[100u16, 200]).unwrap();
1426
1427 let ds = file.new_dataset::<i16>().shape([2]).create("i16").unwrap();
1428 ds.write_raw(&[-100i16, 100]).unwrap();
1429
1430 let ds = file.new_dataset::<u32>().shape([2]).create("u32").unwrap();
1431 ds.write_raw(&[1000u32, 2000]).unwrap();
1432
1433 let ds = file.new_dataset::<i32>().shape([2]).create("i32").unwrap();
1434 ds.write_raw(&[-1000i32, 1000]).unwrap();
1435
1436 let ds = file.new_dataset::<u64>().shape([2]).create("u64").unwrap();
1437 ds.write_raw(&[10000u64, 20000]).unwrap();
1438
1439 let ds = file.new_dataset::<i64>().shape([2]).create("i64").unwrap();
1440 ds.write_raw(&[-10000i64, 10000]).unwrap();
1441
1442 let ds = file.new_dataset::<f32>().shape([2]).create("f32").unwrap();
1443 ds.write_raw(&[1.5f32, 2.5]).unwrap();
1444
1445 let ds = file.new_dataset::<f64>().shape([2]).create("f64").unwrap();
1446 ds.write_raw(&[1.23456f64, 7.89012]).unwrap();
1447
1448 file.close().unwrap();
1449 }
1450
1451 {
1452 let file = H5File::open(&path).unwrap();
1453
1454 assert_eq!(
1455 file.dataset("u8").unwrap().read_raw::<u8>().unwrap(),
1456 vec![1u8, 2]
1457 );
1458 assert_eq!(
1459 file.dataset("i8").unwrap().read_raw::<i8>().unwrap(),
1460 vec![-1i8, 1]
1461 );
1462 assert_eq!(
1463 file.dataset("u16").unwrap().read_raw::<u16>().unwrap(),
1464 vec![100u16, 200]
1465 );
1466 assert_eq!(
1467 file.dataset("i16").unwrap().read_raw::<i16>().unwrap(),
1468 vec![-100i16, 100]
1469 );
1470 assert_eq!(
1471 file.dataset("u32").unwrap().read_raw::<u32>().unwrap(),
1472 vec![1000u32, 2000]
1473 );
1474 assert_eq!(
1475 file.dataset("i32").unwrap().read_raw::<i32>().unwrap(),
1476 vec![-1000i32, 1000]
1477 );
1478 assert_eq!(
1479 file.dataset("u64").unwrap().read_raw::<u64>().unwrap(),
1480 vec![10000u64, 20000]
1481 );
1482 assert_eq!(
1483 file.dataset("i64").unwrap().read_raw::<i64>().unwrap(),
1484 vec![-10000i64, 10000]
1485 );
1486 assert_eq!(
1487 file.dataset("f32").unwrap().read_raw::<f32>().unwrap(),
1488 vec![1.5f32, 2.5]
1489 );
1490 assert_eq!(
1491 file.dataset("f64").unwrap().read_raw::<f64>().unwrap(),
1492 vec![1.23456f64, 7.89012]
1493 );
1494 }
1495
1496 std::fs::remove_file(&path).ok();
1497 }
1498
1499 #[test]
1500 fn append_chunked_roundtrip() {
1501 let path = temp_path("append_chunked");
1502
1503 {
1504 let file = H5File::create(&path).unwrap();
1505 let ds = file
1506 .new_dataset::<f64>()
1507 .shape([0, 3])
1508 .chunk(&[1, 3])
1509 .max_shape(&[None, Some(3)])
1510 .create("data")
1511 .unwrap();
1512
1513 ds.append(&[1.0f64, 2.0, 3.0]).unwrap();
1515 ds.append(&[4.0f64, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap();
1517
1518 file.close().unwrap();
1519 }
1520
1521 {
1522 let file = H5File::open(&path).unwrap();
1523 let ds = file.dataset("data").unwrap();
1524 assert_eq!(ds.shape(), vec![3, 3]);
1525 let all = ds.read_raw::<f64>().unwrap();
1526 assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
1527 }
1528
1529 std::fs::remove_file(&path).ok();
1530 }
1531
1532 #[test]
1533 fn append_1d_chunked() {
1534 let path = temp_path("append_1d");
1535
1536 {
1537 let file = H5File::create(&path).unwrap();
1538 let ds = file
1539 .new_dataset::<i32>()
1540 .shape([0])
1541 .chunk(&[4])
1542 .max_shape(&[None])
1543 .create("values")
1544 .unwrap();
1545
1546 ds.append(&[10i32, 20, 30]).unwrap(); ds.append(&[40i32]).unwrap(); ds.append(&[50i32, 60, 70, 80]).unwrap(); file.close().unwrap();
1551 }
1552
1553 {
1554 let file = H5File::open(&path).unwrap();
1555 let ds = file.dataset("values").unwrap();
1556 assert_eq!(ds.shape(), vec![8]);
1557 let all = ds.read_raw::<i32>().unwrap();
1558 assert_eq!(all, vec![10, 20, 30, 40, 50, 60, 70, 80]);
1559 }
1560
1561 std::fs::remove_file(&path).ok();
1562 }
1563
1564 #[test]
1565 fn append_partial_chunk_flushed_on_close() {
1566 let path = temp_path("append_partial_close");
1567
1568 {
1569 let file = H5File::create(&path).unwrap();
1570 let ds = file
1571 .new_dataset::<f64>()
1572 .shape([0])
1573 .chunk(&[4])
1574 .max_shape(&[None])
1575 .create("vals")
1576 .unwrap();
1577
1578 ds.append(&[1.0f64, 2.0, 3.0, 4.0, 5.0]).unwrap();
1580 file.close().unwrap();
1581 }
1582
1583 {
1584 let file = H5File::open(&path).unwrap();
1585 let ds = file.dataset("vals").unwrap();
1586 assert_eq!(ds.shape(), vec![5]);
1587 let all = ds.read_raw::<f64>().unwrap();
1588 assert_eq!(all.len(), 5);
1591 assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1592 }
1593
1594 std::fs::remove_file(&path).ok();
1595 }
1596}