1use crate::attribute::AttrBuilder;
8use crate::error::{Hdf5Error, Result};
9use crate::file::{borrow_inner, borrow_inner_mut, clone_inner, H5FileInner, SharedInner};
10use crate::types::H5Type;
11
12pub struct DatasetBuilder<T: H5Type> {
29 file_inner: SharedInner,
30 shape: Option<Vec<usize>>,
31 chunk_dims: Option<Vec<usize>>,
32 max_shape: Option<Vec<Option<usize>>>,
33 deflate_level: Option<u32>,
34 shuffle_deflate_level: Option<u32>,
35 custom_pipeline: Option<crate::format::messages::filter::FilterPipeline>,
36 group_path: Option<String>,
37 _marker: std::marker::PhantomData<T>,
38}
39
40impl<T: H5Type> DatasetBuilder<T> {
41 pub(crate) fn new(file_inner: SharedInner) -> Self {
42 Self {
43 file_inner,
44 shape: None,
45 chunk_dims: None,
46 max_shape: None,
47 deflate_level: None,
48 shuffle_deflate_level: None,
49 custom_pipeline: None,
50 group_path: None,
51 _marker: std::marker::PhantomData,
52 }
53 }
54
55 pub(crate) fn new_in_group(file_inner: SharedInner, group_path: String) -> Self {
56 Self {
57 file_inner,
58 shape: None,
59 chunk_dims: None,
60 max_shape: None,
61 deflate_level: None,
62 shuffle_deflate_level: None,
63 custom_pipeline: None,
64 group_path: Some(group_path),
65 _marker: std::marker::PhantomData,
66 }
67 }
68
69 #[must_use]
74 pub fn shape<S: AsRef<[usize]>>(mut self, dims: S) -> Self {
75 self.shape = Some(dims.as_ref().to_vec());
76 self
77 }
78
79 #[must_use]
81 pub fn scalar(mut self) -> Self {
82 self.shape = Some(vec![]);
83 self
84 }
85
86 #[must_use]
92 pub fn chunk(mut self, chunk_dims: &[usize]) -> Self {
93 self.chunk_dims = Some(chunk_dims.to_vec());
94 self
95 }
96
97 #[must_use]
101 pub fn resizable(mut self) -> Self {
102 self.max_shape = Some(vec![None; self.shape.as_ref().map_or(0, |s| s.len())]);
103 self
104 }
105
106 #[must_use]
108 pub fn max_shape(mut self, max: &[Option<usize>]) -> Self {
109 self.max_shape = Some(max.to_vec());
110 self
111 }
112
113 #[must_use]
118 pub fn deflate(mut self, level: u32) -> Self {
119 self.deflate_level = Some(level);
120 self
121 }
122
123 #[must_use]
129 pub fn shuffle_deflate(mut self, level: u32) -> Self {
130 self.shuffle_deflate_level = Some(level);
131 self
132 }
133
134 #[must_use]
138 pub fn zstd(mut self, level: u32) -> Self {
139 self.custom_pipeline = Some(crate::format::messages::filter::FilterPipeline::zstd(level));
140 self
141 }
142
143 #[must_use]
148 pub fn filter_pipeline(
149 mut self,
150 pipeline: crate::format::messages::filter::FilterPipeline,
151 ) -> Self {
152 self.custom_pipeline = Some(pipeline);
153 self
154 }
155
156 pub fn create(self, name: &str) -> Result<H5Dataset> {
161 let shape = self.shape.ok_or_else(|| {
162 Hdf5Error::InvalidState("shape must be set before calling create()".into())
163 })?;
164
165 let full_name = if let Some(ref gp) = self.group_path {
167 if gp == "/" {
168 name.to_string()
169 } else {
170 let trimmed = gp.trim_start_matches('/');
171 format!("{}/{}", trimmed, name)
172 }
173 } else {
174 name.to_string()
175 };
176 let group_path = self.group_path.clone();
177
178 let dims_u64: Vec<u64> = shape.iter().map(|&d| d as u64).collect();
179 let datatype = T::hdf5_type();
180 let element_size = T::element_size();
181
182 if let Some(ref chunk_dims) = self.chunk_dims {
183 let chunk_u64: Vec<u64> = chunk_dims.iter().map(|&d| d as u64).collect();
185 let max_u64: Vec<u64> = if let Some(ref max) = self.max_shape {
186 max.iter()
187 .map(|m| m.map_or(u64::MAX, |v| v as u64))
188 .collect()
189 } else {
190 dims_u64.clone()
192 };
193
194 let index = {
195 let mut inner = borrow_inner_mut(&self.file_inner);
196 match &mut *inner {
197 H5FileInner::Writer(writer) => {
198 let idx = if let Some(pipeline) = self.custom_pipeline {
199 writer.create_chunked_dataset_with_pipeline(
200 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
201 )?
202 } else if let Some(level) = self.shuffle_deflate_level {
203 let pipeline =
204 crate::format::messages::filter::FilterPipeline::shuffle_deflate(
205 T::element_size() as u32,
206 level,
207 );
208 writer.create_chunked_dataset_with_pipeline(
209 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, pipeline,
210 )?
211 } else if let Some(level) = self.deflate_level {
212 writer.create_chunked_dataset_compressed(
213 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64, level,
214 )?
215 } else {
216 writer.create_chunked_dataset(
217 &full_name, datatype, &dims_u64, &max_u64, &chunk_u64,
218 )?
219 };
220 if let Some(ref gp) = group_path {
221 if gp != "/" {
222 writer.assign_dataset_to_group(gp, idx)?;
223 }
224 }
225 idx
226 }
227 H5FileInner::Reader(_) => {
228 return Err(Hdf5Error::InvalidState(
229 "cannot create a dataset in read mode".into(),
230 ));
231 }
232 H5FileInner::Closed => {
233 return Err(Hdf5Error::InvalidState("file is closed".into()));
234 }
235 }
236 };
237
238 Ok(H5Dataset {
239 file_inner: clone_inner(&self.file_inner),
240 info: DatasetInfo::Writer {
241 index,
242 shape,
243 element_size,
244 chunked: true,
245 },
246 })
247 } else {
248 let index = {
250 let mut inner = borrow_inner_mut(&self.file_inner);
251 match &mut *inner {
252 H5FileInner::Writer(writer) => {
253 let idx = writer.create_dataset(&full_name, datatype, &dims_u64)?;
254 if let Some(ref gp) = group_path {
255 if gp != "/" {
256 writer.assign_dataset_to_group(gp, idx)?;
257 }
258 }
259 idx
260 }
261 H5FileInner::Reader(_) => {
262 return Err(Hdf5Error::InvalidState(
263 "cannot create a dataset in read mode".into(),
264 ));
265 }
266 H5FileInner::Closed => {
267 return Err(Hdf5Error::InvalidState("file is closed".into()));
268 }
269 }
270 };
271
272 Ok(H5Dataset {
273 file_inner: clone_inner(&self.file_inner),
274 info: DatasetInfo::Writer {
275 index,
276 shape,
277 element_size,
278 chunked: false,
279 },
280 })
281 }
282 }
283}
284
285enum DatasetInfo {
291 Writer {
293 index: usize,
295 shape: Vec<usize>,
297 element_size: usize,
299 chunked: bool,
301 },
302 Reader {
304 name: String,
306 shape: Vec<usize>,
308 element_size: usize,
310 },
311}
312
313pub struct H5Dataset {
323 file_inner: SharedInner,
324 info: DatasetInfo,
325}
326
327impl H5Dataset {
328 pub(crate) fn new_reader(
330 file_inner: SharedInner,
331 name: String,
332 shape: Vec<usize>,
333 element_size: usize,
334 ) -> Self {
335 Self {
336 file_inner,
337 info: DatasetInfo::Reader {
338 name,
339 shape,
340 element_size,
341 },
342 }
343 }
344
345 pub fn shape(&self) -> Vec<usize> {
347 match &self.info {
348 DatasetInfo::Writer { shape, .. } => shape.clone(),
349 DatasetInfo::Reader { shape, .. } => shape.clone(),
350 }
351 }
352
353 pub fn ndims(&self) -> usize {
355 match &self.info {
356 DatasetInfo::Writer { shape, .. } => shape.len(),
357 DatasetInfo::Reader { shape, .. } => shape.len(),
358 }
359 }
360
361 pub fn total_elements(&self) -> usize {
363 match &self.info {
364 DatasetInfo::Writer { shape, .. } => shape.iter().product(),
365 DatasetInfo::Reader { shape, .. } => shape.iter().product(),
366 }
367 }
368
369 pub fn element_size(&self) -> usize {
371 match &self.info {
372 DatasetInfo::Writer { element_size, .. } => *element_size,
373 DatasetInfo::Reader { element_size, .. } => *element_size,
374 }
375 }
376
377 pub fn chunk_dims(&self) -> Option<Vec<usize>> {
379 match &self.info {
380 DatasetInfo::Reader { name, .. } => {
381 let inner = borrow_inner(&self.file_inner);
382 if let H5FileInner::Reader(reader) = &*inner {
383 if let Some(info) = reader.dataset_info(name) {
384 if let crate::format::messages::data_layout::DataLayoutMessage::ChunkedV4 {
385 chunk_dims,
386 ..
387 } = &info.layout
388 {
389 return Some(
391 chunk_dims[..chunk_dims.len() - 1]
392 .iter()
393 .map(|&d| d as usize)
394 .collect(),
395 );
396 }
397 }
398 }
399 None
400 }
401 DatasetInfo::Writer { .. } => None,
402 }
403 }
404
405 pub fn is_chunked(&self) -> bool {
407 match &self.info {
408 DatasetInfo::Writer { chunked, .. } => *chunked,
409 DatasetInfo::Reader { name, .. } => {
410 let inner = borrow_inner(&self.file_inner);
411 match &*inner {
412 H5FileInner::Reader(reader) => {
413 if let Some(info) = reader.dataset_info(name) {
414 matches!(
415 info.layout,
416 crate::format::messages::data_layout::DataLayoutMessage::ChunkedV4 { .. }
417 )
418 } else {
419 false
420 }
421 }
422 _ => false,
423 }
424 }
425 }
426 }
427
428 pub fn attr_names(&self) -> Result<Vec<String>> {
430 match &self.info {
431 DatasetInfo::Reader { name, .. } => {
432 let inner = borrow_inner(&self.file_inner);
433 match &*inner {
434 H5FileInner::Reader(reader) => Ok(reader.dataset_attr_names(name)?),
435 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
436 }
437 }
438 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
439 "attr_names not available in write mode".into(),
440 )),
441 }
442 }
443
444 pub fn attr(&self, attr_name: &str) -> Result<crate::attribute::H5Attribute> {
446 match &self.info {
447 DatasetInfo::Reader { name, .. } => {
448 let inner = borrow_inner(&self.file_inner);
449 match &*inner {
450 H5FileInner::Reader(reader) => {
451 let attr_msg = reader.dataset_attr(name, attr_name)?;
452 Ok(crate::attribute::H5Attribute::new_reader(
453 clone_inner(&self.file_inner),
454 attr_msg.name.clone(),
455 attr_msg.data.clone(),
456 ))
457 }
458 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
459 }
460 }
461 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
462 "attr() not available in write mode".into(),
463 )),
464 }
465 }
466
467 pub fn new_attr<T: 'static>(&self) -> AttrBuilder<'_, T> {
483 let ds_index = match &self.info {
484 DatasetInfo::Writer { index, .. } => *index,
485 DatasetInfo::Reader { .. } => {
486 usize::MAX
489 }
490 };
491 AttrBuilder::new(&self.file_inner, ds_index)
492 }
493
494 pub fn write_raw<T: H5Type>(&self, data: &[T]) -> Result<()> {
506 match &self.info {
507 DatasetInfo::Writer {
508 index,
509 shape,
510 element_size,
511 chunked,
512 } => {
513 if *chunked {
514 return Err(Hdf5Error::InvalidState(
515 "use write_chunk for chunked datasets".into(),
516 ));
517 }
518
519 let total_elements: usize = shape.iter().product();
520 if data.len() != total_elements {
521 return Err(Hdf5Error::InvalidState(format!(
522 "data length {} does not match dataset size {}",
523 data.len(),
524 total_elements,
525 )));
526 }
527
528 if T::element_size() != *element_size {
530 return Err(Hdf5Error::TypeMismatch(format!(
531 "write type has element size {} but dataset expects {}",
532 T::element_size(),
533 element_size,
534 )));
535 }
536
537 let byte_len = data.len() * T::element_size();
541 let raw =
542 unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
543
544 let mut inner = borrow_inner_mut(&self.file_inner);
545 match &mut *inner {
546 H5FileInner::Writer(writer) => {
547 writer.write_dataset_raw(*index, raw)?;
548 Ok(())
549 }
550 _ => Err(Hdf5Error::InvalidState(
551 "file is no longer in write mode".into(),
552 )),
553 }
554 }
555 DatasetInfo::Reader { .. } => Err(Hdf5Error::InvalidState(
556 "cannot write to a dataset opened in read mode".into(),
557 )),
558 }
559 }
560
561 pub fn write_chunk(&self, chunk_idx: usize, data: &[u8]) -> Result<()> {
566 match &self.info {
567 DatasetInfo::Writer { index, chunked, .. } => {
568 if !*chunked {
569 return Err(Hdf5Error::InvalidState(
570 "write_chunk is only for chunked datasets".into(),
571 ));
572 }
573
574 let mut inner = borrow_inner_mut(&self.file_inner);
575 match &mut *inner {
576 H5FileInner::Writer(writer) => {
577 writer.write_chunk(*index, chunk_idx as u64, data)?;
578 Ok(())
579 }
580 _ => Err(Hdf5Error::InvalidState(
581 "file is no longer in write mode".into(),
582 )),
583 }
584 }
585 DatasetInfo::Reader { .. } => {
586 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
587 }
588 }
589 }
590
591 pub fn write_chunks_batch(&self, chunks: &[(usize, &[u8])]) -> Result<()> {
597 match &self.info {
598 DatasetInfo::Writer { index, chunked, .. } => {
599 if !*chunked {
600 return Err(Hdf5Error::InvalidState(
601 "write_chunks_batch is only for chunked datasets".into(),
602 ));
603 }
604 let pairs: Vec<(u64, &[u8])> = chunks
605 .iter()
606 .map(|(idx, data)| (*idx as u64, *data))
607 .collect();
608 let mut inner = borrow_inner_mut(&self.file_inner);
609 match &mut *inner {
610 H5FileInner::Writer(writer) => {
611 writer.write_chunks_batch(*index, &pairs)?;
612 Ok(())
613 }
614 _ => Err(Hdf5Error::InvalidState(
615 "file is no longer in write mode".into(),
616 )),
617 }
618 }
619 DatasetInfo::Reader { .. } => {
620 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
621 }
622 }
623 }
624
625 pub fn append<T: H5Type>(&self, data: &[T]) -> Result<()> {
648 match &self.info {
649 DatasetInfo::Writer {
650 index,
651 element_size,
652 chunked,
653 ..
654 } => {
655 if !*chunked {
656 return Err(Hdf5Error::InvalidState(
657 "append is only for chunked datasets".into(),
658 ));
659 }
660 if T::element_size() != *element_size {
661 return Err(Hdf5Error::TypeMismatch(format!(
662 "append type has element size {} but dataset expects {}",
663 T::element_size(),
664 element_size,
665 )));
666 }
667
668 let ds_index = *index;
669 let es = *element_size;
670
671 let mut inner = borrow_inner_mut(&self.file_inner);
672 let writer = match &mut *inner {
673 H5FileInner::Writer(w) => w,
674 _ => {
675 return Err(Hdf5Error::InvalidState(
676 "file is no longer in write mode".into(),
677 ))
678 }
679 };
680
681 let chunk_dims = writer
682 .dataset_chunk_dims(ds_index)
683 .ok_or_else(|| {
684 Hdf5Error::InvalidState("dataset has no chunk info".into())
685 })?
686 .to_vec();
687 let dims = writer.dataset_dims(ds_index).to_vec();
688
689 let frame_elems: usize = if dims.len() > 1 {
691 dims[1..].iter().map(|&d| d as usize).product()
692 } else {
693 1
694 };
695
696 if frame_elems == 0 {
697 return Err(Hdf5Error::InvalidState(
698 "cannot append to dataset with zero-size trailing dimensions".into(),
699 ));
700 }
701
702 if data.len() % frame_elems != 0 {
703 return Err(Hdf5Error::InvalidState(format!(
704 "data length {} is not a multiple of frame size {}",
705 data.len(),
706 frame_elems,
707 )));
708 }
709
710 let n_new_frames = data.len() / frame_elems;
711 let current_dim0 = dims[0] as usize;
712
713 let chunk_dim0 = chunk_dims[0] as usize;
715 let chunk_bytes = chunk_dims.iter().map(|&d| d as usize).product::<usize>() * es;
717 let frame_bytes = frame_elems * es;
718
719 let raw = unsafe {
720 std::slice::from_raw_parts(data.as_ptr() as *const u8, data.len() * es)
721 };
722
723 let ds = &mut writer.datasets[ds_index];
725 let buffered_frames = ds.append_buffered_frames as usize;
726 let mut combined = std::mem::take(&mut ds.append_buffer);
727 combined.extend_from_slice(raw);
728 ds.append_buffered_frames = 0;
729
730 let total_frames = buffered_frames + n_new_frames;
731 let total_bytes = combined.len();
732
733 let base_dim0 = current_dim0 - buffered_frames;
735 let mut byte_pos = 0usize;
736 let mut frame_pos = 0usize;
737
738 while frame_pos < total_frames {
739 let abs_frame = base_dim0 + frame_pos;
740 let chunk_idx = abs_frame / chunk_dim0;
741 let remaining_frames = total_frames - frame_pos;
742 let frames_to_fill = chunk_dim0 - (abs_frame % chunk_dim0);
743
744 if remaining_frames >= frames_to_fill {
745 let end = byte_pos + frames_to_fill * frame_bytes;
747 if frames_to_fill == chunk_dim0 {
748 writer.write_chunk(ds_index, chunk_idx as u64, &combined[byte_pos..end])?;
749 } else {
750 let mut chunk_buf = vec![0u8; chunk_bytes];
752 let offset_in_chunk = (abs_frame % chunk_dim0) * frame_bytes;
753 chunk_buf[offset_in_chunk..offset_in_chunk + frames_to_fill * frame_bytes]
754 .copy_from_slice(&combined[byte_pos..end]);
755 writer.write_chunk(ds_index, chunk_idx as u64, &chunk_buf)?;
756 }
757 byte_pos = end;
758 frame_pos += frames_to_fill;
759 } else {
760 let ds = &mut writer.datasets[ds_index];
762 ds.append_buffer = combined[byte_pos..total_bytes].to_vec();
763 ds.append_buffered_frames = remaining_frames as u64;
764 frame_pos = total_frames;
765 }
766 }
767
768 let logical_dim0 = base_dim0 + total_frames;
770 let mut new_dims: Vec<u64> = dims;
771 new_dims[0] = logical_dim0 as u64;
772 writer.extend_dataset(ds_index, &new_dims)?;
773
774 Ok(())
775 }
776 DatasetInfo::Reader { .. } => {
777 Err(Hdf5Error::InvalidState("cannot append in read mode".into()))
778 }
779 }
780 }
781
782 pub fn extend(&self, new_dims: &[usize]) -> Result<()> {
784 match &self.info {
785 DatasetInfo::Writer { index, chunked, .. } => {
786 if !*chunked {
787 return Err(Hdf5Error::InvalidState(
788 "extend is only for chunked datasets".into(),
789 ));
790 }
791
792 let dims_u64: Vec<u64> = new_dims.iter().map(|&d| d as u64).collect();
793 let mut inner = borrow_inner_mut(&self.file_inner);
794 match &mut *inner {
795 H5FileInner::Writer(writer) => {
796 writer.extend_dataset(*index, &dims_u64)?;
797 Ok(())
798 }
799 _ => Err(Hdf5Error::InvalidState(
800 "file is no longer in write mode".into(),
801 )),
802 }
803 }
804 DatasetInfo::Reader { .. } => {
805 Err(Hdf5Error::InvalidState("cannot extend in read mode".into()))
806 }
807 }
808 }
809
810 pub fn flush(&self) -> Result<()> {
812 match &self.info {
813 DatasetInfo::Writer { index, .. } => {
814 let mut inner = borrow_inner_mut(&self.file_inner);
815 match &mut *inner {
816 H5FileInner::Writer(writer) => {
817 writer.flush_dataset(*index)?;
818 Ok(())
819 }
820 _ => Ok(()),
821 }
822 }
823 DatasetInfo::Reader { .. } => Ok(()),
824 }
825 }
826
827 pub fn read_slice<T: H5Type>(&self, starts: &[usize], counts: &[usize]) -> Result<Vec<T>> {
832 match &self.info {
833 DatasetInfo::Reader {
834 name, element_size, ..
835 } => {
836 if T::element_size() != *element_size {
837 return Err(Hdf5Error::TypeMismatch(format!(
838 "read type has element size {} but dataset has element size {}",
839 T::element_size(),
840 element_size,
841 )));
842 }
843 let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
844 let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
845
846 let raw = {
847 let mut inner = borrow_inner_mut(&self.file_inner);
848 match &mut *inner {
849 H5FileInner::Reader(reader) => {
850 reader.read_slice(name, &starts_u64, &counts_u64)?
851 }
852 _ => {
853 return Err(Hdf5Error::InvalidState("file is not in read mode".into()))
854 }
855 }
856 };
857
858 if raw.len() % T::element_size() != 0 {
859 return Err(Hdf5Error::TypeMismatch(format!(
860 "raw data size {} is not a multiple of element size {}",
861 raw.len(),
862 T::element_size(),
863 )));
864 }
865
866 let count = raw.len() / T::element_size();
867 let mut result = Vec::<T>::with_capacity(count);
868 unsafe {
869 std::ptr::copy_nonoverlapping(
870 raw.as_ptr(),
871 result.as_mut_ptr() as *mut u8,
872 raw.len(),
873 );
874 result.set_len(count);
875 }
876 Ok(result)
877 }
878 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
879 "cannot read_slice from a dataset in write mode".into(),
880 )),
881 }
882 }
883
884 pub fn write_slice<T: H5Type>(
888 &self,
889 starts: &[usize],
890 counts: &[usize],
891 data: &[T],
892 ) -> Result<()> {
893 match &self.info {
894 DatasetInfo::Writer {
895 index,
896 element_size,
897 chunked,
898 ..
899 } => {
900 if *chunked {
901 return Err(Hdf5Error::InvalidState(
902 "write_slice is only for contiguous datasets".into(),
903 ));
904 }
905 if T::element_size() != *element_size {
906 return Err(Hdf5Error::TypeMismatch(format!(
907 "write type has element size {} but dataset expects {}",
908 T::element_size(),
909 element_size,
910 )));
911 }
912
913 let expected: usize = counts.iter().product();
914 if data.len() != expected {
915 return Err(Hdf5Error::InvalidState(format!(
916 "data length {} does not match slice size {}",
917 data.len(),
918 expected,
919 )));
920 }
921
922 let starts_u64: Vec<u64> = starts.iter().map(|&s| s as u64).collect();
923 let counts_u64: Vec<u64> = counts.iter().map(|&c| c as u64).collect();
924
925 let byte_len = data.len() * T::element_size();
926 let raw =
927 unsafe { std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len) };
928
929 let mut inner = borrow_inner_mut(&self.file_inner);
930 match &mut *inner {
931 H5FileInner::Writer(writer) => {
932 writer.write_slice(*index, &starts_u64, &counts_u64, raw)?;
933 Ok(())
934 }
935 _ => Err(Hdf5Error::InvalidState(
936 "file is no longer in write mode".into(),
937 )),
938 }
939 }
940 DatasetInfo::Reader { .. } => {
941 Err(Hdf5Error::InvalidState("cannot write in read mode".into()))
942 }
943 }
944 }
945
946 pub fn read_vlen_strings(&self) -> Result<Vec<String>> {
951 match &self.info {
952 DatasetInfo::Reader { name, .. } => {
953 let mut inner = borrow_inner_mut(&self.file_inner);
954 match &mut *inner {
955 H5FileInner::Reader(reader) => Ok(reader.read_vlen_strings(name)?),
956 _ => Err(Hdf5Error::InvalidState("file is not in read mode".into())),
957 }
958 }
959 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
960 "cannot read vlen strings from a dataset in write mode".into(),
961 )),
962 }
963 }
964
965 pub fn read_raw<T: H5Type>(&self) -> Result<Vec<T>> {
977 match &self.info {
978 DatasetInfo::Reader {
979 name, element_size, ..
980 } => {
981 if T::element_size() != *element_size {
982 return Err(Hdf5Error::TypeMismatch(format!(
983 "read type has element size {} but dataset has element size {}",
984 T::element_size(),
985 element_size,
986 )));
987 }
988
989 let raw = {
990 let mut inner = borrow_inner_mut(&self.file_inner);
991 match &mut *inner {
992 H5FileInner::Reader(reader) => reader.read_dataset_raw(name)?,
993 _ => {
994 return Err(Hdf5Error::InvalidState("file is not in read mode".into()));
995 }
996 }
997 };
998
999 if raw.len() % T::element_size() != 0 {
1000 return Err(Hdf5Error::TypeMismatch(format!(
1001 "raw data size {} is not a multiple of element size {}",
1002 raw.len(),
1003 T::element_size(),
1004 )));
1005 }
1006
1007 let count = raw.len() / T::element_size();
1008 let mut result = Vec::<T>::with_capacity(count);
1009
1010 unsafe {
1017 std::ptr::copy_nonoverlapping(
1018 raw.as_ptr(),
1019 result.as_mut_ptr() as *mut u8,
1020 raw.len(),
1021 );
1022 result.set_len(count);
1023 }
1024
1025 Ok(result)
1026 }
1027 DatasetInfo::Writer { .. } => Err(Hdf5Error::InvalidState(
1028 "cannot read from a dataset in write mode".into(),
1029 )),
1030 }
1031 }
1032}
1033
1034#[cfg(test)]
1035mod tests {
1036 use crate::H5File;
1037 use std::path::PathBuf;
1038
1039 fn temp_path(name: &str) -> PathBuf {
1040 std::env::temp_dir().join(format!("hdf5_dataset_test_{}.h5", name))
1041 }
1042
1043 #[test]
1044 fn builder_requires_shape() {
1045 let path = temp_path("no_shape");
1046 let file = H5File::create(&path).unwrap();
1047 let result = file.new_dataset::<u8>().create("data");
1048 assert!(result.is_err());
1049 std::fs::remove_file(&path).ok();
1050 }
1051
1052 #[test]
1053 fn write_raw_size_mismatch() {
1054 let path = temp_path("size_mismatch");
1055 let file = H5File::create(&path).unwrap();
1056 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1057 let result = ds.write_raw(&[1u8, 2, 3]);
1059 assert!(result.is_err());
1060 std::fs::remove_file(&path).ok();
1061 }
1062
1063 #[test]
1064 fn roundtrip_u8_1d() {
1065 let path = temp_path("rt_u8_1d");
1066 let data: Vec<u8> = (0..10).collect();
1067
1068 {
1069 let file = H5File::create(&path).unwrap();
1070 let ds = file.new_dataset::<u8>().shape([10]).create("seq").unwrap();
1071 ds.write_raw(&data).unwrap();
1072 file.close().unwrap();
1073 }
1074
1075 {
1076 let file = H5File::open(&path).unwrap();
1077 let ds = file.dataset("seq").unwrap();
1078 assert_eq!(ds.shape(), vec![10]);
1079 let readback = ds.read_raw::<u8>().unwrap();
1080 assert_eq!(readback, data);
1081 }
1082
1083 std::fs::remove_file(&path).ok();
1084 }
1085
1086 #[test]
1087 fn roundtrip_i32_2d() {
1088 let path = temp_path("rt_i32_2d");
1089 let data: Vec<i32> = vec![-1, 0, 1, 2, 3, 4];
1090
1091 {
1092 let file = H5File::create(&path).unwrap();
1093 let ds = file
1094 .new_dataset::<i32>()
1095 .shape([2, 3])
1096 .create("matrix")
1097 .unwrap();
1098 ds.write_raw(&data).unwrap();
1099 file.close().unwrap();
1100 }
1101
1102 {
1103 let file = H5File::open(&path).unwrap();
1104 let ds = file.dataset("matrix").unwrap();
1105 assert_eq!(ds.shape(), vec![2, 3]);
1106 let readback = ds.read_raw::<i32>().unwrap();
1107 assert_eq!(readback, data);
1108 }
1109
1110 std::fs::remove_file(&path).ok();
1111 }
1112
1113 #[test]
1114 fn roundtrip_f64_3d() {
1115 let path = temp_path("rt_f64_3d");
1116 let data: Vec<f64> = (0..24).map(|i| i as f64 * 0.5).collect();
1117
1118 {
1119 let file = H5File::create(&path).unwrap();
1120 let ds = file
1121 .new_dataset::<f64>()
1122 .shape([2, 3, 4])
1123 .create("cube")
1124 .unwrap();
1125 ds.write_raw(&data).unwrap();
1126 file.close().unwrap();
1127 }
1128
1129 {
1130 let file = H5File::open(&path).unwrap();
1131 let ds = file.dataset("cube").unwrap();
1132 assert_eq!(ds.shape(), vec![2, 3, 4]);
1133 let readback = ds.read_raw::<f64>().unwrap();
1134 assert_eq!(readback, data);
1135 }
1136
1137 std::fs::remove_file(&path).ok();
1138 }
1139
1140 #[test]
1141 fn cannot_read_in_write_mode() {
1142 let path = temp_path("no_read_write");
1143 let file = H5File::create(&path).unwrap();
1144 let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1145 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1146 let result = ds.read_raw::<u8>();
1147 assert!(result.is_err());
1148 std::fs::remove_file(&path).ok();
1149 }
1150
1151 #[test]
1152 fn cannot_write_in_read_mode() {
1153 let path = temp_path("no_write_read");
1154
1155 {
1156 let file = H5File::create(&path).unwrap();
1157 let ds = file.new_dataset::<u8>().shape([4]).create("x").unwrap();
1158 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1159 file.close().unwrap();
1160 }
1161
1162 {
1163 let file = H5File::open(&path).unwrap();
1164 let ds = file.dataset("x").unwrap();
1165 let result = ds.write_raw(&[5u8, 6, 7, 8]);
1166 assert!(result.is_err());
1167 }
1168
1169 std::fs::remove_file(&path).ok();
1170 }
1171
1172 #[test]
1173 fn numeric_attr_roundtrip() {
1174 let path = temp_path("num_attr");
1175 {
1176 let file = H5File::create(&path).unwrap();
1177 let ds = file.new_dataset::<f32>().shape([4]).create("data").unwrap();
1178 ds.write_raw(&[1.0f32; 4]).unwrap();
1179
1180 let a1 = ds.new_attr::<f64>().shape(()).create("scale").unwrap();
1181 a1.write_numeric(&1.2345f64).unwrap();
1182
1183 let a2 = ds.new_attr::<i32>().shape(()).create("count").unwrap();
1184 a2.write_numeric(&42i32).unwrap();
1185
1186 file.close().unwrap();
1187 }
1188 {
1189 let file = H5File::open(&path).unwrap();
1190 let ds = file.dataset("data").unwrap();
1191
1192 let scale = ds.attr("scale").unwrap();
1193 let val: f64 = scale.read_numeric().unwrap();
1194 assert!((val - 1.2345).abs() < 1e-10);
1195
1196 let count = ds.attr("count").unwrap();
1197 let val: i32 = count.read_numeric().unwrap();
1198 assert_eq!(val, 42);
1199 }
1200 std::fs::remove_file(&path).ok();
1201 }
1202
1203 #[test]
1204 fn cannot_create_dataset_in_read_mode() {
1205 let path = temp_path("no_create_read");
1206
1207 {
1208 let _file = H5File::create(&path).unwrap();
1209 }
1210
1211 {
1212 let file = H5File::open(&path).unwrap();
1213 let result = file.new_dataset::<u8>().shape([4]).create("x");
1214 assert!(result.is_err());
1215 }
1216
1217 std::fs::remove_file(&path).ok();
1218 }
1219
1220 #[test]
1221 fn shape_accessor() {
1222 let path = temp_path("shape_acc");
1223
1224 let file = H5File::create(&path).unwrap();
1225 let ds = file
1226 .new_dataset::<f32>()
1227 .shape([5, 10, 3])
1228 .create("tensor")
1229 .unwrap();
1230 assert_eq!(ds.shape(), vec![5, 10, 3]);
1231
1232 std::fs::remove_file(&path).ok();
1233 }
1234
1235 #[test]
1236 fn slice_roundtrip_2d() {
1237 let path = temp_path("slice_2d");
1238
1239 let data: Vec<i32> = (0..20).collect();
1241 {
1242 let file = H5File::create(&path).unwrap();
1243 let ds = file
1244 .new_dataset::<i32>()
1245 .shape([4, 5])
1246 .create("mat")
1247 .unwrap();
1248 ds.write_raw(&data).unwrap();
1249 file.close().unwrap();
1250 }
1251 {
1252 let file = H5File::open(&path).unwrap();
1253 let ds = file.dataset("mat").unwrap();
1254 let slice = ds.read_slice::<i32>(&[1, 2], &[2, 2]).unwrap();
1256 assert_eq!(slice, vec![7, 8, 12, 13]);
1259 }
1260
1261 std::fs::remove_file(&path).ok();
1262 }
1263
1264 #[test]
1265 fn write_slice_2d() {
1266 let path = temp_path("write_slice_2d");
1267
1268 {
1269 let file = H5File::create(&path).unwrap();
1270 let ds = file
1271 .new_dataset::<f32>()
1272 .shape([3, 4])
1273 .create("data")
1274 .unwrap();
1275 ds.write_raw(&[0.0f32; 12]).unwrap();
1276 ds.write_slice(&[1, 1], &[2, 2], &[10.0f32, 20.0, 30.0, 40.0])
1278 .unwrap();
1279 file.close().unwrap();
1280 }
1281 {
1282 let file = H5File::open(&path).unwrap();
1283 let ds = file.dataset("data").unwrap();
1284 let full = ds.read_raw::<f32>().unwrap();
1285 assert_eq!(
1289 full,
1290 vec![0.0, 0.0, 0.0, 0.0, 0.0, 10.0, 20.0, 0.0, 0.0, 30.0, 40.0, 0.0,]
1291 );
1292 }
1293
1294 std::fs::remove_file(&path).ok();
1295 }
1296
1297 #[test]
1298 fn attr_read_roundtrip() {
1299 use crate::types::VarLenUnicode;
1300 let path = temp_path("attr_read");
1301
1302 {
1303 let file = H5File::create(&path).unwrap();
1304 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1305 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1306 let a1 = ds
1307 .new_attr::<VarLenUnicode>()
1308 .shape(())
1309 .create("units")
1310 .unwrap();
1311 a1.write_string("meters").unwrap();
1312 let a2 = ds
1313 .new_attr::<VarLenUnicode>()
1314 .shape(())
1315 .create("desc")
1316 .unwrap();
1317 a2.write_string("test data").unwrap();
1318 file.close().unwrap();
1319 }
1320 {
1321 let file = H5File::open(&path).unwrap();
1322 let ds = file.dataset("data").unwrap();
1323
1324 let names = ds.attr_names().unwrap();
1325 assert!(names.contains(&"units".to_string()));
1326 assert!(names.contains(&"desc".to_string()));
1327
1328 let units = ds.attr("units").unwrap();
1329 assert_eq!(units.read_string().unwrap(), "meters");
1330
1331 let desc = ds.attr("desc").unwrap();
1332 assert_eq!(desc.read_string().unwrap(), "test data");
1333 }
1334
1335 std::fs::remove_file(&path).ok();
1336 }
1337
1338 #[test]
1339 fn type_mismatch_element_size() {
1340 let path = temp_path("type_mismatch");
1341
1342 {
1343 let file = H5File::create(&path).unwrap();
1344 let ds = file.new_dataset::<f64>().shape([4]).create("data").unwrap();
1345 ds.write_raw(&[1.0f64, 2.0, 3.0, 4.0]).unwrap();
1346 file.close().unwrap();
1347 }
1348
1349 {
1350 let file = H5File::open(&path).unwrap();
1351 let ds = file.dataset("data").unwrap();
1352 let result = ds.read_raw::<u8>();
1354 assert!(result.is_err());
1355 }
1356
1357 std::fs::remove_file(&path).ok();
1358 }
1359
1360 #[test]
1361 fn dataset_survives_file_move() {
1362 let path = temp_path("ds_survives");
1363
1364 let ds = {
1365 let file = H5File::create(&path).unwrap();
1366 file.new_dataset::<u8>().shape([4]).create("x").unwrap()
1367 };
1368 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1370 std::fs::remove_file(&path).ok();
1373 }
1374
1375 #[test]
1376 fn new_attr_scalar_string() {
1377 use crate::types::VarLenUnicode;
1378
1379 let path = temp_path("attr_scalar_string");
1380 {
1381 let file = H5File::create(&path).unwrap();
1382 let ds = file.new_dataset::<u8>().shape([4]).create("data").unwrap();
1383 ds.write_raw(&[1u8, 2, 3, 4]).unwrap();
1384
1385 let attr = ds
1386 .new_attr::<VarLenUnicode>()
1387 .shape(())
1388 .create("name")
1389 .unwrap();
1390 attr.write_scalar(&VarLenUnicode("test_value".to_string()))
1391 .unwrap();
1392
1393 file.close().unwrap();
1394 }
1395
1396 {
1398 let file = H5File::open(&path).unwrap();
1399 let ds = file.dataset("data").unwrap();
1400 assert_eq!(ds.shape(), vec![4]);
1401 let readback = ds.read_raw::<u8>().unwrap();
1402 assert_eq!(readback, vec![1u8, 2, 3, 4]);
1403 }
1404
1405 std::fs::remove_file(&path).ok();
1406 }
1407
1408 #[test]
1409 fn all_numeric_types_roundtrip() {
1410 let path = temp_path("all_types");
1411
1412 {
1413 let file = H5File::create(&path).unwrap();
1414
1415 let ds = file.new_dataset::<u8>().shape([2]).create("u8").unwrap();
1416 ds.write_raw(&[1u8, 2]).unwrap();
1417
1418 let ds = file.new_dataset::<i8>().shape([2]).create("i8").unwrap();
1419 ds.write_raw(&[-1i8, 1]).unwrap();
1420
1421 let ds = file.new_dataset::<u16>().shape([2]).create("u16").unwrap();
1422 ds.write_raw(&[100u16, 200]).unwrap();
1423
1424 let ds = file.new_dataset::<i16>().shape([2]).create("i16").unwrap();
1425 ds.write_raw(&[-100i16, 100]).unwrap();
1426
1427 let ds = file.new_dataset::<u32>().shape([2]).create("u32").unwrap();
1428 ds.write_raw(&[1000u32, 2000]).unwrap();
1429
1430 let ds = file.new_dataset::<i32>().shape([2]).create("i32").unwrap();
1431 ds.write_raw(&[-1000i32, 1000]).unwrap();
1432
1433 let ds = file.new_dataset::<u64>().shape([2]).create("u64").unwrap();
1434 ds.write_raw(&[10000u64, 20000]).unwrap();
1435
1436 let ds = file.new_dataset::<i64>().shape([2]).create("i64").unwrap();
1437 ds.write_raw(&[-10000i64, 10000]).unwrap();
1438
1439 let ds = file.new_dataset::<f32>().shape([2]).create("f32").unwrap();
1440 ds.write_raw(&[1.5f32, 2.5]).unwrap();
1441
1442 let ds = file.new_dataset::<f64>().shape([2]).create("f64").unwrap();
1443 ds.write_raw(&[1.23456f64, 7.89012]).unwrap();
1444
1445 file.close().unwrap();
1446 }
1447
1448 {
1449 let file = H5File::open(&path).unwrap();
1450
1451 assert_eq!(
1452 file.dataset("u8").unwrap().read_raw::<u8>().unwrap(),
1453 vec![1u8, 2]
1454 );
1455 assert_eq!(
1456 file.dataset("i8").unwrap().read_raw::<i8>().unwrap(),
1457 vec![-1i8, 1]
1458 );
1459 assert_eq!(
1460 file.dataset("u16").unwrap().read_raw::<u16>().unwrap(),
1461 vec![100u16, 200]
1462 );
1463 assert_eq!(
1464 file.dataset("i16").unwrap().read_raw::<i16>().unwrap(),
1465 vec![-100i16, 100]
1466 );
1467 assert_eq!(
1468 file.dataset("u32").unwrap().read_raw::<u32>().unwrap(),
1469 vec![1000u32, 2000]
1470 );
1471 assert_eq!(
1472 file.dataset("i32").unwrap().read_raw::<i32>().unwrap(),
1473 vec![-1000i32, 1000]
1474 );
1475 assert_eq!(
1476 file.dataset("u64").unwrap().read_raw::<u64>().unwrap(),
1477 vec![10000u64, 20000]
1478 );
1479 assert_eq!(
1480 file.dataset("i64").unwrap().read_raw::<i64>().unwrap(),
1481 vec![-10000i64, 10000]
1482 );
1483 assert_eq!(
1484 file.dataset("f32").unwrap().read_raw::<f32>().unwrap(),
1485 vec![1.5f32, 2.5]
1486 );
1487 assert_eq!(
1488 file.dataset("f64").unwrap().read_raw::<f64>().unwrap(),
1489 vec![1.23456f64, 7.89012]
1490 );
1491 }
1492
1493 std::fs::remove_file(&path).ok();
1494 }
1495
1496 #[test]
1497 fn append_chunked_roundtrip() {
1498 let path = temp_path("append_chunked");
1499
1500 {
1501 let file = H5File::create(&path).unwrap();
1502 let ds = file
1503 .new_dataset::<f64>()
1504 .shape(&[0, 3])
1505 .chunk(&[1, 3])
1506 .max_shape(&[None, Some(3)])
1507 .create("data")
1508 .unwrap();
1509
1510 ds.append(&[1.0f64, 2.0, 3.0]).unwrap();
1512 ds.append(&[4.0f64, 5.0, 6.0, 7.0, 8.0, 9.0]).unwrap();
1514
1515 file.close().unwrap();
1516 }
1517
1518 {
1519 let file = H5File::open(&path).unwrap();
1520 let ds = file.dataset("data").unwrap();
1521 assert_eq!(ds.shape(), vec![3, 3]);
1522 let all = ds.read_raw::<f64>().unwrap();
1523 assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]);
1524 }
1525
1526 std::fs::remove_file(&path).ok();
1527 }
1528
1529 #[test]
1530 fn append_1d_chunked() {
1531 let path = temp_path("append_1d");
1532
1533 {
1534 let file = H5File::create(&path).unwrap();
1535 let ds = file
1536 .new_dataset::<i32>()
1537 .shape(&[0])
1538 .chunk(&[4])
1539 .max_shape(&[None])
1540 .create("values")
1541 .unwrap();
1542
1543 ds.append(&[10i32, 20, 30]).unwrap(); ds.append(&[40i32]).unwrap(); ds.append(&[50i32, 60, 70, 80]).unwrap(); file.close().unwrap();
1548 }
1549
1550 {
1551 let file = H5File::open(&path).unwrap();
1552 let ds = file.dataset("values").unwrap();
1553 assert_eq!(ds.shape(), vec![8]);
1554 let all = ds.read_raw::<i32>().unwrap();
1555 assert_eq!(all, vec![10, 20, 30, 40, 50, 60, 70, 80]);
1556 }
1557
1558 std::fs::remove_file(&path).ok();
1559 }
1560
1561 #[test]
1562 fn append_partial_chunk_flushed_on_close() {
1563 let path = temp_path("append_partial_close");
1564
1565 {
1566 let file = H5File::create(&path).unwrap();
1567 let ds = file
1568 .new_dataset::<f64>()
1569 .shape(&[0])
1570 .chunk(&[4])
1571 .max_shape(&[None])
1572 .create("vals")
1573 .unwrap();
1574
1575 ds.append(&[1.0f64, 2.0, 3.0, 4.0, 5.0]).unwrap();
1577 file.close().unwrap();
1578 }
1579
1580 {
1581 let file = H5File::open(&path).unwrap();
1582 let ds = file.dataset("vals").unwrap();
1583 assert_eq!(ds.shape(), vec![5]);
1584 let all = ds.read_raw::<f64>().unwrap();
1585 assert_eq!(all.len(), 5);
1588 assert_eq!(all, vec![1.0, 2.0, 3.0, 4.0, 5.0]);
1589 }
1590
1591 std::fs::remove_file(&path).ok();
1592 }
1593}