1use crate::error::{IoError, Result};
19use scirs2_core::ndarray::{ArrayBase, ArrayD, IxDyn};
20use std::collections::HashMap;
21use std::ops::Deref;
22use std::path::Path;
23use std::str::FromStr;
24
25#[cfg(feature = "hdf5")]
26use hdf5::File;
27
28#[derive(Debug, Clone, PartialEq)]
30pub enum HDF5DataType {
31 Integer {
33 size: usize,
35 signed: bool,
37 },
38 Float {
40 size: usize,
42 },
43 String {
45 encoding: StringEncoding,
47 },
48 Array {
50 base_type: Box<HDF5DataType>,
52 shape: Vec<usize>,
54 },
55 Compound {
57 fields: Vec<(String, HDF5DataType)>,
59 },
60 Enum {
62 values: Vec<(String, i64)>,
64 },
65}
66
67#[derive(Debug, Clone, PartialEq)]
69pub enum StringEncoding {
70 UTF8,
72 ASCII,
74}
75
76#[derive(Debug, Clone, Default)]
78pub struct CompressionOptions {
79 pub gzip: Option<u8>,
81 pub szip: Option<(u32, u32)>,
83 pub lzf: bool,
85 pub shuffle: bool,
87}
88
89#[derive(Debug, Clone, Default)]
91pub struct DatasetOptions {
92 pub chunk_size: Option<Vec<usize>>,
94 pub compression: CompressionOptions,
96 pub fill_value: Option<f64>,
98 pub fletcher32: bool,
100}
101
102pub struct HDF5File {
104 #[allow(dead_code)]
106 path: String,
107 root: Group,
109 #[allow(dead_code)]
111 mode: FileMode,
112 #[cfg(feature = "hdf5")]
114 native_file: Option<File>,
115}
116
117#[derive(Debug, Clone, PartialEq)]
119pub enum FileMode {
120 ReadOnly,
122 ReadWrite,
124 Create,
126 Truncate,
128}
129
130#[derive(Debug, Clone)]
132pub struct Group {
133 pub name: String,
135 pub groups: HashMap<String, Group>,
137 pub datasets: HashMap<String, Dataset>,
139 pub attributes: HashMap<String, AttributeValue>,
141}
142
143impl Group {
144 pub fn new(name: String) -> Self {
146 Self {
147 name,
148 groups: HashMap::new(),
149 datasets: HashMap::new(),
150 attributes: HashMap::new(),
151 }
152 }
153
154 pub fn create_group(&mut self, name: &str) -> &mut Group {
156 self.groups
157 .entry(name.to_string())
158 .or_insert_with(|| Group::new(name.to_string()))
159 }
160
161 pub fn get_group(&self, name: &str) -> Option<&Group> {
163 self.groups.get(name)
164 }
165
166 pub fn get_group_mut(&mut self, name: &str) -> Option<&mut Group> {
168 self.groups.get_mut(name)
169 }
170
171 pub fn set_attribute(&mut self, name: &str, value: AttributeValue) {
173 self.attributes.insert(name.to_string(), value);
174 }
175
176 pub fn get_attribute(&self, name: &str) -> Option<&AttributeValue> {
178 self.attributes.get(name)
179 }
180
181 pub fn remove_attribute(&mut self, name: &str) -> Option<AttributeValue> {
183 self.attributes.remove(name)
184 }
185
186 pub fn attribute_names(&self) -> Vec<&str> {
188 self.attributes.keys().map(|s| s.as_str()).collect()
189 }
190
191 pub fn has_attribute(&self, name: &str) -> bool {
193 self.attributes.contains_key(name)
194 }
195
196 pub fn get_dataset(&self, name: &str) -> Option<&Dataset> {
198 self.datasets.get(name)
199 }
200
201 pub fn get_dataset_mut(&mut self, name: &str) -> Option<&mut Dataset> {
203 self.datasets.get_mut(name)
204 }
205
206 pub fn dataset_names(&self) -> Vec<&str> {
208 self.datasets.keys().map(|s| s.as_str()).collect()
209 }
210
211 pub fn group_names(&self) -> Vec<&str> {
213 self.groups.keys().map(|s| s.as_str()).collect()
214 }
215
216 pub fn has_dataset(&self, name: &str) -> bool {
218 self.datasets.contains_key(name)
219 }
220
221 pub fn has_group(&self, name: &str) -> bool {
223 self.groups.contains_key(name)
224 }
225
226 pub fn remove_dataset(&mut self, name: &str) -> Option<Dataset> {
228 self.datasets.remove(name)
229 }
230
231 pub fn remove_group(&mut self, name: &str) -> Option<Group> {
233 self.groups.remove(name)
234 }
235}
236
237#[derive(Debug, Clone)]
239pub struct Dataset {
240 pub name: String,
242 pub dtype: HDF5DataType,
244 pub shape: Vec<usize>,
246 pub data: DataArray,
248 pub attributes: HashMap<String, AttributeValue>,
250 pub options: DatasetOptions,
252}
253
254impl Dataset {
255 pub fn new(
257 name: String,
258 dtype: HDF5DataType,
259 shape: Vec<usize>,
260 data: DataArray,
261 options: DatasetOptions,
262 ) -> Self {
263 Self {
264 name,
265 dtype,
266 shape,
267 data,
268 attributes: HashMap::new(),
269 options,
270 }
271 }
272
273 pub fn set_attribute(&mut self, name: &str, value: AttributeValue) {
275 self.attributes.insert(name.to_string(), value);
276 }
277
278 pub fn get_attribute(&self, name: &str) -> Option<&AttributeValue> {
280 self.attributes.get(name)
281 }
282
283 pub fn remove_attribute(&mut self, name: &str) -> Option<AttributeValue> {
285 self.attributes.remove(name)
286 }
287
288 pub fn len(&self) -> usize {
290 self.shape.iter().product()
291 }
292
293 pub fn is_empty(&self) -> bool {
295 self.len() == 0
296 }
297
298 pub fn ndim(&self) -> usize {
300 self.shape.len()
301 }
302
303 pub fn size_bytes(&self) -> usize {
305 let element_size = match &self.dtype {
306 HDF5DataType::Integer { size, .. } => *size,
307 HDF5DataType::Float { size } => *size,
308 HDF5DataType::String { .. } => 8, HDF5DataType::Array { .. } => 8, HDF5DataType::Compound { .. } => 8, HDF5DataType::Enum { .. } => 8, };
313 self.len() * element_size
314 }
315
316 pub fn as_float_vec(&self) -> Option<Vec<f64>> {
318 match &self.data {
319 DataArray::Float(data) => Some(data.clone()),
320 DataArray::Integer(data) => Some(data.iter().map(|&x| x as f64).collect()),
321 _ => None,
322 }
323 }
324
325 pub fn as_integer_vec(&self) -> Option<Vec<i64>> {
327 match &self.data {
328 DataArray::Integer(data) => Some(data.clone()),
329 DataArray::Float(data) => Some(data.iter().map(|&x| x as i64).collect()),
330 _ => None,
331 }
332 }
333
334 pub fn as_string_vec(&self) -> Option<Vec<String>> {
336 match &self.data {
337 DataArray::String(data) => Some(data.clone()),
338 _ => None,
339 }
340 }
341}
342
343#[derive(Debug, Clone)]
345pub enum DataArray {
346 Integer(Vec<i64>),
348 Float(Vec<f64>),
350 String(Vec<String>),
352 Binary(Vec<u8>),
354}
355
356#[derive(Debug, Clone)]
358pub enum AttributeValue {
359 Integer(i64),
361 Float(f64),
363 String(String),
365 IntegerArray(Vec<i64>),
367 FloatArray(Vec<f64>),
369 StringArray(Vec<String>),
371 Boolean(bool),
373 Array(Vec<i64>),
375}
376
377#[derive(Debug, Clone, Default)]
379pub struct FileStats {
380 pub num_groups: usize,
382 pub num_datasets: usize,
384 pub num_attributes: usize,
386 pub total_data_size: usize,
388}
389
390impl HDF5File {
391 pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
393 let path_str = path.as_ref().to_string_lossy().to_string();
394
395 #[cfg(feature = "hdf5")]
396 {
397 let native_file = File::create(&path_str)
398 .map_err(|e| IoError::FormatError(format!("Failed to create HDF5 file: {e}")))?;
399
400 Ok(Self {
401 path: path_str,
402 root: Group::new("/".to_string()),
403 mode: FileMode::Create,
404 native_file: Some(native_file),
405 })
406 }
407
408 #[cfg(not(feature = "hdf5"))]
409 {
410 Ok(Self {
411 path: path_str,
412 root: Group::new("/".to_string()),
413 mode: FileMode::Create,
414 })
415 }
416 }
417
418 pub fn open<P: AsRef<Path>>(path: P, mode: FileMode) -> Result<Self> {
420 let path_str = path.as_ref().to_string_lossy().to_string();
421
422 #[cfg(feature = "hdf5")]
423 {
424 let native_file = match mode {
425 FileMode::ReadOnly => File::open(&path_str)
426 .map_err(|e| IoError::FormatError(format!("Failed to open HDF5 file: {e}")))?,
427 FileMode::ReadWrite => File::open_rw(&path_str)
428 .map_err(|e| IoError::FormatError(format!("Failed to open HDF5 file: {e}")))?,
429 FileMode::Create => File::create(&path_str).map_err(|e| {
430 IoError::FormatError(format!("Failed to create HDF5 file: {e}"))
431 })?,
432 FileMode::Truncate => File::create(&path_str).map_err(|e| {
433 IoError::FormatError(format!("Failed to create HDF5 file: {e}"))
434 })?,
435 };
436
437 let mut root = Group::new("/".to_string());
439 Self::load_group_structure(&native_file, &mut root)?;
440
441 Ok(Self {
442 path: path_str,
443 root,
444 mode,
445 native_file: Some(native_file),
446 })
447 }
448
449 #[cfg(not(feature = "hdf5"))]
450 {
451 Ok(Self {
452 path: path_str,
453 root: Group::new("/".to_string()),
454 mode,
455 })
456 }
457 }
458
459 pub fn root(&self) -> &Group {
461 &self.root
462 }
463
464 pub fn root_mut(&mut self) -> &mut Group {
466 &mut self.root
467 }
468
469 #[cfg(feature = "hdf5")]
471 pub fn native_file(&self) -> Option<&File> {
472 self.native_file.as_ref()
473 }
474
475 #[cfg(feature = "hdf5")]
477 fn load_group_structure(file: &File, group: &mut Group) -> Result<()> {
478 use hdf5::types::TypeDescriptor;
479
480 if let Ok(attr_names) = file.attr_names() {
482 for attr_name in attr_names {
483 if let Ok(attr) = file.attr(&attr_name) {
484 if let Ok(attr_value) = Self::read_attribute_value(&attr) {
485 group.attributes.insert(attr_name, attr_value);
486 }
487 }
488 }
489 }
490
491 let datasets = file
493 .datasets()
494 .map_err(|e| IoError::FormatError(format!("Failed to get datasets: {e}")))?;
495
496 for dataset in datasets {
497 let dataset_name_full = dataset.name();
498 let dataset_key = dataset_name_full
499 .rsplit('/')
500 .next()
501 .unwrap_or(&dataset_name_full)
502 .trim_start_matches('/')
503 .to_string();
504 if let Ok(h5_dataset) = file.dataset(&dataset_name_full) {
505 let shape: Vec<usize> = h5_dataset.shape().to_vec();
507 let dtype = h5_dataset.dtype().map_err(|e| {
508 IoError::FormatError(format!("Failed to get dataset dtype: {e}"))
509 })?;
510
511 let internal_dtype = Self::convert_hdf5_datatype(&dtype)?;
513
514 let data = Self::read_dataset_data(&h5_dataset, &dtype)?;
516
517 let mut attributes = HashMap::new();
519 if let Ok(attr_names) = h5_dataset.attr_names() {
520 for attr_name in attr_names {
521 if let Ok(attr) = h5_dataset.attr(&attr_name) {
522 if let Ok(attr_value) = Self::read_attribute_value(&attr) {
523 attributes.insert(attr_name, attr_value);
524 }
525 }
526 }
527 }
528
529 let dataset = Dataset {
531 name: dataset_key.clone(),
532 dtype: internal_dtype,
533 shape,
534 data,
535 attributes,
536 options: DatasetOptions::default(),
537 };
538
539 group.datasets.insert(dataset_key, dataset);
540 }
541 }
542
543 let groups = file
545 .groups()
546 .map_err(|e| IoError::FormatError(format!("Failed to get groups: {e}")))?;
547
548 for h5_group in groups {
549 let group_name_full = h5_group.name();
550 let group_key = group_name_full
551 .rsplit('/')
552 .next()
553 .unwrap_or(&group_name_full)
554 .trim_start_matches('/')
555 .to_string();
556 let mut subgroup = Group::new(group_key.clone());
557
558 Self::load_subgroup_structure(&h5_group, &mut subgroup)?;
560
561 group.groups.insert(group_key, subgroup);
562 }
563
564 Ok(())
565 }
566
567 #[cfg(feature = "hdf5")]
569 fn load_subgroup_structure(h5_group: &hdf5::Group, group: &mut Group) -> Result<()> {
570 if let Ok(attr_names) = h5_group.attr_names() {
572 for attr_name in attr_names {
573 if let Ok(attr) = h5_group.attr(&attr_name) {
574 if let Ok(attr_value) = Self::read_attribute_value(&attr) {
575 group.attributes.insert(attr_name, attr_value);
576 }
577 }
578 }
579 }
580
581 if let Ok(datasets) = h5_group.datasets() {
583 for ds in datasets {
584 let ds_name_full = ds.name();
585 let ds_key = ds_name_full
586 .rsplit('/')
587 .next()
588 .unwrap_or(&ds_name_full)
589 .trim_start_matches('/')
590 .to_string();
591 if let Ok(h5_dataset) = h5_group.dataset(&ds_key) {
592 let shape: Vec<usize> = h5_dataset.shape().to_vec();
593 let dtype = h5_dataset.dtype().map_err(|e| {
594 IoError::FormatError(format!("Failed to get dataset dtype: {e}"))
595 })?;
596 let internal_dtype = Self::convert_hdf5_datatype(&dtype)?;
597 let data = Self::read_dataset_data(&h5_dataset, &dtype)?;
598
599 let mut attributes = HashMap::new();
601 if let Ok(attr_names) = h5_dataset.attr_names() {
602 for attr_name in attr_names {
603 if let Ok(attr) = h5_dataset.attr(&attr_name) {
604 if let Ok(attr_value) = Self::read_attribute_value(&attr) {
605 attributes.insert(attr_name, attr_value);
606 }
607 }
608 }
609 }
610
611 let dataset = Dataset {
612 name: ds_key.clone(),
613 dtype: internal_dtype,
614 shape,
615 data,
616 attributes,
617 options: DatasetOptions::default(),
618 };
619 group.datasets.insert(ds_key, dataset);
620 }
621 }
622 }
623
624 if let Ok(subgroups) = h5_group.groups() {
626 for sub in subgroups {
627 let sub_name_full = sub.name();
628 let sub_key = sub_name_full
629 .rsplit('/')
630 .next()
631 .unwrap_or(&sub_name_full)
632 .trim_start_matches('/')
633 .to_string();
634 let mut child = Group::new(sub_key.clone());
635 Self::load_subgroup_structure(&sub, &mut child)?;
636 group.groups.insert(sub_key, child);
637 }
638 }
639
640 Ok(())
641 }
642
643 #[cfg(feature = "hdf5")]
645 fn write_group_to_hdf5(file: &File, group: &Group, path_prefix: &str) -> Result<()> {
646 for (attr_name, attr_value) in &group.attributes {
648 Self::write_attribute_to_hdf5(file, path_prefix, attr_name, attr_value)?;
649 }
650
651 for (dataset_name, dataset) in &group.datasets {
653 let dataset_path = if path_prefix.is_empty() {
654 dataset_name.clone()
655 } else {
656 format!("{}/{}", path_prefix, dataset_name)
657 };
658 Self::write_dataset_to_hdf5(file, &dataset_path, dataset)?;
659 }
660
661 for (subgroup_name, subgroup) in &group.groups {
663 let subgroup_path = if path_prefix.is_empty() {
664 subgroup_name.clone()
665 } else {
666 format!("{}/{}", path_prefix, subgroup_name)
667 };
668
669 if let Err(_) = file.group(&subgroup_path) {
671 file.create_group(&subgroup_path).map_err(|e| {
673 IoError::FormatError(format!("Failed to create group {}: {}", subgroup_path, e))
674 })?;
675 }
676
677 Self::write_group_to_hdf5(file, subgroup, &subgroup_path)?;
679 }
680
681 Ok(())
682 }
683
684 #[cfg(feature = "hdf5")]
686 fn write_attribute_to_hdf5(
687 file: &File,
688 path: &str,
689 name: &str,
690 value: &AttributeValue,
691 ) -> Result<()> {
692 use hdf5::types::VarLenUnicode;
693
694 let target_group = if path.is_empty() {
696 file.as_group()
697 .map_err(|e| IoError::FormatError(format!("Failed to access root group: {e}")))?
698 } else {
699 file.group(path).map_err(|e| {
700 IoError::FormatError(format!("Failed to access group '{path}': {e}"))
701 })?
702 };
703
704 match value {
706 AttributeValue::Integer(v) => {
707 let attr = target_group.new_attr::<i64>().create(name).map_err(|e| {
708 IoError::FormatError(format!("Failed to create integer attribute: {}", e))
709 })?;
710 attr.write_scalar(v).map_err(|e| {
711 IoError::FormatError(format!("Failed to write integer attribute: {}", e))
712 })?;
713 }
714 AttributeValue::Float(v) => {
715 let attr = target_group.new_attr::<f64>().create(name).map_err(|e| {
716 IoError::FormatError(format!("Failed to create float attribute: {}", e))
717 })?;
718 attr.write_scalar(v).map_err(|e| {
719 IoError::FormatError(format!("Failed to write float attribute: {}", e))
720 })?;
721 }
722 AttributeValue::String(v) => {
723 let vlen_str = VarLenUnicode::from_str(v).map_err(|e| {
724 IoError::FormatError(format!("Failed to create VarLenUnicode: {:?}", e))
725 })?;
726 let attr = target_group
727 .new_attr::<VarLenUnicode>()
728 .create(name)
729 .map_err(|e| {
730 IoError::FormatError(format!("Failed to create string attribute: {}", e))
731 })?;
732 attr.write_scalar(&vlen_str).map_err(|e| {
733 IoError::FormatError(format!("Failed to write string attribute: {}", e))
734 })?;
735 }
736 AttributeValue::IntegerArray(v) => {
737 let attr = target_group
738 .new_attr::<i64>()
739 .shape([v.len()])
740 .create(name)
741 .map_err(|e| {
742 IoError::FormatError(format!(
743 "Failed to create integer array attribute: {}",
744 e
745 ))
746 })?;
747 attr.write(v).map_err(|e| {
748 IoError::FormatError(format!("Failed to write integer array attribute: {}", e))
749 })?;
750 }
751 AttributeValue::FloatArray(v) => {
752 let attr = target_group
753 .new_attr::<f64>()
754 .shape([v.len()])
755 .create(name)
756 .map_err(|e| {
757 IoError::FormatError(format!(
758 "Failed to create float array attribute: {}",
759 e
760 ))
761 })?;
762 attr.write(v).map_err(|e| {
763 IoError::FormatError(format!("Failed to write float array attribute: {}", e))
764 })?;
765 }
766 AttributeValue::StringArray(v) => {
767 let mut vlen_strings = Vec::new();
768 for s in v {
769 let vlen = VarLenUnicode::from_str(s).map_err(|e| {
770 IoError::FormatError(format!("Failed to create VarLenUnicode: {:?}", e))
771 })?;
772 vlen_strings.push(vlen);
773 }
774 let attr = target_group
775 .new_attr::<VarLenUnicode>()
776 .shape([v.len()])
777 .create(name)
778 .map_err(|e| {
779 IoError::FormatError(format!(
780 "Failed to create string array attribute: {}",
781 e
782 ))
783 })?;
784 attr.write(&vlen_strings).map_err(|e| {
785 IoError::FormatError(format!("Failed to write string array attribute: {}", e))
786 })?;
787 }
788 AttributeValue::Boolean(v) => {
789 let int_val = if *v { 1i64 } else { 0i64 };
790 let attr = target_group.new_attr::<i64>().create(name).map_err(|e| {
791 IoError::FormatError(format!("Failed to create boolean attribute: {}", e))
792 })?;
793 attr.write_scalar(&int_val).map_err(|e| {
794 IoError::FormatError(format!("Failed to write boolean attribute: {}", e))
795 })?;
796 }
797 AttributeValue::Array(_) => {
798 eprintln!("Warning: Skipping complex array attribute '{}'", name);
800 }
801 }
802
803 Ok(())
804 }
805
806 #[cfg(feature = "hdf5")]
808 fn write_dataset_to_hdf5(file: &File, path: &str, dataset: &Dataset) -> Result<()> {
809 match &dataset.data {
811 DataArray::Float(data) => {
812 let h5_dataset = file
813 .new_dataset::<f64>()
814 .shape(&dataset.shape)
815 .create(path)
816 .map_err(|e| {
817 IoError::FormatError(format!("Failed to create float dataset: {}", e))
818 })?;
819 h5_dataset.write_raw(data).map_err(|e| {
821 IoError::FormatError(format!("Failed to write float dataset: {}", e))
822 })?;
823 }
824 DataArray::Integer(data) => {
825 let h5_dataset = file
826 .new_dataset::<i64>()
827 .shape(&dataset.shape)
828 .create(path)
829 .map_err(|e| {
830 IoError::FormatError(format!("Failed to create integer dataset: {}", e))
831 })?;
832 h5_dataset.write_raw(data).map_err(|e| {
834 IoError::FormatError(format!("Failed to write integer dataset: {}", e))
835 })?;
836 }
837 DataArray::String(data) => {
838 use hdf5::types::VarLenUnicode;
839 let mut vlen_strings = Vec::new();
840 for s in data {
841 let vlen = VarLenUnicode::from_str(s).map_err(|e| {
842 IoError::FormatError(format!("Failed to create VarLenUnicode: {:?}", e))
843 })?;
844 vlen_strings.push(vlen);
845 }
846 let h5_dataset = file
847 .new_dataset::<VarLenUnicode>()
848 .shape(&dataset.shape)
849 .create(path)
850 .map_err(|e| {
851 IoError::FormatError(format!("Failed to create string dataset: {}", e))
852 })?;
853 h5_dataset.write(&vlen_strings).map_err(|e| {
854 IoError::FormatError(format!("Failed to write string dataset: {}", e))
855 })?;
856 }
857 DataArray::Binary(data) => {
858 let h5_dataset = file
860 .new_dataset::<u8>()
861 .shape(&dataset.shape)
862 .create(path)
863 .map_err(|e| {
864 IoError::FormatError(format!("Failed to create binary dataset: {}", e))
865 })?;
866 h5_dataset.write(data).map_err(|e| {
867 IoError::FormatError(format!("Failed to write binary dataset: {}", e))
868 })?;
869 }
870 }
871
872 Ok(())
873 }
874
875 #[cfg(feature = "hdf5")]
877 fn convert_hdf5_datatype(dtype: &hdf5::Datatype) -> Result<HDF5DataType> {
878 use hdf5::types::TypeDescriptor;
879
880 match dtype.to_descriptor() {
881 Ok(TypeDescriptor::Integer(int_type)) => Ok(HDF5DataType::Integer {
882 size: int_type as usize,
883 signed: true,
884 }),
885 Ok(TypeDescriptor::Unsigned(int_type)) => Ok(HDF5DataType::Integer {
886 size: int_type as usize,
887 signed: false,
888 }),
889 Ok(TypeDescriptor::Float(float_type)) => Ok(HDF5DataType::Float {
890 size: float_type as usize,
891 }),
892 Ok(TypeDescriptor::FixedUnicode(size)) => Ok(HDF5DataType::String {
893 encoding: StringEncoding::UTF8,
894 }),
895 Ok(TypeDescriptor::FixedAscii(size)) => Ok(HDF5DataType::String {
896 encoding: StringEncoding::ASCII,
897 }),
898 Ok(TypeDescriptor::VarLenUnicode) => Ok(HDF5DataType::String {
899 encoding: StringEncoding::UTF8,
900 }),
901 Ok(TypeDescriptor::VarLenAscii) => Ok(HDF5DataType::String {
902 encoding: StringEncoding::ASCII,
903 }),
904 Ok(TypeDescriptor::Compound(comp_type)) => {
913 let mut fields = Vec::new();
914 for field in &comp_type.fields {
915 let field_datatype =
917 hdf5::Datatype::from_descriptor(&field.ty).map_err(|e| {
918 IoError::FormatError(format!(
919 "Failed to create datatype for field: {}",
920 e
921 ))
922 })?;
923 let field_type = Self::convert_hdf5_datatype(&field_datatype)?;
924 fields.push((field.name.clone(), field_type));
925 }
926 Ok(HDF5DataType::Compound { fields })
927 }
928 Ok(TypeDescriptor::Enum(enum_type)) => {
929 let mut values = Vec::new();
930 for member in &enum_type.members {
931 values.push((member.name.clone(), member.value as i64));
932 }
933 Ok(HDF5DataType::Enum { values })
934 }
935 _ => {
936 Ok(HDF5DataType::String {
938 encoding: StringEncoding::UTF8,
939 })
940 }
941 }
942 }
943
944 #[cfg(feature = "hdf5")]
946 fn read_dataset_data(dataset: &hdf5::Dataset, dtype: &hdf5::Datatype) -> Result<DataArray> {
947 use hdf5::types::TypeDescriptor;
948
949 match dtype.to_descriptor() {
950 Ok(TypeDescriptor::Integer(_)) => {
951 let data: Vec<i64> = dataset.read_raw().map_err(|e| {
952 IoError::FormatError(format!("Failed to read integer dataset: {e}"))
953 })?;
954 Ok(DataArray::Integer(data))
955 }
956 Ok(TypeDescriptor::Float(_)) => {
957 let data: Vec<f64> = dataset.read_raw().map_err(|e| {
958 IoError::FormatError(format!("Failed to read float dataset: {e}"))
959 })?;
960 Ok(DataArray::Float(data))
961 }
962 Ok(TypeDescriptor::FixedUnicode(_))
963 | Ok(TypeDescriptor::FixedAscii(_))
964 | Ok(TypeDescriptor::VarLenUnicode) => {
965 use hdf5::types::VarLenUnicode;
966 let data: Vec<VarLenUnicode> = dataset.read_raw().map_err(|e| {
967 IoError::FormatError(format!("Failed to read string dataset: {e}"))
968 })?;
969 let strings: Vec<String> = data.into_iter().map(|s| s.to_string()).collect();
970 Ok(DataArray::String(strings))
971 }
972 Ok(TypeDescriptor::VarLenAscii) => {
973 use hdf5::types::VarLenAscii;
974 let data: Vec<VarLenAscii> = dataset.read_raw().map_err(|e| {
975 IoError::FormatError(format!("Failed to read string dataset: {e}"))
976 })?;
977 let strings: Vec<String> = data.into_iter().map(|s| s.to_string()).collect();
978 Ok(DataArray::String(strings))
979 }
980 _ => {
981 let data: Vec<u8> = dataset.read_raw().map_err(|e| {
983 IoError::FormatError(format!("Failed to read binary dataset: {e}"))
984 })?;
985 Ok(DataArray::Binary(data))
986 }
987 }
988 }
989
990 #[cfg(feature = "hdf5")]
992 fn read_attribute_value(attr: &hdf5::Attribute) -> Result<AttributeValue> {
993 use hdf5::types::TypeDescriptor;
994
995 let dtype = attr
996 .dtype()
997 .map_err(|e| IoError::FormatError(format!("Failed to get attribute dtype: {e}")))?;
998
999 match dtype.to_descriptor() {
1000 Ok(TypeDescriptor::Integer(_)) => {
1001 if attr.shape().iter().product::<usize>() == 1 {
1002 let value: i64 = attr.read_scalar().map_err(|e| {
1003 IoError::FormatError(format!("Failed to read integer attribute: {e}"))
1004 })?;
1005 Ok(AttributeValue::Integer(value))
1006 } else {
1007 let value: Vec<i64> = attr.read_raw().map_err(|e| {
1008 IoError::FormatError(format!(
1009 "Failed to read integer array attribute: {}",
1010 e
1011 ))
1012 })?;
1013 Ok(AttributeValue::IntegerArray(value))
1014 }
1015 }
1016 Ok(TypeDescriptor::Float(_)) => {
1017 if attr.shape().iter().product::<usize>() == 1 {
1018 let value: f64 = attr.read_scalar().map_err(|e| {
1019 IoError::FormatError(format!("Failed to read float attribute: {e}"))
1020 })?;
1021 Ok(AttributeValue::Float(value))
1022 } else {
1023 let value: Vec<f64> = attr.read_raw().map_err(|e| {
1024 IoError::FormatError(format!("Failed to read float array attribute: {e}"))
1025 })?;
1026 Ok(AttributeValue::FloatArray(value))
1027 }
1028 }
1029 Ok(TypeDescriptor::VarLenUnicode) => {
1030 use hdf5::types::VarLenUnicode;
1031 if attr.shape().iter().product::<usize>() == 1 {
1032 let value: VarLenUnicode = attr.read_scalar().map_err(|e| {
1033 IoError::FormatError(format!("Failed to read string attribute: {e}"))
1034 })?;
1035 Ok(AttributeValue::String(value.to_string()))
1036 } else {
1037 let value: Vec<VarLenUnicode> = attr.read_raw().map_err(|e| {
1038 IoError::FormatError(format!(
1039 "Failed to read string array attribute: {}",
1040 e
1041 ))
1042 })?;
1043 let strings: Vec<String> = value.into_iter().map(|s| s.to_string()).collect();
1044 Ok(AttributeValue::StringArray(strings))
1045 }
1046 }
1047 Ok(TypeDescriptor::VarLenAscii) => {
1048 use hdf5::types::VarLenAscii;
1049 if attr.shape().iter().product::<usize>() == 1 {
1050 let value: VarLenAscii = attr.read_scalar().map_err(|e| {
1051 IoError::FormatError(format!("Failed to read string attribute: {e}"))
1052 })?;
1053 Ok(AttributeValue::String(value.to_string()))
1054 } else {
1055 let value: Vec<VarLenAscii> = attr.read_raw().map_err(|e| {
1056 IoError::FormatError(format!(
1057 "Failed to read string array attribute: {}",
1058 e
1059 ))
1060 })?;
1061 let strings: Vec<String> = value.into_iter().map(|s| s.to_string()).collect();
1062 Ok(AttributeValue::StringArray(strings))
1063 }
1064 }
1065 Ok(TypeDescriptor::FixedUnicode(size)) | Ok(TypeDescriptor::FixedAscii(size)) => {
1066 use hdf5::types::VarLenUnicode;
1068 if attr.shape().iter().product::<usize>() == 1 {
1069 let value: VarLenUnicode = attr.read_scalar().map_err(|e| {
1070 IoError::FormatError(format!("Failed to read string attribute: {e}"))
1071 })?;
1072 Ok(AttributeValue::String(value.to_string()))
1073 } else {
1074 let value: Vec<VarLenUnicode> = attr.read_raw().map_err(|e| {
1075 IoError::FormatError(format!(
1076 "Failed to read string array attribute: {}",
1077 e
1078 ))
1079 })?;
1080 let strings: Vec<String> = value.into_iter().map(|s| s.to_string()).collect();
1081 Ok(AttributeValue::StringArray(strings))
1082 }
1083 }
1084 _ => {
1085 Ok(AttributeValue::String("unknown".to_string()))
1087 }
1088 }
1089 }
1090
1091 pub fn create_dataset_from_array<A, D>(
1093 &mut self,
1094 path: &str,
1095 array: &ArrayBase<A, D>,
1096 options: Option<DatasetOptions>,
1097 ) -> Result<()>
1098 where
1099 A: scirs2_core::ndarray::Data,
1100 A::Elem: Clone + std::fmt::Debug,
1101 D: scirs2_core::ndarray::Dimension,
1102 {
1103 let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
1104 if parts.is_empty() {
1105 return Err(IoError::FormatError("Invalid dataset path".to_string()));
1106 }
1107
1108 let dataset_name = parts.last().unwrap();
1109 let mut current_group = &mut self.root;
1110
1111 for &group_name in &parts[..parts.len() - 1] {
1113 current_group = current_group.create_group(group_name);
1114 }
1115
1116 let shape: Vec<usize> = array.shape().to_vec();
1118 let flat_data: Vec<f64> = array
1119 .iter()
1120 .map(|x| {
1121 format!("{:?}", x).parse::<f64>().unwrap_or(0.0)
1123 })
1124 .collect();
1125
1126 let dataset = Dataset {
1127 name: dataset_name.to_string(),
1128 dtype: HDF5DataType::Float { size: 8 },
1129 shape: shape.clone(),
1130 data: DataArray::Float(flat_data.clone()),
1131 attributes: HashMap::new(),
1132 options: options.unwrap_or_default(),
1133 };
1134
1135 current_group
1136 .datasets
1137 .insert(dataset_name.to_string(), dataset);
1138
1139 Ok(())
1143 }
1144
1145 pub fn read_dataset_typed<T>(&self, path: &str) -> Result<ArrayD<T>>
1147 where
1148 T: Clone + Default + std::str::FromStr,
1149 <T as std::str::FromStr>::Err: std::fmt::Display,
1150 {
1151 let f64_array = self.read_dataset(path)?;
1153 let shape = f64_array.shape().to_vec();
1154 let converted: Vec<T> = f64_array
1155 .iter()
1156 .map(|&v| {
1157 let s = format!("{}", v);
1159 s.parse::<T>().unwrap_or_default()
1160 })
1161 .collect();
1162
1163 ArrayD::from_shape_vec(scirs2_core::ndarray::IxDyn(&shape), converted)
1164 .map_err(|e| IoError::FormatError(format!("Failed to create typed array: {}", e)))
1165 }
1166
1167 pub fn read_dataset(&self, path: &str) -> Result<ArrayD<f64>> {
1169 let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
1170 if parts.is_empty() {
1171 return Err(IoError::FormatError("Invalid dataset path".to_string()));
1172 }
1173
1174 let dataset_name = parts.last().unwrap();
1175 let mut current_group = &self.root;
1176
1177 for &group_name in &parts[..parts.len() - 1] {
1179 current_group = current_group
1180 .get_group(group_name)
1181 .ok_or_else(|| IoError::FormatError(format!("Group '{group_name}' not found")))?;
1182 }
1183
1184 let dataset = current_group
1186 .datasets
1187 .get(*dataset_name)
1188 .ok_or_else(|| IoError::FormatError(format!("Dataset '{dataset_name}' not found")))?;
1189
1190 #[cfg(feature = "hdf5")]
1192 {
1193 if let Some(ref file) = self.native_file {
1194 let full_path = parts.join("/");
1196
1197 if let Ok(h5_dataset) = file.dataset(&full_path) {
1199 let data: Vec<f64> = h5_dataset.read_raw().map_err(|e| {
1200 IoError::FormatError(format!("Failed to read HDF5 dataset: {e}"))
1201 })?;
1202
1203 let shape = IxDyn(&dataset.shape);
1204 return ArrayD::from_shape_vec(shape, data)
1205 .map_err(|e| IoError::FormatError(e.to_string()));
1206 }
1207 }
1208 }
1209
1210 match &dataset.data {
1212 DataArray::Float(data) => {
1213 let shape = IxDyn(&dataset.shape);
1214 ArrayD::from_shape_vec(shape, data.clone())
1215 .map_err(|e| IoError::FormatError(e.to_string()))
1216 }
1217 DataArray::Integer(data) => {
1218 let float_data: Vec<f64> = data.iter().map(|&x| x as f64).collect();
1219 let shape = IxDyn(&dataset.shape);
1220 ArrayD::from_shape_vec(shape, float_data)
1221 .map_err(|e| IoError::FormatError(e.to_string()))
1222 }
1223 _ => Err(IoError::FormatError(
1224 "Unsupported data type for ndarray conversion".to_string(),
1225 )),
1226 }
1227 }
1228
1229 pub fn write(&self) -> Result<()> {
1231 #[cfg(feature = "hdf5")]
1232 {
1233 if let Some(ref file) = self.native_file {
1234 Self::write_group_to_hdf5(file, &self.root, "")?;
1236
1237 file.flush()
1239 .map_err(|e| IoError::FormatError(format!("Failed to flush HDF5 file: {e}")))?;
1240 }
1241 }
1242
1243 #[cfg(not(feature = "hdf5"))]
1244 {
1245 let sidecar = format!("{}.json", self.path);
1248 let mut obj = serde_json::json!({
1249 "groups": serde_json::Value::Object(serde_json::Map::new()),
1250 "datasets": serde_json::Value::Object(serde_json::Map::new()),
1251 });
1252 if let serde_json::Value::Object(ref mut map) = obj["datasets"] {
1254 for (k, ds) in &self.root.datasets {
1255 map.insert(k.clone(), serde_json::json!({
1256 "shape": ds.shape,
1257 "data": match &ds.data { DataArray::Float(v)=>serde_json::json!(v), DataArray::Integer(v)=>serde_json::json!(v), _=>serde_json::json!([])},
1258 }));
1259 }
1260 }
1261 std::fs::write(&sidecar, serde_json::to_vec(&obj).unwrap())
1263 .map_err(|e| IoError::FormatError(format!("Failed to persist mock HDF5: {e}")))?;
1264 }
1265
1266 Ok(())
1267 }
1268
1269 pub fn get_dataset(&self, path: &str) -> Result<&Dataset> {
1271 let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
1272 if parts.is_empty() {
1273 return Err(IoError::FormatError("Invalid dataset path".to_string()));
1274 }
1275
1276 let dataset_name = parts.last().unwrap();
1277 let mut current_group = &self.root;
1278
1279 for &group_name in &parts[..parts.len() - 1] {
1281 current_group = current_group
1282 .get_group(group_name)
1283 .ok_or_else(|| IoError::FormatError(format!("Group '{group_name}' not found")))?;
1284 }
1285
1286 current_group
1288 .get_dataset(dataset_name)
1289 .ok_or_else(|| IoError::FormatError(format!("Dataset '{dataset_name}' not found")))
1290 }
1291
1292 pub fn get_group(&self, path: &str) -> Result<&Group> {
1294 if path == "/" || path.is_empty() {
1295 return Ok(&self.root);
1296 }
1297
1298 let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
1299 let mut current_group = &self.root;
1300
1301 for &group_name in &parts {
1302 current_group = current_group
1303 .get_group(group_name)
1304 .ok_or_else(|| IoError::FormatError(format!("Group '{group_name}' not found")))?;
1305 }
1306
1307 Ok(current_group)
1308 }
1309
1310 pub fn list_datasets(&self) -> Vec<String> {
1312 let mut datasets = Vec::new();
1313 self.collect_datasets(&self.root, String::new(), &mut datasets);
1314 datasets
1315 }
1316
1317 pub fn list_groups(&self) -> Vec<String> {
1319 let mut groups = Vec::new();
1320 self.collect_groups(&self.root, String::new(), &mut groups);
1321 groups
1322 }
1323
1324 #[allow(clippy::only_used_in_recursion)]
1326 fn collect_datasets(&self, group: &Group, prefix: String, datasets: &mut Vec<String>) {
1327 for dataset_name in group.dataset_names() {
1328 let fullpath = if prefix.is_empty() {
1329 dataset_name.to_string()
1330 } else {
1331 format!("{prefix}/{dataset_name}")
1332 };
1333 datasets.push(fullpath);
1334 }
1335
1336 for (group_name, subgroup) in &group.groups {
1337 let new_prefix = if prefix.is_empty() {
1338 group_name.clone()
1339 } else {
1340 format!("{prefix}/{group_name}")
1341 };
1342 self.collect_datasets(subgroup, new_prefix, datasets);
1343 }
1344 }
1345
1346 #[allow(clippy::only_used_in_recursion)]
1348 fn collect_groups(&self, group: &Group, prefix: String, groups: &mut Vec<String>) {
1349 for (group_name, subgroup) in &group.groups {
1350 let fullpath = if prefix.is_empty() {
1351 group_name.clone()
1352 } else {
1353 format!("{prefix}/{group_name}")
1354 };
1355 groups.push(fullpath.clone());
1356 self.collect_groups(subgroup, fullpath, groups);
1357 }
1358 }
1359
1360 pub fn stats(&self) -> FileStats {
1362 let mut stats = FileStats::default();
1363 self.collect_stats(&self.root, &mut stats);
1364 stats
1365 }
1366
1367 #[allow(clippy::only_used_in_recursion)]
1369 fn collect_stats(&self, group: &Group, stats: &mut FileStats) {
1370 stats.num_groups += group.groups.len();
1371 stats.num_datasets += group.datasets.len();
1372 stats.num_attributes += group.attributes.len();
1373
1374 for dataset in group.datasets.values() {
1375 stats.num_attributes += dataset.attributes.len();
1376 stats.total_data_size += dataset.size_bytes();
1377 }
1378
1379 for subgroup in group.groups.values() {
1380 self.collect_stats(subgroup, stats);
1381 }
1382 }
1383
1384 pub fn close(self) -> Result<()> {
1386 #[cfg(feature = "hdf5")]
1387 {
1388 let _ = self.write();
1390 if let Some(file) = self.native_file {
1391 drop(file);
1393 }
1394 }
1395
1396 Ok(())
1397 }
1398
1399 pub fn create_group(&mut self, name: &str) -> Result<()> {
1401 self.root.create_group(name);
1402 Ok(())
1403 }
1404
1405 pub fn set_attribute(&mut self, name: &str, key: &str, value: AttributeValue) -> Result<()> {
1407 if name == "/" || name.is_empty() {
1408 self.root.set_attribute(key, value);
1409 } else {
1410 let parts: Vec<&str> = name.split('/').filter(|s| !s.is_empty()).collect();
1412 let mut current_group = &mut self.root;
1413
1414 for &group_name in &parts {
1415 current_group = current_group.groups.get_mut(group_name).ok_or_else(|| {
1416 IoError::FormatError(format!("Group '{}' not found", group_name))
1417 })?;
1418 }
1419 current_group.set_attribute(key, value);
1420 }
1421 Ok(())
1422 }
1423
1424 pub fn get_attribute(&self, name: &str, key: &str) -> Result<Option<&AttributeValue>> {
1426 if name == "/" || name.is_empty() {
1427 Ok(self.root.get_attribute(key))
1428 } else {
1429 let parts: Vec<&str> = name.split('/').filter(|s| !s.is_empty()).collect();
1431 let mut current_group = &self.root;
1432
1433 for &group_name in &parts {
1434 current_group = current_group.groups.get(group_name).ok_or_else(|| {
1435 IoError::FormatError(format!("Group '{}' not found", group_name))
1436 })?;
1437 }
1438 Ok(current_group.get_attribute(key))
1439 }
1440 }
1441
1442 pub fn is_group(&self, name: &str) -> bool {
1444 if name == "/" || name.is_empty() {
1445 true } else {
1447 let parts: Vec<&str> = name.split('/').filter(|s| !s.is_empty()).collect();
1449 let mut current_group = &self.root;
1450
1451 for (i, &part) in parts.iter().enumerate() {
1452 if i == parts.len() - 1 {
1453 return current_group.groups.contains_key(part);
1455 } else {
1456 match current_group.groups.get(part) {
1458 Some(group) => current_group = group,
1459 None => return false,
1460 }
1461 }
1462 }
1463 false
1464 }
1465 }
1466
1467 pub fn write_dataset_slice<T>(&mut self, name: &str, data: &[T], offset: &[usize]) -> Result<()>
1469 where
1470 T: Clone + std::fmt::Debug,
1471 {
1472 let _ = (name, data, offset);
1475 Ok(())
1476 }
1477
1478 pub fn read_dataset_slice<T>(
1480 &self,
1481 name: &str,
1482 shape: &[usize],
1483 offset: &[usize],
1484 ) -> Result<Vec<T>>
1485 where
1486 T: Clone + Default,
1487 {
1488 let _ = (name, offset);
1491 let total: usize = shape.iter().product();
1492 Ok(vec![T::default(); total])
1493 }
1494
1495 pub fn list_all_items(&self) -> Vec<String> {
1497 let mut items = Vec::new();
1498 self.list_items_recursive(&self.root, "", &mut items);
1499 items
1500 }
1501
1502 fn list_items_recursive(&self, group: &Group, prefix: &str, items: &mut Vec<String>) {
1503 for name in group.datasets.keys() {
1504 let path = if prefix.is_empty() {
1505 format!("/{}", name)
1506 } else {
1507 format!("{}/{}", prefix, name)
1508 };
1509 items.push(path);
1510 }
1511
1512 for (name, subgroup) in &group.groups {
1513 let path = if prefix.is_empty() {
1514 format!("/{}", name)
1515 } else {
1516 format!("{}/{}", prefix, name)
1517 };
1518 items.push(path.clone());
1519 self.list_items_recursive(subgroup, &path, items);
1520 }
1521 }
1522
1523 pub fn create_dataset<T>(
1525 &mut self,
1526 path: &str,
1527 shape: &[usize],
1528 _options: Option<DatasetOptions>,
1529 ) -> Result<()>
1530 where
1531 T: Clone + Default + std::fmt::Debug,
1532 {
1533 let total: usize = shape.iter().product();
1534 let data = vec![T::default(); total];
1535 let array = ArrayD::from_shape_vec(IxDyn(shape), data)
1536 .map_err(|e| IoError::FormatError(e.to_string()))?;
1537
1538 self.create_dataset_from_array(path, &array, None)
1540 }
1541}
1542
1543#[allow(dead_code)]
1560pub fn read_hdf5<P: AsRef<Path>>(path: P) -> Result<Group> {
1561 let file = HDF5File::open(path, FileMode::ReadOnly)?;
1562 Ok(file.root)
1563}
1564
1565#[allow(dead_code)]
1585pub fn write_hdf5<P: AsRef<Path>>(path: P, datasets: HashMap<String, ArrayD<f64>>) -> Result<()> {
1586 let mut file = HDF5File::create(path)?;
1587
1588 for (datasetpath, array) in datasets {
1589 file.create_dataset_from_array(&datasetpath, &array, None)?;
1590 }
1591
1592 file.write()?;
1593 file.close()?;
1594 Ok(())
1595}
1596
1597#[allow(dead_code)]
1625pub fn create_hdf5_with_structure<P, F>(path: P, builder: F) -> Result<()>
1626where
1627 P: AsRef<Path>,
1628 F: FnOnce(&mut HDF5File) -> Result<()>,
1629{
1630 let mut file = HDF5File::create(path)?;
1631 builder(&mut file)?;
1632 file.write()?;
1633 file.close()?;
1634 Ok(())
1635}
1636
1637pub mod enhanced;
1639
1640pub use enhanced::{
1642 create_optimal_compression_options, read_hdf5_enhanced, write_hdf5_enhanced, CompressionStats,
1643 EnhancedHDF5File, ExtendedDataType, ParallelConfig,
1644};
1645
1646#[cfg(test)]
1650mod legacy_tests {
1651 use super::*;
1652
1653 #[test]
1654 fn test_group_creation() {
1655 let mut root = Group::new("/".to_string());
1656 let subgroup = root.create_group("data");
1657 assert_eq!(subgroup.name, "data");
1658 assert!(root.get_group("data").is_some());
1659 }
1660
1661 #[test]
1662 fn test_attribute_setting() {
1663 let mut group = Group::new("test".to_string());
1664 group.set_attribute("version", AttributeValue::Integer(1));
1665 group.set_attribute(
1666 "description",
1667 AttributeValue::String("Test group".to_string()),
1668 );
1669
1670 assert_eq!(group.attributes.len(), 2);
1671 }
1672
1673 #[test]
1674 fn test_dataset_creation() {
1675 let dataset = Dataset {
1676 name: "test_data".to_string(),
1677 dtype: HDF5DataType::Float { size: 8 },
1678 shape: vec![2, 3],
1679 data: DataArray::Float(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
1680 attributes: HashMap::new(),
1681 options: DatasetOptions::default(),
1682 };
1683
1684 assert_eq!(dataset.shape, vec![2, 3]);
1685 if let DataArray::Float(data) = &dataset.data {
1686 assert_eq!(data.len(), 6);
1687 }
1688 }
1689
1690 #[test]
1691 fn test_compression_options() {
1692 let mut options = CompressionOptions::default();
1693 options.gzip = Some(6);
1694 options.shuffle = true;
1695
1696 assert_eq!(options.gzip, Some(6));
1697 assert!(options.shuffle);
1698 }
1699
1700 #[test]
1701 #[ignore]
1702 fn test_hdf5_file_creation() {
1703 let file = HDF5File::create("test.h5").unwrap();
1704 assert_eq!(file.mode, FileMode::Create);
1705 assert_eq!(file.root.name, "/");
1706 }
1707}