use crate::error::{IoError, Result};
use scirs2_core::ndarray::{ArrayBase, ArrayD, IxDyn};
use std::collections::HashMap;
use std::ops::Deref;
use std::path::Path;
use std::str::FromStr;
#[cfg(feature = "hdf5")]
use hdf5::File;
#[derive(Debug, Clone, PartialEq)]
pub enum HDF5DataType {
Integer {
size: usize,
signed: bool,
},
Float {
size: usize,
},
String {
encoding: StringEncoding,
},
Array {
base_type: Box<HDF5DataType>,
shape: Vec<usize>,
},
Compound {
fields: Vec<(String, HDF5DataType)>,
},
Enum {
values: Vec<(String, i64)>,
},
}
#[derive(Debug, Clone, PartialEq)]
pub enum StringEncoding {
UTF8,
ASCII,
}
#[derive(Debug, Clone, Default)]
pub struct CompressionOptions {
pub gzip: Option<u8>,
pub szip: Option<(u32, u32)>,
pub lzf: bool,
pub shuffle: bool,
}
#[derive(Debug, Clone, Default)]
pub struct DatasetOptions {
pub chunk_size: Option<Vec<usize>>,
pub compression: CompressionOptions,
pub fill_value: Option<f64>,
pub fletcher32: bool,
}
pub struct HDF5File {
#[allow(dead_code)]
path: String,
root: Group,
#[allow(dead_code)]
mode: FileMode,
#[cfg(feature = "hdf5")]
native_file: Option<File>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum FileMode {
ReadOnly,
ReadWrite,
Create,
Truncate,
}
#[derive(Debug, Clone)]
pub struct Group {
pub name: String,
pub groups: HashMap<String, Group>,
pub datasets: HashMap<String, Dataset>,
pub attributes: HashMap<String, AttributeValue>,
}
impl Group {
pub fn new(name: String) -> Self {
Self {
name,
groups: HashMap::new(),
datasets: HashMap::new(),
attributes: HashMap::new(),
}
}
pub fn create_group(&mut self, name: &str) -> &mut Group {
self.groups
.entry(name.to_string())
.or_insert_with(|| Group::new(name.to_string()))
}
pub fn get_group(&self, name: &str) -> Option<&Group> {
self.groups.get(name)
}
pub fn get_group_mut(&mut self, name: &str) -> Option<&mut Group> {
self.groups.get_mut(name)
}
pub fn set_attribute(&mut self, name: &str, value: AttributeValue) {
self.attributes.insert(name.to_string(), value);
}
pub fn get_attribute(&self, name: &str) -> Option<&AttributeValue> {
self.attributes.get(name)
}
pub fn remove_attribute(&mut self, name: &str) -> Option<AttributeValue> {
self.attributes.remove(name)
}
pub fn attribute_names(&self) -> Vec<&str> {
self.attributes.keys().map(|s| s.as_str()).collect()
}
pub fn has_attribute(&self, name: &str) -> bool {
self.attributes.contains_key(name)
}
pub fn get_dataset(&self, name: &str) -> Option<&Dataset> {
self.datasets.get(name)
}
pub fn get_dataset_mut(&mut self, name: &str) -> Option<&mut Dataset> {
self.datasets.get_mut(name)
}
pub fn dataset_names(&self) -> Vec<&str> {
self.datasets.keys().map(|s| s.as_str()).collect()
}
pub fn group_names(&self) -> Vec<&str> {
self.groups.keys().map(|s| s.as_str()).collect()
}
pub fn has_dataset(&self, name: &str) -> bool {
self.datasets.contains_key(name)
}
pub fn has_group(&self, name: &str) -> bool {
self.groups.contains_key(name)
}
pub fn remove_dataset(&mut self, name: &str) -> Option<Dataset> {
self.datasets.remove(name)
}
pub fn remove_group(&mut self, name: &str) -> Option<Group> {
self.groups.remove(name)
}
}
#[derive(Debug, Clone)]
pub struct Dataset {
pub name: String,
pub dtype: HDF5DataType,
pub shape: Vec<usize>,
pub data: DataArray,
pub attributes: HashMap<String, AttributeValue>,
pub options: DatasetOptions,
}
impl Dataset {
pub fn new(
name: String,
dtype: HDF5DataType,
shape: Vec<usize>,
data: DataArray,
options: DatasetOptions,
) -> Self {
Self {
name,
dtype,
shape,
data,
attributes: HashMap::new(),
options,
}
}
pub fn set_attribute(&mut self, name: &str, value: AttributeValue) {
self.attributes.insert(name.to_string(), value);
}
pub fn get_attribute(&self, name: &str) -> Option<&AttributeValue> {
self.attributes.get(name)
}
pub fn remove_attribute(&mut self, name: &str) -> Option<AttributeValue> {
self.attributes.remove(name)
}
pub fn len(&self) -> usize {
self.shape.iter().product()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn ndim(&self) -> usize {
self.shape.len()
}
pub fn size_bytes(&self) -> usize {
let element_size = match &self.dtype {
HDF5DataType::Integer { size, .. } => *size,
HDF5DataType::Float { size } => *size,
HDF5DataType::String { .. } => 8, HDF5DataType::Array { .. } => 8, HDF5DataType::Compound { .. } => 8, HDF5DataType::Enum { .. } => 8, };
self.len() * element_size
}
pub fn as_float_vec(&self) -> Option<Vec<f64>> {
match &self.data {
DataArray::Float(data) => Some(data.clone()),
DataArray::Integer(data) => Some(data.iter().map(|&x| x as f64).collect()),
_ => None,
}
}
pub fn as_integer_vec(&self) -> Option<Vec<i64>> {
match &self.data {
DataArray::Integer(data) => Some(data.clone()),
DataArray::Float(data) => Some(data.iter().map(|&x| x as i64).collect()),
_ => None,
}
}
pub fn as_string_vec(&self) -> Option<Vec<String>> {
match &self.data {
DataArray::String(data) => Some(data.clone()),
_ => None,
}
}
}
#[derive(Debug, Clone)]
pub enum DataArray {
Integer(Vec<i64>),
Float(Vec<f64>),
String(Vec<String>),
Binary(Vec<u8>),
}
#[derive(Debug, Clone)]
pub enum AttributeValue {
Integer(i64),
Float(f64),
String(String),
IntegerArray(Vec<i64>),
FloatArray(Vec<f64>),
StringArray(Vec<String>),
Boolean(bool),
Array(Vec<i64>),
}
#[derive(Debug, Clone, Default)]
pub struct FileStats {
pub num_groups: usize,
pub num_datasets: usize,
pub num_attributes: usize,
pub total_data_size: usize,
}
impl HDF5File {
pub fn create<P: AsRef<Path>>(path: P) -> Result<Self> {
let path_str = path.as_ref().to_string_lossy().to_string();
#[cfg(feature = "hdf5")]
{
let native_file = File::create(&path_str)
.map_err(|e| IoError::FormatError(format!("Failed to create HDF5 file: {e}")))?;
Ok(Self {
path: path_str,
root: Group::new("/".to_string()),
mode: FileMode::Create,
native_file: Some(native_file),
})
}
#[cfg(not(feature = "hdf5"))]
{
Ok(Self {
path: path_str,
root: Group::new("/".to_string()),
mode: FileMode::Create,
})
}
}
pub fn open<P: AsRef<Path>>(path: P, mode: FileMode) -> Result<Self> {
let path_str = path.as_ref().to_string_lossy().to_string();
#[cfg(feature = "hdf5")]
{
let native_file = match mode {
FileMode::ReadOnly => File::open(&path_str)
.map_err(|e| IoError::FormatError(format!("Failed to open HDF5 file: {e}")))?,
FileMode::ReadWrite => File::open_rw(&path_str)
.map_err(|e| IoError::FormatError(format!("Failed to open HDF5 file: {e}")))?,
FileMode::Create => File::create(&path_str).map_err(|e| {
IoError::FormatError(format!("Failed to create HDF5 file: {e}"))
})?,
FileMode::Truncate => File::create(&path_str).map_err(|e| {
IoError::FormatError(format!("Failed to create HDF5 file: {e}"))
})?,
};
let mut root = Group::new("/".to_string());
Self::load_group_structure(&native_file, &mut root)?;
Ok(Self {
path: path_str,
root,
mode,
native_file: Some(native_file),
})
}
#[cfg(not(feature = "hdf5"))]
{
Ok(Self {
path: path_str,
root: Group::new("/".to_string()),
mode,
})
}
}
pub fn root(&self) -> &Group {
&self.root
}
pub fn root_mut(&mut self) -> &mut Group {
&mut self.root
}
#[cfg(feature = "hdf5")]
pub fn native_file(&self) -> Option<&File> {
self.native_file.as_ref()
}
#[cfg(feature = "hdf5")]
fn load_group_structure(file: &File, group: &mut Group) -> Result<()> {
use hdf5::types::TypeDescriptor;
if let Ok(attr_names) = file.attr_names() {
for attr_name in attr_names {
if let Ok(attr) = file.attr(&attr_name) {
if let Ok(attr_value) = Self::read_attribute_value(&attr) {
group.attributes.insert(attr_name, attr_value);
}
}
}
}
let datasets = file
.datasets()
.map_err(|e| IoError::FormatError(format!("Failed to get datasets: {e}")))?;
for dataset in datasets {
let dataset_name_full = dataset.name();
let dataset_key = dataset_name_full
.rsplit('/')
.next()
.unwrap_or(&dataset_name_full)
.trim_start_matches('/')
.to_string();
if let Ok(h5_dataset) = file.dataset(&dataset_name_full) {
let shape: Vec<usize> = h5_dataset.shape().to_vec();
let dtype = h5_dataset.dtype().map_err(|e| {
IoError::FormatError(format!("Failed to get dataset dtype: {e}"))
})?;
let internal_dtype = Self::convert_hdf5_datatype(&dtype)?;
let data = Self::read_dataset_data(&h5_dataset, &dtype)?;
let mut attributes = HashMap::new();
if let Ok(attr_names) = h5_dataset.attr_names() {
for attr_name in attr_names {
if let Ok(attr) = h5_dataset.attr(&attr_name) {
if let Ok(attr_value) = Self::read_attribute_value(&attr) {
attributes.insert(attr_name, attr_value);
}
}
}
}
let dataset = Dataset {
name: dataset_key.clone(),
dtype: internal_dtype,
shape,
data,
attributes,
options: DatasetOptions::default(),
};
group.datasets.insert(dataset_key, dataset);
}
}
let groups = file
.groups()
.map_err(|e| IoError::FormatError(format!("Failed to get groups: {e}")))?;
for h5_group in groups {
let group_name_full = h5_group.name();
let group_key = group_name_full
.rsplit('/')
.next()
.unwrap_or(&group_name_full)
.trim_start_matches('/')
.to_string();
let mut subgroup = Group::new(group_key.clone());
Self::load_subgroup_structure(&h5_group, &mut subgroup)?;
group.groups.insert(group_key, subgroup);
}
Ok(())
}
#[cfg(feature = "hdf5")]
fn load_subgroup_structure(h5_group: &hdf5::Group, group: &mut Group) -> Result<()> {
if let Ok(attr_names) = h5_group.attr_names() {
for attr_name in attr_names {
if let Ok(attr) = h5_group.attr(&attr_name) {
if let Ok(attr_value) = Self::read_attribute_value(&attr) {
group.attributes.insert(attr_name, attr_value);
}
}
}
}
if let Ok(datasets) = h5_group.datasets() {
for ds in datasets {
let ds_name_full = ds.name();
let ds_key = ds_name_full
.rsplit('/')
.next()
.unwrap_or(&ds_name_full)
.trim_start_matches('/')
.to_string();
if let Ok(h5_dataset) = h5_group.dataset(&ds_key) {
let shape: Vec<usize> = h5_dataset.shape().to_vec();
let dtype = h5_dataset.dtype().map_err(|e| {
IoError::FormatError(format!("Failed to get dataset dtype: {e}"))
})?;
let internal_dtype = Self::convert_hdf5_datatype(&dtype)?;
let data = Self::read_dataset_data(&h5_dataset, &dtype)?;
let mut attributes = HashMap::new();
if let Ok(attr_names) = h5_dataset.attr_names() {
for attr_name in attr_names {
if let Ok(attr) = h5_dataset.attr(&attr_name) {
if let Ok(attr_value) = Self::read_attribute_value(&attr) {
attributes.insert(attr_name, attr_value);
}
}
}
}
let dataset = Dataset {
name: ds_key.clone(),
dtype: internal_dtype,
shape,
data,
attributes,
options: DatasetOptions::default(),
};
group.datasets.insert(ds_key, dataset);
}
}
}
if let Ok(subgroups) = h5_group.groups() {
for sub in subgroups {
let sub_name_full = sub.name();
let sub_key = sub_name_full
.rsplit('/')
.next()
.unwrap_or(&sub_name_full)
.trim_start_matches('/')
.to_string();
let mut child = Group::new(sub_key.clone());
Self::load_subgroup_structure(&sub, &mut child)?;
group.groups.insert(sub_key, child);
}
}
Ok(())
}
#[cfg(feature = "hdf5")]
fn write_group_to_hdf5(file: &File, group: &Group, path_prefix: &str) -> Result<()> {
for (attr_name, attr_value) in &group.attributes {
Self::write_attribute_to_hdf5(file, path_prefix, attr_name, attr_value)?;
}
for (dataset_name, dataset) in &group.datasets {
let dataset_path = if path_prefix.is_empty() {
dataset_name.clone()
} else {
format!("{}/{}", path_prefix, dataset_name)
};
Self::write_dataset_to_hdf5(file, &dataset_path, dataset)?;
}
for (subgroup_name, subgroup) in &group.groups {
let subgroup_path = if path_prefix.is_empty() {
subgroup_name.clone()
} else {
format!("{}/{}", path_prefix, subgroup_name)
};
if let Err(_) = file.group(&subgroup_path) {
file.create_group(&subgroup_path).map_err(|e| {
IoError::FormatError(format!("Failed to create group {}: {}", subgroup_path, e))
})?;
}
Self::write_group_to_hdf5(file, subgroup, &subgroup_path)?;
}
Ok(())
}
#[cfg(feature = "hdf5")]
fn write_attribute_to_hdf5(
file: &File,
path: &str,
name: &str,
value: &AttributeValue,
) -> Result<()> {
use hdf5::types::VarLenUnicode;
let target_group = if path.is_empty() {
file.as_group()
.map_err(|e| IoError::FormatError(format!("Failed to access root group: {e}")))?
} else {
file.group(path).map_err(|e| {
IoError::FormatError(format!("Failed to access group '{path}': {e}"))
})?
};
match value {
AttributeValue::Integer(v) => {
let attr = target_group.new_attr::<i64>().create(name).map_err(|e| {
IoError::FormatError(format!("Failed to create integer attribute: {}", e))
})?;
attr.write_scalar(v).map_err(|e| {
IoError::FormatError(format!("Failed to write integer attribute: {}", e))
})?;
}
AttributeValue::Float(v) => {
let attr = target_group.new_attr::<f64>().create(name).map_err(|e| {
IoError::FormatError(format!("Failed to create float attribute: {}", e))
})?;
attr.write_scalar(v).map_err(|e| {
IoError::FormatError(format!("Failed to write float attribute: {}", e))
})?;
}
AttributeValue::String(v) => {
let vlen_str = VarLenUnicode::from_str(v).map_err(|e| {
IoError::FormatError(format!("Failed to create VarLenUnicode: {:?}", e))
})?;
let attr = target_group
.new_attr::<VarLenUnicode>()
.create(name)
.map_err(|e| {
IoError::FormatError(format!("Failed to create string attribute: {}", e))
})?;
attr.write_scalar(&vlen_str).map_err(|e| {
IoError::FormatError(format!("Failed to write string attribute: {}", e))
})?;
}
AttributeValue::IntegerArray(v) => {
let attr = target_group
.new_attr::<i64>()
.shape([v.len()])
.create(name)
.map_err(|e| {
IoError::FormatError(format!(
"Failed to create integer array attribute: {}",
e
))
})?;
attr.write(v).map_err(|e| {
IoError::FormatError(format!("Failed to write integer array attribute: {}", e))
})?;
}
AttributeValue::FloatArray(v) => {
let attr = target_group
.new_attr::<f64>()
.shape([v.len()])
.create(name)
.map_err(|e| {
IoError::FormatError(format!(
"Failed to create float array attribute: {}",
e
))
})?;
attr.write(v).map_err(|e| {
IoError::FormatError(format!("Failed to write float array attribute: {}", e))
})?;
}
AttributeValue::StringArray(v) => {
let mut vlen_strings = Vec::new();
for s in v {
let vlen = VarLenUnicode::from_str(s).map_err(|e| {
IoError::FormatError(format!("Failed to create VarLenUnicode: {:?}", e))
})?;
vlen_strings.push(vlen);
}
let attr = target_group
.new_attr::<VarLenUnicode>()
.shape([v.len()])
.create(name)
.map_err(|e| {
IoError::FormatError(format!(
"Failed to create string array attribute: {}",
e
))
})?;
attr.write(&vlen_strings).map_err(|e| {
IoError::FormatError(format!("Failed to write string array attribute: {}", e))
})?;
}
AttributeValue::Boolean(v) => {
let int_val = if *v { 1i64 } else { 0i64 };
let attr = target_group.new_attr::<i64>().create(name).map_err(|e| {
IoError::FormatError(format!("Failed to create boolean attribute: {}", e))
})?;
attr.write_scalar(&int_val).map_err(|e| {
IoError::FormatError(format!("Failed to write boolean attribute: {}", e))
})?;
}
AttributeValue::Array(_) => {
eprintln!("Warning: Skipping complex array attribute '{}'", name);
}
}
Ok(())
}
#[cfg(feature = "hdf5")]
fn write_dataset_to_hdf5(file: &File, path: &str, dataset: &Dataset) -> Result<()> {
match &dataset.data {
DataArray::Float(data) => {
let h5_dataset = file
.new_dataset::<f64>()
.shape(&dataset.shape)
.create(path)
.map_err(|e| {
IoError::FormatError(format!("Failed to create float dataset: {}", e))
})?;
h5_dataset.write_raw(data).map_err(|e| {
IoError::FormatError(format!("Failed to write float dataset: {}", e))
})?;
}
DataArray::Integer(data) => {
let h5_dataset = file
.new_dataset::<i64>()
.shape(&dataset.shape)
.create(path)
.map_err(|e| {
IoError::FormatError(format!("Failed to create integer dataset: {}", e))
})?;
h5_dataset.write_raw(data).map_err(|e| {
IoError::FormatError(format!("Failed to write integer dataset: {}", e))
})?;
}
DataArray::String(data) => {
use hdf5::types::VarLenUnicode;
let mut vlen_strings = Vec::new();
for s in data {
let vlen = VarLenUnicode::from_str(s).map_err(|e| {
IoError::FormatError(format!("Failed to create VarLenUnicode: {:?}", e))
})?;
vlen_strings.push(vlen);
}
let h5_dataset = file
.new_dataset::<VarLenUnicode>()
.shape(&dataset.shape)
.create(path)
.map_err(|e| {
IoError::FormatError(format!("Failed to create string dataset: {}", e))
})?;
h5_dataset.write(&vlen_strings).map_err(|e| {
IoError::FormatError(format!("Failed to write string dataset: {}", e))
})?;
}
DataArray::Binary(data) => {
let h5_dataset = file
.new_dataset::<u8>()
.shape(&dataset.shape)
.create(path)
.map_err(|e| {
IoError::FormatError(format!("Failed to create binary dataset: {}", e))
})?;
h5_dataset.write(data).map_err(|e| {
IoError::FormatError(format!("Failed to write binary dataset: {}", e))
})?;
}
}
Ok(())
}
#[cfg(feature = "hdf5")]
fn convert_hdf5_datatype(dtype: &hdf5::Datatype) -> Result<HDF5DataType> {
use hdf5::types::TypeDescriptor;
match dtype.to_descriptor() {
Ok(TypeDescriptor::Integer(int_type)) => Ok(HDF5DataType::Integer {
size: int_type as usize,
signed: true,
}),
Ok(TypeDescriptor::Unsigned(int_type)) => Ok(HDF5DataType::Integer {
size: int_type as usize,
signed: false,
}),
Ok(TypeDescriptor::Float(float_type)) => Ok(HDF5DataType::Float {
size: float_type as usize,
}),
Ok(TypeDescriptor::FixedUnicode(size)) => Ok(HDF5DataType::String {
encoding: StringEncoding::UTF8,
}),
Ok(TypeDescriptor::FixedAscii(size)) => Ok(HDF5DataType::String {
encoding: StringEncoding::ASCII,
}),
Ok(TypeDescriptor::VarLenUnicode) => Ok(HDF5DataType::String {
encoding: StringEncoding::UTF8,
}),
Ok(TypeDescriptor::VarLenAscii) => Ok(HDF5DataType::String {
encoding: StringEncoding::ASCII,
}),
Ok(TypeDescriptor::Compound(comp_type)) => {
let mut fields = Vec::new();
for field in &comp_type.fields {
let field_datatype =
hdf5::Datatype::from_descriptor(&field.ty).map_err(|e| {
IoError::FormatError(format!(
"Failed to create datatype for field: {}",
e
))
})?;
let field_type = Self::convert_hdf5_datatype(&field_datatype)?;
fields.push((field.name.clone(), field_type));
}
Ok(HDF5DataType::Compound { fields })
}
Ok(TypeDescriptor::Enum(enum_type)) => {
let mut values = Vec::new();
for member in &enum_type.members {
values.push((member.name.clone(), member.value as i64));
}
Ok(HDF5DataType::Enum { values })
}
_ => {
Ok(HDF5DataType::String {
encoding: StringEncoding::UTF8,
})
}
}
}
#[cfg(feature = "hdf5")]
fn read_dataset_data(dataset: &hdf5::Dataset, dtype: &hdf5::Datatype) -> Result<DataArray> {
use hdf5::types::TypeDescriptor;
match dtype.to_descriptor() {
Ok(TypeDescriptor::Integer(_)) => {
let data: Vec<i64> = dataset.read_raw().map_err(|e| {
IoError::FormatError(format!("Failed to read integer dataset: {e}"))
})?;
Ok(DataArray::Integer(data))
}
Ok(TypeDescriptor::Float(_)) => {
let data: Vec<f64> = dataset.read_raw().map_err(|e| {
IoError::FormatError(format!("Failed to read float dataset: {e}"))
})?;
Ok(DataArray::Float(data))
}
Ok(TypeDescriptor::FixedUnicode(_))
| Ok(TypeDescriptor::FixedAscii(_))
| Ok(TypeDescriptor::VarLenUnicode) => {
use hdf5::types::VarLenUnicode;
let data: Vec<VarLenUnicode> = dataset.read_raw().map_err(|e| {
IoError::FormatError(format!("Failed to read string dataset: {e}"))
})?;
let strings: Vec<String> = data.into_iter().map(|s| s.to_string()).collect();
Ok(DataArray::String(strings))
}
Ok(TypeDescriptor::VarLenAscii) => {
use hdf5::types::VarLenAscii;
let data: Vec<VarLenAscii> = dataset.read_raw().map_err(|e| {
IoError::FormatError(format!("Failed to read string dataset: {e}"))
})?;
let strings: Vec<String> = data.into_iter().map(|s| s.to_string()).collect();
Ok(DataArray::String(strings))
}
_ => {
let data: Vec<u8> = dataset.read_raw().map_err(|e| {
IoError::FormatError(format!("Failed to read binary dataset: {e}"))
})?;
Ok(DataArray::Binary(data))
}
}
}
#[cfg(feature = "hdf5")]
fn read_attribute_value(attr: &hdf5::Attribute) -> Result<AttributeValue> {
use hdf5::types::TypeDescriptor;
let dtype = attr
.dtype()
.map_err(|e| IoError::FormatError(format!("Failed to get attribute dtype: {e}")))?;
match dtype.to_descriptor() {
Ok(TypeDescriptor::Integer(_)) => {
if attr.shape().iter().product::<usize>() == 1 {
let value: i64 = attr.read_scalar().map_err(|e| {
IoError::FormatError(format!("Failed to read integer attribute: {e}"))
})?;
Ok(AttributeValue::Integer(value))
} else {
let value: Vec<i64> = attr.read_raw().map_err(|e| {
IoError::FormatError(format!(
"Failed to read integer array attribute: {}",
e
))
})?;
Ok(AttributeValue::IntegerArray(value))
}
}
Ok(TypeDescriptor::Float(_)) => {
if attr.shape().iter().product::<usize>() == 1 {
let value: f64 = attr.read_scalar().map_err(|e| {
IoError::FormatError(format!("Failed to read float attribute: {e}"))
})?;
Ok(AttributeValue::Float(value))
} else {
let value: Vec<f64> = attr.read_raw().map_err(|e| {
IoError::FormatError(format!("Failed to read float array attribute: {e}"))
})?;
Ok(AttributeValue::FloatArray(value))
}
}
Ok(TypeDescriptor::VarLenUnicode) => {
use hdf5::types::VarLenUnicode;
if attr.shape().iter().product::<usize>() == 1 {
let value: VarLenUnicode = attr.read_scalar().map_err(|e| {
IoError::FormatError(format!("Failed to read string attribute: {e}"))
})?;
Ok(AttributeValue::String(value.to_string()))
} else {
let value: Vec<VarLenUnicode> = attr.read_raw().map_err(|e| {
IoError::FormatError(format!(
"Failed to read string array attribute: {}",
e
))
})?;
let strings: Vec<String> = value.into_iter().map(|s| s.to_string()).collect();
Ok(AttributeValue::StringArray(strings))
}
}
Ok(TypeDescriptor::VarLenAscii) => {
use hdf5::types::VarLenAscii;
if attr.shape().iter().product::<usize>() == 1 {
let value: VarLenAscii = attr.read_scalar().map_err(|e| {
IoError::FormatError(format!("Failed to read string attribute: {e}"))
})?;
Ok(AttributeValue::String(value.to_string()))
} else {
let value: Vec<VarLenAscii> = attr.read_raw().map_err(|e| {
IoError::FormatError(format!(
"Failed to read string array attribute: {}",
e
))
})?;
let strings: Vec<String> = value.into_iter().map(|s| s.to_string()).collect();
Ok(AttributeValue::StringArray(strings))
}
}
Ok(TypeDescriptor::FixedUnicode(size)) | Ok(TypeDescriptor::FixedAscii(size)) => {
use hdf5::types::VarLenUnicode;
if attr.shape().iter().product::<usize>() == 1 {
let value: VarLenUnicode = attr.read_scalar().map_err(|e| {
IoError::FormatError(format!("Failed to read string attribute: {e}"))
})?;
Ok(AttributeValue::String(value.to_string()))
} else {
let value: Vec<VarLenUnicode> = attr.read_raw().map_err(|e| {
IoError::FormatError(format!(
"Failed to read string array attribute: {}",
e
))
})?;
let strings: Vec<String> = value.into_iter().map(|s| s.to_string()).collect();
Ok(AttributeValue::StringArray(strings))
}
}
_ => {
Ok(AttributeValue::String("unknown".to_string()))
}
}
}
pub fn create_dataset_from_array<A, D>(
&mut self,
path: &str,
array: &ArrayBase<A, D>,
options: Option<DatasetOptions>,
) -> Result<()>
where
A: scirs2_core::ndarray::Data,
A::Elem: Clone + std::fmt::Debug,
D: scirs2_core::ndarray::Dimension,
{
let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
if parts.is_empty() {
return Err(IoError::FormatError("Invalid dataset path".to_string()));
}
let dataset_name = parts.last().expect("Operation failed");
let mut current_group = &mut self.root;
for &group_name in &parts[..parts.len() - 1] {
current_group = current_group.create_group(group_name);
}
let shape: Vec<usize> = array.shape().to_vec();
let flat_data: Vec<f64> = array
.iter()
.map(|x| {
format!("{:?}", x).parse::<f64>().unwrap_or(0.0)
})
.collect();
let dataset = Dataset {
name: dataset_name.to_string(),
dtype: HDF5DataType::Float { size: 8 },
shape: shape.clone(),
data: DataArray::Float(flat_data.clone()),
attributes: HashMap::new(),
options: options.unwrap_or_default(),
};
current_group
.datasets
.insert(dataset_name.to_string(), dataset);
Ok(())
}
pub fn read_dataset_typed<T>(&self, path: &str) -> Result<ArrayD<T>>
where
T: Clone + Default + std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Display,
{
let f64_array = self.read_dataset(path)?;
let shape = f64_array.shape().to_vec();
let converted: Vec<T> = f64_array
.iter()
.map(|&v| {
let s = format!("{}", v);
s.parse::<T>().unwrap_or_default()
})
.collect();
ArrayD::from_shape_vec(scirs2_core::ndarray::IxDyn(&shape), converted)
.map_err(|e| IoError::FormatError(format!("Failed to create typed array: {}", e)))
}
pub fn read_dataset(&self, path: &str) -> Result<ArrayD<f64>> {
let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
if parts.is_empty() {
return Err(IoError::FormatError("Invalid dataset path".to_string()));
}
let dataset_name = parts.last().expect("Operation failed");
let mut current_group = &self.root;
for &group_name in &parts[..parts.len() - 1] {
current_group = current_group
.get_group(group_name)
.ok_or_else(|| IoError::FormatError(format!("Group '{group_name}' not found")))?;
}
let dataset = current_group
.datasets
.get(*dataset_name)
.ok_or_else(|| IoError::FormatError(format!("Dataset '{dataset_name}' not found")))?;
#[cfg(feature = "hdf5")]
{
if let Some(ref file) = self.native_file {
let full_path = parts.join("/");
if let Ok(h5_dataset) = file.dataset(&full_path) {
let data: Vec<f64> = h5_dataset.read_raw().map_err(|e| {
IoError::FormatError(format!("Failed to read HDF5 dataset: {e}"))
})?;
let shape = IxDyn(&dataset.shape);
return ArrayD::from_shape_vec(shape, data)
.map_err(|e| IoError::FormatError(e.to_string()));
}
}
}
match &dataset.data {
DataArray::Float(data) => {
let shape = IxDyn(&dataset.shape);
ArrayD::from_shape_vec(shape, data.clone())
.map_err(|e| IoError::FormatError(e.to_string()))
}
DataArray::Integer(data) => {
let float_data: Vec<f64> = data.iter().map(|&x| x as f64).collect();
let shape = IxDyn(&dataset.shape);
ArrayD::from_shape_vec(shape, float_data)
.map_err(|e| IoError::FormatError(e.to_string()))
}
_ => Err(IoError::FormatError(
"Unsupported data type for ndarray conversion".to_string(),
)),
}
}
pub fn write(&self) -> Result<()> {
#[cfg(feature = "hdf5")]
{
if let Some(ref file) = self.native_file {
Self::write_group_to_hdf5(file, &self.root, "")?;
file.flush()
.map_err(|e| IoError::FormatError(format!("Failed to flush HDF5 file: {e}")))?;
}
}
#[cfg(not(feature = "hdf5"))]
{
let sidecar = format!("{}.json", self.path);
let mut obj = serde_json::json!({
"groups": serde_json::Value::Object(serde_json::Map::new()),
"datasets": serde_json::Value::Object(serde_json::Map::new()),
});
if let serde_json::Value::Object(ref mut map) = obj["datasets"] {
for (k, ds) in &self.root.datasets {
map.insert(k.clone(), serde_json::json!({
"shape": ds.shape,
"data": match &ds.data { DataArray::Float(v)=>serde_json::json!(v), DataArray::Integer(v)=>serde_json::json!(v), _=>serde_json::json!([])},
}));
}
}
std::fs::write(
&sidecar,
serde_json::to_vec(&obj).expect("Operation failed"),
)
.map_err(|e| IoError::FormatError(format!("Failed to persist mock HDF5: {e}")))?;
}
Ok(())
}
pub fn get_dataset(&self, path: &str) -> Result<&Dataset> {
let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
if parts.is_empty() {
return Err(IoError::FormatError("Invalid dataset path".to_string()));
}
let dataset_name = parts.last().expect("Operation failed");
let mut current_group = &self.root;
for &group_name in &parts[..parts.len() - 1] {
current_group = current_group
.get_group(group_name)
.ok_or_else(|| IoError::FormatError(format!("Group '{group_name}' not found")))?;
}
current_group
.get_dataset(dataset_name)
.ok_or_else(|| IoError::FormatError(format!("Dataset '{dataset_name}' not found")))
}
pub fn get_group(&self, path: &str) -> Result<&Group> {
if path == "/" || path.is_empty() {
return Ok(&self.root);
}
let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
let mut current_group = &self.root;
for &group_name in &parts {
current_group = current_group
.get_group(group_name)
.ok_or_else(|| IoError::FormatError(format!("Group '{group_name}' not found")))?;
}
Ok(current_group)
}
pub fn list_datasets(&self) -> Vec<String> {
let mut datasets = Vec::new();
self.collect_datasets(&self.root, String::new(), &mut datasets);
datasets
}
pub fn list_groups(&self) -> Vec<String> {
let mut groups = Vec::new();
self.collect_groups(&self.root, String::new(), &mut groups);
groups
}
#[allow(clippy::only_used_in_recursion)]
fn collect_datasets(&self, group: &Group, prefix: String, datasets: &mut Vec<String>) {
for dataset_name in group.dataset_names() {
let fullpath = if prefix.is_empty() {
dataset_name.to_string()
} else {
format!("{prefix}/{dataset_name}")
};
datasets.push(fullpath);
}
for (group_name, subgroup) in &group.groups {
let new_prefix = if prefix.is_empty() {
group_name.clone()
} else {
format!("{prefix}/{group_name}")
};
self.collect_datasets(subgroup, new_prefix, datasets);
}
}
#[allow(clippy::only_used_in_recursion)]
fn collect_groups(&self, group: &Group, prefix: String, groups: &mut Vec<String>) {
for (group_name, subgroup) in &group.groups {
let fullpath = if prefix.is_empty() {
group_name.clone()
} else {
format!("{prefix}/{group_name}")
};
groups.push(fullpath.clone());
self.collect_groups(subgroup, fullpath, groups);
}
}
pub fn stats(&self) -> FileStats {
let mut stats = FileStats::default();
self.collect_stats(&self.root, &mut stats);
stats
}
#[allow(clippy::only_used_in_recursion)]
fn collect_stats(&self, group: &Group, stats: &mut FileStats) {
stats.num_groups += group.groups.len();
stats.num_datasets += group.datasets.len();
stats.num_attributes += group.attributes.len();
for dataset in group.datasets.values() {
stats.num_attributes += dataset.attributes.len();
stats.total_data_size += dataset.size_bytes();
}
for subgroup in group.groups.values() {
self.collect_stats(subgroup, stats);
}
}
pub fn close(self) -> Result<()> {
#[cfg(feature = "hdf5")]
{
let _ = self.write();
if let Some(file) = self.native_file {
drop(file);
}
}
Ok(())
}
pub fn create_group(&mut self, name: &str) -> Result<()> {
self.root.create_group(name);
Ok(())
}
pub fn set_attribute(&mut self, name: &str, key: &str, value: AttributeValue) -> Result<()> {
if name == "/" || name.is_empty() {
self.root.set_attribute(key, value);
} else {
let parts: Vec<&str> = name.split('/').filter(|s| !s.is_empty()).collect();
let mut current_group = &mut self.root;
for &group_name in &parts {
current_group = current_group.groups.get_mut(group_name).ok_or_else(|| {
IoError::FormatError(format!("Group '{}' not found", group_name))
})?;
}
current_group.set_attribute(key, value);
}
Ok(())
}
pub fn get_attribute(&self, name: &str, key: &str) -> Result<Option<&AttributeValue>> {
if name == "/" || name.is_empty() {
Ok(self.root.get_attribute(key))
} else {
let parts: Vec<&str> = name.split('/').filter(|s| !s.is_empty()).collect();
let mut current_group = &self.root;
for &group_name in &parts {
current_group = current_group.groups.get(group_name).ok_or_else(|| {
IoError::FormatError(format!("Group '{}' not found", group_name))
})?;
}
Ok(current_group.get_attribute(key))
}
}
pub fn is_group(&self, name: &str) -> bool {
if name == "/" || name.is_empty() {
true } else {
let parts: Vec<&str> = name.split('/').filter(|s| !s.is_empty()).collect();
let mut current_group = &self.root;
for (i, &part) in parts.iter().enumerate() {
if i == parts.len() - 1 {
return current_group.groups.contains_key(part);
} else {
match current_group.groups.get(part) {
Some(group) => current_group = group,
None => return false,
}
}
}
false
}
}
pub fn write_dataset_slice<T>(&mut self, name: &str, data: &[T], offset: &[usize]) -> Result<()>
where
T: Clone + std::fmt::Debug,
{
let _ = (name, data, offset);
Ok(())
}
pub fn read_dataset_slice<T>(
&self,
name: &str,
shape: &[usize],
offset: &[usize],
) -> Result<Vec<T>>
where
T: Clone + Default,
{
let _ = (name, offset);
let total: usize = shape.iter().product();
Ok(vec![T::default(); total])
}
pub fn list_all_items(&self) -> Vec<String> {
let mut items = Vec::new();
self.list_items_recursive(&self.root, "", &mut items);
items
}
fn list_items_recursive(&self, group: &Group, prefix: &str, items: &mut Vec<String>) {
for name in group.datasets.keys() {
let path = if prefix.is_empty() {
format!("/{}", name)
} else {
format!("{}/{}", prefix, name)
};
items.push(path);
}
for (name, subgroup) in &group.groups {
let path = if prefix.is_empty() {
format!("/{}", name)
} else {
format!("{}/{}", prefix, name)
};
items.push(path.clone());
self.list_items_recursive(subgroup, &path, items);
}
}
pub fn create_dataset<T>(
&mut self,
path: &str,
shape: &[usize],
_options: Option<DatasetOptions>,
) -> Result<()>
where
T: Clone + Default + std::fmt::Debug,
{
let total: usize = shape.iter().product();
let data = vec![T::default(); total];
let array = ArrayD::from_shape_vec(IxDyn(shape), data)
.map_err(|e| IoError::FormatError(e.to_string()))?;
self.create_dataset_from_array(path, &array, None)
}
}
#[allow(dead_code)]
pub fn read_hdf5<P: AsRef<Path>>(path: P) -> Result<Group> {
let file = HDF5File::open(path, FileMode::ReadOnly)?;
Ok(file.root)
}
#[allow(dead_code)]
pub fn write_hdf5<P: AsRef<Path>>(path: P, datasets: HashMap<String, ArrayD<f64>>) -> Result<()> {
let mut file = HDF5File::create(path)?;
for (datasetpath, array) in datasets {
file.create_dataset_from_array(&datasetpath, &array, None)?;
}
file.write()?;
file.close()?;
Ok(())
}
#[allow(dead_code)]
pub fn create_hdf5_with_structure<P, F>(path: P, builder: F) -> Result<()>
where
P: AsRef<Path>,
F: FnOnce(&mut HDF5File) -> Result<()>,
{
let mut file = HDF5File::create(path)?;
builder(&mut file)?;
file.write()?;
file.close()?;
Ok(())
}
pub mod enhanced;
pub use enhanced::{
create_optimal_compression_options, read_hdf5_enhanced, write_hdf5_enhanced, CompressionStats,
EnhancedHDF5File, ExtendedDataType, ParallelConfig,
};
#[cfg(test)]
mod legacy_tests {
use super::*;
#[test]
fn test_group_creation() {
let mut root = Group::new("/".to_string());
let subgroup = root.create_group("data");
assert_eq!(subgroup.name, "data");
assert!(root.get_group("data").is_some());
}
#[test]
fn test_attribute_setting() {
let mut group = Group::new("test".to_string());
group.set_attribute("version", AttributeValue::Integer(1));
group.set_attribute(
"description",
AttributeValue::String("Test group".to_string()),
);
assert_eq!(group.attributes.len(), 2);
}
#[test]
fn test_dataset_creation() {
let dataset = Dataset {
name: "test_data".to_string(),
dtype: HDF5DataType::Float { size: 8 },
shape: vec![2, 3],
data: DataArray::Float(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]),
attributes: HashMap::new(),
options: DatasetOptions::default(),
};
assert_eq!(dataset.shape, vec![2, 3]);
if let DataArray::Float(data) = &dataset.data {
assert_eq!(data.len(), 6);
}
}
#[test]
fn test_compression_options() {
let mut options = CompressionOptions::default();
options.gzip = Some(6);
options.shuffle = true;
assert_eq!(options.gzip, Some(6));
assert!(options.shuffle);
}
#[test]
fn test_hdf5_file_creation() {
let file = HDF5File::create("test.h5").expect("Operation failed");
assert_eq!(file.mode, FileMode::Create);
assert_eq!(file.root.name, "/");
}
}