scirs2_io/matlab/
v73_enhanced.rs

1//! Enhanced MATLAB v7.3+ format support
2//!
3//! This module provides comprehensive support for MATLAB v7.3+ files,
4//! which are based on HDF5 format with MATLAB-specific conventions.
5
6use crate::error::{IoError, Result};
7use crate::matlab::MatType;
8#[allow(unused_imports)]
9use scirs2_core::ndarray::{ArrayD, IxDyn};
10use std::collections::HashMap;
11use std::path::Path;
12
13#[cfg(feature = "hdf5")]
14use crate::hdf5::{AttributeValue, CompressionOptions, DatasetOptions, FileMode, HDF5File};
15
16/// MATLAB v7.3+ specific features
17#[derive(Debug, Clone)]
18pub struct V73Features {
19    /// Enable subsref subsasgn support for partial I/O
20    pub enable_partial_io: bool,
21    /// Support for MATLAB objects
22    pub support_objects: bool,
23    /// Support for function handles
24    pub support_function_handles: bool,
25    /// Support for tables
26    pub support_tables: bool,
27    /// Support for tall arrays
28    pub support_tall_arrays: bool,
29    /// Support for categorical arrays
30    pub support_categorical: bool,
31    /// Support for datetime arrays
32    pub support_datetime: bool,
33    /// Support for string arrays (different from char arrays)
34    pub support_string_arrays: bool,
35}
36
37impl Default for V73Features {
38    fn default() -> Self {
39        Self {
40            enable_partial_io: true,
41            support_objects: true,
42            support_function_handles: true,
43            support_tables: true,
44            support_tall_arrays: false, // Requires special handling
45            support_categorical: true,
46            support_datetime: true,
47            support_string_arrays: true,
48        }
49    }
50}
51
52/// Extended MATLAB data types for v7.3+
53#[derive(Debug, Clone)]
54pub enum ExtendedMatType {
55    /// Standard MatType
56    Standard(Box<MatType>),
57    /// MATLAB table
58    Table(MatlabTable),
59    /// MATLAB categorical array
60    Categorical(CategoricalArray),
61    /// MATLAB datetime array
62    DateTime(DateTimeArray),
63    /// MATLAB string array (not char array)
64    StringArray(Vec<String>),
65    /// Function handle
66    FunctionHandle(FunctionHandle),
67    /// MATLAB object
68    Object(MatlabObject),
69    /// Complex double array
70    ComplexDouble(ArrayD<scirs2_core::numeric::Complex<f64>>),
71    /// Complex single array
72    ComplexSingle(ArrayD<scirs2_core::numeric::Complex<f32>>),
73}
74
75/// MATLAB table representation
76#[derive(Debug, Clone)]
77pub struct MatlabTable {
78    /// Variable names
79    pub variable_names: Vec<String>,
80    /// Row names (optional)
81    pub row_names: Option<Vec<String>>,
82    /// Table data (column-oriented)
83    pub data: HashMap<String, MatType>,
84    /// Table properties
85    pub properties: HashMap<String, String>,
86}
87
88/// MATLAB categorical array
89#[derive(Debug, Clone)]
90pub struct CategoricalArray {
91    /// Category names
92    pub categories: Vec<String>,
93    /// Data indices (0-based)
94    pub data: ArrayD<u32>,
95    /// Whether the categories are ordered
96    pub ordered: bool,
97}
98
99/// MATLAB datetime array
100#[derive(Debug, Clone)]
101pub struct DateTimeArray {
102    /// Serial date numbers (days since January 0, 0000)
103    pub data: ArrayD<f64>,
104    /// Time zone information
105    pub timezone: Option<String>,
106    /// Date format
107    pub format: String,
108}
109
110/// MATLAB function handle
111#[derive(Debug, Clone)]
112pub struct FunctionHandle {
113    /// Function name or anonymous function string
114    pub function: String,
115    /// Function type (simple, nested, anonymous, etc.)
116    pub function_type: String,
117    /// Workspace variables (for nested/anonymous functions)
118    pub workspace: Option<HashMap<String, MatType>>,
119}
120
121/// MATLAB object
122#[derive(Debug, Clone)]
123pub struct MatlabObject {
124    /// Class name
125    pub class_name: String,
126    /// Object properties
127    pub properties: HashMap<String, MatType>,
128    /// Superclass data
129    pub superclass_data: Option<Box<MatlabObject>>,
130}
131
132/// Enhanced v7.3 MAT file handler
133pub struct V73MatFile {
134    #[allow(dead_code)]
135    features: V73Features,
136    #[cfg(feature = "hdf5")]
137    compression: Option<CompressionOptions>,
138}
139
140impl V73MatFile {
141    /// Create a new v7.3 MAT file handler
142    pub fn new(features: V73Features) -> Self {
143        Self {
144            features,
145            #[cfg(feature = "hdf5")]
146            compression: None,
147        }
148    }
149
150    /// Set compression options
151    #[cfg(feature = "hdf5")]
152    pub fn with_compression(mut self, compression: CompressionOptions) -> Self {
153        self.compression = Some(compression);
154        self
155    }
156
157    /// Write extended MATLAB types to v7.3 file
158    #[cfg(feature = "hdf5")]
159    pub fn write_extended<P: AsRef<Path>>(
160        &self,
161        path: P,
162        vars: &HashMap<String, ExtendedMatType>,
163    ) -> Result<()> {
164        let mut hdf5_file = HDF5File::create(path)?;
165
166        // Add MATLAB v7.3 file signature
167        hdf5_file.set_attribute(
168            "/",
169            "MATLAB_version",
170            AttributeValue::String("7.3".to_string()),
171        )?;
172
173        for (name, ext_type) in vars {
174            self.write_extended_type(&mut hdf5_file, name, ext_type)?;
175        }
176
177        hdf5_file.close()?;
178        Ok(())
179    }
180
181    /// Read extended MATLAB types from v7.3 file
182    #[cfg(feature = "hdf5")]
183    pub fn read_extended<P: AsRef<Path>>(
184        &self,
185        path: P,
186    ) -> Result<HashMap<String, ExtendedMatType>> {
187        let hdf5_file = HDF5File::open(path, FileMode::ReadOnly)?;
188        let mut vars = HashMap::new();
189
190        // Get all top-level datasets and groups
191        let items = hdf5_file.list_all_items();
192
193        for item in items {
194            if let Ok(ext_type) = self.read_extended_type(&hdf5_file, &item) {
195                vars.insert(item.trim_start_matches('/').to_string(), ext_type);
196            }
197        }
198
199        Ok(vars)
200    }
201
202    /// Write an extended type to HDF5
203    #[cfg(feature = "hdf5")]
204    fn write_extended_type(
205        &self,
206        file: &mut HDF5File,
207        name: &str,
208        ext_type: &ExtendedMatType,
209    ) -> Result<()> {
210        match ext_type {
211            ExtendedMatType::Standard(mat_type) => self.write_standard_type(file, name, &mat_type),
212            ExtendedMatType::Table(table) => self.write_table(file, name, table),
213            ExtendedMatType::Categorical(cat_array) => {
214                self.write_categorical(file, name, cat_array)
215            }
216            ExtendedMatType::DateTime(dt_array) => self.write_datetime(file, name, dt_array),
217            ExtendedMatType::StringArray(strings) => self.write_string_array(file, name, strings),
218            ExtendedMatType::FunctionHandle(func_handle) => {
219                self.write_function_handle(file, name, func_handle)
220            }
221            ExtendedMatType::Object(object) => self.write_object(file, name, object),
222            ExtendedMatType::ComplexDouble(array) => self.write_complex_double(file, name, array),
223            ExtendedMatType::ComplexSingle(array) => self.write_complex_single(file, name, array),
224        }
225    }
226
227    /// Write a MATLAB table
228    #[cfg(feature = "hdf5")]
229    fn write_table(&self, file: &mut HDF5File, name: &str, table: &MatlabTable) -> Result<()> {
230        // Create a group for the table
231        file.create_group(name)?;
232        file.set_attribute(
233            name,
234            "MATLAB_class",
235            AttributeValue::String("table".to_string()),
236        )?;
237
238        // Write variable names
239        let var_names_data: Vec<u16> = table
240            .variable_names
241            .iter()
242            .flat_map(|s| s.encode_utf16())
243            .collect();
244        // Convert Vec to ArrayD and use create_dataset_from_array
245        let var_names_array = scirs2_core::ndarray::Array1::from_vec(var_names_data).into_dyn();
246        file.create_dataset_from_array(
247            &format!("{}/varnames", name),
248            &var_names_array,
249            Some(DatasetOptions::default()),
250        )?;
251
252        // Write table data
253        for (var_name, var_data) in &table.data {
254            let var_path = format!("{}/{}", name, var_name);
255            self.write_standard_type(file, &var_path, var_data)?;
256        }
257
258        // Write row names if present
259        if let Some(ref row_names) = table.row_names {
260            let row_names_data: Vec<u16> =
261                row_names.iter().flat_map(|s| s.encode_utf16()).collect();
262            let row_names_array = scirs2_core::ndarray::Array1::from_vec(row_names_data).into_dyn();
263            file.create_dataset_from_array(
264                &format!("{}/rownames", name),
265                &row_names_array,
266                Some(DatasetOptions::default()),
267            )?;
268        }
269
270        // Write properties
271        for (prop_name, prop_value) in &table.properties {
272            file.set_attribute(
273                name,
274                &format!("property_{}", prop_name),
275                AttributeValue::String(prop_value.clone()),
276            )?;
277        }
278
279        Ok(())
280    }
281
282    /// Write a categorical array
283    #[cfg(feature = "hdf5")]
284    fn write_categorical(
285        &self,
286        file: &mut HDF5File,
287        name: &str,
288        cat_array: &CategoricalArray,
289    ) -> Result<()> {
290        // Create a group for the categorical _array
291        file.create_group(name)?;
292        file.set_attribute(
293            name,
294            "MATLAB_class",
295            AttributeValue::String("categorical".to_string()),
296        )?;
297
298        // Write categories
299        let cats_data: Vec<u16> = cat_array
300            .categories
301            .iter()
302            .flat_map(|s| s.encode_utf16())
303            .collect();
304        let cats_array = scirs2_core::ndarray::Array1::from_vec(cats_data).into_dyn();
305        file.create_dataset_from_array(
306            &format!("{}/categories", name),
307            &cats_array,
308            Some(DatasetOptions::default()),
309        )?;
310
311        // Write data indices
312        file.create_dataset_from_array(
313            &format!("{}/data", name),
314            &cat_array.data,
315            Some(DatasetOptions::default()),
316        )?;
317
318        // Write ordered flag
319        file.set_attribute(name, "ordered", AttributeValue::Boolean(cat_array.ordered))?;
320
321        Ok(())
322    }
323
324    /// Write a datetime array
325    #[cfg(feature = "hdf5")]
326    fn write_datetime(
327        &self,
328        file: &mut HDF5File,
329        name: &str,
330        dt_array: &DateTimeArray,
331    ) -> Result<()> {
332        // Create dataset for datetime data
333        file.create_dataset_from_array(
334            name,
335            &dt_array.data,
336            Some(DatasetOptions {
337                compression: self.compression.clone().unwrap_or_default(),
338                ..Default::default()
339            }),
340        )?;
341
342        file.set_attribute(
343            name,
344            "MATLAB_class",
345            AttributeValue::String("datetime".to_string()),
346        )?;
347
348        // Write timezone if present
349        if let Some(ref tz) = dt_array.timezone {
350            file.set_attribute(name, "timezone", AttributeValue::String(tz.clone()))?;
351        }
352
353        // Write format
354        file.set_attribute(
355            name,
356            "format",
357            AttributeValue::String(dt_array.format.clone()),
358        )?;
359
360        Ok(())
361    }
362
363    /// Write a string array
364    #[cfg(feature = "hdf5")]
365    fn write_string_array(
366        &self,
367        file: &mut HDF5File,
368        name: &str,
369        strings: &[String],
370    ) -> Result<()> {
371        // Create a group for the string array
372        file.create_group(name)?;
373        file.set_attribute(
374            name,
375            "MATLAB_class",
376            AttributeValue::String("string".to_string()),
377        )?;
378
379        // Write each string as a separate dataset
380        for (i, string) in strings.iter().enumerate() {
381            let string_data: Vec<u16> = string.encode_utf16().collect();
382            let string_array = scirs2_core::ndarray::Array1::from_vec(string_data).into_dyn();
383            file.create_dataset_from_array(
384                &format!("{}/string_{}", name, i),
385                &string_array,
386                Some(DatasetOptions::default()),
387            )?;
388        }
389
390        // Write array size
391        file.set_attribute(
392            name,
393            "size",
394            AttributeValue::Array(vec![strings.len() as i64]),
395        )?;
396
397        Ok(())
398    }
399
400    /// Write a function handle
401    #[cfg(feature = "hdf5")]
402    fn write_function_handle(
403        &self,
404        file: &mut HDF5File,
405        name: &str,
406        func_handle: &FunctionHandle,
407    ) -> Result<()> {
408        // Create a group for the function _handle
409        file.create_group(name)?;
410        file.set_attribute(
411            name,
412            "MATLAB_class",
413            AttributeValue::String("function_handle".to_string()),
414        )?;
415
416        // Write function string
417        let func_data: Vec<u16> = func_handle.function.encode_utf16().collect();
418        let func_array = scirs2_core::ndarray::Array1::from_vec(func_data).into_dyn();
419        file.create_dataset_from_array(
420            &format!("{}/function", name),
421            &func_array,
422            Some(DatasetOptions::default()),
423        )?;
424
425        // Write function type
426        file.set_attribute(
427            name,
428            "type",
429            AttributeValue::String(func_handle.function_type.clone()),
430        )?;
431
432        // Write workspace if present
433        if let Some(ref workspace) = func_handle.workspace {
434            let ws_group = format!("{}/workspace", name);
435            file.create_group(&ws_group)?;
436
437            for (var_name, var_data) in workspace {
438                let var_path = format!("{}/{}", ws_group, var_name);
439                self.write_standard_type(file, &var_path, var_data)?;
440            }
441        }
442
443        Ok(())
444    }
445
446    /// Write a MATLAB object
447    #[cfg(feature = "hdf5")]
448    fn write_object(&self, file: &mut HDF5File, name: &str, object: &MatlabObject) -> Result<()> {
449        // Create a group for the object
450        file.create_group(name)?;
451        file.set_attribute(
452            name,
453            "MATLAB_class",
454            AttributeValue::String(object.class_name.clone()),
455        )?;
456        file.set_attribute(name, "MATLAB_object", AttributeValue::Boolean(true))?;
457
458        // Write properties
459        let props_group = format!("{}/properties", name);
460        file.create_group(&props_group)?;
461
462        for (prop_name, prop_data) in &object.properties {
463            let prop_path = format!("{}/{}", props_group, prop_name);
464            self.write_standard_type(file, &prop_path, prop_data)?;
465        }
466
467        // Write superclass data if present
468        if let Some(ref superclass) = object.superclass_data {
469            let super_path = format!("{}/superclass", name);
470            self.write_object(file, &super_path, superclass)?;
471        }
472
473        Ok(())
474    }
475
476    /// Write complex double array
477    #[cfg(feature = "hdf5")]
478    fn write_complex_double(
479        &self,
480        file: &mut HDF5File,
481        name: &str,
482        array: &ArrayD<scirs2_core::numeric::Complex<f64>>,
483    ) -> Result<()> {
484        // Split into real and imaginary parts
485        let real_part = array.mapv(|x| x.re);
486        let imag_part = array.mapv(|x| x.im);
487
488        // Create a group for the _complex array
489        file.create_group(name)?;
490        file.set_attribute(
491            name,
492            "MATLAB_class",
493            AttributeValue::String("double".to_string()),
494        )?;
495        file.set_attribute(name, "MATLAB_complex", AttributeValue::Boolean(true))?;
496
497        // Write real and imaginary parts
498        file.create_dataset_from_array(
499            &format!("{}/real", name),
500            &real_part,
501            Some(DatasetOptions {
502                compression: self.compression.clone().unwrap_or_default(),
503                ..Default::default()
504            }),
505        )?;
506        file.create_dataset_from_array(
507            &format!("{}/imag", name),
508            &imag_part,
509            Some(DatasetOptions {
510                compression: self.compression.clone().unwrap_or_default(),
511                ..Default::default()
512            }),
513        )?;
514
515        Ok(())
516    }
517
518    /// Write complex single array
519    #[cfg(feature = "hdf5")]
520    fn write_complex_single(
521        &self,
522        file: &mut HDF5File,
523        name: &str,
524        array: &ArrayD<scirs2_core::numeric::Complex<f32>>,
525    ) -> Result<()> {
526        // Split into real and imaginary parts
527        let real_part = array.mapv(|x| x.re);
528        let imag_part = array.mapv(|x| x.im);
529
530        // Create a group for the _complex array
531        file.create_group(name)?;
532        file.set_attribute(
533            name,
534            "MATLAB_class",
535            AttributeValue::String("single".to_string()),
536        )?;
537        file.set_attribute(name, "MATLAB_complex", AttributeValue::Boolean(true))?;
538
539        // Write real and imaginary parts
540        file.create_dataset_from_array(
541            &format!("{}/real", name),
542            &real_part,
543            Some(DatasetOptions {
544                compression: self.compression.clone().unwrap_or_default(),
545                ..Default::default()
546            }),
547        )?;
548        file.create_dataset_from_array(
549            &format!("{}/imag", name),
550            &imag_part,
551            Some(DatasetOptions {
552                compression: self.compression.clone().unwrap_or_default(),
553                ..Default::default()
554            }),
555        )?;
556
557        Ok(())
558    }
559
560    /// Write standard MatType (helper)
561    #[cfg(feature = "hdf5")]
562    fn write_standard_type(
563        &self,
564        file: &mut HDF5File,
565        name: &str,
566        mat_type: &MatType,
567    ) -> Result<()> {
568        // Delegate to existing implementation
569        // This would use the existing write_mat_type_to_hdf5 logic
570        Err(IoError::Other("Not implemented yet".to_string()))
571    }
572
573    /// Read an extended type from HDF5
574    #[cfg(feature = "hdf5")]
575    fn read_extended_type(&self, file: &HDF5File, name: &str) -> Result<ExtendedMatType> {
576        // Check MATLAB_class attribute to determine type
577        if let Ok(Some(class_attr)) = file.get_attribute(name, "MATLAB_class") {
578            match class_attr {
579                AttributeValue::String(class_name) => {
580                    match class_name.as_str() {
581                        "table" => self.read_table(file, name),
582                        "categorical" => self.read_categorical(file, name),
583                        "datetime" => self.read_datetime(file, name),
584                        "string" => self.read_string_array(file, name),
585                        "function_handle" => self.read_function_handle(file, name),
586                        _ => {
587                            // Check if it's an object
588                            if let Ok(Some(AttributeValue::Boolean(true))) =
589                                file.get_attribute(name, "MATLAB_object")
590                            {
591                                self.read_object(file, name)
592                            } else {
593                                // Try to read as standard type
594                                Err(IoError::Other(
595                                    "Standard type reading not implemented".to_string(),
596                                ))
597                            }
598                        }
599                    }
600                }
601                _ => Err(IoError::Other("Invalid MATLAB_class attribute".to_string())),
602            }
603        } else {
604            Err(IoError::Other("Missing MATLAB_class attribute".to_string()))
605        }
606    }
607
608    // Read implementations would follow similar patterns...
609    #[cfg(feature = "hdf5")]
610    fn read_table(&self, file: &HDF5File, name: &str) -> Result<ExtendedMatType> {
611        Err(IoError::Other(
612            "Table reading not implemented yet".to_string(),
613        ))
614    }
615
616    #[cfg(feature = "hdf5")]
617    fn read_categorical(&self, file: &HDF5File, name: &str) -> Result<ExtendedMatType> {
618        Err(IoError::Other(
619            "Categorical reading not implemented yet".to_string(),
620        ))
621    }
622
623    #[cfg(feature = "hdf5")]
624    fn read_datetime(&self, file: &HDF5File, name: &str) -> Result<ExtendedMatType> {
625        Err(IoError::Other(
626            "DateTime reading not implemented yet".to_string(),
627        ))
628    }
629
630    #[cfg(feature = "hdf5")]
631    fn read_string_array(&self, file: &HDF5File, name: &str) -> Result<ExtendedMatType> {
632        Err(IoError::Other(
633            "String array reading not implemented yet".to_string(),
634        ))
635    }
636
637    #[cfg(feature = "hdf5")]
638    fn read_function_handle(&self, file: &HDF5File, name: &str) -> Result<ExtendedMatType> {
639        Err(IoError::Other(
640            "Function handle reading not implemented yet".to_string(),
641        ))
642    }
643
644    #[cfg(feature = "hdf5")]
645    fn read_object(&self, file: &HDF5File, name: &str) -> Result<ExtendedMatType> {
646        Err(IoError::Other(
647            "Object reading not implemented yet".to_string(),
648        ))
649    }
650}
651
652/// Partial I/O support for large variables
653pub struct PartialIoSupport;
654
655impl PartialIoSupport {
656    /// Read a slice of a large array without loading the entire array
657    #[cfg(feature = "hdf5")]
658    pub fn read_array_slice<T, P: AsRef<Path>>(
659        path: P,
660        var_name: &str,
661        start: &[usize],
662        count: &[usize],
663    ) -> Result<ArrayD<T>>
664    where
665        T: Default + Clone,
666    {
667        Err(IoError::Other(
668            "Partial I/O not implemented yet".to_string(),
669        ))
670    }
671
672    /// Write to a slice of an existing array
673    #[cfg(feature = "hdf5")]
674    pub fn write_array_slice<T, P: AsRef<Path>>(
675        path: P,
676        var_name: &str,
677        data: &ArrayD<T>,
678        start: &[usize],
679    ) -> Result<()>
680    where
681        T: Default + Clone,
682    {
683        Err(IoError::Other(
684            "Partial I/O not implemented yet".to_string(),
685        ))
686    }
687}
688
689#[cfg(test)]
690mod tests {
691    use super::*;
692
693    #[test]
694    fn test_v73_features_default() {
695        let features = V73Features::default();
696        assert!(features.enable_partial_io);
697        assert!(features.support_objects);
698        assert!(features.support_tables);
699    }
700
701    #[test]
702    fn test_matlab_table_creation() {
703        let mut table = MatlabTable {
704            variable_names: vec!["x".to_string(), "y".to_string()],
705            row_names: Some(vec!["row1".to_string(), "row2".to_string()]),
706            data: HashMap::new(),
707            properties: HashMap::new(),
708        };
709
710        // Add some data
711        table.data.insert(
712            "x".to_string(),
713            MatType::Double(ArrayD::zeros(IxDyn(&[2, 1]))),
714        );
715        table.data.insert(
716            "y".to_string(),
717            MatType::Double(ArrayD::ones(IxDyn(&[2, 1]))),
718        );
719
720        assert_eq!(table.variable_names.len(), 2);
721        assert_eq!(table.data.len(), 2);
722    }
723}