scirs2_io/matlab/
mod.rs

1//! MATLAB file format (.mat) handling module
2//!
3//! This module provides functionality for reading and writing MATLAB .mat files.
4//! Supports MATLAB v5 format (Level 5 MAT-File) and enhanced v7.3+ format.
5//!
6//! # Submodules
7//!
8//! - `enhanced`: Enhanced MATLAB format support with v7.3+ and HDF5 integration
9//! - `write_impl`: Internal implementation details for MAT file writing
10
11/// Enhanced MATLAB format support with v7.3+ and HDF5 integration
12pub mod enhanced;
13/// Extended MATLAB v7.3+ format support with additional data types
14pub mod v73_enhanced;
15/// Internal MAT file writing implementation
16mod write_impl;
17
18use byteorder::{ByteOrder, LittleEndian};
19use scirs2_core::ndarray::{Array, ArrayD, IxDyn};
20use std::collections::HashMap;
21use std::fs::File;
22use std::io::{BufReader, BufWriter, Read, Write};
23use std::path::Path;
24
25use crate::error::{IoError, Result};
26
27/// MATLAB data types
28#[derive(Debug, Clone)]
29pub enum MatType {
30    /// Double-precision floating point
31    Double(ArrayD<f64>),
32    /// Single-precision floating point
33    Single(ArrayD<f32>),
34    /// 8-bit signed integer
35    Int8(ArrayD<i8>),
36    /// 16-bit signed integer
37    Int16(ArrayD<i16>),
38    /// 32-bit signed integer
39    Int32(ArrayD<i32>),
40    /// 64-bit signed integer
41    Int64(ArrayD<i64>),
42    /// 8-bit unsigned integer
43    UInt8(ArrayD<u8>),
44    /// 16-bit unsigned integer
45    UInt16(ArrayD<u16>),
46    /// 32-bit unsigned integer
47    UInt32(ArrayD<u32>),
48    /// 64-bit unsigned integer
49    UInt64(ArrayD<u64>),
50    /// Logical/boolean
51    Logical(ArrayD<bool>),
52    /// Character array
53    Char(String),
54    /// Cell array
55    Cell(Vec<MatType>),
56    /// Structure
57    Struct(HashMap<String, MatType>),
58    /// Sparse double matrix
59    SparseDouble(crate::sparse::SparseMatrix<f64>),
60    /// Sparse single matrix
61    SparseSingle(crate::sparse::SparseMatrix<f32>),
62    /// Sparse logical matrix
63    SparseLogical(crate::sparse::SparseMatrix<bool>),
64}
65
66/// MATLAB header information
67#[derive(Debug, Clone)]
68struct MatHeader {
69    /// Version of the MAT file
70    _version: u16,
71    /// Endianness indicator
72    endian_indicator: u16,
73}
74
75// MATLAB data type identifiers (miTYPE values)
76const MI_INT8: i32 = 1;
77const _MI_UINT8: i32 = 2;
78const _MI_INT16: i32 = 3;
79const _MI_UINT16: i32 = 4;
80const MI_INT32: i32 = 5;
81const MI_UINT32: i32 = 6;
82const _MI_SINGLE: i32 = 7;
83const _MI_DOUBLE: i32 = 9;
84const _MI_INT64: i32 = 12;
85const _MI_UINT64: i32 = 13;
86const MI_MATRIX: i32 = 14;
87const _MI_COMPRESSED: i32 = 15;
88const MI_UTF8: i32 = 16;
89const _MI_UTF16: i32 = 17;
90const _MI_UTF32: i32 = 18;
91
92// MATLAB array type values (mxCLASS values)
93const _MX_CELL_CLASS: i32 = 1;
94const _MX_STRUCT_CLASS: i32 = 2;
95const _MX_OBJECT_CLASS: i32 = 3;
96const MX_CHAR_CLASS: i32 = 4;
97const _MX_SPARSE_CLASS: i32 = 5;
98const MX_DOUBLE_CLASS: i32 = 6;
99const MX_SINGLE_CLASS: i32 = 7;
100const MX_INT8_CLASS: i32 = 8;
101const MX_UINT8_CLASS: i32 = 9;
102const MX_INT16_CLASS: i32 = 10;
103const MX_UINT16_CLASS: i32 = 11;
104const MX_INT32_CLASS: i32 = 12;
105const MX_UINT32_CLASS: i32 = 13;
106const MX_INT64_CLASS: i32 = 14;
107const MX_UINT64_CLASS: i32 = 15;
108
109/// Matrix flags for MATLAB data
110#[derive(Debug, Clone)]
111struct MatrixFlags {
112    /// Class type (double, single, etc.)
113    class_type: i32,
114    /// Whether the matrix is complex
115    is_complex: bool,
116    /// Whether the matrix is a global variable
117    _is_global: bool,
118    /// Whether the matrix is logical
119    is_logical: bool,
120}
121
122impl MatrixFlags {
123    /// Parse matrix flags from a u32
124    fn from_u32(flags: u32) -> Self {
125        let class_type = (flags & 0xFF) as i32;
126        let is_complex = (flags & 0x800) != 0;
127        let is_global = (flags & 0x400) != 0;
128        let is_logical = (flags & 0x200) != 0;
129
130        MatrixFlags {
131            class_type,
132            is_complex,
133            is_logical,
134            _is_global: is_global,
135        }
136    }
137
138    /// Convert to a u32 for writing
139    fn _to_u32(&self) -> u32 {
140        let mut flags = self.class_type as u32;
141        if self.is_complex {
142            flags |= 0x800;
143        }
144        if self._is_global {
145            flags |= 0x400;
146        }
147        if self.is_logical {
148            flags |= 0x200;
149        }
150        flags
151    }
152}
153
154/// Data element for MATLAB file
155#[derive(Debug, Clone)]
156struct _DataElement {
157    /// Data type
158    data_type: i32,
159    /// Data (as bytes)
160    data: Vec<u8>,
161}
162
163/// Matrix array for MATLAB file
164#[derive(Debug, Clone)]
165struct _MatrixArray {
166    /// Matrix flags
167    flags: MatrixFlags,
168    /// Array dimensions
169    dims: Vec<i32>,
170    /// Array name
171    name: String,
172    /// Real data
173    realdata: Vec<u8>,
174    /// Imaginary data (if complex)
175    imagdata: Option<Vec<u8>>,
176}
177
178/// Reads a MATLAB .mat file
179///
180/// # Arguments
181///
182/// * `path` - Path to the .mat file
183///
184/// # Returns
185///
186/// * A HashMap mapping variable names to their values
187///
188/// # Example
189///
190/// ```no_run
191/// use scirs2_io::matlab::read_mat;
192/// use std::path::Path;
193///
194/// let vars = read_mat(Path::new("data.mat")).unwrap();
195/// for (name_, value) in vars.iter() {
196///     println!("Variable: {}", name_);
197/// }
198/// ```
199#[allow(dead_code)]
200pub fn read_mat<P: AsRef<Path>>(path: P) -> Result<HashMap<String, MatType>> {
201    let file = File::open(path).map_err(|e| IoError::FileError(e.to_string()))?;
202    let mut reader = BufReader::new(file);
203
204    // Read the MAT file header (128 bytes total)
205    let mut headerbytes = [0u8; 128];
206    reader
207        .read_exact(&mut headerbytes)
208        .map_err(|e| IoError::FileError(format!("Failed to read MAT header: {e}")))?;
209
210    // Check magic string "MATLAB"
211    let magic = std::str::from_utf8(&headerbytes[0..6])
212        .map_err(|_| IoError::FormatError("Invalid MAT file header".to_string()))?;
213
214    if magic != "MATLAB" {
215        return Err(IoError::FormatError("Not a valid MATLAB file".to_string()));
216    }
217
218    // Parse version and endianness from last 4 bytes (positions 124-128)
219    let subsystemdata_offset = &headerbytes[124..128];
220    let version = LittleEndian::read_u16(&subsystemdata_offset[0..2]);
221    let endian_indicator = LittleEndian::read_u16(&subsystemdata_offset[2..4]);
222
223    let header = MatHeader {
224        _version: version,
225        endian_indicator,
226    };
227
228    // Check endianness indicator
229    if header.endian_indicator != 0x4D49 && header.endian_indicator != 0x494D {
230        return Err(IoError::FormatError(
231            "Invalid endianness indicator".to_string(),
232        ));
233    }
234
235    // Read data elements
236    let mut variables = HashMap::<String, MatType>::new();
237
238    // Read data elements until EOF
239    while let Ok(element_type) = read_i32(&mut reader) {
240        // Check if we've reached EOF
241        if element_type == 0 {
242            break;
243        }
244
245        // Read element size
246        let element_size = read_i32(&mut reader)?;
247
248        // Handle different data types
249        match element_type {
250            MI_MATRIX => {
251                // Read matrix data
252                let mut matrixdata = vec![0u8; element_size as usize];
253                reader
254                    .read_exact(&mut matrixdata)
255                    .map_err(|e| IoError::FileError(format!("Failed to read matrix data: {e}")))?;
256
257                // Parse matrix data
258                if let Ok((name, mat_type)) = parse_matrixdata(&matrixdata) {
259                    variables.insert(name, mat_type);
260                }
261            }
262            _ => {
263                // Skip unknown element types
264                reader
265                    .by_ref()
266                    .take(element_size as u64)
267                    .read_to_end(&mut vec![])
268                    .map_err(|e| IoError::FileError(format!("Failed to skip element: {e}")))?;
269            }
270        }
271    }
272
273    Ok(variables)
274}
275
276/// Parse matrix data from byte array
277#[allow(dead_code)]
278fn parse_matrixdata(data: &[u8]) -> Result<(String, MatType)> {
279    let mut cursor = 0;
280
281    // Read array flags
282    let array_flags_type = LittleEndian::read_i32(&data[cursor..cursor + 4]);
283    cursor += 4;
284
285    let array_flags_size = LittleEndian::read_i32(&data[cursor..cursor + 4]);
286    cursor += 4;
287
288    if array_flags_type != MI_UINT32 || array_flags_size != 8 {
289        return Err(IoError::FormatError("Invalid array flags".to_string()));
290    }
291
292    let flags = MatrixFlags::from_u32(LittleEndian::read_u32(&data[cursor..cursor + 4]));
293    cursor += 8; // Skip flags (4 bytes) and reserved (4 bytes)
294
295    // Read dimensions
296    let dimensions_type = LittleEndian::read_i32(&data[cursor..cursor + 4]);
297    cursor += 4;
298
299    let dimensions_size = LittleEndian::read_i32(&data[cursor..cursor + 4]);
300    cursor += 4;
301
302    if dimensions_type != MI_INT32 {
303        return Err(IoError::FormatError("Invalid dimensions type".to_string()));
304    }
305
306    let num_dims = dimensions_size / 4;
307    let mut dims = Vec::with_capacity(num_dims as usize);
308    for i in 0..num_dims {
309        dims.push(LittleEndian::read_i32(
310            &data[cursor + (i * 4) as usize..cursor + ((i + 1) * 4) as usize],
311        ));
312    }
313    cursor += dimensions_size as usize;
314
315    // Pad to 8-byte boundary
316    if cursor % 8 != 0 {
317        cursor += 8 - (cursor % 8);
318    }
319
320    // Read array name
321    let name_type = LittleEndian::read_i32(&data[cursor..cursor + 4]);
322    cursor += 4;
323
324    let name_size = LittleEndian::read_i32(&data[cursor..cursor + 4]);
325    cursor += 4;
326
327    if name_type != MI_INT8 && name_type != MI_UTF8 {
328        return Err(IoError::FormatError("Invalid name type".to_string()));
329    }
330
331    let name = std::str::from_utf8(&data[cursor..cursor + name_size as usize])
332        .map_err(|_| IoError::FormatError("Invalid name encoding".to_string()))?
333        .to_string();
334
335    cursor += name_size as usize;
336
337    // Pad to 8-byte boundary
338    if cursor % 8 != 0 {
339        cursor += 8 - (cursor % 8);
340    }
341
342    // Read data
343    let data_type = LittleEndian::read_i32(&data[cursor..cursor + 4]);
344    cursor += 4;
345
346    let data_size = LittleEndian::read_i32(&data[cursor..cursor + 4]);
347    cursor += 4;
348
349    let realdata = &data[cursor..cursor + data_size as usize];
350    cursor += data_size as usize;
351
352    // Pad to 8-byte boundary
353    if cursor % 8 != 0 {
354        cursor += 8 - (cursor % 8);
355    }
356
357    // Read imaginary part if complex
358    let _imagdata = if flags.is_complex {
359        let imag_type = LittleEndian::read_i32(&data[cursor..cursor + 4]);
360        cursor += 4;
361
362        let imag_size = LittleEndian::read_i32(&data[cursor..cursor + 4]);
363        cursor += 4;
364
365        if imag_type != data_type {
366            return Err(IoError::FormatError(
367                "Mismatched imaginary type".to_string(),
368            ));
369        }
370
371        Some(&data[cursor..cursor + imag_size as usize])
372    } else {
373        None
374    };
375
376    // Convert to appropriate MatType based on class type
377    let mat_type = match flags.class_type {
378        MX_DOUBLE_CLASS => {
379            let data_vec = bytes_to_f64_vec(realdata);
380            let ndarray = Array::from_shape_vec(IxDyn(&convert_dims(&dims)), data_vec)
381                .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
382            MatType::Double(ndarray)
383        }
384        MX_SINGLE_CLASS => {
385            let data_vec = bytes_to_f32_vec(realdata);
386            let ndarray = Array::from_shape_vec(IxDyn(&convert_dims(&dims)), data_vec)
387                .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
388            MatType::Single(ndarray)
389        }
390        MX_INT8_CLASS => {
391            let data_vec = realdata.to_vec();
392            let ndarray = Array::from_shape_vec(
393                IxDyn(&convert_dims(&dims)),
394                data_vec.into_iter().map(|b| b as i8).collect(),
395            )
396            .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
397            MatType::Int8(ndarray)
398        }
399        MX_UINT8_CLASS => {
400            if flags.is_logical {
401                // Handle as logical data
402                let data_vec: Vec<bool> = realdata.iter().map(|&b| b != 0).collect();
403                let ndarray = Array::from_shape_vec(IxDyn(&convert_dims(&dims)), data_vec)
404                    .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
405                MatType::Logical(ndarray)
406            } else {
407                // Handle as regular uint8 data
408                let data_vec = realdata.to_vec();
409                let ndarray = Array::from_shape_vec(IxDyn(&convert_dims(&dims)), data_vec)
410                    .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
411                MatType::UInt8(ndarray)
412            }
413        }
414        MX_INT16_CLASS => {
415            let data_vec = bytes_to_i16_vec(realdata);
416            let ndarray = Array::from_shape_vec(IxDyn(&convert_dims(&dims)), data_vec)
417                .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
418            MatType::Int16(ndarray)
419        }
420        MX_UINT16_CLASS => {
421            let data_vec = bytes_to_u16_vec(realdata);
422            let ndarray = Array::from_shape_vec(IxDyn(&convert_dims(&dims)), data_vec)
423                .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
424            MatType::UInt16(ndarray)
425        }
426        MX_INT32_CLASS => {
427            let data_vec = bytes_to_i32_vec(realdata);
428            let ndarray = Array::from_shape_vec(IxDyn(&convert_dims(&dims)), data_vec)
429                .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
430            MatType::Int32(ndarray)
431        }
432        MX_UINT32_CLASS => {
433            let data_vec = bytes_to_u32_vec(realdata);
434            let ndarray = Array::from_shape_vec(IxDyn(&convert_dims(&dims)), data_vec)
435                .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
436            MatType::UInt32(ndarray)
437        }
438        MX_INT64_CLASS => {
439            let data_vec = bytes_to_i64_vec(realdata);
440            let ndarray = Array::from_shape_vec(IxDyn(&convert_dims(&dims)), data_vec)
441                .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
442            MatType::Int64(ndarray)
443        }
444        MX_UINT64_CLASS => {
445            let data_vec = bytes_to_u64_vec(realdata);
446            let ndarray = Array::from_shape_vec(IxDyn(&convert_dims(&dims)), data_vec)
447                .map_err(|e| IoError::FormatError(format!("Failed to create array: {e}")))?;
448            MatType::UInt64(ndarray)
449        }
450        MX_CHAR_CLASS => {
451            // Convert to string
452            let chars: Vec<u16> = bytes_to_u16_vec(realdata);
453            let utf16_chars: Vec<u16> = chars.into_iter().collect();
454            let string = String::from_utf16_lossy(&utf16_chars);
455            MatType::Char(string)
456        }
457        _ => {
458            // Unsupported class type
459            return Err(IoError::FormatError(format!(
460                "Unsupported class type: {}",
461                flags.class_type
462            )));
463        }
464    };
465
466    Ok((name, mat_type))
467}
468
469/// Convert MATLAB dimensions to ndarray dimensions
470#[allow(dead_code)]
471fn convert_dims(dims: &[i32]) -> Vec<usize> {
472    // MATLAB stores dimensions in column-major order
473    // For compatibility with ndarray (row-major), we reverse the dimensions
474    dims.iter().rev().map(|&d| d as usize).collect()
475}
476
477/// Read an i32 from the reader
478#[allow(dead_code)]
479fn read_i32<R: Read>(reader: &mut R) -> Result<i32> {
480    let mut buffer = [0u8; 4];
481    match reader.read_exact(&mut buffer) {
482        Ok(_) => Ok(LittleEndian::read_i32(&buffer)),
483        Err(_) => Ok(0), // EOF
484    }
485}
486
487/// Convert bytes to f64 vector
488#[allow(dead_code)]
489fn bytes_to_f64_vec(bytes: &[u8]) -> Vec<f64> {
490    let mut result = Vec::with_capacity(bytes.len() / 8);
491    for i in (0..bytes.len()).step_by(8) {
492        if i + 8 <= bytes.len() {
493            result.push(LittleEndian::read_f64(&bytes[i..i + 8]));
494        }
495    }
496    result
497}
498
499/// Convert bytes to f32 vector
500#[allow(dead_code)]
501fn bytes_to_f32_vec(bytes: &[u8]) -> Vec<f32> {
502    let mut result = Vec::with_capacity(bytes.len() / 4);
503    for i in (0..bytes.len()).step_by(4) {
504        if i + 4 <= bytes.len() {
505            result.push(LittleEndian::read_f32(&bytes[i..i + 4]));
506        }
507    }
508    result
509}
510
511/// Convert bytes to i16 vector
512#[allow(dead_code)]
513fn bytes_to_i16_vec(bytes: &[u8]) -> Vec<i16> {
514    let mut result = Vec::with_capacity(bytes.len() / 2);
515    for i in (0..bytes.len()).step_by(2) {
516        if i + 2 <= bytes.len() {
517            result.push(LittleEndian::read_i16(&bytes[i..i + 2]));
518        }
519    }
520    result
521}
522
523/// Convert bytes to u16 vector
524#[allow(dead_code)]
525fn bytes_to_u16_vec(bytes: &[u8]) -> Vec<u16> {
526    let mut result = Vec::with_capacity(bytes.len() / 2);
527    for i in (0..bytes.len()).step_by(2) {
528        if i + 2 <= bytes.len() {
529            result.push(LittleEndian::read_u16(&bytes[i..i + 2]));
530        }
531    }
532    result
533}
534
535/// Convert bytes to i32 vector
536#[allow(dead_code)]
537fn bytes_to_i32_vec(bytes: &[u8]) -> Vec<i32> {
538    let mut result = Vec::with_capacity(bytes.len() / 4);
539    for i in (0..bytes.len()).step_by(4) {
540        if i + 4 <= bytes.len() {
541            result.push(LittleEndian::read_i32(&bytes[i..i + 4]));
542        }
543    }
544    result
545}
546
547/// Convert bytes to u32 vector
548#[allow(dead_code)]
549fn bytes_to_u32_vec(bytes: &[u8]) -> Vec<u32> {
550    let mut result = Vec::with_capacity(bytes.len() / 4);
551    for i in (0..bytes.len()).step_by(4) {
552        if i + 4 <= bytes.len() {
553            result.push(LittleEndian::read_u32(&bytes[i..i + 4]));
554        }
555    }
556    result
557}
558
559/// Convert bytes to i64 vector
560#[allow(dead_code)]
561fn bytes_to_i64_vec(bytes: &[u8]) -> Vec<i64> {
562    let mut result = Vec::with_capacity(bytes.len() / 8);
563    for i in (0..bytes.len()).step_by(8) {
564        if i + 8 <= bytes.len() {
565            result.push(LittleEndian::read_i64(&bytes[i..i + 8]));
566        }
567    }
568    result
569}
570
571/// Convert bytes to u64 vector
572#[allow(dead_code)]
573fn bytes_to_u64_vec(bytes: &[u8]) -> Vec<u64> {
574    let mut result = Vec::with_capacity(bytes.len() / 8);
575    for i in (0..bytes.len()).step_by(8) {
576        if i + 8 <= bytes.len() {
577            result.push(LittleEndian::read_u64(&bytes[i..i + 8]));
578        }
579    }
580    result
581}
582
583/// Writes data to a MATLAB .mat file
584///
585/// # Arguments
586///
587/// * `path` - Path where the .mat file should be written
588/// * `vars` - A HashMap mapping variable names to their values
589///
590/// # Example
591///
592/// ```no_run
593/// use scirs2_io::matlab::{write_mat, MatType};
594/// use scirs2_core::ndarray::Array;
595/// use std::collections::HashMap;
596/// use std::path::Path;
597///
598/// let mut vars = HashMap::new();
599/// let data = Array::linspace(0.0, 10.0, 100).into_dyn();
600/// vars.insert("x".to_string(), MatType::Double(data));
601///
602/// write_mat(Path::new("output.mat"), &vars).unwrap();
603/// ```
604#[allow(dead_code)]
605pub fn write_mat<P: AsRef<Path>>(path: P, vars: &HashMap<String, MatType>) -> Result<()> {
606    let file = File::create(path).map_err(|e| IoError::FileError(e.to_string()))?;
607    let mut writer = BufWriter::new(file);
608
609    // Write MAT file header
610    write_impl::write_mat_header(&mut writer)?;
611
612    // Write each variable
613    for (name, mat_type) in vars {
614        write_impl::write_variable(&mut writer, name, mat_type)?;
615    }
616
617    writer
618        .flush()
619        .map_err(|e| IoError::FileError(format!("Failed to flush writer: {e}")))?;
620    Ok(())
621}