use crate::array::Array;
use crate::error::{NumRs2Error, Result};
use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::path::Path;
use std::str::FromStr;
pub mod npy_npz;
pub mod text;
#[cfg(feature = "bson")]
pub mod bson_format;
#[cfg(feature = "matlab")]
pub mod matlab;
#[cfg(feature = "messagepack")]
pub mod messagepack;
#[cfg(feature = "netcdf")]
pub mod netcdf;
#[cfg(feature = "parquet")]
pub mod parquet;
pub use npy_npz::*;
pub use text::*;
#[cfg(feature = "bson")]
pub use bson_format::{from_bson_document, from_bson_file, to_bson_document, to_bson_file};
#[cfg(feature = "matlab")]
pub use matlab::{read_mat, write_mat};
#[cfg(feature = "messagepack")]
pub use messagepack::{
from_messagepack, from_messagepack_bytes, to_messagepack, to_messagepack_bytes,
};
#[cfg(feature = "netcdf")]
pub use netcdf::{read_netcdf, write_netcdf};
#[cfg(feature = "parquet")]
pub use parquet::{read_parquet, write_parquet};
#[derive(Serialize, Deserialize)]
struct SerializedArray<T> {
shape: Vec<usize>,
data: Vec<T>,
}
#[derive(Clone, Copy, Debug)]
pub enum SerializeFormat {
Json,
Csv,
Binary,
Npy,
Npz,
Pickle,
#[cfg(feature = "parquet")]
Parquet,
#[cfg(feature = "messagepack")]
MessagePack,
#[cfg(feature = "bson")]
Bson,
#[cfg(feature = "netcdf")]
NetCdf,
#[cfg(feature = "matlab")]
Matlab,
}
impl<T: Clone + Serialize> Array<T> {
pub fn to_string(&self, format: SerializeFormat) -> Result<String> {
let serialized = SerializedArray {
shape: self.shape(),
data: self.to_vec(),
};
match format {
SerializeFormat::Json => serde_json::to_string(&serialized).map_err(|e| {
NumRs2Error::SerializationError(format!("JSON serialization error: {}", e))
}),
SerializeFormat::Csv => {
let mut writer = csv::Writer::from_writer(vec![]);
let data = self.to_vec();
writer.serialize(&data).map_err(|e| {
NumRs2Error::SerializationError(format!("CSV serialization error: {}", e))
})?;
let csv_bytes = writer.into_inner().map_err(|e| {
NumRs2Error::SerializationError(format!("CSV serialization error: {}", e))
})?;
String::from_utf8(csv_bytes).map_err(|e| {
NumRs2Error::SerializationError(format!("CSV serialization error: {}", e))
})
}
SerializeFormat::Binary => Err(NumRs2Error::SerializationError(
"Binary serialization to string not supported".to_string(),
)),
SerializeFormat::Npy => Err(NumRs2Error::SerializationError(
"NPY format serialization to string not supported".to_string(),
)),
SerializeFormat::Npz => Err(NumRs2Error::SerializationError(
"NPZ format serialization to string not supported".to_string(),
)),
SerializeFormat::Pickle => Err(NumRs2Error::SerializationError(
"Pickle format serialization to string not supported (use to_file instead)"
.to_string(),
)),
#[cfg(feature = "parquet")]
SerializeFormat::Parquet => Err(NumRs2Error::SerializationError(
"Parquet format serialization to string not supported (use to_file instead)"
.to_string(),
)),
#[cfg(feature = "messagepack")]
SerializeFormat::MessagePack => Err(NumRs2Error::SerializationError(
"MessagePack format serialization to string not supported (use to_file instead)"
.to_string(),
)),
#[cfg(feature = "bson")]
SerializeFormat::Bson => Err(NumRs2Error::SerializationError(
"BSON format serialization to string not supported (use to_file instead)"
.to_string(),
)),
#[cfg(feature = "netcdf")]
SerializeFormat::NetCdf => Err(NumRs2Error::SerializationError(
"NetCDF format serialization to string not supported (use to_file instead)"
.to_string(),
)),
#[cfg(feature = "matlab")]
SerializeFormat::Matlab => Err(NumRs2Error::SerializationError(
"MATLAB format serialization to string not supported (use to_file instead)"
.to_string(),
)),
}
}
pub fn to_file<P: AsRef<Path>>(&self, path: P, format: SerializeFormat) -> Result<()> {
let file = File::create(path)
.map_err(|e| NumRs2Error::IOError(format!("Failed to create file: {}", e)))?;
let mut writer = BufWriter::new(file);
let serialized = SerializedArray {
shape: self.shape(),
data: self.to_vec(),
};
match format {
SerializeFormat::Json => {
let json = serde_json::to_string(&serialized).map_err(|e| {
NumRs2Error::SerializationError(format!("JSON serialization error: {}", e))
})?;
writer
.write_all(json.as_bytes())
.map_err(|e| NumRs2Error::IOError(format!("Failed to write to file: {}", e)))?;
}
SerializeFormat::Csv => {
let mut csv_writer = csv::Writer::from_writer(writer);
for row in self.to_row_vectors()? {
csv_writer.serialize(row).map_err(|e| {
NumRs2Error::SerializationError(format!("CSV serialization error: {}", e))
})?;
}
csv_writer.flush().map_err(|e| {
NumRs2Error::IOError(format!("Failed to flush CSV writer: {}", e))
})?;
}
SerializeFormat::Binary => {
let config = oxicode::config::standard();
oxicode::serde::encode_into_std_write(&serialized, &mut writer, config).map_err(
|e| {
NumRs2Error::SerializationError(format!(
"Binary serialization error: {}",
e
))
},
)?;
}
SerializeFormat::Npy | SerializeFormat::Npz => {
npy_npz::serialize_to_file(self, &mut writer, format)?;
}
SerializeFormat::Pickle => {
serde_pickle::to_writer(&mut writer, &serialized, serde_pickle::SerOptions::new())
.map_err(|e| {
NumRs2Error::SerializationError(format!(
"Pickle serialization error: {}",
e
))
})?;
}
#[cfg(feature = "parquet")]
SerializeFormat::Parquet => {
return Err(NumRs2Error::SerializationError(
"Parquet format not yet implemented for Array serialization".to_string(),
));
}
#[cfg(feature = "messagepack")]
SerializeFormat::MessagePack => {
let bytes = to_messagepack_bytes(self)?;
writer
.write_all(&bytes)
.map_err(|e| NumRs2Error::IOError(format!("Failed to write to file: {}", e)))?;
}
#[cfg(feature = "bson")]
SerializeFormat::Bson => {
return Err(NumRs2Error::SerializationError(
"BSON format: use to_bson_file() function instead".to_string(),
));
}
#[cfg(feature = "netcdf")]
SerializeFormat::NetCdf => {
return Err(NumRs2Error::SerializationError(
"NetCDF format: use write_netcdf() function instead".to_string(),
));
}
#[cfg(feature = "matlab")]
SerializeFormat::Matlab => {
return Err(NumRs2Error::SerializationError(
"MATLAB format: use write_mat() function instead".to_string(),
));
}
}
Ok(())
}
pub fn to_row_vectors(&self) -> Result<Vec<Vec<T>>> {
if self.ndim() == 1 {
return Ok(vec![self.to_vec()]);
} else if self.ndim() == 2 {
let shape = self.shape();
let rows = shape[0];
let cols = shape[1];
let data = self.to_vec();
let mut result = Vec::with_capacity(rows);
for i in 0..rows {
let mut row = Vec::with_capacity(cols);
for j in 0..cols {
let idx = i * cols + j;
row.push(data[idx].clone());
}
result.push(row);
}
return Ok(result);
}
Err(NumRs2Error::DimensionMismatch(
"Cannot convert arrays with more than 2 dimensions to CSV rows".to_string(),
))
}
}
impl<T: Clone + for<'a> Deserialize<'a> + std::str::FromStr> Array<T>
where
<T as FromStr>::Err: std::fmt::Debug,
{
pub fn from_string(s: &str, format: SerializeFormat) -> Result<Self> {
match format {
SerializeFormat::Json => {
let serialized: SerializedArray<T> = serde_json::from_str(s).map_err(|e| {
NumRs2Error::DeserializationError(format!("JSON deserialization error: {}", e))
})?;
Ok(Array::from_vec(serialized.data).reshape(&serialized.shape))
}
SerializeFormat::Csv => {
let mut reader = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(s.as_bytes());
let mut data = Vec::new();
for result in reader.records() {
let record = result.map_err(|e| {
NumRs2Error::DeserializationError(format!("CSV reading error: {}", e))
})?;
for field in record.iter() {
let value = field.parse::<T>().map_err(|_| {
NumRs2Error::DeserializationError(format!(
"Failed to parse CSV field: {}",
field
))
})?;
data.push(value);
}
}
Ok(Array::from_vec(data))
}
SerializeFormat::Binary => Err(NumRs2Error::DeserializationError(
"Binary deserialization from string not supported".to_string(),
)),
SerializeFormat::Npy => Err(NumRs2Error::DeserializationError(
"NPY format deserialization from string not supported".to_string(),
)),
SerializeFormat::Npz => Err(NumRs2Error::DeserializationError(
"NPZ format deserialization from string not supported".to_string(),
)),
SerializeFormat::Pickle => Err(NumRs2Error::DeserializationError(
"Pickle format deserialization from string not supported (use from_file instead)"
.to_string(),
)),
#[cfg(feature = "parquet")]
SerializeFormat::Parquet => Err(NumRs2Error::DeserializationError(
"Parquet format deserialization from string not supported (use from_file instead)"
.to_string(),
)),
#[cfg(feature = "messagepack")]
SerializeFormat::MessagePack => Err(NumRs2Error::DeserializationError(
"MessagePack format deserialization from string not supported (use from_file instead)"
.to_string(),
)),
#[cfg(feature = "bson")]
SerializeFormat::Bson => Err(NumRs2Error::DeserializationError(
"BSON format deserialization from string not supported (use from_file instead)"
.to_string(),
)),
#[cfg(feature = "netcdf")]
SerializeFormat::NetCdf => Err(NumRs2Error::DeserializationError(
"NetCDF format deserialization from string not supported (use from_file instead)"
.to_string(),
)),
#[cfg(feature = "matlab")]
SerializeFormat::Matlab => Err(NumRs2Error::DeserializationError(
"MATLAB format deserialization from string not supported (use from_file instead)"
.to_string(),
)),
}
}
pub fn from_file<P: AsRef<Path>>(path: P, format: SerializeFormat) -> Result<Self> {
let file = File::open(path)
.map_err(|e| NumRs2Error::IOError(format!("Failed to open file: {}", e)))?;
let reader = BufReader::new(file);
match format {
SerializeFormat::Json => {
let serialized: SerializedArray<T> =
serde_json::from_reader(reader).map_err(|e| {
NumRs2Error::DeserializationError(format!(
"JSON deserialization error: {}",
e
))
})?;
Ok(Array::from_vec(serialized.data).reshape(&serialized.shape))
}
SerializeFormat::Csv => {
let mut csv_reader = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(reader);
let mut all_rows: Vec<Vec<T>> = Vec::new();
for result in csv_reader.records() {
let record = result.map_err(|e| {
NumRs2Error::DeserializationError(format!("CSV reading error: {}", e))
})?;
let mut row = Vec::new();
for field in record.iter() {
let value = field.parse::<T>().map_err(|_| {
NumRs2Error::DeserializationError(format!(
"Failed to parse CSV field: {}",
field
))
})?;
row.push(value);
}
all_rows.push(row);
}
if all_rows.is_empty() {
return Err(NumRs2Error::DeserializationError(
"CSV file contained no data".to_string(),
));
}
let row_length = all_rows[0].len();
for (i, row) in all_rows.iter().enumerate().skip(1) {
if row.len() != row_length {
return Err(NumRs2Error::DeserializationError(
format!("CSV file has inconsistent row lengths: row 0 has length {}, row {} has length {}",
row_length, i, row.len())
));
}
}
let rows_count = all_rows.len();
let mut data = Vec::with_capacity(rows_count * row_length);
for row in all_rows {
data.extend(row);
}
Ok(Array::from_vec(data).reshape(&[rows_count, row_length]))
}
SerializeFormat::Binary => {
let config = oxicode::config::standard();
let mut reader = reader; let (serialized, _len): (SerializedArray<T>, usize) =
oxicode::serde::decode_from_std_read(&mut reader, config).map_err(|e| {
NumRs2Error::DeserializationError(format!(
"Binary deserialization error: {}",
e
))
})?;
Ok(Array::from_vec(serialized.data).reshape(&serialized.shape))
}
SerializeFormat::Npy | SerializeFormat::Npz => {
npy_npz::deserialize_from_file(reader, format)
}
SerializeFormat::Pickle => {
let serialized: SerializedArray<T> =
serde_pickle::from_reader(reader, serde_pickle::DeOptions::new()).map_err(
|e| {
NumRs2Error::DeserializationError(format!(
"Pickle deserialization error: {}",
e
))
},
)?;
Ok(Array::from_vec(serialized.data).reshape(&serialized.shape))
}
#[cfg(feature = "parquet")]
SerializeFormat::Parquet => Err(NumRs2Error::DeserializationError(
"Parquet format not yet implemented for Array deserialization".to_string(),
)),
#[cfg(feature = "messagepack")]
SerializeFormat::MessagePack => {
use std::io::Read;
let mut bytes = Vec::new();
let mut reader = reader;
reader
.read_to_end(&mut bytes)
.map_err(|e| NumRs2Error::IOError(format!("Failed to read file: {}", e)))?;
from_messagepack_bytes(&bytes)
}
#[cfg(feature = "bson")]
SerializeFormat::Bson => Err(NumRs2Error::DeserializationError(
"BSON format: use from_bson_file() function instead".to_string(),
)),
#[cfg(feature = "netcdf")]
SerializeFormat::NetCdf => Err(NumRs2Error::DeserializationError(
"NetCDF format: use read_netcdf() function instead".to_string(),
)),
#[cfg(feature = "matlab")]
SerializeFormat::Matlab => Err(NumRs2Error::DeserializationError(
"MATLAB format: use read_mat() function instead".to_string(),
)),
}
}
}
pub fn vec_to_array<T: Clone>(vec: Vec<T>, shape: Option<&[usize]>) -> Result<Array<T>> {
let array = Array::from_vec(vec);
match shape {
Some(shape) => Ok(array.reshape(shape)),
None => Ok(array),
}
}
pub fn vec2d_to_array<T: Clone>(vec: Vec<Vec<T>>) -> Result<Array<T>> {
if vec.is_empty() {
return Ok(Array::from_vec(Vec::new()));
}
let rows = vec.len();
let cols = vec[0].len();
for (i, row) in vec.iter().enumerate() {
if row.len() != cols {
return Err(NumRs2Error::DimensionMismatch(format!(
"Row 0 has length {}, but row {} has length {}",
cols,
i,
row.len()
)));
}
}
let mut data = Vec::with_capacity(rows * cols);
for row in vec {
data.extend(row);
}
Ok(Array::from_vec(data).reshape(&[rows, cols]))
}
pub fn array_to_vec2d<T: Clone>(array: &Array<T>) -> Result<Vec<Vec<T>>> {
if array.ndim() != 2 {
return Err(NumRs2Error::DimensionMismatch(format!(
"Expected 2D array, got {}D",
array.ndim()
)));
}
let shape = array.shape();
let rows = shape[0];
let cols = shape[1];
let data = array.to_vec();
let mut result = Vec::with_capacity(rows);
for i in 0..rows {
let mut row = Vec::with_capacity(cols);
for j in 0..cols {
let idx = i * cols + j;
row.push(data[idx].clone());
}
result.push(row);
}
Ok(result)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_json_serialization() {
let array = Array::from_vec(vec![1, 2, 3, 4]).reshape(&[2, 2]);
let json = array
.to_string(SerializeFormat::Json)
.expect("Failed to serialize to JSON");
let expected = r#"{"shape":[2,2],"data":[1,2,3,4]}"#;
assert_eq!(json, expected);
let deserialized = Array::<i32>::from_string(&json, SerializeFormat::Json)
.expect("Failed to deserialize from JSON");
assert_eq!(deserialized.shape(), vec![2, 2]);
assert_eq!(deserialized.to_vec(), vec![1, 2, 3, 4]);
}
#[test]
fn test_vec_to_array() {
let vec = vec![1, 2, 3, 4];
let array = vec_to_array(vec, Some(&[2, 2])).expect("Failed to convert vec to array");
assert_eq!(array.shape(), vec![2, 2]);
assert_eq!(array.to_vec(), vec![1, 2, 3, 4]);
}
#[test]
fn test_vec2d_to_array() {
let vec = vec![vec![1, 2], vec![3, 4]];
let array = vec2d_to_array(vec).expect("Failed to convert 2D vec to array");
assert_eq!(array.shape(), vec![2, 2]);
assert_eq!(array.to_vec(), vec![1, 2, 3, 4]);
}
#[test]
fn test_array_to_vec2d() {
let array = Array::from_vec(vec![1, 2, 3, 4]).reshape(&[2, 2]);
let vec = array_to_vec2d(&array).expect("Failed to convert array to 2D vec");
assert_eq!(vec, vec![vec![1, 2], vec![3, 4]]);
}
#[test]
fn test_csv_serialization() {
let array = Array::from_vec(vec![1, 2, 3, 4]).reshape(&[2, 2]);
let rows = array
.to_row_vectors()
.expect("Failed to convert to row vectors");
assert_eq!(rows, vec![vec![1, 2], vec![3, 4]]);
}
}