pub mod parser;
use std::fmt::Display;
use std::fs;
use std::io::Write;
use std::path::Path;
use std::str::FromStr;
use ferray_core::Array;
use ferray_core::dimension::Ix2;
use ferray_core::dtype::Element;
use ferray_core::error::{FerrayError, FerrayResult};
use self::parser::{TextParseOptions, parse_text_grid, parse_text_grid_with_missing};
#[derive(Debug, Clone)]
pub struct SaveTxtOptions {
pub delimiter: char,
pub fmt: Option<String>,
pub header: Option<String>,
pub footer: Option<String>,
pub newline: String,
}
impl Default for SaveTxtOptions {
fn default() -> Self {
Self {
delimiter: ',',
fmt: None,
header: None,
footer: None,
newline: "\n".to_string(),
}
}
}
pub fn savetxt<T: Element + Display, P: AsRef<Path>>(
path: P,
array: &Array<T, Ix2>,
opts: &SaveTxtOptions,
) -> FerrayResult<()> {
let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
FerrayError::io_error(format!(
"failed to create file '{}': {e}",
path.as_ref().display()
))
})?;
savetxt_to_writer(&mut file, array, opts)
}
pub fn savetxt_to_writer<T: Element + Display, W: Write>(
writer: &mut W,
array: &Array<T, Ix2>,
opts: &SaveTxtOptions,
) -> FerrayResult<()> {
let shape = array.shape();
let nrows = shape[0];
let ncols = shape[1];
if let Some(ref header) = opts.header {
write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
writer
.write_all(opts.newline.as_bytes())
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
let slice = array
.as_slice()
.ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
for row in 0..nrows {
for col in 0..ncols {
if col > 0 {
write!(writer, "{}", opts.delimiter)
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
let val = &slice[row * ncols + col];
if let Some(ref fmt_str) = opts.fmt {
let formatted = fmt_str.replace("{}", &val.to_string());
write!(writer, "{formatted}").map_err(|e| FerrayError::io_error(e.to_string()))?;
} else {
write!(writer, "{val}").map_err(|e| FerrayError::io_error(e.to_string()))?;
}
}
writer
.write_all(opts.newline.as_bytes())
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
if let Some(ref footer) = opts.footer {
write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
writer
.write_all(opts.newline.as_bytes())
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
writer
.flush()
.map_err(|e| FerrayError::io_error(e.to_string()))?;
Ok(())
}
pub fn loadtxt<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix2>>
where
T: Element + FromStr,
T::Err: Display,
P: AsRef<Path>,
{
let content = fs::read_to_string(path.as_ref()).map_err(|e| {
FerrayError::io_error(format!(
"failed to read file '{}': {e}",
path.as_ref().display()
))
})?;
loadtxt_from_str(&content, delimiter, skiprows)
}
pub fn loadtxt_from_str<T>(
content: &str,
delimiter: char,
skiprows: usize,
) -> FerrayResult<Array<T, Ix2>>
where
T: Element + FromStr,
T::Err: Display,
{
let opts = TextParseOptions {
delimiter,
skiprows,
..Default::default()
};
let (cells, nrows, ncols) = parse_text_grid(content, &opts)?;
if nrows == 0 {
return Array::from_vec(Ix2::new([0, 0]), vec![]);
}
let data: FerrayResult<Vec<T>> = cells
.iter()
.enumerate()
.map(|(i, cell)| {
cell.parse::<T>().map_err(|e| {
let row = i / ncols;
let col = i % ncols;
FerrayError::io_error(format!(
"failed to parse value '{cell}' at row {row}, col {col}: {e}"
))
})
})
.collect();
let data = data?;
Array::from_vec(Ix2::new([nrows, ncols]), data)
}
pub fn genfromtxt<P: AsRef<Path>>(
path: P,
delimiter: char,
filling_value: f64,
skiprows: usize,
missing_values: &[&str],
) -> FerrayResult<Array<f64, Ix2>> {
let content = fs::read_to_string(path.as_ref()).map_err(|e| {
FerrayError::io_error(format!(
"failed to read file '{}': {e}",
path.as_ref().display()
))
})?;
genfromtxt_from_str(&content, delimiter, filling_value, skiprows, missing_values)
}
pub fn genfromtxt_from_str(
content: &str,
delimiter: char,
filling_value: f64,
skiprows: usize,
missing_values: &[&str],
) -> FerrayResult<Array<f64, Ix2>> {
let opts = TextParseOptions {
delimiter,
skiprows,
..Default::default()
};
let mut all_missing: Vec<&str> = vec!["", "NA", "N/A", "nan", "NaN", "NAN", "--", "null"];
for mv in missing_values {
if !all_missing.contains(mv) {
all_missing.push(mv);
}
}
let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &all_missing)?;
if nrows == 0 {
return Array::from_vec(Ix2::new([0, 0]), vec![]);
}
let data: FerrayResult<Vec<f64>> = cells
.iter()
.enumerate()
.map(|(i, cell)| match cell {
None => Ok(filling_value),
Some(s) => s.parse::<f64>().map_err(|e| {
let row = i / ncols;
let col = i % ncols;
FerrayError::io_error(format!(
"failed to parse value '{s}' at row {row}, col {col}: {e}"
))
}),
})
.collect();
let data = data?;
Array::from_vec(Ix2::new([nrows, ncols]), data)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn loadtxt_simple_csv() {
let content = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
assert_eq!(arr.shape(), &[2, 3]);
assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
}
#[test]
fn loadtxt_with_skiprows() {
let content = "# header\nname,value\n1.0,10.0\n2.0,20.0\n";
let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 1).unwrap();
assert_eq!(arr.shape(), &[2, 2]);
assert_eq!(arr.as_slice().unwrap()[0], 1.0);
}
#[test]
fn loadtxt_tab_delimited() {
let content = "1\t2\t3\n4\t5\t6\n";
let arr: Array<i32, Ix2> = loadtxt_from_str(content, '\t', 0).unwrap();
assert_eq!(arr.shape(), &[2, 3]);
assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
}
#[test]
fn loadtxt_integers() {
let content = "10,20\n30,40\n";
let arr: Array<i64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
assert_eq!(arr.as_slice().unwrap(), &[10i64, 20, 30, 40]);
}
#[test]
fn loadtxt_file_roundtrip() {
let data = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), data.clone()).unwrap();
let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
let _ = std::fs::create_dir_all(&dir);
let path = dir.join("test.csv");
savetxt(&path, &arr, &SaveTxtOptions::default()).unwrap();
let loaded: Array<f64, Ix2> = loadtxt(&path, ',', 0).unwrap();
assert_eq!(loaded.shape(), &[2, 3]);
assert_eq!(loaded.as_slice().unwrap(), &data[..]);
let _ = std::fs::remove_file(&path);
}
#[test]
fn savetxt_custom_delimiter() {
let data = vec![1.0f64, 2.0, 3.0, 4.0];
let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 2]), data).unwrap();
let mut buf = Vec::new();
let opts = SaveTxtOptions {
delimiter: '\t',
..Default::default()
};
savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
let output = String::from_utf8(buf).unwrap();
assert!(output.contains('\t'));
assert!(!output.contains(','));
}
#[test]
fn savetxt_with_header_footer() {
let data = vec![1.0f64, 2.0];
let arr = Array::<f64, Ix2>::from_vec(Ix2::new([1, 2]), data).unwrap();
let mut buf = Vec::new();
let opts = SaveTxtOptions {
header: Some("# my header".to_string()),
footer: Some("# end".to_string()),
..Default::default()
};
savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
let output = String::from_utf8(buf).unwrap();
assert!(output.starts_with("# my header\n"));
assert!(output.ends_with("# end\n"));
}
#[test]
fn genfromtxt_missing_nan() {
let content = "1.0,2.0,3.0\n4.0,,6.0\n7.0,8.0,\n";
let arr = genfromtxt_from_str(content, ',', f64::NAN, 0, &[]).unwrap();
assert_eq!(arr.shape(), &[3, 3]);
let slice = arr.as_slice().unwrap();
assert_eq!(slice[0], 1.0);
assert!(slice[4].is_nan()); assert!(slice[8].is_nan()); }
#[test]
fn genfromtxt_na_marker() {
let content = "1.0,NA,3.0\n4.0,5.0,NA\n";
let arr = genfromtxt_from_str(content, ',', -999.0, 0, &["NA"]).unwrap();
assert_eq!(arr.shape(), &[2, 3]);
let slice = arr.as_slice().unwrap();
assert_eq!(slice[1], -999.0);
assert_eq!(slice[5], -999.0);
}
#[test]
fn genfromtxt_with_skiprows() {
let content = "col1,col2\n1.0,2.0\n3.0,4.0\n";
let arr = genfromtxt_from_str(content, ',', f64::NAN, 1, &[]).unwrap();
assert_eq!(arr.shape(), &[2, 2]);
assert_eq!(arr.as_slice().unwrap()[0], 1.0);
}
#[test]
fn genfromtxt_file() {
let content = "1.0,2.0\n,4.0\n";
let dir = std::env::temp_dir().join(format!("ferray_io_text_{}", std::process::id()));
let _ = std::fs::create_dir_all(&dir);
let path = dir.join("genfromtxt_test.csv");
std::fs::write(&path, content).unwrap();
let arr = genfromtxt(&path, ',', f64::NAN, 0, &[]).unwrap();
assert_eq!(arr.shape(), &[2, 2]);
assert!(arr.as_slice().unwrap()[2].is_nan());
let _ = std::fs::remove_file(&path);
}
#[test]
fn loadtxt_empty() {
let content = "";
let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
assert_eq!(arr.shape(), &[0, 0]);
}
}