pub mod parser;
use std::fmt::Display;
use std::fs;
use std::io::Write;
use std::path::Path;
use std::str::FromStr;
use ferray_core::Array;
use ferray_core::dimension::{Ix1, Ix2};
use ferray_core::dtype::Element;
use ferray_core::error::{FerrayError, FerrayResult};
use self::parser::{TextParseOptions, parse_text_grid, parse_text_grid_with_missing};
#[derive(Debug, Clone)]
pub struct SaveTxtOptions {
pub delimiter: char,
pub fmt: Option<String>,
pub header: Option<String>,
pub footer: Option<String>,
pub newline: String,
}
impl Default for SaveTxtOptions {
fn default() -> Self {
Self {
delimiter: ',',
fmt: None,
header: None,
footer: None,
newline: "\n".to_string(),
}
}
}
fn format_value<T: Display>(val: &T, fmt_str: &str) -> String {
if fmt_str.contains('{') {
if let Some(spec) = fmt_str.strip_prefix("{:").and_then(|s| s.strip_suffix('}')) {
if let Some(prec_str) = spec.strip_prefix('.') {
let is_sci = prec_str.ends_with('e') || prec_str.ends_with('E');
let digits_str = if is_sci {
&prec_str[..prec_str.len() - 1]
} else {
prec_str
};
if let Ok(prec) = digits_str.parse::<usize>() {
if let Ok(v) = val.to_string().parse::<f64>() {
return if is_sci {
format!("{v:.prec$e}")
} else {
format!("{v:.prec$}")
};
}
}
}
}
return fmt_str.replace("{}", &val.to_string());
}
if let Some(spec) = fmt_str.strip_prefix('%') {
let (body, mode) = if let Some(rest) = spec.strip_suffix('e') {
(rest, 'e')
} else if let Some(rest) = spec.strip_suffix('E') {
(rest, 'E')
} else if let Some(rest) = spec.strip_suffix('f') {
(rest, 'f')
} else if let Some(rest) = spec.strip_suffix('g') {
(rest, 'g')
} else {
return format!("{val}");
};
if let Ok(v) = val.to_string().parse::<f64>() {
if let Some(dot_pos) = body.find('.') {
let prec_str = &body[dot_pos + 1..];
if let Ok(prec) = prec_str.parse::<usize>() {
return match mode {
'e' => format!("{v:.prec$e}"),
'E' => format!("{v:.prec$E}"),
_ => format!("{v:.prec$}"),
};
}
} else if body.is_empty() {
return match mode {
'e' => format!("{v:e}"),
'E' => format!("{v:E}"),
_ => format!("{v}"),
};
}
}
}
format!("{val}")
}
pub fn savetxt<T: Element + Display, P: AsRef<Path>>(
path: P,
array: &Array<T, Ix2>,
opts: &SaveTxtOptions,
) -> FerrayResult<()> {
let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
FerrayError::io_error(format!(
"failed to create file '{}': {e}",
path.as_ref().display()
))
})?;
savetxt_to_writer(&mut file, array, opts)
}
pub fn savetxt_1d<T: Element + Display, P: AsRef<Path>>(
path: P,
array: &Array<T, Ix1>,
opts: &SaveTxtOptions,
) -> FerrayResult<()> {
let mut file = std::fs::File::create(path.as_ref()).map_err(|e| {
FerrayError::io_error(format!(
"failed to create file '{}': {e}",
path.as_ref().display()
))
})?;
savetxt_1d_to_writer(&mut file, array, opts)
}
pub fn savetxt_1d_to_writer<T: Element + Display, W: Write>(
writer: &mut W,
array: &Array<T, Ix1>,
opts: &SaveTxtOptions,
) -> FerrayResult<()> {
if let Some(ref header) = opts.header {
write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
writer
.write_all(opts.newline.as_bytes())
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
let slice = array
.as_slice()
.ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
for val in slice {
let formatted = if let Some(ref fmt_str) = opts.fmt {
format_value(val, fmt_str)
} else {
format!("{val}")
};
writer
.write_all(formatted.as_bytes())
.map_err(|e| FerrayError::io_error(e.to_string()))?;
writer
.write_all(opts.newline.as_bytes())
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
if let Some(ref footer) = opts.footer {
write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
writer
.write_all(opts.newline.as_bytes())
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
writer
.flush()
.map_err(|e| FerrayError::io_error(e.to_string()))?;
Ok(())
}
pub fn loadtxt_1d<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix1>>
where
T: Element + FromStr,
T::Err: Display,
P: AsRef<Path>,
{
let arr2 = loadtxt::<T, _>(path, delimiter, skiprows)?;
let shape = arr2.shape();
let n = shape[0] * shape[1];
let data: Vec<T> = arr2.iter().cloned().collect();
Array::<T, Ix1>::from_vec(Ix1::new([n]), data)
}
pub fn savetxt_to_writer<T: Element + Display, W: Write>(
writer: &mut W,
array: &Array<T, Ix2>,
opts: &SaveTxtOptions,
) -> FerrayResult<()> {
let shape = array.shape();
let nrows = shape[0];
let ncols = shape[1];
if let Some(ref header) = opts.header {
write!(writer, "{header}").map_err(|e| FerrayError::io_error(e.to_string()))?;
writer
.write_all(opts.newline.as_bytes())
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
let slice = array
.as_slice()
.ok_or_else(|| FerrayError::io_error("cannot save non-contiguous array as text"))?;
for row in 0..nrows {
for col in 0..ncols {
if col > 0 {
write!(writer, "{}", opts.delimiter)
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
let val = &slice[row * ncols + col];
if let Some(ref fmt_str) = opts.fmt {
let formatted = format_value(val, fmt_str);
write!(writer, "{formatted}").map_err(|e| FerrayError::io_error(e.to_string()))?;
} else {
write!(writer, "{val}").map_err(|e| FerrayError::io_error(e.to_string()))?;
}
}
writer
.write_all(opts.newline.as_bytes())
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
if let Some(ref footer) = opts.footer {
write!(writer, "{footer}").map_err(|e| FerrayError::io_error(e.to_string()))?;
writer
.write_all(opts.newline.as_bytes())
.map_err(|e| FerrayError::io_error(e.to_string()))?;
}
writer
.flush()
.map_err(|e| FerrayError::io_error(e.to_string()))?;
Ok(())
}
pub fn loadtxt<T, P>(path: P, delimiter: char, skiprows: usize) -> FerrayResult<Array<T, Ix2>>
where
T: Element + FromStr,
T::Err: Display,
P: AsRef<Path>,
{
let content = fs::read_to_string(path.as_ref()).map_err(|e| {
FerrayError::io_error(format!(
"failed to read file '{}': {e}",
path.as_ref().display()
))
})?;
loadtxt_from_str(&content, delimiter, skiprows)
}
pub fn loadtxt_from_str<T>(
content: &str,
delimiter: char,
skiprows: usize,
) -> FerrayResult<Array<T, Ix2>>
where
T: Element + FromStr,
T::Err: Display,
{
let opts = TextParseOptions {
delimiter,
skiprows,
..Default::default()
};
let (cells, nrows, ncols) = parse_text_grid(content, &opts)?;
if nrows == 0 {
return Array::from_vec(Ix2::new([0, 0]), vec![]);
}
let data: FerrayResult<Vec<T>> = cells
.iter()
.enumerate()
.map(|(i, cell)| {
cell.parse::<T>().map_err(|e| {
let row = i / ncols;
let col = i % ncols;
FerrayError::io_error(format!(
"failed to parse value '{cell}' at row {row}, col {col}: {e}"
))
})
})
.collect();
let data = data?;
Array::from_vec(Ix2::new([nrows, ncols]), data)
}
pub fn genfromtxt<P: AsRef<Path>>(
path: P,
delimiter: char,
filling_value: f64,
skiprows: usize,
missing_values: &[&str],
) -> FerrayResult<Array<f64, Ix2>> {
let content = fs::read_to_string(path.as_ref()).map_err(|e| {
FerrayError::io_error(format!(
"failed to read file '{}': {e}",
path.as_ref().display()
))
})?;
genfromtxt_from_str(&content, delimiter, filling_value, skiprows, missing_values)
}
pub fn genfromtxt_from_str(
content: &str,
delimiter: char,
filling_value: f64,
skiprows: usize,
missing_values: &[&str],
) -> FerrayResult<Array<f64, Ix2>> {
let opts = TextParseOptions {
delimiter,
skiprows,
..Default::default()
};
let mut all_missing: Vec<&str> = vec!["", "NA", "N/A", "nan", "NaN", "NAN", "--", "null"];
for mv in missing_values {
if !all_missing.contains(mv) {
all_missing.push(mv);
}
}
let (cells, nrows, ncols) = parse_text_grid_with_missing(content, &opts, &all_missing)?;
if nrows == 0 {
return Array::from_vec(Ix2::new([0, 0]), vec![]);
}
let data: FerrayResult<Vec<f64>> = cells
.iter()
.enumerate()
.map(|(i, cell)| match cell {
None => Ok(filling_value),
Some(s) => s.parse::<f64>().map_err(|e| {
let row = i / ncols;
let col = i % ncols;
FerrayError::io_error(format!(
"failed to parse value '{s}' at row {row}, col {col}: {e}"
))
}),
})
.collect();
let data = data?;
Array::from_vec(Ix2::new([nrows, ncols]), data)
}
pub fn fromregex<T>(content: &str, regex: &str) -> FerrayResult<Array<T, Ix2>>
where
T: Element + FromStr,
T::Err: Display,
{
let re = regex::Regex::new(regex)
.map_err(|e| FerrayError::invalid_value(format!("fromregex: invalid regex: {e}")))?;
let n_groups = re.captures_len().saturating_sub(1);
if n_groups == 0 {
return Err(FerrayError::invalid_value(
"fromregex: regex must contain at least one capture group",
));
}
let mut data: Vec<T> = Vec::new();
let mut nrows = 0usize;
'lines: for line in content.lines() {
if let Some(caps) = re.captures(line) {
let start = data.len();
for g in 1..=n_groups {
let m = caps.get(g).map_or("", |m| m.as_str());
match m.parse::<T>() {
Ok(v) => data.push(v),
Err(_) => {
data.truncate(start);
continue 'lines;
}
}
}
nrows += 1;
}
}
Array::from_vec(Ix2::new([nrows, n_groups]), data)
}
pub fn fromregex_from_file<T, P>(path: P, regex: &str) -> FerrayResult<Array<T, Ix2>>
where
T: Element + FromStr,
T::Err: Display,
P: AsRef<Path>,
{
let content = fs::read_to_string(path.as_ref()).map_err(|e| {
FerrayError::io_error(format!(
"fromregex: failed to read file '{}': {e}",
path.as_ref().display()
))
})?;
fromregex::<T>(&content, regex)
}
#[cfg(test)]
#[allow(clippy::float_cmp)] mod tests {
use super::*;
#[test]
fn savetxt_1d_writes_one_value_per_line() {
let arr = Array::<f64, Ix1>::from_vec(Ix1::new([4]), vec![1.5, 2.5, 3.0, 4.0]).unwrap();
let mut buf: Vec<u8> = Vec::new();
let opts = SaveTxtOptions::default();
savetxt_1d_to_writer(&mut buf, &arr, &opts).unwrap();
let s = String::from_utf8(buf).unwrap();
assert_eq!(s, "1.5\n2.5\n3\n4\n");
}
#[test]
fn savetxt_1d_then_loadtxt_1d_roundtrip() {
let arr =
Array::<f64, Ix1>::from_vec(Ix1::new([5]), vec![1.0, -2.5, 3.5, 0.0, 7.25]).unwrap();
let dir = tempfile::tempdir().unwrap();
let p = dir.path().join("vec.txt");
let opts = SaveTxtOptions::default();
savetxt_1d(&p, &arr, &opts).unwrap();
let back: Array<f64, Ix1> = loadtxt_1d(&p, ',', 0).unwrap();
assert_eq!(back.shape(), &[5]);
assert_eq!(back.as_slice().unwrap(), arr.as_slice().unwrap());
}
#[test]
fn loadtxt_1d_flattens_multicolumn_input() {
let dir = tempfile::tempdir().unwrap();
let p = dir.path().join("rect.txt");
std::fs::write(&p, "1,2\n3,4\n5,6\n").unwrap();
let v: Array<i64, Ix1> = loadtxt_1d(&p, ',', 0).unwrap();
assert_eq!(v.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
}
#[test]
fn loadtxt_simple_csv() {
let content = "1.0,2.0,3.0\n4.0,5.0,6.0\n";
let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
assert_eq!(arr.shape(), &[2, 3]);
assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
}
#[test]
fn loadtxt_with_skiprows() {
let content = "# header\nname,value\n1.0,10.0\n2.0,20.0\n";
let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 1).unwrap();
assert_eq!(arr.shape(), &[2, 2]);
assert_eq!(arr.as_slice().unwrap()[0], 1.0);
}
#[test]
fn loadtxt_tab_delimited() {
let content = "1\t2\t3\n4\t5\t6\n";
let arr: Array<i32, Ix2> = loadtxt_from_str(content, '\t', 0).unwrap();
assert_eq!(arr.shape(), &[2, 3]);
assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3, 4, 5, 6]);
}
#[test]
fn loadtxt_integers() {
let content = "10,20\n30,40\n";
let arr: Array<i64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
assert_eq!(arr.as_slice().unwrap(), &[10i64, 20, 30, 40]);
}
#[test]
fn loadtxt_file_roundtrip() {
let data = vec![1.0f64, 2.0, 3.0, 4.0, 5.0, 6.0];
let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 3]), data.clone()).unwrap();
let dir = tempfile::TempDir::new().unwrap();
let path = dir.path().join("test.csv");
savetxt(&path, &arr, &SaveTxtOptions::default()).unwrap();
let loaded: Array<f64, Ix2> = loadtxt(&path, ',', 0).unwrap();
assert_eq!(loaded.shape(), &[2, 3]);
assert_eq!(loaded.as_slice().unwrap(), &data[..]);
}
#[test]
fn savetxt_custom_delimiter() {
let data = vec![1.0f64, 2.0, 3.0, 4.0];
let arr = Array::<f64, Ix2>::from_vec(Ix2::new([2, 2]), data).unwrap();
let mut buf = Vec::new();
let opts = SaveTxtOptions {
delimiter: '\t',
..Default::default()
};
savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
let output = String::from_utf8(buf).unwrap();
assert!(output.contains('\t'));
assert!(!output.contains(','));
}
#[test]
fn savetxt_with_header_footer() {
let data = vec![1.0f64, 2.0];
let arr = Array::<f64, Ix2>::from_vec(Ix2::new([1, 2]), data).unwrap();
let mut buf = Vec::new();
let opts = SaveTxtOptions {
header: Some("# my header".to_string()),
footer: Some("# end".to_string()),
..Default::default()
};
savetxt_to_writer(&mut buf, &arr, &opts).unwrap();
let output = String::from_utf8(buf).unwrap();
assert!(output.starts_with("# my header\n"));
assert!(output.ends_with("# end\n"));
}
#[test]
fn genfromtxt_missing_nan() {
let content = "1.0,2.0,3.0\n4.0,,6.0\n7.0,8.0,\n";
let arr = genfromtxt_from_str(content, ',', f64::NAN, 0, &[]).unwrap();
assert_eq!(arr.shape(), &[3, 3]);
let slice = arr.as_slice().unwrap();
assert_eq!(slice[0], 1.0);
assert!(slice[4].is_nan()); assert!(slice[8].is_nan()); }
#[test]
fn genfromtxt_na_marker() {
let content = "1.0,NA,3.0\n4.0,5.0,NA\n";
let arr = genfromtxt_from_str(content, ',', -999.0, 0, &["NA"]).unwrap();
assert_eq!(arr.shape(), &[2, 3]);
let slice = arr.as_slice().unwrap();
assert_eq!(slice[1], -999.0);
assert_eq!(slice[5], -999.0);
}
#[test]
fn genfromtxt_with_skiprows() {
let content = "col1,col2\n1.0,2.0\n3.0,4.0\n";
let arr = genfromtxt_from_str(content, ',', f64::NAN, 1, &[]).unwrap();
assert_eq!(arr.shape(), &[2, 2]);
assert_eq!(arr.as_slice().unwrap()[0], 1.0);
}
#[test]
fn genfromtxt_file() {
let content = "1.0,2.0\n,4.0\n";
let dir = tempfile::TempDir::new().unwrap();
let path = dir.path().join("genfromtxt_test.csv");
std::fs::write(&path, content).unwrap();
let arr = genfromtxt(&path, ',', f64::NAN, 0, &[]).unwrap();
assert_eq!(arr.shape(), &[2, 2]);
assert!(arr.as_slice().unwrap()[2].is_nan());
}
#[test]
fn loadtxt_empty() {
let content = "";
let arr: Array<f64, Ix2> = loadtxt_from_str(content, ',', 0).unwrap();
assert_eq!(arr.shape(), &[0, 0]);
}
#[test]
fn fromregex_basic_one_group() {
let s = "value=10\nvalue=20\nirrelevant\nvalue=30\n";
let arr: Array<i32, Ix2> = fromregex(s, r"^value=(\d+)$").unwrap();
assert_eq!(arr.shape(), &[3, 1]);
assert_eq!(arr.as_slice().unwrap(), &[10, 20, 30]);
}
#[test]
fn fromregex_multiple_groups() {
let s = "1,2\n3,4\n5,6\n";
let arr: Array<f64, Ix2> = fromregex(s, r"^([\d.]+),([\d.]+)$").unwrap();
assert_eq!(arr.shape(), &[3, 2]);
assert_eq!(arr.as_slice().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
}
#[test]
fn fromregex_no_groups_errs() {
let r: FerrayResult<Array<i32, Ix2>> = fromregex("a\nb\n", r"^[ab]$");
assert!(r.is_err());
}
#[test]
fn fromregex_invalid_regex_errs() {
let r: FerrayResult<Array<i32, Ix2>> = fromregex("", r"(unclosed");
assert!(r.is_err());
}
#[test]
fn fromregex_skips_unparseable_rows() {
let s = "v=10\nv=foo\nv=20\n";
let arr: Array<i32, Ix2> = fromregex(s, r"^v=(\S+)$").unwrap();
assert_eq!(arr.shape(), &[2, 1]);
assert_eq!(arr.as_slice().unwrap(), &[10, 20]);
}
#[test]
fn fromregex_from_file_roundtrip() {
let dir = tempfile::TempDir::new().unwrap();
let path = dir.path().join("regex_test.txt");
std::fs::write(&path, "x=1\nx=2\nx=3\n").unwrap();
let arr: Array<i32, Ix2> = fromregex_from_file(&path, r"^x=(\d+)$").unwrap();
assert_eq!(arr.shape(), &[3, 1]);
assert_eq!(arr.as_slice().unwrap(), &[1, 2, 3]);
}
}