use crate::array::Array;
use crate::error::{NumRs2Error, Result};
use num_traits::{Num, Zero};
use regex::Regex;
use std::collections::HashMap;
use std::fmt::Display;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::Path;
use std::str::FromStr;
pub type ConverterFn = Box<dyn Fn(&str) -> Result<String>>;
#[derive(Debug, Clone)]
pub struct LoadTxtOptions {
pub comments: String,
pub delimiter: Option<String>,
pub skiprows: usize,
pub usecols: Option<Vec<usize>>,
pub max_rows: Option<usize>,
pub ndmin: usize,
}
impl Default for LoadTxtOptions {
fn default() -> Self {
Self {
comments: "#".to_string(),
delimiter: None,
skiprows: 0,
usecols: None,
max_rows: None,
ndmin: 0,
}
}
}
#[derive(Debug, Clone)]
pub struct SaveTxtOptions {
pub fmt: String,
pub delimiter: String,
pub newline: String,
pub header: Option<String>,
pub footer: Option<String>,
pub comments: String,
}
impl Default for SaveTxtOptions {
fn default() -> Self {
Self {
fmt: "%.18e".to_string(),
delimiter: " ".to_string(),
newline: "\n".to_string(),
header: None,
footer: None,
comments: "# ".to_string(),
}
}
}
pub struct GenFromTxtOptions {
pub dtype: String,
pub comments: String,
pub delimiter: Option<String>,
pub skip_header: usize,
pub skip_footer: usize,
pub converters: HashMap<usize, ConverterFn>,
pub missing_values: HashMap<usize, Vec<String>>,
pub filling_values: HashMap<usize, String>,
pub usecols: Option<Vec<usize>>,
pub names: Option<Vec<String>>,
pub excludelist: Option<Vec<String>>,
pub default_missing: Vec<String>,
pub replace_space: Option<char>,
pub case_sensitive: bool,
pub deletechars: String,
pub autostrip: bool,
pub max_rows: Option<usize>,
pub encoding: String,
}
impl Default for GenFromTxtOptions {
fn default() -> Self {
let default_missing = vec![
"".to_string(),
"N/A".to_string(),
"NA".to_string(),
"NULL".to_string(),
"nan".to_string(),
"NaN".to_string(),
"NAN".to_string(),
];
Self {
dtype: "f64".to_string(),
comments: "#".to_string(),
delimiter: None,
skip_header: 0,
skip_footer: 0,
converters: HashMap::new(),
missing_values: HashMap::new(),
filling_values: HashMap::new(),
usecols: None,
names: None,
excludelist: None,
default_missing,
replace_space: Some('_'),
case_sensitive: true,
deletechars: String::new(),
autostrip: false,
max_rows: None,
encoding: "utf-8".to_string(),
}
}
}
pub fn loadtxt<T>(fname: &Path, options: LoadTxtOptions) -> Result<Array<T>>
where
T: Clone + Default + FromStr + Zero,
<T as FromStr>::Err: std::fmt::Debug,
{
let file = File::open(fname)
.map_err(|e| NumRs2Error::IOError(format!("Failed to open file {:?}: {}", fname, e)))?;
let reader = BufReader::new(file);
let mut lines = reader.lines();
for _ in 0..options.skiprows {
if lines.next().is_none() {
return Err(NumRs2Error::IOError(
"File ended during skip rows".to_string(),
));
}
}
let mut rows = Vec::new();
let mut rows_read = 0;
for (line_num, line_result) in lines.enumerate() {
let line = line_result
.map_err(|e| NumRs2Error::IOError(format!("Error reading line {}: {}", line_num, e)))?;
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with(&options.comments) {
continue;
}
if let Some(max_rows) = options.max_rows {
if rows_read >= max_rows {
break;
}
}
let values: Vec<&str> = if let Some(ref delimiter) = options.delimiter {
line.split(delimiter).collect()
} else {
line.split_whitespace().collect()
};
if values.is_empty() {
continue;
}
let selected_values: Vec<&str> = if let Some(ref usecols) = options.usecols {
usecols
.iter()
.filter_map(|&col_idx| values.get(col_idx))
.copied()
.collect()
} else {
values
};
let mut row = Vec::with_capacity(selected_values.len());
for (col_idx, value_str) in selected_values.iter().enumerate() {
let parsed_value = value_str.trim().parse::<T>().map_err(|e| {
NumRs2Error::ConversionError(format!(
"Failed to parse '{}' at line {}, column {}: {:?}",
value_str,
line_num + options.skiprows,
col_idx,
e
))
})?;
row.push(parsed_value);
}
if !row.is_empty() {
rows.push(row);
rows_read += 1;
}
}
if rows.is_empty() {
return Err(NumRs2Error::IOError("No data found in file".to_string()));
}
let row_length = rows[0].len();
for (i, row) in rows.iter().enumerate() {
if row.len() != row_length {
return Err(NumRs2Error::DimensionMismatch(format!(
"Row {} has {} columns, expected {}",
i,
row.len(),
row_length
)));
}
}
let total_elements = rows.len() * row_length;
let mut data = Vec::with_capacity(total_elements);
for row in rows {
data.extend(row);
}
let shape = if row_length == 1 {
vec![data.len()]
} else {
vec![data.len() / row_length, row_length]
};
let mut array = Array::from_vec(data).reshape(&shape);
while array.ndim() < options.ndmin {
let new_shape = {
let mut shape = vec![1];
shape.extend(array.shape());
shape
};
array = array.reshape(&new_shape);
}
Ok(array)
}
#[allow(non_snake_case)]
pub fn savetxt<T>(fname: &Path, X: &Array<T>, options: SaveTxtOptions) -> Result<()>
where
T: Clone + Display + Zero,
{
let file = File::create(fname)
.map_err(|e| NumRs2Error::IOError(format!("Failed to create file {:?}: {}", fname, e)))?;
let mut writer = BufWriter::new(file);
if let Some(ref header) = options.header {
writeln!(writer, "{}{}", options.comments, header)
.map_err(|e| NumRs2Error::IOError(format!("Failed to write header: {}", e)))?;
}
let array_2d = if X.ndim() == 1 {
X.reshape(&[X.size(), 1])
} else if X.ndim() == 2 {
X.clone()
} else {
return Err(NumRs2Error::DimensionMismatch(format!(
"Can only save 1D or 2D arrays to text files, got {}D",
X.ndim()
)));
};
let shape = array_2d.shape();
let rows = shape[0];
let cols = shape[1];
for row in 0..rows {
let mut line_values = Vec::with_capacity(cols);
for col in 0..cols {
let index = if array_2d.ndim() == 1 {
vec![row]
} else {
vec![row, col]
};
let value = array_2d.get(&index)?;
let formatted = format!("{}", value);
line_values.push(formatted);
}
let line = line_values.join(&options.delimiter);
write!(writer, "{}{}", line, options.newline)
.map_err(|e| NumRs2Error::IOError(format!("Failed to write data: {}", e)))?;
}
if let Some(ref footer) = options.footer {
writeln!(writer, "{}{}", options.comments, footer)
.map_err(|e| NumRs2Error::IOError(format!("Failed to write footer: {}", e)))?;
}
writer
.flush()
.map_err(|e| NumRs2Error::IOError(format!("Failed to flush output: {}", e)))?;
Ok(())
}
pub fn genfromtxt<T>(fname: &Path, options: GenFromTxtOptions) -> Result<Array<T>>
where
T: Clone + Default + FromStr + Zero + Num,
<T as FromStr>::Err: std::fmt::Debug,
{
let file = File::open(fname)
.map_err(|e| NumRs2Error::IOError(format!("Failed to open file {:?}: {}", fname, e)))?;
let reader = BufReader::new(file);
let lines: Vec<String> = reader
.lines()
.collect::<std::io::Result<Vec<_>>>()
.map_err(|e| NumRs2Error::IOError(format!("Failed to read file: {}", e)))?;
if lines.is_empty() {
return Err(NumRs2Error::IOError("File is empty".to_string()));
}
let total_lines = lines.len();
if options.skip_header >= total_lines {
return Err(NumRs2Error::IOError(
"skip_header is larger than file length".to_string(),
));
}
let end_line = if options.skip_footer > 0 {
total_lines.saturating_sub(options.skip_footer)
} else {
total_lines
};
if options.skip_header >= end_line {
return Err(NumRs2Error::IOError(
"No data lines available after skipping header and footer".to_string(),
));
}
let mut rows = Vec::new();
let mut rows_read = 0;
for (line_idx, line) in lines[options.skip_header..end_line].iter().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with(&options.comments) {
continue;
}
if let Some(max_rows) = options.max_rows {
if rows_read >= max_rows {
break;
}
}
let raw_values: Vec<&str> = if let Some(ref delimiter) = options.delimiter {
line.split(delimiter).collect()
} else {
line.split_whitespace().collect()
};
if raw_values.is_empty() {
continue;
}
let selected_values: Vec<&str> = if let Some(ref usecols) = options.usecols {
usecols
.iter()
.filter_map(|&col_idx| raw_values.get(col_idx))
.copied()
.collect()
} else {
raw_values
};
let mut row = Vec::with_capacity(selected_values.len());
for (col_idx, value_str) in selected_values.iter().enumerate() {
let trimmed_value = if options.autostrip {
value_str.trim()
} else {
*value_str
};
let is_missing = options.default_missing.contains(&trimmed_value.to_string())
|| options
.missing_values
.get(&col_idx)
.map(|mv| mv.contains(&trimmed_value.to_string()))
.unwrap_or(false);
let parsed_value = if is_missing {
if let Some(filling) = options.filling_values.get(&col_idx) {
filling.parse::<T>().map_err(|e| {
NumRs2Error::ConversionError(format!(
"Failed to parse filling value '{}': {:?}",
filling, e
))
})?
} else {
T::zero()
}
} else {
let converted_str = if let Some(converter) = options.converters.get(&col_idx) {
converter(trimmed_value)?.to_string()
} else {
trimmed_value.to_string()
};
converted_str.parse::<T>().map_err(|e| {
NumRs2Error::ConversionError(format!(
"Failed to parse '{}' at line {}, column {}: {:?}",
converted_str,
line_idx + options.skip_header,
col_idx,
e
))
})?
};
row.push(parsed_value);
}
if !row.is_empty() {
rows.push(row);
rows_read += 1;
}
}
if rows.is_empty() {
return Err(NumRs2Error::IOError("No data found in file".to_string()));
}
let row_length = rows[0].len();
for (i, row) in rows.iter().enumerate() {
if row.len() != row_length {
return Err(NumRs2Error::DimensionMismatch(format!(
"Row {} has {} columns, expected {}",
i,
row.len(),
row_length
)));
}
}
let total_elements = rows.len() * row_length;
let mut data = Vec::with_capacity(total_elements);
for row in rows {
data.extend(row);
}
let shape = if row_length == 1 {
vec![data.len()]
} else {
vec![data.len() / row_length, row_length]
};
Ok(Array::from_vec(data).reshape(&shape))
}
pub fn detect_delimiter(fname: &Path, sample_lines: Option<usize>) -> Result<String> {
let file = File::open(fname)
.map_err(|e| NumRs2Error::IOError(format!("Failed to open file {:?}: {}", fname, e)))?;
let reader = BufReader::new(file);
let sample_size = sample_lines.unwrap_or(10);
let sample: Vec<String> = reader
.lines()
.take(sample_size)
.collect::<std::io::Result<Vec<_>>>()
.map_err(|e| NumRs2Error::IOError(format!("Failed to read file: {}", e)))?;
if sample.is_empty() {
return Err(NumRs2Error::IOError("File is empty".to_string()));
}
let delimiters = vec![",", "\t", ";", "|", " "];
let mut delimiter_scores = HashMap::new();
for delimiter in &delimiters {
let mut total_consistency = 0.0;
let mut valid_lines = 0;
for line in &sample {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
let parts: Vec<&str> = line.split(delimiter).collect();
if parts.len() > 1 {
total_consistency += parts.len() as f64;
valid_lines += 1;
}
}
if valid_lines > 0 {
let avg_consistency = total_consistency / valid_lines as f64;
delimiter_scores.insert(delimiter, avg_consistency);
}
}
if delimiter_scores.is_empty() {
return Ok(" ".to_string()); }
let best_delimiter = delimiter_scores
.iter()
.max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
.map(|(delimiter, _)| delimiter.to_string())
.unwrap_or_else(|| " ".to_string());
Ok(best_delimiter)
}
pub fn fromregex<T>(
fname: &Path,
regexp: &str,
dtype: &str,
encoding: Option<&str>,
) -> Result<Array<T>>
where
T: Clone + Default + FromStr + Zero,
<T as FromStr>::Err: std::fmt::Debug,
{
let _encoding = encoding.unwrap_or("utf-8");
let regex = Regex::new(regexp).map_err(|e| {
NumRs2Error::InvalidOperation(format!("Invalid regular expression '{}': {}", regexp, e))
})?;
let file = File::open(fname)
.map_err(|e| NumRs2Error::IOError(format!("Failed to open file {:?}: {}", fname, e)))?;
let reader = BufReader::new(file);
let mut rows = Vec::new();
let mut expected_columns = None;
for (line_num, line_result) in reader.lines().enumerate() {
let line = line_result
.map_err(|e| NumRs2Error::IOError(format!("Error reading line {}: {}", line_num, e)))?;
if line.trim().is_empty() {
continue;
}
if let Some(captures) = regex.captures(&line) {
let mut row = Vec::new();
for i in 1..captures.len() {
if let Some(capture) = captures.get(i) {
let value_str = capture.as_str().trim();
let parsed_value = value_str.parse::<T>().map_err(|e| {
NumRs2Error::ConversionError(format!(
"Failed to parse '{}' as {} at line {}, capture group {}: {:?}",
value_str,
dtype,
line_num + 1,
i,
e
))
})?;
row.push(parsed_value);
}
}
if let Some(expected) = expected_columns {
if row.len() != expected {
return Err(NumRs2Error::DimensionMismatch(format!(
"Line {} has {} capture groups, expected {}",
line_num + 1,
row.len(),
expected
)));
}
} else {
expected_columns = Some(row.len());
}
if !row.is_empty() {
rows.push(row);
}
}
}
if rows.is_empty() {
return Err(NumRs2Error::IOError(
"No data found matching the regular expression".to_string(),
));
}
let row_length = rows[0].len();
let total_elements = rows.len() * row_length;
let mut data = Vec::with_capacity(total_elements);
for row in rows {
data.extend(row);
}
let shape = if row_length == 1 {
vec![data.len()]
} else {
vec![data.len() / row_length, row_length]
};
Ok(Array::from_vec(data).reshape(&shape))
}
pub fn savez_compressed<T: Clone + serde::Serialize>(
fname: &Path,
arrays: &HashMap<String, Array<T>>,
) -> Result<()> {
use std::fs::File;
if arrays.is_empty() {
return Err(NumRs2Error::InvalidOperation(
"Cannot save empty array collection".to_string(),
));
}
let file = File::create(fname)
.map_err(|e| NumRs2Error::IOError(format!("Failed to create NPZ file: {}", e)))?;
crate::io::npy_npz::save_npz_arrays(arrays, file, true)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::NamedTempFile;
#[test]
fn test_loadtxt_basic() {
let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
writeln!(temp_file, "1.0 2.0 3.0").expect("Failed to write to temp file");
writeln!(temp_file, "4.0 5.0 6.0").expect("Failed to write to temp file");
let array = loadtxt::<f64>(temp_file.path(), LoadTxtOptions::default())
.expect("Failed to load text file");
assert_eq!(array.shape(), &[2, 3]);
assert_eq!(array.to_vec(), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
}
#[test]
fn test_loadtxt_with_comments() {
let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
writeln!(temp_file, "# This is a comment").expect("Failed to write to temp file");
writeln!(temp_file, "1.0 2.0").expect("Failed to write to temp file");
writeln!(temp_file, "# Another comment").expect("Failed to write to temp file");
writeln!(temp_file, "3.0 4.0").expect("Failed to write to temp file");
let array = loadtxt::<f64>(temp_file.path(), LoadTxtOptions::default())
.expect("Failed to load text file");
assert_eq!(array.shape(), &[2, 2]);
assert_eq!(array.to_vec(), vec![1.0, 2.0, 3.0, 4.0]);
}
#[test]
fn test_loadtxt_with_delimiter() {
let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
writeln!(temp_file, "1.0,2.0,3.0").expect("Failed to write to temp file");
writeln!(temp_file, "4.0,5.0,6.0").expect("Failed to write to temp file");
let options = LoadTxtOptions {
delimiter: Some(",".to_string()),
..Default::default()
};
let array = loadtxt::<f64>(temp_file.path(), options).expect("Failed to load text file");
assert_eq!(array.shape(), &[2, 3]);
assert_eq!(array.to_vec(), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
}
#[test]
fn test_savetxt_basic() {
let array = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0]).reshape(&[2, 2]);
let temp_file = NamedTempFile::new().expect("Failed to create temp file");
savetxt(temp_file.path(), &array, SaveTxtOptions::default())
.expect("Failed to save text file");
let content = fs::read_to_string(temp_file.path()).expect("Failed to read saved file");
let lines: Vec<&str> = content.trim().split('\n').collect();
assert_eq!(lines.len(), 2);
assert!(lines[0].contains("1") && lines[0].contains("2"));
assert!(lines[1].contains("3") && lines[1].contains("4"));
}
#[test]
fn test_genfromtxt_with_missing() {
let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
writeln!(temp_file, "1.0 2.0 3.0").expect("Failed to write to temp file");
writeln!(temp_file, "4.0 nan 6.0").expect("Failed to write to temp file");
writeln!(temp_file, "7.0 8.0 N/A").expect("Failed to write to temp file");
let array = genfromtxt::<f64>(temp_file.path(), GenFromTxtOptions::default())
.expect("Failed to load with genfromtxt");
assert_eq!(array.shape(), &[3, 3]);
let expected = vec![1.0, 2.0, 3.0, 4.0, 0.0, 6.0, 7.0, 8.0, 0.0];
assert_eq!(array.to_vec(), expected);
}
#[test]
fn test_detect_delimiter() {
let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
writeln!(temp_file, "1,2,3").expect("Failed to write to temp file");
writeln!(temp_file, "4,5,6").expect("Failed to write to temp file");
writeln!(temp_file, "7,8,9").expect("Failed to write to temp file");
let delimiter =
detect_delimiter(temp_file.path(), Some(3)).expect("Failed to detect delimiter");
assert_eq!(delimiter, ",");
}
#[test]
fn test_fromregex() {
let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
writeln!(temp_file, "Value: 1.5, Count: 10").expect("Failed to write to temp file");
writeln!(temp_file, "Value: 2.3, Count: 20").expect("Failed to write to temp file");
writeln!(temp_file, "Value: 4.7, Count: 15").expect("Failed to write to temp file");
let pattern = r"Value: ([0-9.]+), Count: ([0-9]+)";
let array = fromregex::<f64>(temp_file.path(), pattern, "f64", None)
.expect("Failed to parse with regex");
assert_eq!(array.shape(), &[3, 2]);
let expected = vec![1.5, 10.0, 2.3, 20.0, 4.7, 15.0];
assert_eq!(array.to_vec(), expected);
}
#[test]
fn test_fromregex_single_column() {
let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
writeln!(temp_file, "Temperature: 23.5°C").expect("Failed to write to temp file");
writeln!(temp_file, "Temperature: 25.1°C").expect("Failed to write to temp file");
writeln!(temp_file, "Temperature: 22.8°C").expect("Failed to write to temp file");
let pattern = r"Temperature: ([0-9.]+)°C";
let array = fromregex::<f64>(temp_file.path(), pattern, "f64", None)
.expect("Failed to parse with regex");
assert_eq!(array.shape(), &[3]);
let expected = vec![23.5, 25.1, 22.8];
assert_eq!(array.to_vec(), expected);
}
}