use std::collections::HashSet;
use std::convert::TryFrom;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use runmat_accelerate_api::HostTensorView;
use runmat_builtins::{LogicalArray, Tensor, Value};
use runmat_filesystem::File;
use runmat_macros::runtime_builtin;
use crate::builtins::common::spec::{
BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
ReductionNaN, ResidencyPolicy, ShapeRequirements,
};
use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
const BUILTIN_NAME: &str = "readmatrix";
#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::io::tabular::readmatrix")]
pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
name: "readmatrix",
op_kind: GpuOpKind::Custom("io-readmatrix"),
supported_precisions: &[],
broadcast: BroadcastSemantics::None,
provider_hooks: &[],
constant_strategy: ConstantStrategy::InlineLiteral,
residency: ResidencyPolicy::GatherImmediately,
nan_mode: ReductionNaN::Include,
two_pass_threshold: None,
workgroup_size: None,
accepts_nan_mode: false,
notes: "Runs entirely on the host; acceleration providers are not involved.",
};
fn readmatrix_error(message: impl Into<String>) -> RuntimeError {
build_runtime_error(message)
.with_builtin(BUILTIN_NAME)
.build()
}
fn readmatrix_error_with_source<E>(message: impl Into<String>, source: E) -> RuntimeError
where
E: std::error::Error + Send + Sync + 'static,
{
build_runtime_error(message)
.with_builtin(BUILTIN_NAME)
.with_source(source)
.build()
}
fn map_control_flow(err: RuntimeError) -> RuntimeError {
let identifier = err.identifier().map(|value| value.to_string());
let message = err.message().to_string();
let mut builder = build_runtime_error(message)
.with_builtin(BUILTIN_NAME)
.with_source(err);
if let Some(identifier) = identifier {
builder = builder.with_identifier(identifier);
}
builder.build()
}
#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::io::tabular::readmatrix")]
pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
name: "readmatrix",
shape: ShapeRequirements::Any,
constant_strategy: ConstantStrategy::InlineLiteral,
elementwise: None,
reduction: None,
emits_nan: false,
notes: "Not eligible for fusion; executes as a standalone host operation.",
};
#[runtime_builtin(
name = "readmatrix",
category = "io/tabular",
summary = "Import numeric data from delimited text files into a RunMat matrix.",
keywords = "readmatrix,csv,delimited text,numeric import,table",
accel = "cpu",
type_resolver(crate::builtins::io::type_resolvers::readmatrix_type),
builtin_path = "crate::builtins::io::tabular::readmatrix"
)]
async fn readmatrix_builtin(path: Value, rest: Vec<Value>) -> crate::BuiltinResult<Value> {
let path_value = gather_if_needed_async(&path)
.await
.map_err(map_control_flow)?;
let options = parse_options(&rest).await?;
options.validate()?;
let resolved = resolve_path(&path_value)?;
let tensor = read_numeric_matrix(&resolved, &options)?;
finalize_output(tensor, &options)
}
async fn parse_options(args: &[Value]) -> BuiltinResult<ReadMatrixOptions> {
let mut options = ReadMatrixOptions::default();
let mut index = 0usize;
if let Some(Value::Struct(struct_value)) = args.get(index) {
parse_struct_options(struct_value, &mut options).await?;
index += 1;
}
while index < args.len() {
if index + 1 >= args.len() {
return Err(readmatrix_error(
"readmatrix: name/value inputs must appear in pairs",
));
}
let name_value = gather_if_needed_async(&args[index])
.await
.map_err(map_control_flow)?;
let name = option_name_from_value(&name_value)?;
let value = &args[index + 1];
apply_option(&mut options, &name, value).await?;
index += 2;
}
Ok(options)
}
async fn parse_struct_options(
struct_value: &runmat_builtins::StructValue,
options: &mut ReadMatrixOptions,
) -> BuiltinResult<()> {
for (name, value) in &struct_value.fields {
apply_option(options, name, value).await?;
}
Ok(())
}
async fn apply_option(
options: &mut ReadMatrixOptions,
name: &str,
value: &Value,
) -> BuiltinResult<()> {
let lowered = name.trim().to_ascii_lowercase();
let is_like = lowered == "like";
let effective_value = if is_like {
value.clone()
} else {
gather_if_needed_async(value)
.await
.map_err(map_control_flow)?
};
if name.eq_ignore_ascii_case("Delimiter") {
let delimiter = parse_delimiter(&effective_value)?;
options.delimiter = Some(delimiter);
return Ok(());
}
if name.eq_ignore_ascii_case("NumHeaderLines") {
let header_lines = value_to_usize(&effective_value, "NumHeaderLines")?;
options.num_header_lines = header_lines;
return Ok(());
}
if name.eq_ignore_ascii_case("TreatAsMissing") {
let tokens = parse_treat_as_missing(&effective_value).await?;
for token in tokens {
options.add_missing_token(&token);
}
return Ok(());
}
if name.eq_ignore_ascii_case("DecimalSeparator") {
let sep = parse_separator_char(&effective_value, "DecimalSeparator")?;
options.decimal_separator = sep;
return Ok(());
}
if name.eq_ignore_ascii_case("ThousandsSeparator") {
let sep = parse_separator_char(&effective_value, "ThousandsSeparator")?;
options.thousands_separator = Some(sep);
return Ok(());
}
if name.eq_ignore_ascii_case("EmptyValue") {
let numeric = value_to_f64(&effective_value, "EmptyValue")?;
options.empty_value = Some(numeric);
return Ok(());
}
if name.eq_ignore_ascii_case("OutputType") {
let text = value_to_string_scalar(&effective_value, "OutputType")?;
options.set_output_type(&text)?;
return Ok(());
}
if name.eq_ignore_ascii_case("Range") {
let range = parse_range(&effective_value)?;
options.range = Some(range);
return Ok(());
}
if is_like {
options.set_like(effective_value)?;
return Ok(());
}
Ok(())
}
fn option_name_from_value(value: &Value) -> BuiltinResult<String> {
value_to_string_scalar(value, "option name")
}
fn parse_delimiter(value: &Value) -> BuiltinResult<Delimiter> {
let text = value_to_string_scalar(value, "Delimiter")?;
if text.is_empty() {
return Err(readmatrix_error("readmatrix: Delimiter cannot be empty"));
}
let trimmed_lower = text.trim().to_ascii_lowercase();
match trimmed_lower.as_str() {
"tab" => Ok(Delimiter::Char('\t')),
"space" | "whitespace" => Ok(Delimiter::Whitespace),
"comma" => Ok(Delimiter::Char(',')),
"semicolon" => Ok(Delimiter::Char(';')),
"pipe" => Ok(Delimiter::Char('|')),
_ => {
if text.chars().count() == 1 {
Ok(Delimiter::Char(text.chars().next().unwrap()))
} else {
Ok(Delimiter::String(text))
}
}
}
}
fn parse_separator_char(value: &Value, option_name: &str) -> BuiltinResult<char> {
let text = value_to_string_scalar(value, option_name)?;
if text.is_empty() {
return Err(readmatrix_error(format!(
"readmatrix: {option_name} must be a single character"
)));
}
let mut chars = text.chars();
let ch = chars.next().unwrap();
if chars.next().is_some() {
return Err(readmatrix_error(format!(
"readmatrix: {option_name} must be a single character"
)));
}
if ch == '\n' || ch == '\r' {
return Err(readmatrix_error(format!(
"readmatrix: {option_name} cannot be a newline character"
)));
}
Ok(ch)
}
fn value_to_string_scalar(value: &Value, context: &str) -> BuiltinResult<String> {
match value {
Value::String(s) => Ok(s.clone()),
Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
Value::StringArray(sa) => {
if sa.data.len() == 1 {
Ok(sa.data[0].clone())
} else {
Err(readmatrix_error(format!(
"readmatrix: {context} must be a scalar string array"
)))
}
}
_ => Err(readmatrix_error(format!(
"readmatrix: expected {context} as a string scalar or character vector"
))),
}
}
fn value_to_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
match value {
Value::Int(i) => {
let num = i.to_i64();
if num < 0 {
Err(readmatrix_error(format!(
"readmatrix: {context} must be a non-negative integer"
)))
} else {
Ok(num as usize)
}
}
Value::Num(n) => {
if !n.is_finite() {
return Err(readmatrix_error(format!(
"readmatrix: {context} must be a finite non-negative integer"
)));
}
if *n < 0.0 {
return Err(readmatrix_error(format!(
"readmatrix: {context} must be a non-negative integer"
)));
}
if (n.round() - n).abs() > f64::EPSILON {
return Err(readmatrix_error(format!(
"readmatrix: {context} must be an integer value"
)));
}
Ok(n.round() as usize)
}
_ => Err(readmatrix_error(format!(
"readmatrix: {context} must be provided as a numeric scalar"
))),
}
}
fn value_to_f64(value: &Value, context: &str) -> BuiltinResult<f64> {
match value {
Value::Num(n) => {
if n.is_finite() {
Ok(*n)
} else {
Err(readmatrix_error(format!(
"readmatrix: {context} must be a finite numeric scalar"
)))
}
}
Value::Int(i) => Ok(i.to_f64()),
Value::Tensor(t) => {
if t.data.len() == 1 {
let v = t.data[0];
if v.is_finite() {
Ok(v)
} else {
Err(readmatrix_error(format!(
"readmatrix: {context} must be a finite numeric scalar"
)))
}
} else {
Err(readmatrix_error(format!(
"readmatrix: {context} must be a numeric scalar"
)))
}
}
_ => Err(readmatrix_error(format!(
"readmatrix: {context} must be a numeric scalar"
))),
}
}
async fn parse_treat_as_missing(value: &Value) -> BuiltinResult<Vec<String>> {
let mut out = Vec::new();
let mut stack = vec![value.clone()];
while let Some(value) = stack.pop() {
match value {
Value::String(s) => out.push(s),
Value::CharArray(ca) if ca.rows == 1 => out.push(ca.data.iter().collect()),
Value::StringArray(sa) => out.extend(sa.data),
Value::Num(n) => out.push(format_numeric_token(n)),
Value::Int(i) => out.push(format!("{}", i.to_i64())),
Value::Tensor(t) => {
if t.data.len() == 1 {
out.push(format_numeric_token(t.data[0]));
} else {
return Err(readmatrix_error(
"readmatrix: TreatAsMissing entries must be scalar values",
));
}
}
Value::Cell(cell) => {
for handle in &cell.data {
let inner = unsafe { &*handle.as_raw() };
let gathered = gather_if_needed_async(inner)
.await
.map_err(map_control_flow)?;
stack.push(gathered);
}
}
_ => {
return Err(readmatrix_error(
"readmatrix: TreatAsMissing values must be strings or numeric scalars",
))
}
}
}
Ok(out)
}
fn format_numeric_token(value: f64) -> String {
if value == 0.0 {
"0".to_string()
} else {
format!("{}", value)
}
}
#[derive(Clone)]
struct ReadMatrixOptions {
delimiter: Option<Delimiter>,
num_header_lines: usize,
decimal_separator: char,
thousands_separator: Option<char>,
treat_as_missing: HashSet<String>,
empty_value: Option<f64>,
range: Option<RangeSpec>,
output_template: OutputTemplate,
}
impl Default for ReadMatrixOptions {
fn default() -> Self {
Self {
delimiter: None,
num_header_lines: 0,
decimal_separator: '.',
thousands_separator: None,
treat_as_missing: HashSet::new(),
empty_value: None,
range: None,
output_template: OutputTemplate::Double,
}
}
}
impl ReadMatrixOptions {
fn add_missing_token(&mut self, token: &str) {
let normalized = normalize_missing_token(token);
self.treat_as_missing.insert(normalized);
}
fn is_missing_token(&self, token: &str) -> bool {
if self.treat_as_missing.is_empty() {
return false;
}
let norm = normalize_missing_token(token);
self.treat_as_missing.contains(&norm)
}
fn empty_value(&self) -> f64 {
self.empty_value.unwrap_or(f64::NAN)
}
fn validate(&self) -> BuiltinResult<()> {
if let Some(range) = &self.range {
range.validate()?;
}
if let Some(sep) = self.thousands_separator {
if sep == self.decimal_separator {
return Err(readmatrix_error(
"readmatrix: DecimalSeparator and ThousandsSeparator must differ",
));
}
}
Ok(())
}
fn set_output_type(&mut self, spec: &str) -> BuiltinResult<()> {
if matches!(self.output_template, OutputTemplate::Like(_)) {
return Err(readmatrix_error(
"readmatrix: cannot combine 'Like' with OutputType",
));
}
if spec.eq_ignore_ascii_case("double") {
self.output_template = OutputTemplate::Double;
return Ok(());
}
if spec.eq_ignore_ascii_case("logical") {
self.output_template = OutputTemplate::Logical;
return Ok(());
}
Err(readmatrix_error(format!(
"readmatrix: unsupported OutputType '{}'",
spec
)))
}
fn set_like(&mut self, proto: Value) -> BuiltinResult<()> {
if matches!(self.output_template, OutputTemplate::Like(_)) {
return Err(readmatrix_error(
"readmatrix: multiple 'Like' specifications are not supported",
));
}
if !matches!(self.output_template, OutputTemplate::Double) {
return Err(readmatrix_error(
"readmatrix: cannot combine 'Like' with OutputType overrides",
));
}
self.output_template = OutputTemplate::Like(proto);
Ok(())
}
}
#[derive(Clone)]
enum Delimiter {
Char(char),
String(String),
Whitespace,
}
#[derive(Clone)]
enum OutputTemplate {
Double,
Logical,
Like(Value),
}
#[derive(Clone)]
struct RangeSpec {
start_row: usize,
start_col: usize,
end_row: Option<usize>,
end_col: Option<usize>,
}
impl RangeSpec {
fn validate(&self) -> BuiltinResult<()> {
if let Some(end_row) = self.end_row {
if end_row < self.start_row {
return Err(readmatrix_error(
"readmatrix: Range end row must be >= start row",
));
}
}
if let Some(end_col) = self.end_col {
if end_col < self.start_col {
return Err(readmatrix_error(
"readmatrix: Range end column must be >= start column",
));
}
}
Ok(())
}
}
fn normalize_missing_token(token: &str) -> String {
token.trim().to_ascii_lowercase()
}
fn parse_range(value: &Value) -> BuiltinResult<RangeSpec> {
match value {
Value::String(s) => parse_range_string(s),
Value::CharArray(ca) if ca.rows == 1 => {
let text: String = ca.data.iter().collect();
parse_range_string(&text)
}
Value::StringArray(sa) => {
if sa.data.len() == 1 {
parse_range_string(&sa.data[0])
} else {
Err(readmatrix_error(
"readmatrix: Range string array inputs must be scalar",
))
}
}
Value::Tensor(_) => parse_range_numeric(value),
_ => Err(readmatrix_error(
"readmatrix: Range must be provided as a string or numeric vector",
)),
}
}
fn parse_range_string(text: &str) -> BuiltinResult<RangeSpec> {
let trimmed = text.trim();
if trimmed.is_empty() {
return Err(readmatrix_error("readmatrix: Range string cannot be empty"));
}
let parts: Vec<&str> = trimmed.split(':').collect();
if parts.len() > 2 {
return Err(readmatrix_error(format!(
"readmatrix: invalid Range specification '{}'",
text
)));
}
let start = parse_cell_reference(parts[0])?;
if start.col.is_none() {
return Err(readmatrix_error(
"readmatrix: Range must specify a starting column",
));
}
let end_ref = if parts.len() == 2 {
Some(parse_cell_reference(parts[1])?)
} else {
None
};
if let Some(ref end) = end_ref {
if end.col.is_none() {
return Err(readmatrix_error(
"readmatrix: Range end must include a column reference",
));
}
}
let start_row = start.row.unwrap_or(0);
let start_col = start.col.unwrap();
let end_row = end_ref.as_ref().and_then(|r| r.row);
let end_col = end_ref.as_ref().and_then(|r| r.col);
Ok(RangeSpec {
start_row,
start_col,
end_row,
end_col,
})
}
fn parse_range_numeric(value: &Value) -> BuiltinResult<RangeSpec> {
let elements = match value {
Value::Tensor(t) => t.data.clone(),
_ => {
return Err(readmatrix_error(
"readmatrix: numeric Range must be provided as a vector with 2 or 4 elements",
))
}
};
if elements.len() != 2 && elements.len() != 4 {
return Err(readmatrix_error(
"readmatrix: numeric Range must contain exactly 2 or 4 elements",
));
}
let mut indices = Vec::with_capacity(elements.len());
for (idx, value) in elements.iter().enumerate() {
let converted = positive_index(*value, idx)?;
indices.push(converted);
}
let start_row = indices[0];
let start_col = indices[1];
let (end_row, end_col) = if indices.len() == 4 {
(Some(indices[2]), Some(indices[3]))
} else {
(None, None)
};
Ok(RangeSpec {
start_row,
start_col,
end_row,
end_col,
})
}
fn positive_index(value: f64, position: usize) -> BuiltinResult<usize> {
if !value.is_finite() {
return Err(readmatrix_error("readmatrix: Range indices must be finite"));
}
if value < 1.0 {
return Err(readmatrix_error("readmatrix: Range indices must be >= 1"));
}
let rounded = value.round();
if (rounded - value).abs() > f64::EPSILON {
return Err(readmatrix_error(
"readmatrix: Range indices must be integers",
));
}
let zero_based = (rounded as i64) - 1;
if zero_based < 0 {
return Err(readmatrix_error("readmatrix: Range indices must be >= 1"));
}
usize::try_from(zero_based).map_err(|_| {
readmatrix_error(format!(
"readmatrix: Range index {} is too large to fit in usize",
position + 1
))
})
}
#[derive(Clone, Copy)]
struct CellReference {
row: Option<usize>,
col: Option<usize>,
}
fn parse_cell_reference(token: &str) -> BuiltinResult<CellReference> {
let mut letters = String::new();
let mut digits = String::new();
for ch in token.trim().chars() {
if ch == '$' {
continue;
}
if ch.is_ascii_alphabetic() {
letters.push(ch.to_ascii_uppercase());
} else if ch.is_ascii_digit() {
digits.push(ch);
} else {
return Err(readmatrix_error(format!(
"readmatrix: invalid Range component '{}'",
token
)));
}
}
if letters.is_empty() && digits.is_empty() {
return Err(readmatrix_error(
"readmatrix: Range references cannot be empty",
));
}
let col = if letters.is_empty() {
None
} else {
Some(column_index_from_letters(&letters)?)
};
let row = if digits.is_empty() {
None
} else {
let parsed = digits.parse::<usize>().map_err(|_| {
readmatrix_error(format!(
"readmatrix: invalid row index '{}' in Range component '{}'",
digits, token
))
})?;
if parsed == 0 {
return Err(readmatrix_error("readmatrix: Range rows must be >= 1"));
}
Some(parsed - 1)
};
Ok(CellReference { row, col })
}
fn column_index_from_letters(letters: &str) -> BuiltinResult<usize> {
let mut value: usize = 0;
for ch in letters.chars() {
if !ch.is_ascii_uppercase() {
return Err(readmatrix_error(format!(
"readmatrix: invalid column designator '{}' in Range",
letters
)));
}
let digit = (ch as u8 - b'A' + 1) as usize;
value = value
.checked_mul(26)
.and_then(|v| v.checked_add(digit))
.ok_or_else(|| readmatrix_error("readmatrix: Range column index overflowed"))?;
}
value
.checked_sub(1)
.ok_or_else(|| readmatrix_error("readmatrix: Range column index underflowed"))
}
fn apply_range(
rows: &[Vec<f64>],
max_cols: usize,
range: &RangeSpec,
default_fill: f64,
) -> (Vec<Vec<f64>>, usize) {
if rows.is_empty() || max_cols == 0 {
return (Vec::new(), 0);
}
if range.start_row >= rows.len() || range.start_col >= max_cols {
return (Vec::new(), 0);
}
let last_row = rows.len().saturating_sub(1);
let mut end_row = range.end_row.unwrap_or(last_row);
if end_row > last_row {
end_row = last_row;
}
if end_row < range.start_row {
return (Vec::new(), 0);
}
let last_col = max_cols.saturating_sub(1);
let mut end_col = range.end_col.unwrap_or(last_col);
if end_col > last_col {
end_col = last_col;
}
if end_col < range.start_col {
return (Vec::new(), 0);
}
let mut subset = Vec::new();
let mut subset_max_cols = 0usize;
for row_idx in range.start_row..=end_row {
if row_idx >= rows.len() {
break;
}
let row = &rows[row_idx];
let mut extracted = Vec::with_capacity(end_col - range.start_col + 1);
for col_idx in range.start_col..=end_col {
if col_idx >= max_cols {
break;
}
let value = row.get(col_idx).copied().unwrap_or(default_fill);
extracted.push(value);
}
subset_max_cols = subset_max_cols.max(extracted.len());
subset.push(extracted);
}
if subset_max_cols == 0 {
(Vec::new(), 0)
} else {
(subset, subset_max_cols)
}
}
fn finalize_output(tensor: Tensor, options: &ReadMatrixOptions) -> BuiltinResult<Value> {
match &options.output_template {
OutputTemplate::Double => Ok(Value::Tensor(tensor)),
OutputTemplate::Logical => tensor_to_logical(tensor),
OutputTemplate::Like(proto) => finalize_like(tensor, proto),
}
}
fn tensor_to_logical(tensor: Tensor) -> BuiltinResult<Value> {
let mut data = Vec::with_capacity(tensor.data.len());
for value in &tensor.data {
let bit = if *value == 0.0 { 0 } else { 1 };
data.push(bit);
}
let logical = LogicalArray::new(data, tensor.shape.clone())
.map_err(|e| readmatrix_error(format!("readmatrix: {e}")))?;
Ok(Value::LogicalArray(logical))
}
fn finalize_like(tensor: Tensor, proto: &Value) -> BuiltinResult<Value> {
match proto {
Value::LogicalArray(_) | Value::Bool(_) => tensor_to_logical(tensor),
Value::GpuTensor(handle) => tensor_to_gpu(tensor, handle),
Value::Tensor(_) | Value::Num(_) | Value::Int(_) => Ok(Value::Tensor(tensor)),
Value::ComplexTensor(_) | Value::Complex(_, _) => Ok(Value::Tensor(tensor)),
Value::CharArray(_) | Value::String(_) | Value::StringArray(_) => Ok(Value::Tensor(tensor)),
Value::Cell(_) => Ok(Value::Tensor(tensor)),
_ => Ok(Value::Tensor(tensor)),
}
}
fn tensor_to_gpu(
tensor: Tensor,
_handle: &runmat_accelerate_api::GpuTensorHandle,
) -> BuiltinResult<Value> {
if let Some(provider) = runmat_accelerate_api::provider() {
let view = HostTensorView {
data: &tensor.data,
shape: &tensor.shape,
};
if let Ok(uploaded) = provider.upload(&view) {
return Ok(Value::GpuTensor(uploaded));
}
}
Ok(Value::Tensor(tensor))
}
fn read_numeric_matrix(path: &Path, options: &ReadMatrixOptions) -> BuiltinResult<Tensor> {
let file = File::open(path).map_err(|err| {
readmatrix_error_with_source(
format!("readmatrix: unable to read '{}': {err}", path.display()),
err,
)
})?;
let reader = BufReader::new(file);
let mut data_lines: Vec<(usize, String)> = Vec::new();
for (idx, line_result) in reader.lines().enumerate() {
let line_number = idx + 1;
let line = line_result.map_err(|err| {
readmatrix_error_with_source(
format!("readmatrix: error reading '{}': {err}", path.display()),
err,
)
})?;
let cleaned = line.trim_end_matches('\r');
if line_number <= options.num_header_lines {
continue;
}
if cleaned.trim().is_empty() {
continue;
}
data_lines.push((line_number, cleaned.to_string()));
}
if data_lines.is_empty() {
return Tensor::new(Vec::new(), vec![0, 0])
.map_err(|e| readmatrix_error(format!("readmatrix: {e}")));
}
if let Some((_, first_line)) = data_lines.first_mut() {
if first_line.starts_with('\u{FEFF}') {
let stripped = first_line.trim_start_matches('\u{FEFF}').to_string();
*first_line = stripped;
}
}
let delimiter = options
.delimiter
.clone()
.or_else(|| detect_delimiter(&data_lines))
.unwrap_or(Delimiter::Whitespace);
let mut rows: Vec<Vec<f64>> = Vec::new();
let mut max_cols = 0usize;
for (line_number, text) in &data_lines {
let fields = split_fields(text, &delimiter);
if fields.is_empty() {
continue;
}
let mut row = Vec::with_capacity(fields.len());
for (index, field) in fields.iter().enumerate() {
let value = parse_numeric_token(field, options, *line_number, index + 1)?;
row.push(value);
}
if row.len() > max_cols {
max_cols = row.len();
}
rows.push(row);
}
if rows.is_empty() {
return Tensor::new(Vec::new(), vec![0, 0])
.map_err(|e| readmatrix_error(format!("readmatrix: {e}")));
}
let default_fill = options.empty_value();
if let Some(range) = &options.range {
let (subset_rows, subset_cols) = apply_range(&rows, max_cols, range, default_fill);
rows = subset_rows;
max_cols = subset_cols;
}
if rows.is_empty() || max_cols == 0 {
return Tensor::new(Vec::new(), vec![0, 0])
.map_err(|e| readmatrix_error(format!("readmatrix: {e}")));
}
let row_count = rows.len();
let mut data = vec![default_fill; row_count * max_cols];
for (row_index, row) in rows.iter().enumerate() {
for col_index in 0..max_cols {
let value = row.get(col_index).copied().unwrap_or(default_fill);
data[col_index * row_count + row_index] = value;
}
}
Tensor::new(data, vec![row_count, max_cols])
.map_err(|e| readmatrix_error(format!("readmatrix: {e}")))
}
fn detect_delimiter(lines: &[(usize, String)]) -> Option<Delimiter> {
if lines.is_empty() {
return None;
}
let sample: Vec<&str> = lines
.iter()
.take(32)
.map(|(_, line)| line.as_str())
.collect();
let candidates = [',', '\t', ';', '|'];
let mut best: Option<(f64, Delimiter)> = None;
for candidate in candidates {
let mut counts = Vec::new();
for line in &sample {
if !line.contains(candidate) {
continue;
}
let fields = line.split(candidate).count();
if fields >= 2 {
counts.push(fields);
}
}
if counts.is_empty() {
continue;
}
let average = counts.iter().copied().sum::<usize>() as f64 / counts.len() as f64;
if average < 2.0 {
continue;
}
if best
.as_ref()
.map(|(best_avg, _)| average > *best_avg)
.unwrap_or(true)
{
best = Some((average, Delimiter::Char(candidate)));
}
}
if let Some((_, delimiter)) = best {
return Some(delimiter);
}
let mut whitespace_hits = 0usize;
for line in &sample {
if line.split_whitespace().count() > 1 {
whitespace_hits += 1;
}
}
if whitespace_hits > 0 {
Some(Delimiter::Whitespace)
} else {
None
}
}
fn split_fields(line: &str, delimiter: &Delimiter) -> Vec<String> {
match delimiter {
Delimiter::Char(ch) => split_with_char_delim(line, *ch),
Delimiter::String(pattern) => line.split(pattern).map(|s| s.to_string()).collect(),
Delimiter::Whitespace => line.split_whitespace().map(|s| s.to_string()).collect(),
}
}
fn split_with_char_delim(line: &str, delimiter: char) -> Vec<String> {
let mut fields = Vec::new();
let mut current = String::new();
let mut in_quotes = false;
let mut chars = line.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '"' {
if in_quotes && chars.peek() == Some(&'"') {
current.push('"');
chars.next();
} else {
in_quotes = !in_quotes;
}
continue;
}
if ch == delimiter && !in_quotes {
fields.push(current.clone());
current.clear();
} else {
current.push(ch);
}
}
fields.push(current);
fields
}
fn parse_numeric_token(
token: &str,
options: &ReadMatrixOptions,
line_number: usize,
column_number: usize,
) -> BuiltinResult<f64> {
let trimmed = token.trim();
if trimmed.is_empty() {
return Ok(options.empty_value());
}
let unquoted = unquote(trimmed);
let inner = unquoted.trim();
if inner.is_empty() {
return Ok(options.empty_value());
}
if options.is_missing_token(inner) {
return Ok(f64::NAN);
}
let normalized = normalize_numeric_token(inner, options);
if normalized.is_empty() {
return Ok(options.empty_value());
}
let lower = normalized.to_ascii_lowercase();
if lower == "nan" {
return Ok(f64::NAN);
}
if matches!(lower.as_str(), "inf" | "+inf" | "infinity" | "+infinity") {
return Ok(f64::INFINITY);
}
if matches!(lower.as_str(), "-inf" | "-infinity") {
return Ok(f64::NEG_INFINITY);
}
normalized.parse::<f64>().map_err(|_| {
readmatrix_error(format!(
"readmatrix: unable to parse numeric value '{}' on line {} column {}",
inner, line_number, column_number
))
})
}
fn normalize_numeric_token(token: &str, options: &ReadMatrixOptions) -> String {
let mut text = token.to_string();
if let Some(thousands) = options.thousands_separator {
if thousands != options.decimal_separator {
text = text.chars().filter(|ch| *ch != thousands).collect();
}
}
if options.decimal_separator != '.' {
text = text.replace(options.decimal_separator, ".");
}
text
}
fn unquote(token: &str) -> &str {
if token.len() >= 2 {
let bytes = token.as_bytes();
if (bytes[0] == b'"' && bytes[token.len() - 1] == b'"')
|| (bytes[0] == b'\'' && bytes[token.len() - 1] == b'\'')
{
return &token[1..token.len() - 1];
}
}
token
}
fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
match value {
Value::String(s) => normalize_path(s),
Value::CharArray(ca) if ca.rows == 1 => {
let text: String = ca.data.iter().collect();
normalize_path(&text)
}
Value::CharArray(_) => Err(readmatrix_error(
"readmatrix: expected a 1-by-N character vector for the file name",
)),
Value::StringArray(sa) => {
if sa.data.len() == 1 {
normalize_path(&sa.data[0])
} else {
Err(readmatrix_error(
"readmatrix: string array inputs must be scalar",
))
}
}
other => Err(readmatrix_error(format!(
"readmatrix: expected filename as string scalar or character vector, got {other:?}"
))),
}
}
fn normalize_path(raw: &str) -> BuiltinResult<PathBuf> {
if raw.is_empty() {
return Err(readmatrix_error("readmatrix: filename must not be empty"));
}
Ok(Path::new(raw).to_path_buf())
}
#[cfg(test)]
pub(crate) mod tests {
use super::*;
use futures::executor::block_on;
use runmat_time::unix_timestamp_ms;
use std::fs;
use crate::builtins::common::test_support;
use runmat_accelerate_api::HostTensorView;
use runmat_builtins::{CharArray, IntValue, LogicalArray, StringArray, Tensor};
fn unique_path(prefix: &str) -> PathBuf {
let millis = unix_timestamp_ms();
let mut path = std::env::temp_dir();
path.push(format!("runmat_{prefix}_{}_{}", std::process::id(), millis));
path
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_reads_csv_data() {
let path = unique_path("readmatrix_csv");
fs::write(&path, "1,2,3\n4,5,6\n").expect("write sample file");
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
Vec::new(),
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![2, 3]);
assert_eq!(t.data, vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_skips_header_lines() {
let path = unique_path("readmatrix_header");
fs::write(&path, "time,value\n0,10\n1,12\n").expect("write sample file");
let args = vec![Value::from("NumHeaderLines"), Value::Int(IntValue::I32(1))];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![2, 2]);
assert_eq!(t.data, vec![0.0, 1.0, 10.0, 12.0]);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_respects_delimiter_option() {
let path = unique_path("readmatrix_tab");
fs::write(&path, "1\t2\t3\n4\t5\t6\n").expect("write sample file");
let args = vec![Value::from("Delimiter"), Value::from("tab")];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![2, 3]);
assert_eq!(t.data, vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_respects_range_string() {
let path = unique_path("readmatrix_range_string");
fs::write(&path, "11,12,13\n21,22,23\n31,32,33\n").expect("write sample file");
let args = vec![Value::from("Range"), Value::from("B2:C3")];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![2, 2]);
assert_eq!(t.data, vec![22.0, 32.0, 23.0, 33.0]);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_respects_range_numeric_vector() {
let path = unique_path("readmatrix_range_numeric");
fs::write(&path, "11,12,13\n21,22,23\n31,32,33\n").expect("write sample file");
let range = Tensor::new(vec![2.0, 2.0, 3.0, 3.0], vec![1, 4]).expect("range tensor");
let args = vec![Value::from("Range"), Value::Tensor(range)];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![2, 2]);
assert_eq!(t.data, vec![22.0, 32.0, 23.0, 33.0]);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_treats_custom_missing_tokens() {
let path = unique_path("readmatrix_missing");
fs::write(&path, "1,NA,3\nNA,5,missing\n").expect("write file");
let strings = StringArray::new(vec!["NA".to_string(), "missing".to_string()], vec![1, 2])
.expect("string array");
let args = vec![Value::from("TreatAsMissing"), Value::StringArray(strings)];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![2, 3]);
assert!(t.data[1].is_nan()); assert!(t.data[2].is_nan()); assert!(t.data[5].is_nan()); }
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_uses_decimal_and_thousands_separators() {
let path = unique_path("readmatrix_decimal");
fs::write(&path, "1.234,56;7.890,12\n").expect("write sample file");
let args = vec![
Value::from("Delimiter"),
Value::from(";"),
Value::from("DecimalSeparator"),
Value::from(","),
Value::from("ThousandsSeparator"),
Value::from("."),
];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![1, 2]);
assert!((t.data[0] - 1234.56).abs() < 1e-9);
assert!((t.data[1] - 7890.12).abs() < 1e-9);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_applies_empty_value() {
let path = unique_path("readmatrix_empty_value");
fs::write(&path, "1,,3\n4,,6\n").expect("write sample file");
let args = vec![Value::from("EmptyValue"), Value::Num(0.0)];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![2, 3]);
assert_eq!(t.data, vec![1.0, 4.0, 0.0, 0.0, 3.0, 6.0]);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_accepts_struct_options() {
let path = unique_path("readmatrix_struct_opts");
fs::write(&path, "header1,header2\n9,10\n11,12\n").expect("write sample file");
let mut options_struct = runmat_builtins::StructValue::new();
options_struct
.fields
.insert("Delimiter".to_string(), Value::from(","));
options_struct
.fields
.insert("NumHeaderLines".to_string(), Value::Int(IntValue::I32(1)));
let args = vec![Value::Struct(options_struct)];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![2, 2]);
assert_eq!(t.data, vec![9.0, 11.0, 10.0, 12.0]);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_errors_on_non_numeric_field() {
let path = unique_path("readmatrix_error");
fs::write(&path, "1,abc,3\n").expect("write sample file");
let err = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
Vec::new(),
))
.expect_err("readmatrix should fail");
let message = err.message().to_string();
assert!(
message.contains("unable to parse numeric value"),
"unexpected error message: {message}"
);
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_returns_empty_on_no_data() {
let path = unique_path("readmatrix_empty");
File::create(&path).expect("create file");
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
Vec::new(),
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![0, 0]);
assert!(t.data.is_empty());
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_output_type_logical() {
let path = unique_path("readmatrix_output_logical");
fs::write(&path, "0,1,-3\nNaN,0,5\n").expect("write sample file");
let args = vec![Value::from("OutputType"), Value::from("logical")];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
match result {
Value::LogicalArray(arr) => {
assert_eq!(arr.shape, vec![2, 3]);
assert_eq!(arr.data, vec![0, 1, 1, 0, 1, 1]);
}
other => panic!("expected logical array, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_like_logical_proto() {
let path = unique_path("readmatrix_like_logical");
fs::write(&path, "1,0\n0,5\n").expect("write sample file");
let proto = LogicalArray::new(vec![1], vec![1]).expect("logical prototype");
let args = vec![Value::from("Like"), Value::LogicalArray(proto)];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
match result {
Value::LogicalArray(arr) => {
assert_eq!(arr.shape, vec![2, 2]);
assert_eq!(arr.data, vec![1, 0, 0, 1]);
}
other => panic!("expected logical array, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_like_gpu_proto() {
test_support::with_test_provider(|provider| {
let path = unique_path("readmatrix_like_gpu");
fs::write(&path, "1,2\n3,4\n").expect("write sample file");
let proto_tensor = Tensor::new(vec![0.0, 0.0], vec![1, 2]).expect("tensor");
let view = HostTensorView {
data: &proto_tensor.data,
shape: &proto_tensor.shape,
};
let handle = provider.upload(&view).expect("upload prototype");
let args = vec![Value::from("Like"), Value::GpuTensor(handle.clone())];
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
args,
))
.expect("readmatrix");
assert!(
matches!(result, Value::GpuTensor(_)),
"expected GPU tensor result, got {result:?}"
);
let gathered = test_support::gather(result).expect("gather result");
assert_eq!(gathered.shape, vec![2, 2]);
assert_eq!(gathered.data, vec![1.0, 3.0, 2.0, 4.0]);
let _ = fs::remove_file(&path);
});
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_accepts_character_vector_path() {
let path = unique_path("readmatrix_char_path");
fs::write(&path, "1 2 3\n").expect("write sample file");
let text = path.to_string_lossy().to_string();
let chars: Vec<char> = text.chars().collect();
let len = chars.len();
let char_array = CharArray::new(chars, 1, len).expect("char array");
let result = block_on(readmatrix_builtin(Value::CharArray(char_array), Vec::new()))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![1, 3]);
assert_eq!(t.data, vec![1.0, 2.0, 3.0]);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_handles_quoted_fields() {
let path = unique_path("readmatrix_quotes");
fs::write(&path, "\"1\",\"2\",\"3\"\n").expect("write sample file");
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
Vec::new(),
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![1, 3]);
assert_eq!(t.data, vec![1.0, 2.0, 3.0]);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_preserves_negative_infinity() {
let path = unique_path("readmatrix_infinity");
fs::write(&path, "-Inf,Inf,NaN\n").expect("write sample file");
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
Vec::new(),
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![1, 3]);
assert!(t.data[0].is_infinite() && t.data[0].is_sign_negative());
assert!(t.data[1].is_infinite() && t.data[1].is_sign_positive());
assert!(t.data[2].is_nan());
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[test]
fn readmatrix_supports_whitespace_delimiter() {
let path = unique_path("readmatrix_whitespace");
fs::write(&path, "1 2 3\n4 5 6\n").expect("write sample file");
let result = block_on(readmatrix_builtin(
Value::from(path.to_string_lossy().to_string()),
Vec::new(),
))
.expect("readmatrix");
match result {
Value::Tensor(t) => {
assert_eq!(t.shape, vec![2, 3]);
assert_eq!(t.data, vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
}
other => panic!("expected tensor result, got {other:?}"),
}
let _ = fs::remove_file(&path);
}
}