runmat_runtime/builtins/io/tabular/
csvread.rs

1//! MATLAB-compatible `csvread` builtin for RunMat.
2//!
3//! `csvread` is largely superseded by `readmatrix`, but MATLAB users still rely on
4//! its terse API for numeric CSV imports. This implementation mirrors MATLAB's
5//! zero-based range semantics while integrating with the modern builtin template.
6
7use std::io::{BufRead, BufReader};
8use std::path::{Path, PathBuf};
9
10use runmat_builtins::{Tensor, Value};
11use runmat_filesystem::File;
12use runmat_macros::runtime_builtin;
13
14use crate::builtins::common::fs::expand_user_path;
15use crate::builtins::common::spec::{
16    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
17    ReductionNaN, ResidencyPolicy, ShapeRequirements,
18};
19use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
20
21const BUILTIN_NAME: &str = "csvread";
22
23#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::io::tabular::csvread")]
24pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
25    name: "csvread",
26    op_kind: GpuOpKind::Custom("io-csvread"),
27    supported_precisions: &[],
28    broadcast: BroadcastSemantics::None,
29    provider_hooks: &[],
30    constant_strategy: ConstantStrategy::InlineLiteral,
31    residency: ResidencyPolicy::GatherImmediately,
32    nan_mode: ReductionNaN::Include,
33    two_pass_threshold: None,
34    workgroup_size: None,
35    accepts_nan_mode: false,
36    notes: "Runs entirely on the host; acceleration providers are not involved.",
37};
38
39fn csvread_error(message: impl Into<String>) -> RuntimeError {
40    build_runtime_error(message)
41        .with_builtin(BUILTIN_NAME)
42        .build()
43}
44
45fn csvread_error_with_source<E>(message: impl Into<String>, source: E) -> RuntimeError
46where
47    E: std::error::Error + Send + Sync + 'static,
48{
49    build_runtime_error(message)
50        .with_builtin(BUILTIN_NAME)
51        .with_source(source)
52        .build()
53}
54
55fn map_control_flow(err: RuntimeError) -> RuntimeError {
56    let identifier = err.identifier().map(|value| value.to_string());
57    let message = err.message().to_string();
58    let mut builder = build_runtime_error(message)
59        .with_builtin(BUILTIN_NAME)
60        .with_source(err);
61    if let Some(identifier) = identifier {
62        builder = builder.with_identifier(identifier);
63    }
64    builder.build()
65}
66
67#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::io::tabular::csvread")]
68pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
69    name: "csvread",
70    shape: ShapeRequirements::Any,
71    constant_strategy: ConstantStrategy::InlineLiteral,
72    elementwise: None,
73    reduction: None,
74    emits_nan: false,
75    notes: "Not eligible for fusion; executes as a standalone host operation.",
76};
77
78#[runtime_builtin(
79    name = "csvread",
80    category = "io/tabular",
81    summary = "Read numeric data from a comma-separated text file.",
82    keywords = "csvread,csv,dlmread,numeric import,range",
83    accel = "cpu",
84    type_resolver(crate::builtins::io::type_resolvers::tensor_type),
85    builtin_path = "crate::builtins::io::tabular::csvread"
86)]
87async fn csvread_builtin(path: Value, rest: Vec<Value>) -> crate::BuiltinResult<Value> {
88    let gathered_path = gather_if_needed_async(&path)
89        .await
90        .map_err(map_control_flow)?;
91    let options = parse_arguments(&rest).await?;
92    let resolved = resolve_path(&gathered_path)?;
93    let (rows, max_cols, skipped_rows) = read_csv_rows(&resolved, &options)?;
94    let start_row = if options.range.is_none() {
95        options.start_row.saturating_sub(skipped_rows)
96    } else {
97        options.start_row
98    };
99    let subset = if let Some(range) = options.range {
100        apply_range(&rows, max_cols, &range, 0.0)
101    } else {
102        apply_offsets(&rows, max_cols, start_row, options.start_col, 0.0)
103    };
104    let tensor = rows_to_tensor(subset.rows, subset.row_count, subset.col_count, 0.0)?;
105    Ok(Value::Tensor(tensor))
106}
107
108#[derive(Debug, Default)]
109struct CsvReadOptions {
110    start_row: usize,
111    start_col: usize,
112    range: Option<RangeSpec>,
113}
114
115async fn parse_arguments(args: &[Value]) -> BuiltinResult<CsvReadOptions> {
116    let mut gathered = Vec::with_capacity(args.len());
117    for value in args {
118        gathered.push(
119            gather_if_needed_async(value)
120                .await
121                .map_err(map_control_flow)?,
122        );
123    }
124    match gathered.len() {
125        0 => Ok(CsvReadOptions::default()),
126        2 => {
127            let start_row = value_to_start_index(&gathered[0], "row")?;
128            let start_col = value_to_start_index(&gathered[1], "col")?;
129            Ok(CsvReadOptions {
130                start_row,
131                start_col,
132                range: None,
133            })
134        }
135        3 => {
136            let start_row = value_to_start_index(&gathered[0], "row")?;
137            let start_col = value_to_start_index(&gathered[1], "col")?;
138            let range = parse_range(&gathered[2])?;
139            Ok(CsvReadOptions {
140                start_row,
141                start_col,
142                range: Some(range),
143            })
144        }
145        _ => Err(csvread_error(
146            "csvread: expected csvread(filename[, row, col[, range]])",
147        )),
148    }
149}
150
151fn value_to_start_index(value: &Value, name: &str) -> BuiltinResult<usize> {
152    match value {
153        Value::Int(i) => {
154            let raw = i.to_i64();
155            if raw < 0 {
156                return Err(csvread_error(format!(
157                    "csvread: {name} must be a non-negative integer"
158                )));
159            }
160            usize::try_from(raw).map_err(|_| csvread_error(format!("csvread: {name} is too large")))
161        }
162        Value::Num(n) => {
163            if !n.is_finite() {
164                return Err(csvread_error(format!(
165                    "csvread: {name} must be a finite integer"
166                )));
167            }
168            if *n < 0.0 {
169                return Err(csvread_error(format!(
170                    "csvread: {name} must be a non-negative integer"
171                )));
172            }
173            let rounded = n.round();
174            if (rounded - n).abs() > f64::EPSILON {
175                return Err(csvread_error(format!("csvread: {name} must be an integer")));
176            }
177            usize::try_from(rounded as i64)
178                .map_err(|_| csvread_error(format!("csvread: {name} is too large")))
179        }
180        _ => Err(csvread_error(format!(
181            "csvread: expected {name} as a numeric scalar, got {value:?}"
182        ))),
183    }
184}
185
186fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
187    match value {
188        Value::String(s) => normalize_path(s),
189        Value::CharArray(ca) if ca.rows == 1 => {
190            let text: String = ca.data.iter().collect();
191            normalize_path(&text)
192        }
193        Value::StringArray(sa) => {
194            if sa.data.len() == 1 {
195                normalize_path(&sa.data[0])
196            } else {
197                Err(csvread_error("csvread: string array inputs must be scalar"))
198            }
199        }
200        Value::CharArray(_) => Err(csvread_error(
201            "csvread: expected a 1-by-N character vector for the file name",
202        )),
203        other => Err(csvread_error(format!(
204            "csvread: expected filename as string scalar or character vector, got {other:?}"
205        ))),
206    }
207}
208
209fn normalize_path(raw: &str) -> BuiltinResult<PathBuf> {
210    if raw.trim().is_empty() {
211        return Err(csvread_error("csvread: filename must not be empty"));
212    }
213    let expanded = expand_user_path(raw, BUILTIN_NAME).map_err(csvread_error)?;
214    Ok(Path::new(&expanded).to_path_buf())
215}
216
217fn read_csv_rows(
218    path: &Path,
219    options: &CsvReadOptions,
220) -> BuiltinResult<(Vec<Vec<f64>>, usize, usize)> {
221    let file = File::open(path).map_err(|err| {
222        csvread_error_with_source(
223            format!("csvread: unable to open '{}': {err}", path.display()),
224            err,
225        )
226    })?;
227    let mut reader = BufReader::new(file);
228    let mut buffer = String::new();
229    let mut rows = Vec::new();
230    let mut max_cols = 0usize;
231    let mut line_index = 0usize;
232    let mut skipped_rows = 0usize;
233
234    loop {
235        buffer.clear();
236        let bytes = reader.read_line(&mut buffer).map_err(|err| {
237            csvread_error_with_source(
238                format!("csvread: failed to read '{}': {err}", path.display()),
239                err,
240            )
241        })?;
242        if bytes == 0 {
243            break;
244        }
245        line_index += 1;
246        if buffer.trim().is_empty() {
247            continue;
248        }
249        if buffer.ends_with('\n') {
250            buffer.pop();
251            if buffer.ends_with('\r') {
252                buffer.pop();
253            }
254        } else if buffer.ends_with('\r') {
255            buffer.pop();
256        }
257        if options.range.is_none() && options.start_row > 0 && line_index <= options.start_row {
258            skipped_rows += 1;
259            continue;
260        }
261        let parse_start_col = if options.range.is_none() {
262            options.start_col
263        } else {
264            0
265        };
266        let parsed = parse_csv_row(&buffer, line_index, parse_start_col)?;
267        max_cols = max_cols.max(parsed.len());
268        rows.push(parsed);
269    }
270
271    Ok((rows, max_cols, skipped_rows))
272}
273
274fn parse_csv_row(line: &str, line_index: usize, parse_start_col: usize) -> BuiltinResult<Vec<f64>> {
275    let mut values = Vec::new();
276    for (col_index, raw_field) in line.split(',').enumerate() {
277        if col_index < parse_start_col {
278            // Respect csvread(..., row, col) offsets by skipping validation for
279            // columns that will be dropped before materializing the output.
280            values.push(0.0);
281            continue;
282        }
283        let trimmed = raw_field.trim();
284        if trimmed.is_empty() {
285            values.push(0.0);
286            continue;
287        }
288        let unwrapped = if trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2
289        {
290            &trimmed[1..trimmed.len() - 1]
291        } else {
292            trimmed
293        };
294        let lowered = unwrapped.to_ascii_lowercase();
295        let value = match lowered.as_str() {
296            "nan" => f64::NAN,
297            "inf" | "+inf" => f64::INFINITY,
298            "-inf" => f64::NEG_INFINITY,
299            _ => unwrapped.parse::<f64>().map_err(|_| {
300                csvread_error(format!(
301                    "csvread: nonnumeric token '{}' at row {} column {}",
302                    unwrapped,
303                    line_index,
304                    col_index + 1
305                ))
306            })?,
307        };
308        values.push(value);
309    }
310    Ok(values)
311}
312
313#[derive(Clone, Copy, Debug)]
314struct RangeSpec {
315    start_row: usize,
316    start_col: usize,
317    end_row: Option<usize>,
318    end_col: Option<usize>,
319}
320
321fn parse_range(value: &Value) -> BuiltinResult<RangeSpec> {
322    match value {
323        Value::String(s) => parse_range_string(s),
324        Value::CharArray(ca) if ca.rows == 1 => {
325            let text: String = ca.data.iter().collect();
326            parse_range_string(&text)
327        }
328        Value::StringArray(sa) => {
329            if sa.data.len() == 1 {
330                parse_range_string(&sa.data[0])
331            } else {
332                Err(csvread_error(
333                    "csvread: Range string array inputs must be scalar",
334                ))
335            }
336        }
337        Value::Tensor(_) => parse_range_numeric(value),
338        _ => Err(csvread_error(
339            "csvread: Range must be provided as a string or numeric vector",
340        )),
341    }
342}
343
344fn parse_range_string(text: &str) -> BuiltinResult<RangeSpec> {
345    let trimmed = text.trim();
346    if trimmed.is_empty() {
347        return Err(csvread_error("csvread: Range string cannot be empty"));
348    }
349    let parts: Vec<&str> = trimmed.split(':').collect();
350    if parts.len() > 2 {
351        return Err(csvread_error(format!(
352            "csvread: invalid Range specification '{trimmed}'"
353        )));
354    }
355    let start = parse_cell_reference(parts[0])?;
356    if start.col.is_none() {
357        return Err(csvread_error(
358            "csvread: Range must specify a starting column",
359        ));
360    }
361    let end = if parts.len() == 2 {
362        Some(parse_cell_reference(parts[1])?)
363    } else {
364        None
365    };
366    if let Some(ref end_ref) = end {
367        if end_ref.col.is_none() {
368            return Err(csvread_error(
369                "csvread: Range end must include a column reference",
370            ));
371        }
372    }
373    let start_row = start.row.unwrap_or(0);
374    let start_col = start.col.unwrap();
375    let end_row = end.as_ref().and_then(|r| r.row);
376    let end_col = end.as_ref().and_then(|r| r.col);
377    Ok(RangeSpec {
378        start_row,
379        start_col,
380        end_row,
381        end_col,
382    })
383}
384
385fn parse_range_numeric(value: &Value) -> BuiltinResult<RangeSpec> {
386    let elements = match value {
387        Value::Tensor(t) => t.data.clone(),
388        _ => {
389            return Err(csvread_error(
390                "csvread: numeric Range must be provided as a vector with 2 or 4 elements",
391            ));
392        }
393    };
394    if elements.len() != 2 && elements.len() != 4 {
395        return Err(csvread_error(
396            "csvread: numeric Range must contain exactly 2 or 4 elements",
397        ));
398    }
399    let mut indices = Vec::with_capacity(elements.len());
400    for (idx, element) in elements.iter().enumerate() {
401        indices.push(non_negative_index(*element, idx)?);
402    }
403    let start_row = indices[0];
404    let start_col = indices[1];
405    let (end_row, end_col) = if indices.len() == 4 {
406        (Some(indices[2]), Some(indices[3]))
407    } else {
408        (None, None)
409    };
410    Ok(RangeSpec {
411        start_row,
412        start_col,
413        end_row,
414        end_col,
415    })
416}
417
418fn non_negative_index(value: f64, position: usize) -> BuiltinResult<usize> {
419    if !value.is_finite() {
420        return Err(csvread_error("csvread: Range indices must be finite"));
421    }
422    if value < 0.0 {
423        return Err(csvread_error("csvread: Range indices must be non-negative"));
424    }
425    let rounded = value.round();
426    if (rounded - value).abs() > f64::EPSILON {
427        return Err(csvread_error("csvread: Range indices must be integers"));
428    }
429    usize::try_from(rounded as i64).map_err(|_| {
430        csvread_error(format!(
431            "csvread: Range index {} is too large to fit in usize",
432            position + 1
433        ))
434    })
435}
436
437#[derive(Clone, Copy)]
438struct CellReference {
439    row: Option<usize>,
440    col: Option<usize>,
441}
442
443fn parse_cell_reference(token: &str) -> BuiltinResult<CellReference> {
444    let mut letters = String::new();
445    let mut digits = String::new();
446    for ch in token.trim().chars() {
447        if ch == '$' {
448            continue;
449        }
450        if ch.is_ascii_alphabetic() {
451            letters.push(ch.to_ascii_uppercase());
452        } else if ch.is_ascii_digit() {
453            digits.push(ch);
454        } else {
455            return Err(csvread_error(format!(
456                "csvread: invalid Range component '{token}'"
457            )));
458        }
459    }
460    if letters.is_empty() && digits.is_empty() {
461        return Err(csvread_error("csvread: Range references cannot be empty"));
462    }
463    let col = if letters.is_empty() {
464        None
465    } else {
466        Some(column_index_from_letters(&letters)?)
467    };
468    let row = if digits.is_empty() {
469        None
470    } else {
471        let parsed = digits.parse::<usize>().map_err(|_| {
472            csvread_error(format!(
473                "csvread: invalid row index '{}' in Range component '{token}'",
474                digits
475            ))
476        })?;
477        if parsed == 0 {
478            return Err(csvread_error("csvread: Range rows must be >= 1"));
479        }
480        Some(parsed - 1)
481    };
482    Ok(CellReference { row, col })
483}
484
485fn column_index_from_letters(letters: &str) -> BuiltinResult<usize> {
486    let mut value: usize = 0;
487    for ch in letters.chars() {
488        if !ch.is_ascii_uppercase() {
489            return Err(csvread_error(format!(
490                "csvread: invalid column designator '{letters}' in Range"
491            )));
492        }
493        let digit = (ch as u8 - b'A' + 1) as usize;
494        value = value
495            .checked_mul(26)
496            .and_then(|v| v.checked_add(digit))
497            .ok_or_else(|| csvread_error("csvread: Range column index overflowed"))?;
498    }
499    value
500        .checked_sub(1)
501        .ok_or_else(|| csvread_error("csvread: Range column index underflowed"))
502}
503
504struct SubsetResult {
505    rows: Vec<Vec<f64>>,
506    row_count: usize,
507    col_count: usize,
508}
509
510fn apply_offsets(
511    rows: &[Vec<f64>],
512    max_cols: usize,
513    start_row: usize,
514    start_col: usize,
515    default_fill: f64,
516) -> SubsetResult {
517    if rows.is_empty() || max_cols == 0 {
518        return SubsetResult {
519            rows: Vec::new(),
520            row_count: 0,
521            col_count: 0,
522        };
523    }
524    if start_row >= rows.len() {
525        return SubsetResult {
526            rows: Vec::new(),
527            row_count: 0,
528            col_count: 0,
529        };
530    }
531    if start_col >= max_cols {
532        return SubsetResult {
533            rows: Vec::new(),
534            row_count: 0,
535            col_count: 0,
536        };
537    }
538
539    let mut subset_rows = Vec::new();
540    let mut col_count = 0usize;
541    for row in rows.iter().skip(start_row) {
542        if start_col >= row.len() && row.len() < max_cols {
543            // Entire row missing required columns; fill zeros of remaining width.
544            let width = max_cols - start_col;
545            subset_rows.push(vec![default_fill; width]);
546            col_count = col_count.max(width);
547            continue;
548        }
549        let mut extracted = Vec::with_capacity(max_cols - start_col);
550        for col_idx in start_col..max_cols {
551            let value = row.get(col_idx).copied().unwrap_or(default_fill);
552            extracted.push(value);
553        }
554        col_count = col_count.max(extracted.len());
555        subset_rows.push(extracted);
556    }
557    let row_count = subset_rows.len();
558    SubsetResult {
559        rows: subset_rows,
560        row_count,
561        col_count,
562    }
563}
564
565fn apply_range(
566    rows: &[Vec<f64>],
567    max_cols: usize,
568    range: &RangeSpec,
569    default_fill: f64,
570) -> SubsetResult {
571    if rows.is_empty() || max_cols == 0 {
572        return SubsetResult {
573            rows: Vec::new(),
574            row_count: 0,
575            col_count: 0,
576        };
577    }
578    if range.start_row >= rows.len() || range.start_col >= max_cols {
579        return SubsetResult {
580            rows: Vec::new(),
581            row_count: 0,
582            col_count: 0,
583        };
584    }
585    let last_row = rows.len().saturating_sub(1);
586    let mut end_row = range.end_row.unwrap_or(last_row);
587    if end_row > last_row {
588        end_row = last_row;
589    }
590    if end_row < range.start_row {
591        return SubsetResult {
592            rows: Vec::new(),
593            row_count: 0,
594            col_count: 0,
595        };
596    }
597
598    let last_col = max_cols.saturating_sub(1);
599    let mut end_col = range.end_col.unwrap_or(last_col);
600    if end_col > last_col {
601        end_col = last_col;
602    }
603    if end_col < range.start_col {
604        return SubsetResult {
605            rows: Vec::new(),
606            row_count: 0,
607            col_count: 0,
608        };
609    }
610
611    let mut subset_rows = Vec::new();
612    let mut col_count = 0usize;
613    for row_idx in range.start_row..=end_row {
614        if row_idx >= rows.len() {
615            break;
616        }
617        let row = &rows[row_idx];
618        let mut extracted = Vec::with_capacity(end_col - range.start_col + 1);
619        for col_idx in range.start_col..=end_col {
620            if col_idx >= max_cols {
621                break;
622            }
623            let value = row.get(col_idx).copied().unwrap_or(default_fill);
624            extracted.push(value);
625        }
626        col_count = col_count.max(extracted.len());
627        subset_rows.push(extracted);
628    }
629    let row_count = subset_rows.len();
630    SubsetResult {
631        rows: subset_rows,
632        row_count,
633        col_count,
634    }
635}
636
637fn rows_to_tensor(
638    rows: Vec<Vec<f64>>,
639    row_count: usize,
640    col_count: usize,
641    default_fill: f64,
642) -> BuiltinResult<Tensor> {
643    if row_count == 0 || col_count == 0 {
644        return Tensor::new(Vec::new(), vec![0, 0])
645            .map_err(|e| csvread_error(format!("csvread: {e}")));
646    }
647    let mut data = vec![default_fill; row_count * col_count];
648    for (row_idx, row) in rows.iter().enumerate().take(row_count) {
649        for col_idx in 0..col_count {
650            let value = row.get(col_idx).copied().unwrap_or(default_fill);
651            data[row_idx + col_idx * row_count] = value;
652        }
653    }
654    Tensor::new(data, vec![row_count, col_count])
655        .map_err(|e| csvread_error(format!("csvread: {e}")))
656}
657
658#[cfg(test)]
659pub(crate) mod tests {
660    use super::*;
661    use runmat_time::unix_timestamp_ns;
662    use std::fs;
663    use std::sync::atomic::{AtomicUsize, Ordering};
664
665    use runmat_builtins::{CharArray, IntValue, Tensor as BuiltinTensor};
666
667    fn csvread_builtin(path: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
668        futures::executor::block_on(super::csvread_builtin(path, rest))
669    }
670
671    static UNIQUE_COUNTER: AtomicUsize = AtomicUsize::new(0);
672
673    fn unique_path(prefix: &str) -> PathBuf {
674        let nanos = unix_timestamp_ns();
675        let seq = UNIQUE_COUNTER.fetch_add(1, Ordering::Relaxed);
676        let mut path = std::env::temp_dir();
677        path.push(format!(
678            "runmat_csvread_{prefix}_{}_{}_{}",
679            std::process::id(),
680            nanos,
681            seq
682        ));
683        path
684    }
685
686    fn write_temp_file(lines: &[&str]) -> PathBuf {
687        let path = unique_path("input").with_extension("csv");
688        let contents = lines.join("\n");
689        fs::write(&path, contents).expect("write temp csv");
690        path
691    }
692
693    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
694    #[test]
695    fn csvread_basic_csv_roundtrip() {
696        let path = write_temp_file(&["1,2,3", "4,5,6"]);
697        let result = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
698            .expect("csvread");
699        match result {
700            Value::Tensor(t) => {
701                assert_eq!(t.shape, vec![2, 3]);
702                assert_eq!(t.data, vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
703            }
704            other => panic!("expected tensor, got {other:?}"),
705        }
706        fs::remove_file(path).ok();
707    }
708
709    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
710    #[test]
711    fn csvread_with_offsets() {
712        let path = write_temp_file(&["0,1,2", "3,4,5", "6,7,8"]);
713        let args = vec![Value::Int(IntValue::I32(1)), Value::Int(IntValue::I32(1))];
714        let result =
715            csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
716        match result {
717            Value::Tensor(t) => {
718                assert_eq!(t.shape, vec![2, 2]);
719                assert_eq!(t.data, vec![4.0, 7.0, 5.0, 8.0]);
720            }
721            other => panic!("expected tensor, got {other:?}"),
722        }
723        fs::remove_file(path).ok();
724    }
725
726    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
727    #[test]
728    fn csvread_with_numeric_range() {
729        let path = write_temp_file(&["1,2,3", "4,5,6", "7,8,9"]);
730        let args = vec![
731            Value::Int(IntValue::I32(0)),
732            Value::Int(IntValue::I32(0)),
733            Value::from(BuiltinTensor::new(vec![1.0, 1.0, 2.0, 2.0], vec![4, 1]).expect("tensor")),
734        ];
735        let result =
736            csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
737        match result {
738            Value::Tensor(t) => {
739                assert_eq!(t.shape, vec![2, 2]);
740                assert_eq!(t.data, vec![5.0, 8.0, 6.0, 9.0]);
741            }
742            other => panic!("expected tensor, got {other:?}"),
743        }
744        fs::remove_file(path).ok();
745    }
746
747    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
748    #[test]
749    fn csvread_with_string_range() {
750        let path = write_temp_file(&["1,2,3", "4,5,6", "7,8,9"]);
751        let args = vec![
752            Value::Int(IntValue::I32(0)),
753            Value::Int(IntValue::I32(0)),
754            Value::from("B2:C3"),
755        ];
756        let result =
757            csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
758        match result {
759            Value::Tensor(t) => {
760                assert_eq!(t.shape, vec![2, 2]);
761                assert_eq!(t.data, vec![5.0, 8.0, 6.0, 9.0]);
762            }
763            other => panic!("expected tensor, got {other:?}"),
764        }
765        fs::remove_file(path).ok();
766    }
767
768    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
769    #[test]
770    fn csvread_empty_fields_become_zero() {
771        let path = write_temp_file(&["1,,3", ",5,", "7,8,"]);
772        let result = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
773            .expect("csv");
774        match result {
775            Value::Tensor(t) => {
776                assert_eq!(t.shape, vec![3, 3]);
777                assert_eq!(t.data, vec![1.0, 0.0, 7.0, 0.0, 5.0, 8.0, 3.0, 0.0, 0.0]);
778            }
779            other => panic!("expected tensor, got {other:?}"),
780        }
781        fs::remove_file(path).ok();
782    }
783
784    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
785    #[test]
786    fn csvread_errors_on_text() {
787        let path = write_temp_file(&["1,2,3", "4,error,6"]);
788        let err = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
789            .expect_err("should fail");
790        let message = err.message().to_string();
791        assert!(
792            message.contains("nonnumeric token 'error'"),
793            "unexpected error: {message}"
794        );
795        fs::remove_file(path).ok();
796    }
797
798    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
799    #[test]
800    fn csvread_accepts_char_array_filename() {
801        let path = write_temp_file(&["1,2"]);
802        let path_string = path.to_string_lossy().to_string();
803        let data: Vec<char> = path_string.chars().collect();
804        let cols = data.len();
805        let chars = CharArray::new(data, 1, cols).expect("char array");
806        let result = csvread_builtin(Value::CharArray(chars), Vec::new()).expect("csv");
807        match result {
808            Value::Tensor(t) => {
809                assert_eq!(t.shape, vec![1, 2]);
810                assert_eq!(t.data, vec![1.0, 2.0]);
811            }
812            other => panic!("expected tensor, got {other:?}"),
813        }
814        fs::remove_file(path).ok();
815    }
816
817    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
818    #[test]
819    fn csvread_with_header_and_row_labels_using_offsets() {
820        let path = write_temp_file(&["Name,Jan,Feb", "alpha,1,2", "beta,3,4"]);
821        let args = vec![Value::Int(IntValue::I32(1)), Value::Int(IntValue::I32(1))];
822        let result =
823            csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
824        match result {
825            Value::Tensor(t) => {
826                assert_eq!(t.shape, vec![2, 2]);
827                assert_eq!(t.data, vec![1.0, 3.0, 2.0, 4.0]);
828            }
829            other => panic!("expected tensor, got {other:?}"),
830        }
831        fs::remove_file(path).ok();
832    }
833}
runmat_runtime/builtins/io/tabular/csvread.rs

runmat_runtime/builtins/io/tabular/
csvread.rs