runmat_runtime/builtins/io/tabular/
csvread.rs

1//! MATLAB-compatible `csvread` builtin for RunMat.
2//!
3//! `csvread` is largely superseded by `readmatrix`, but MATLAB users still rely on
4//! its terse API for numeric CSV imports. This implementation mirrors MATLAB's
5//! zero-based range semantics while integrating with the modern builtin template.
6
7use std::fs::File;
8use std::io::{BufRead, BufReader};
9use std::path::{Path, PathBuf};
10
11use runmat_builtins::{Tensor, Value};
12use runmat_macros::runtime_builtin;
13
14use crate::builtins::common::fs::expand_user_path;
15use crate::builtins::common::spec::{
16    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
17    ReductionNaN, ResidencyPolicy, ShapeRequirements,
18};
19#[cfg(feature = "doc_export")]
20use crate::register_builtin_doc_text;
21use crate::{gather_if_needed, register_builtin_fusion_spec, register_builtin_gpu_spec};
22
23#[cfg(feature = "doc_export")]
24pub const DOC_MD: &str = r#"---
25title: "csvread"
26category: "io/tabular"
27keywords: ["csvread", "csv", "comma-separated values", "numeric import", "range", "header"]
28summary: "Read numeric data from a comma-separated text file with MATLAB-compatible zero-based ranges."
29references:
30  - https://www.mathworks.com/help/matlab/ref/csvread.html
31gpu_support:
32  elementwise: false
33  reduction: false
34  precisions: []
35  broadcasting: "none"
36  notes: "Performs host-side file I/O and parsing. Acceleration providers are not involved, and results remain on the CPU."
37fusion:
38  elementwise: false
39  reduction: false
40  max_inputs: 1
41  constants: "inline"
42requires_feature: null
43tested:
44  unit: "builtins::io::tabular::csvread::tests"
45  integration: "builtins::io::tabular::csvread::tests::csvread_basic_csv_roundtrip"
46---
47
48# What does the `csvread` function do in MATLAB / RunMat?
49`csvread(filename)` reads numeric data from a comma-separated text file and returns a dense double-precision matrix. It is a legacy convenience wrapper preserved for MATLAB compatibility, and RunMat intentionally mirrors the original zero-based semantics.
50
51## How does the `csvread` function behave in MATLAB / RunMat?
52- Accepts character vectors or string scalars for the file name. String arrays must contain exactly one element.
53- `csvread(filename, row, col)` starts reading at the zero-based row `row` and column `col`, skipping any data before that offset.
54- `csvread(filename, row, col, range)` reads only the rectangle described by `range`. Numeric ranges must contain four elements `[r1 c1 r2 c2]` (zero-based, inclusive). Excel-style ranges use the familiar `"B2:D6"` A1 notation, which RunMat converts to zero-based indices internally.
55- Empty fields (two consecutive commas or a trailing comma) are interpreted as `0`. Tokens such as `NaN`, `Inf`, and `-Inf` are accepted (case-insensitive).
56- Any other nonnumeric token raises an error that identifies the offending row and column.
57- Results are dense double-precision tensors using column-major layout. An empty file produces a `0×0` tensor.
58- Paths can contain `~` to reference the home directory; RunMat expands the token before opening the file.
59
60## `csvread` Function GPU Execution Behaviour
61`csvread` performs all work on the host CPU. Arguments are gathered from the GPU when necessary, and the resulting tensor is returned in host memory. To keep data on the GPU, call `gpuArray` on the output or switch to `readmatrix` with the `'Like'` option. No provider hooks are required.
62
63## Examples of using the `csvread` function in MATLAB / RunMat
64
65### Import Entire CSV File
66```matlab
67writematrix([1 2 3; 4 5 6], "scores.csv");
68M = csvread("scores.csv");
69delete("scores.csv");
70```
71Expected output:
72```matlab
73M =
74     1     2     3
75     4     5     6
76```
77
78### Skip Header Row And Column Using Zero-Based Offsets
79```matlab
80fid = fopen("with_header.csv", "w");
81fprintf(fid, "Name,Jan,Feb\nalpha,1,2\nbeta,3,4\n");
82fclose(fid);
83
84M = csvread("with_header.csv", 1, 1);
85delete("with_header.csv");
86```
87Expected output:
88```matlab
89M =
90     1     2
91     3     4
92```
93
94### Read A Specific Range With Numeric Vector Syntax
95```matlab
96fid = fopen("measurements.csv", "w");
97fprintf(fid, "10,11,12,13\n14,15,16,17\n18,19,20,21\n22,23,24,25\n");
98fclose(fid);
99
100block = csvread("measurements.csv", 0, 0, [1 1 2 3]);
101delete("measurements.csv");
102```
103Expected output:
104```matlab
105block =
106    15    16    17
107    19    20    21
108```
109
110### Read A Block Using Excel-Style Range Notation
111```matlab
112fid = fopen("measurements2.csv", "w");
113fprintf(fid, "10,11,12\n14,15,16\n18,19,20\n");
114fclose(fid);
115
116sub = csvread("measurements2.csv", 0, 0, "B2:C3");
117delete("measurements2.csv");
118```
119Expected output:
120```matlab
121sub =
122    15    16
123    19    20
124```
125
126### Handle Empty Fields As Zeros
127```matlab
128fid = fopen("with_blanks.csv", "w");
129fprintf(fid, "1,,3\n,5,\n7,8,\n");
130fclose(fid);
131
132M = csvread("with_blanks.csv");
133delete("with_blanks.csv");
134```
135Expected output:
136```matlab
137M =
138     1     0     3
139     0     5     0
140     7     8     0
141```
142
143### Read Numeric Data From A File In The Home Directory
144```matlab
145homePath = fullfile(getenv("HOME"), "runmat_csvread_home.csv");
146fid = fopen(homePath, "w");
147fprintf(fid, "9,10\n11,12\n");
148fclose(fid);
149
150M = csvread(fullfile("~", "runmat_csvread_home.csv"));
151delete(homePath);
152```
153Expected output:
154```matlab
155M =
156     9    10
157    11    12
158```
159
160### Detect Errors When Text Appears In Numeric Columns
161```matlab
162fid = fopen("bad.csv", "w");
163fprintf(fid, "1,2,3\n4,error,6\n");
164fclose(fid);
165
166try
167    csvread("bad.csv");
168catch err
169    disp(err.message);
170end
171delete("bad.csv");
172```
173Expected output:
174```matlab
175csvread: nonnumeric token 'error' at row 2, column 2
176```
177
178## GPU residency in RunMat (Do I need `gpuArray`?)
179
180`csvread` always returns a host-resident tensor because it performs file I/O and parsing on the CPU. If you need the data on the GPU, wrap the call with `gpuArray(csvread(...))` or switch to `readmatrix` with the `'Like'` option so that RunMat can place the result directly on the desired device.
181
182## FAQ
183
184### Why does `csvread` complain about text data?
185`csvread` is limited to numeric CSV content. If a field contains letters, quoted strings, or other tokens that cannot be parsed as numbers, the builtin raises an error. Switch to `readmatrix` or `readtable` when the file mixes text and numbers.
186
187### Are the row and column offsets zero-based?
188Yes. `csvread(filename, row, col)` treats `row` and `col` as zero-based counts to skip from the start of the file before reading results.
189
190### How are Excel-style ranges interpreted?
191Excel ranges such as `"B2:D5"` use the familiar 1-based row numbering and column letters. The builtin converts them internally to zero-based indices and includes both endpoints.
192
193### Can I read files with quoted numeric fields?
194Quoted numeric fields are not supported. Remove quotes before calling `csvread`, or switch to `readmatrix`, which has full CSV parsing support.
195
196### What happens to empty cells?
197Empty cells (two consecutive commas or a trailing delimiter) become zero, matching MATLAB's `csvread` behaviour.
198
199### Does `csvread` support custom delimiters?
200No. `csvread` always uses comma separation. Use `dlmread` or `readmatrix` for other delimiters.
201
202### How do I keep the results on the GPU?
203`csvread` returns a host tensor. Call `gpuArray(csvread(...))` after reading, or prefer `readmatrix` with `'Like', gpuArray.zeros(1)` to keep residency on the GPU automatically.
204
205### What if the file is empty?
206An empty file results in a `0×0` double tensor. MATLAB behaves the same way.
207
208### Does `csvread` change the working directory?
209No. Relative paths are resolved against the current working directory and do not modify it.
210
211## See Also
212[readmatrix](./readmatrix), [writematrix](./writematrix), [gpuArray](../../acceleration/gpu/gpuArray), [gather](../../acceleration/gpu/gather)
213
214## Source & Feedback
215- The full source code for the implementation of the `csvread` function is available at: [`crates/runmat-runtime/src/builtins/io/tabular/csvread.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/io/tabular/csvread.rs)
216- Found a bug or behavioural difference? Please [open an issue](https://github.com/runmat-org/runmat/issues/new/choose) with details and a minimal repro.
217"#;
218
219pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
220    name: "csvread",
221    op_kind: GpuOpKind::Custom("io-csvread"),
222    supported_precisions: &[],
223    broadcast: BroadcastSemantics::None,
224    provider_hooks: &[],
225    constant_strategy: ConstantStrategy::InlineLiteral,
226    residency: ResidencyPolicy::GatherImmediately,
227    nan_mode: ReductionNaN::Include,
228    two_pass_threshold: None,
229    workgroup_size: None,
230    accepts_nan_mode: false,
231    notes: "Runs entirely on the host; acceleration providers are not involved.",
232};
233
234register_builtin_gpu_spec!(GPU_SPEC);
235
236pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
237    name: "csvread",
238    shape: ShapeRequirements::Any,
239    constant_strategy: ConstantStrategy::InlineLiteral,
240    elementwise: None,
241    reduction: None,
242    emits_nan: false,
243    notes: "Not eligible for fusion; executes as a standalone host operation.",
244};
245
246register_builtin_fusion_spec!(FUSION_SPEC);
247
248#[cfg(feature = "doc_export")]
249register_builtin_doc_text!("csvread", DOC_MD);
250
251#[runtime_builtin(
252    name = "csvread",
253    category = "io/tabular",
254    summary = "Read numeric data from a comma-separated text file.",
255    keywords = "csvread,csv,dlmread,numeric import,range",
256    accel = "cpu"
257)]
258fn csvread_builtin(path: Value, rest: Vec<Value>) -> Result<Value, String> {
259    let gathered_path = gather_if_needed(&path).map_err(|e| format!("csvread: {e}"))?;
260    let options = parse_arguments(&rest)?;
261    let resolved = resolve_path(&gathered_path)?;
262    let (rows, max_cols) = read_csv_rows(&resolved)?;
263    let subset = if let Some(range) = options.range {
264        apply_range(&rows, max_cols, &range, 0.0)
265    } else {
266        apply_offsets(&rows, max_cols, options.start_row, options.start_col, 0.0)
267    };
268    let tensor = rows_to_tensor(subset.rows, subset.row_count, subset.col_count, 0.0)?;
269    Ok(Value::Tensor(tensor))
270}
271
272#[derive(Debug, Default)]
273struct CsvReadOptions {
274    start_row: usize,
275    start_col: usize,
276    range: Option<RangeSpec>,
277}
278
279fn parse_arguments(args: &[Value]) -> Result<CsvReadOptions, String> {
280    let mut gathered = Vec::with_capacity(args.len());
281    for value in args {
282        gathered.push(gather_if_needed(value).map_err(|e| format!("csvread: {e}"))?);
283    }
284    match gathered.len() {
285        0 => Ok(CsvReadOptions::default()),
286        2 => {
287            let start_row = value_to_start_index(&gathered[0], "row")?;
288            let start_col = value_to_start_index(&gathered[1], "col")?;
289            Ok(CsvReadOptions {
290                start_row,
291                start_col,
292                range: None,
293            })
294        }
295        3 => {
296            let start_row = value_to_start_index(&gathered[0], "row")?;
297            let start_col = value_to_start_index(&gathered[1], "col")?;
298            let range = parse_range(&gathered[2])?;
299            Ok(CsvReadOptions {
300                start_row,
301                start_col,
302                range: Some(range),
303            })
304        }
305        _ => Err("csvread: expected csvread(filename[, row, col[, range]])".to_string()),
306    }
307}
308
309fn value_to_start_index(value: &Value, name: &str) -> Result<usize, String> {
310    match value {
311        Value::Int(i) => {
312            let raw = i.to_i64();
313            if raw < 0 {
314                return Err(format!("csvread: {name} must be a non-negative integer"));
315            }
316            usize::try_from(raw).map_err(|_| format!("csvread: {name} is too large"))
317        }
318        Value::Num(n) => {
319            if !n.is_finite() {
320                return Err(format!("csvread: {name} must be a finite integer"));
321            }
322            if *n < 0.0 {
323                return Err(format!("csvread: {name} must be a non-negative integer"));
324            }
325            let rounded = n.round();
326            if (rounded - n).abs() > f64::EPSILON {
327                return Err(format!("csvread: {name} must be an integer"));
328            }
329            usize::try_from(rounded as i64).map_err(|_| format!("csvread: {name} is too large"))
330        }
331        _ => Err(format!(
332            "csvread: expected {name} as a numeric scalar, got {value:?}"
333        )),
334    }
335}
336
337fn resolve_path(value: &Value) -> Result<PathBuf, String> {
338    match value {
339        Value::String(s) => normalize_path(s),
340        Value::CharArray(ca) if ca.rows == 1 => {
341            let text: String = ca.data.iter().collect();
342            normalize_path(&text)
343        }
344        Value::StringArray(sa) => {
345            if sa.data.len() == 1 {
346                normalize_path(&sa.data[0])
347            } else {
348                Err("csvread: string array inputs must be scalar".to_string())
349            }
350        }
351        Value::CharArray(_) => {
352            Err("csvread: expected a 1-by-N character vector for the file name".to_string())
353        }
354        other => Err(format!(
355            "csvread: expected filename as string scalar or character vector, got {other:?}"
356        )),
357    }
358}
359
360fn normalize_path(raw: &str) -> Result<PathBuf, String> {
361    if raw.trim().is_empty() {
362        return Err("csvread: filename must not be empty".to_string());
363    }
364    let expanded = expand_user_path(raw, "csvread").map_err(|e| format!("csvread: {e}"))?;
365    Ok(Path::new(&expanded).to_path_buf())
366}
367
368fn read_csv_rows(path: &Path) -> Result<(Vec<Vec<f64>>, usize), String> {
369    let file = File::open(path)
370        .map_err(|e| format!("csvread: unable to open '{}': {e}", path.display()))?;
371    let mut reader = BufReader::new(file);
372    let mut buffer = String::new();
373    let mut rows = Vec::new();
374    let mut max_cols = 0usize;
375    let mut line_index = 0usize;
376
377    loop {
378        buffer.clear();
379        let bytes = reader
380            .read_line(&mut buffer)
381            .map_err(|e| format!("csvread: failed to read '{}': {}", path.display(), e))?;
382        if bytes == 0 {
383            break;
384        }
385        line_index += 1;
386        if buffer.trim().is_empty() {
387            continue;
388        }
389        if buffer.ends_with('\n') {
390            buffer.pop();
391            if buffer.ends_with('\r') {
392                buffer.pop();
393            }
394        } else if buffer.ends_with('\r') {
395            buffer.pop();
396        }
397        let parsed = parse_csv_row(&buffer, line_index)?;
398        max_cols = max_cols.max(parsed.len());
399        rows.push(parsed);
400    }
401
402    Ok((rows, max_cols))
403}
404
405fn parse_csv_row(line: &str, line_index: usize) -> Result<Vec<f64>, String> {
406    let mut values = Vec::new();
407    for (col_index, raw_field) in line.split(',').enumerate() {
408        let trimmed = raw_field.trim();
409        if trimmed.is_empty() {
410            values.push(0.0);
411            continue;
412        }
413        let unwrapped = if trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2
414        {
415            &trimmed[1..trimmed.len() - 1]
416        } else {
417            trimmed
418        };
419        let lowered = unwrapped.to_ascii_lowercase();
420        let value = match lowered.as_str() {
421            "nan" => f64::NAN,
422            "inf" | "+inf" => f64::INFINITY,
423            "-inf" => f64::NEG_INFINITY,
424            _ => unwrapped.parse::<f64>().map_err(|_| {
425                format!(
426                    "csvread: nonnumeric token '{}' at row {}, column {}",
427                    unwrapped,
428                    line_index,
429                    col_index + 1
430                )
431            })?,
432        };
433        values.push(value);
434    }
435    Ok(values)
436}
437
438#[derive(Clone, Copy, Debug)]
439struct RangeSpec {
440    start_row: usize,
441    start_col: usize,
442    end_row: Option<usize>,
443    end_col: Option<usize>,
444}
445
446fn parse_range(value: &Value) -> Result<RangeSpec, String> {
447    match value {
448        Value::String(s) => parse_range_string(s),
449        Value::CharArray(ca) if ca.rows == 1 => {
450            let text: String = ca.data.iter().collect();
451            parse_range_string(&text)
452        }
453        Value::StringArray(sa) => {
454            if sa.data.len() == 1 {
455                parse_range_string(&sa.data[0])
456            } else {
457                Err("csvread: Range string array inputs must be scalar".to_string())
458            }
459        }
460        Value::Tensor(_) => parse_range_numeric(value),
461        _ => Err("csvread: Range must be provided as a string or numeric vector".to_string()),
462    }
463}
464
465fn parse_range_string(text: &str) -> Result<RangeSpec, String> {
466    let trimmed = text.trim();
467    if trimmed.is_empty() {
468        return Err("csvread: Range string cannot be empty".to_string());
469    }
470    let parts: Vec<&str> = trimmed.split(':').collect();
471    if parts.len() > 2 {
472        return Err(format!("csvread: invalid Range specification '{trimmed}'"));
473    }
474    let start = parse_cell_reference(parts[0])?;
475    if start.col.is_none() {
476        return Err("csvread: Range must specify a starting column".to_string());
477    }
478    let end = if parts.len() == 2 {
479        Some(parse_cell_reference(parts[1])?)
480    } else {
481        None
482    };
483    if let Some(ref end_ref) = end {
484        if end_ref.col.is_none() {
485            return Err("csvread: Range end must include a column reference".to_string());
486        }
487    }
488    let start_row = start.row.unwrap_or(0);
489    let start_col = start.col.unwrap();
490    let end_row = end.as_ref().and_then(|r| r.row);
491    let end_col = end.as_ref().and_then(|r| r.col);
492    Ok(RangeSpec {
493        start_row,
494        start_col,
495        end_row,
496        end_col,
497    })
498}
499
500fn parse_range_numeric(value: &Value) -> Result<RangeSpec, String> {
501    let elements = match value {
502        Value::Tensor(t) => t.data.clone(),
503        _ => {
504            return Err(
505                "csvread: numeric Range must be provided as a vector with 2 or 4 elements"
506                    .to_string(),
507            )
508        }
509    };
510    if elements.len() != 2 && elements.len() != 4 {
511        return Err("csvread: numeric Range must contain exactly 2 or 4 elements".to_string());
512    }
513    let mut indices = Vec::with_capacity(elements.len());
514    for (idx, element) in elements.iter().enumerate() {
515        indices.push(non_negative_index(*element, idx)?);
516    }
517    let start_row = indices[0];
518    let start_col = indices[1];
519    let (end_row, end_col) = if indices.len() == 4 {
520        (Some(indices[2]), Some(indices[3]))
521    } else {
522        (None, None)
523    };
524    Ok(RangeSpec {
525        start_row,
526        start_col,
527        end_row,
528        end_col,
529    })
530}
531
532fn non_negative_index(value: f64, position: usize) -> Result<usize, String> {
533    if !value.is_finite() {
534        return Err("csvread: Range indices must be finite".to_string());
535    }
536    if value < 0.0 {
537        return Err("csvread: Range indices must be non-negative".to_string());
538    }
539    let rounded = value.round();
540    if (rounded - value).abs() > f64::EPSILON {
541        return Err("csvread: Range indices must be integers".to_string());
542    }
543    usize::try_from(rounded as i64).map_err(|_| {
544        format!(
545            "csvread: Range index {} is too large to fit in usize",
546            position + 1
547        )
548    })
549}
550
551#[derive(Clone, Copy)]
552struct CellReference {
553    row: Option<usize>,
554    col: Option<usize>,
555}
556
557fn parse_cell_reference(token: &str) -> Result<CellReference, String> {
558    let mut letters = String::new();
559    let mut digits = String::new();
560    for ch in token.trim().chars() {
561        if ch == '$' {
562            continue;
563        }
564        if ch.is_ascii_alphabetic() {
565            letters.push(ch.to_ascii_uppercase());
566        } else if ch.is_ascii_digit() {
567            digits.push(ch);
568        } else {
569            return Err(format!("csvread: invalid Range component '{token}'"));
570        }
571    }
572    if letters.is_empty() && digits.is_empty() {
573        return Err("csvread: Range references cannot be empty".to_string());
574    }
575    let col = if letters.is_empty() {
576        None
577    } else {
578        Some(column_index_from_letters(&letters)?)
579    };
580    let row = if digits.is_empty() {
581        None
582    } else {
583        let parsed = digits.parse::<usize>().map_err(|_| {
584            format!(
585                "csvread: invalid row index '{}' in Range component '{token}'",
586                digits
587            )
588        })?;
589        if parsed == 0 {
590            return Err("csvread: Range rows must be >= 1".to_string());
591        }
592        Some(parsed - 1)
593    };
594    Ok(CellReference { row, col })
595}
596
597fn column_index_from_letters(letters: &str) -> Result<usize, String> {
598    let mut value: usize = 0;
599    for ch in letters.chars() {
600        if !ch.is_ascii_uppercase() {
601            return Err(format!(
602                "csvread: invalid column designator '{letters}' in Range"
603            ));
604        }
605        let digit = (ch as u8 - b'A' + 1) as usize;
606        value = value
607            .checked_mul(26)
608            .and_then(|v| v.checked_add(digit))
609            .ok_or_else(|| "csvread: Range column index overflowed".to_string())?;
610    }
611    value
612        .checked_sub(1)
613        .ok_or_else(|| "csvread: Range column index underflowed".to_string())
614}
615
616struct SubsetResult {
617    rows: Vec<Vec<f64>>,
618    row_count: usize,
619    col_count: usize,
620}
621
622fn apply_offsets(
623    rows: &[Vec<f64>],
624    max_cols: usize,
625    start_row: usize,
626    start_col: usize,
627    default_fill: f64,
628) -> SubsetResult {
629    if rows.is_empty() || max_cols == 0 {
630        return SubsetResult {
631            rows: Vec::new(),
632            row_count: 0,
633            col_count: 0,
634        };
635    }
636    if start_row >= rows.len() {
637        return SubsetResult {
638            rows: Vec::new(),
639            row_count: 0,
640            col_count: 0,
641        };
642    }
643    if start_col >= max_cols {
644        return SubsetResult {
645            rows: Vec::new(),
646            row_count: 0,
647            col_count: 0,
648        };
649    }
650
651    let mut subset_rows = Vec::new();
652    let mut col_count = 0usize;
653    for row in rows.iter().skip(start_row) {
654        if start_col >= row.len() && row.len() < max_cols {
655            // Entire row missing required columns; fill zeros of remaining width.
656            let width = max_cols - start_col;
657            subset_rows.push(vec![default_fill; width]);
658            col_count = col_count.max(width);
659            continue;
660        }
661        let mut extracted = Vec::with_capacity(max_cols - start_col);
662        for col_idx in start_col..max_cols {
663            let value = row.get(col_idx).copied().unwrap_or(default_fill);
664            extracted.push(value);
665        }
666        col_count = col_count.max(extracted.len());
667        subset_rows.push(extracted);
668    }
669    let row_count = subset_rows.len();
670    SubsetResult {
671        rows: subset_rows,
672        row_count,
673        col_count,
674    }
675}
676
677fn apply_range(
678    rows: &[Vec<f64>],
679    max_cols: usize,
680    range: &RangeSpec,
681    default_fill: f64,
682) -> SubsetResult {
683    if rows.is_empty() || max_cols == 0 {
684        return SubsetResult {
685            rows: Vec::new(),
686            row_count: 0,
687            col_count: 0,
688        };
689    }
690    if range.start_row >= rows.len() || range.start_col >= max_cols {
691        return SubsetResult {
692            rows: Vec::new(),
693            row_count: 0,
694            col_count: 0,
695        };
696    }
697    let last_row = rows.len().saturating_sub(1);
698    let mut end_row = range.end_row.unwrap_or(last_row);
699    if end_row > last_row {
700        end_row = last_row;
701    }
702    if end_row < range.start_row {
703        return SubsetResult {
704            rows: Vec::new(),
705            row_count: 0,
706            col_count: 0,
707        };
708    }
709
710    let last_col = max_cols.saturating_sub(1);
711    let mut end_col = range.end_col.unwrap_or(last_col);
712    if end_col > last_col {
713        end_col = last_col;
714    }
715    if end_col < range.start_col {
716        return SubsetResult {
717            rows: Vec::new(),
718            row_count: 0,
719            col_count: 0,
720        };
721    }
722
723    let mut subset_rows = Vec::new();
724    let mut col_count = 0usize;
725    for row_idx in range.start_row..=end_row {
726        if row_idx >= rows.len() {
727            break;
728        }
729        let row = &rows[row_idx];
730        let mut extracted = Vec::with_capacity(end_col - range.start_col + 1);
731        for col_idx in range.start_col..=end_col {
732            if col_idx >= max_cols {
733                break;
734            }
735            let value = row.get(col_idx).copied().unwrap_or(default_fill);
736            extracted.push(value);
737        }
738        col_count = col_count.max(extracted.len());
739        subset_rows.push(extracted);
740    }
741    let row_count = subset_rows.len();
742    SubsetResult {
743        rows: subset_rows,
744        row_count,
745        col_count,
746    }
747}
748
749fn rows_to_tensor(
750    rows: Vec<Vec<f64>>,
751    row_count: usize,
752    col_count: usize,
753    default_fill: f64,
754) -> Result<Tensor, String> {
755    if row_count == 0 || col_count == 0 {
756        return Tensor::new(Vec::new(), vec![0, 0]).map_err(|e| format!("csvread: {e}"));
757    }
758    let mut data = vec![default_fill; row_count * col_count];
759    for (row_idx, row) in rows.iter().enumerate().take(row_count) {
760        for col_idx in 0..col_count {
761            let value = row.get(col_idx).copied().unwrap_or(default_fill);
762            data[row_idx + col_idx * row_count] = value;
763        }
764    }
765    Tensor::new(data, vec![row_count, col_count]).map_err(|e| format!("csvread: {e}"))
766}
767
768#[cfg(test)]
769mod tests {
770    use super::*;
771    use std::fs;
772    use std::sync::atomic::{AtomicUsize, Ordering};
773    use std::time::{SystemTime, UNIX_EPOCH};
774
775    use runmat_builtins::{CharArray, IntValue, Tensor as BuiltinTensor};
776
777    #[cfg(feature = "doc_export")]
778    use crate::builtins::common::test_support;
779
780    static UNIQUE_COUNTER: AtomicUsize = AtomicUsize::new(0);
781
782    fn unique_path(prefix: &str) -> PathBuf {
783        let nanos = SystemTime::now()
784            .duration_since(UNIX_EPOCH)
785            .unwrap()
786            .as_nanos();
787        let seq = UNIQUE_COUNTER.fetch_add(1, Ordering::Relaxed);
788        let mut path = std::env::temp_dir();
789        path.push(format!(
790            "runmat_csvread_{prefix}_{}_{}_{}",
791            std::process::id(),
792            nanos,
793            seq
794        ));
795        path
796    }
797
798    fn write_temp_file(lines: &[&str]) -> PathBuf {
799        let path = unique_path("input").with_extension("csv");
800        let contents = lines.join("\n");
801        fs::write(&path, contents).expect("write temp csv");
802        path
803    }
804
805    #[test]
806    fn csvread_basic_csv_roundtrip() {
807        let path = write_temp_file(&["1,2,3", "4,5,6"]);
808        let result = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
809            .expect("csvread");
810        match result {
811            Value::Tensor(t) => {
812                assert_eq!(t.shape, vec![2, 3]);
813                assert_eq!(t.data, vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
814            }
815            other => panic!("expected tensor, got {other:?}"),
816        }
817        fs::remove_file(path).ok();
818    }
819
820    #[test]
821    fn csvread_with_offsets() {
822        let path = write_temp_file(&["0,1,2", "3,4,5", "6,7,8"]);
823        let args = vec![Value::Int(IntValue::I32(1)), Value::Int(IntValue::I32(1))];
824        let result =
825            csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
826        match result {
827            Value::Tensor(t) => {
828                assert_eq!(t.shape, vec![2, 2]);
829                assert_eq!(t.data, vec![4.0, 7.0, 5.0, 8.0]);
830            }
831            other => panic!("expected tensor, got {other:?}"),
832        }
833        fs::remove_file(path).ok();
834    }
835
836    #[test]
837    fn csvread_with_numeric_range() {
838        let path = write_temp_file(&["1,2,3", "4,5,6", "7,8,9"]);
839        let args = vec![
840            Value::Int(IntValue::I32(0)),
841            Value::Int(IntValue::I32(0)),
842            Value::from(BuiltinTensor::new(vec![1.0, 1.0, 2.0, 2.0], vec![4, 1]).expect("tensor")),
843        ];
844        let result =
845            csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
846        match result {
847            Value::Tensor(t) => {
848                assert_eq!(t.shape, vec![2, 2]);
849                assert_eq!(t.data, vec![5.0, 8.0, 6.0, 9.0]);
850            }
851            other => panic!("expected tensor, got {other:?}"),
852        }
853        fs::remove_file(path).ok();
854    }
855
856    #[test]
857    fn csvread_with_string_range() {
858        let path = write_temp_file(&["1,2,3", "4,5,6", "7,8,9"]);
859        let args = vec![
860            Value::Int(IntValue::I32(0)),
861            Value::Int(IntValue::I32(0)),
862            Value::from("B2:C3"),
863        ];
864        let result =
865            csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
866        match result {
867            Value::Tensor(t) => {
868                assert_eq!(t.shape, vec![2, 2]);
869                assert_eq!(t.data, vec![5.0, 8.0, 6.0, 9.0]);
870            }
871            other => panic!("expected tensor, got {other:?}"),
872        }
873        fs::remove_file(path).ok();
874    }
875
876    #[test]
877    fn csvread_empty_fields_become_zero() {
878        let path = write_temp_file(&["1,,3", ",5,", "7,8,"]);
879        let result = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
880            .expect("csv");
881        match result {
882            Value::Tensor(t) => {
883                assert_eq!(t.shape, vec![3, 3]);
884                assert_eq!(t.data, vec![1.0, 0.0, 7.0, 0.0, 5.0, 8.0, 3.0, 0.0, 0.0]);
885            }
886            other => panic!("expected tensor, got {other:?}"),
887        }
888        fs::remove_file(path).ok();
889    }
890
891    #[test]
892    fn csvread_errors_on_text() {
893        let path = write_temp_file(&["1,2,3", "4,error,6"]);
894        let err = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
895            .expect_err("should fail");
896        assert!(
897            err.contains("nonnumeric token 'error'"),
898            "unexpected error: {err}"
899        );
900        fs::remove_file(path).ok();
901    }
902
903    #[test]
904    fn csvread_accepts_char_array_filename() {
905        let path = write_temp_file(&["1,2"]);
906        let path_string = path.to_string_lossy().to_string();
907        let data: Vec<char> = path_string.chars().collect();
908        let cols = data.len();
909        let chars = CharArray::new(data, 1, cols).expect("char array");
910        let result = csvread_builtin(Value::CharArray(chars), Vec::new()).expect("csv");
911        match result {
912            Value::Tensor(t) => {
913                assert_eq!(t.shape, vec![1, 2]);
914                assert_eq!(t.data, vec![1.0, 2.0]);
915            }
916            other => panic!("expected tensor, got {other:?}"),
917        }
918        fs::remove_file(path).ok();
919    }
920
921    #[test]
922    #[cfg(feature = "doc_export")]
923    fn doc_examples_present() {
924        let blocks = test_support::doc_examples(DOC_MD);
925        assert!(!blocks.is_empty());
926    }
927}
runmat_runtime/builtins/io/tabular/csvread.rs

runmat_runtime/builtins/io/tabular/
csvread.rs