1use std::fs::File;
8use std::io::{BufRead, BufReader};
9use std::path::{Path, PathBuf};
10
11use runmat_builtins::{Tensor, Value};
12use runmat_macros::runtime_builtin;
13
14use crate::builtins::common::fs::expand_user_path;
15use crate::builtins::common::spec::{
16 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
17 ReductionNaN, ResidencyPolicy, ShapeRequirements,
18};
19#[cfg(feature = "doc_export")]
20use crate::register_builtin_doc_text;
21use crate::{gather_if_needed, register_builtin_fusion_spec, register_builtin_gpu_spec};
22
23#[cfg(feature = "doc_export")]
24pub const DOC_MD: &str = r#"---
25title: "csvread"
26category: "io/tabular"
27keywords: ["csvread", "csv", "comma-separated values", "numeric import", "range", "header"]
28summary: "Read numeric data from a comma-separated text file with MATLAB-compatible zero-based ranges."
29references:
30 - https://www.mathworks.com/help/matlab/ref/csvread.html
31gpu_support:
32 elementwise: false
33 reduction: false
34 precisions: []
35 broadcasting: "none"
36 notes: "Performs host-side file I/O and parsing. Acceleration providers are not involved, and results remain on the CPU."
37fusion:
38 elementwise: false
39 reduction: false
40 max_inputs: 1
41 constants: "inline"
42requires_feature: null
43tested:
44 unit: "builtins::io::tabular::csvread::tests"
45 integration: "builtins::io::tabular::csvread::tests::csvread_basic_csv_roundtrip"
46---
47
48# What does the `csvread` function do in MATLAB / RunMat?
49`csvread(filename)` reads numeric data from a comma-separated text file and returns a dense double-precision matrix. It is a legacy convenience wrapper preserved for MATLAB compatibility, and RunMat intentionally mirrors the original zero-based semantics.
50
51## How does the `csvread` function behave in MATLAB / RunMat?
52- Accepts character vectors or string scalars for the file name. String arrays must contain exactly one element.
53- `csvread(filename, row, col)` starts reading at the zero-based row `row` and column `col`, skipping any data before that offset.
54- `csvread(filename, row, col, range)` reads only the rectangle described by `range`. Numeric ranges must contain four elements `[r1 c1 r2 c2]` (zero-based, inclusive). Excel-style ranges use the familiar `"B2:D6"` A1 notation, which RunMat converts to zero-based indices internally.
55- Empty fields (two consecutive commas or a trailing comma) are interpreted as `0`. Tokens such as `NaN`, `Inf`, and `-Inf` are accepted (case-insensitive).
56- Any other nonnumeric token raises an error that identifies the offending row and column.
57- Results are dense double-precision tensors using column-major layout. An empty file produces a `0×0` tensor.
58- Paths can contain `~` to reference the home directory; RunMat expands the token before opening the file.
59
60## `csvread` Function GPU Execution Behaviour
61`csvread` performs all work on the host CPU. Arguments are gathered from the GPU when necessary, and the resulting tensor is returned in host memory. To keep data on the GPU, call `gpuArray` on the output or switch to `readmatrix` with the `'Like'` option. No provider hooks are required.
62
63## Examples of using the `csvread` function in MATLAB / RunMat
64
65### Import Entire CSV File
66```matlab
67writematrix([1 2 3; 4 5 6], "scores.csv");
68M = csvread("scores.csv");
69delete("scores.csv");
70```
71Expected output:
72```matlab
73M =
74 1 2 3
75 4 5 6
76```
77
78### Skip Header Row And Column Using Zero-Based Offsets
79```matlab
80fid = fopen("with_header.csv", "w");
81fprintf(fid, "Name,Jan,Feb\nalpha,1,2\nbeta,3,4\n");
82fclose(fid);
83
84M = csvread("with_header.csv", 1, 1);
85delete("with_header.csv");
86```
87Expected output:
88```matlab
89M =
90 1 2
91 3 4
92```
93
94### Read A Specific Range With Numeric Vector Syntax
95```matlab
96fid = fopen("measurements.csv", "w");
97fprintf(fid, "10,11,12,13\n14,15,16,17\n18,19,20,21\n22,23,24,25\n");
98fclose(fid);
99
100block = csvread("measurements.csv", 0, 0, [1 1 2 3]);
101delete("measurements.csv");
102```
103Expected output:
104```matlab
105block =
106 15 16 17
107 19 20 21
108```
109
110### Read A Block Using Excel-Style Range Notation
111```matlab
112fid = fopen("measurements2.csv", "w");
113fprintf(fid, "10,11,12\n14,15,16\n18,19,20\n");
114fclose(fid);
115
116sub = csvread("measurements2.csv", 0, 0, "B2:C3");
117delete("measurements2.csv");
118```
119Expected output:
120```matlab
121sub =
122 15 16
123 19 20
124```
125
126### Handle Empty Fields As Zeros
127```matlab
128fid = fopen("with_blanks.csv", "w");
129fprintf(fid, "1,,3\n,5,\n7,8,\n");
130fclose(fid);
131
132M = csvread("with_blanks.csv");
133delete("with_blanks.csv");
134```
135Expected output:
136```matlab
137M =
138 1 0 3
139 0 5 0
140 7 8 0
141```
142
143### Read Numeric Data From A File In The Home Directory
144```matlab
145homePath = fullfile(getenv("HOME"), "runmat_csvread_home.csv");
146fid = fopen(homePath, "w");
147fprintf(fid, "9,10\n11,12\n");
148fclose(fid);
149
150M = csvread(fullfile("~", "runmat_csvread_home.csv"));
151delete(homePath);
152```
153Expected output:
154```matlab
155M =
156 9 10
157 11 12
158```
159
160### Detect Errors When Text Appears In Numeric Columns
161```matlab
162fid = fopen("bad.csv", "w");
163fprintf(fid, "1,2,3\n4,error,6\n");
164fclose(fid);
165
166try
167 csvread("bad.csv");
168catch err
169 disp(err.message);
170end
171delete("bad.csv");
172```
173Expected output:
174```matlab
175csvread: nonnumeric token 'error' at row 2, column 2
176```
177
178## GPU residency in RunMat (Do I need `gpuArray`?)
179
180`csvread` always returns a host-resident tensor because it performs file I/O and parsing on the CPU. If you need the data on the GPU, wrap the call with `gpuArray(csvread(...))` or switch to `readmatrix` with the `'Like'` option so that RunMat can place the result directly on the desired device.
181
182## FAQ
183
184### Why does `csvread` complain about text data?
185`csvread` is limited to numeric CSV content. If a field contains letters, quoted strings, or other tokens that cannot be parsed as numbers, the builtin raises an error. Switch to `readmatrix` or `readtable` when the file mixes text and numbers.
186
187### Are the row and column offsets zero-based?
188Yes. `csvread(filename, row, col)` treats `row` and `col` as zero-based counts to skip from the start of the file before reading results.
189
190### How are Excel-style ranges interpreted?
191Excel ranges such as `"B2:D5"` use the familiar 1-based row numbering and column letters. The builtin converts them internally to zero-based indices and includes both endpoints.
192
193### Can I read files with quoted numeric fields?
194Quoted numeric fields are not supported. Remove quotes before calling `csvread`, or switch to `readmatrix`, which has full CSV parsing support.
195
196### What happens to empty cells?
197Empty cells (two consecutive commas or a trailing delimiter) become zero, matching MATLAB's `csvread` behaviour.
198
199### Does `csvread` support custom delimiters?
200No. `csvread` always uses comma separation. Use `dlmread` or `readmatrix` for other delimiters.
201
202### How do I keep the results on the GPU?
203`csvread` returns a host tensor. Call `gpuArray(csvread(...))` after reading, or prefer `readmatrix` with `'Like', gpuArray.zeros(1)` to keep residency on the GPU automatically.
204
205### What if the file is empty?
206An empty file results in a `0×0` double tensor. MATLAB behaves the same way.
207
208### Does `csvread` change the working directory?
209No. Relative paths are resolved against the current working directory and do not modify it.
210
211## See Also
212[readmatrix](./readmatrix), [writematrix](./writematrix), [gpuArray](../../acceleration/gpu/gpuArray), [gather](../../acceleration/gpu/gather)
213
214## Source & Feedback
215- The full source code for the implementation of the `csvread` function is available at: [`crates/runmat-runtime/src/builtins/io/tabular/csvread.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/io/tabular/csvread.rs)
216- Found a bug or behavioural difference? Please [open an issue](https://github.com/runmat-org/runmat/issues/new/choose) with details and a minimal repro.
217"#;
218
219pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
220 name: "csvread",
221 op_kind: GpuOpKind::Custom("io-csvread"),
222 supported_precisions: &[],
223 broadcast: BroadcastSemantics::None,
224 provider_hooks: &[],
225 constant_strategy: ConstantStrategy::InlineLiteral,
226 residency: ResidencyPolicy::GatherImmediately,
227 nan_mode: ReductionNaN::Include,
228 two_pass_threshold: None,
229 workgroup_size: None,
230 accepts_nan_mode: false,
231 notes: "Runs entirely on the host; acceleration providers are not involved.",
232};
233
234register_builtin_gpu_spec!(GPU_SPEC);
235
236pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
237 name: "csvread",
238 shape: ShapeRequirements::Any,
239 constant_strategy: ConstantStrategy::InlineLiteral,
240 elementwise: None,
241 reduction: None,
242 emits_nan: false,
243 notes: "Not eligible for fusion; executes as a standalone host operation.",
244};
245
246register_builtin_fusion_spec!(FUSION_SPEC);
247
248#[cfg(feature = "doc_export")]
249register_builtin_doc_text!("csvread", DOC_MD);
250
251#[runtime_builtin(
252 name = "csvread",
253 category = "io/tabular",
254 summary = "Read numeric data from a comma-separated text file.",
255 keywords = "csvread,csv,dlmread,numeric import,range",
256 accel = "cpu"
257)]
258fn csvread_builtin(path: Value, rest: Vec<Value>) -> Result<Value, String> {
259 let gathered_path = gather_if_needed(&path).map_err(|e| format!("csvread: {e}"))?;
260 let options = parse_arguments(&rest)?;
261 let resolved = resolve_path(&gathered_path)?;
262 let (rows, max_cols) = read_csv_rows(&resolved)?;
263 let subset = if let Some(range) = options.range {
264 apply_range(&rows, max_cols, &range, 0.0)
265 } else {
266 apply_offsets(&rows, max_cols, options.start_row, options.start_col, 0.0)
267 };
268 let tensor = rows_to_tensor(subset.rows, subset.row_count, subset.col_count, 0.0)?;
269 Ok(Value::Tensor(tensor))
270}
271
272#[derive(Debug, Default)]
273struct CsvReadOptions {
274 start_row: usize,
275 start_col: usize,
276 range: Option<RangeSpec>,
277}
278
279fn parse_arguments(args: &[Value]) -> Result<CsvReadOptions, String> {
280 let mut gathered = Vec::with_capacity(args.len());
281 for value in args {
282 gathered.push(gather_if_needed(value).map_err(|e| format!("csvread: {e}"))?);
283 }
284 match gathered.len() {
285 0 => Ok(CsvReadOptions::default()),
286 2 => {
287 let start_row = value_to_start_index(&gathered[0], "row")?;
288 let start_col = value_to_start_index(&gathered[1], "col")?;
289 Ok(CsvReadOptions {
290 start_row,
291 start_col,
292 range: None,
293 })
294 }
295 3 => {
296 let start_row = value_to_start_index(&gathered[0], "row")?;
297 let start_col = value_to_start_index(&gathered[1], "col")?;
298 let range = parse_range(&gathered[2])?;
299 Ok(CsvReadOptions {
300 start_row,
301 start_col,
302 range: Some(range),
303 })
304 }
305 _ => Err("csvread: expected csvread(filename[, row, col[, range]])".to_string()),
306 }
307}
308
309fn value_to_start_index(value: &Value, name: &str) -> Result<usize, String> {
310 match value {
311 Value::Int(i) => {
312 let raw = i.to_i64();
313 if raw < 0 {
314 return Err(format!("csvread: {name} must be a non-negative integer"));
315 }
316 usize::try_from(raw).map_err(|_| format!("csvread: {name} is too large"))
317 }
318 Value::Num(n) => {
319 if !n.is_finite() {
320 return Err(format!("csvread: {name} must be a finite integer"));
321 }
322 if *n < 0.0 {
323 return Err(format!("csvread: {name} must be a non-negative integer"));
324 }
325 let rounded = n.round();
326 if (rounded - n).abs() > f64::EPSILON {
327 return Err(format!("csvread: {name} must be an integer"));
328 }
329 usize::try_from(rounded as i64).map_err(|_| format!("csvread: {name} is too large"))
330 }
331 _ => Err(format!(
332 "csvread: expected {name} as a numeric scalar, got {value:?}"
333 )),
334 }
335}
336
337fn resolve_path(value: &Value) -> Result<PathBuf, String> {
338 match value {
339 Value::String(s) => normalize_path(s),
340 Value::CharArray(ca) if ca.rows == 1 => {
341 let text: String = ca.data.iter().collect();
342 normalize_path(&text)
343 }
344 Value::StringArray(sa) => {
345 if sa.data.len() == 1 {
346 normalize_path(&sa.data[0])
347 } else {
348 Err("csvread: string array inputs must be scalar".to_string())
349 }
350 }
351 Value::CharArray(_) => {
352 Err("csvread: expected a 1-by-N character vector for the file name".to_string())
353 }
354 other => Err(format!(
355 "csvread: expected filename as string scalar or character vector, got {other:?}"
356 )),
357 }
358}
359
360fn normalize_path(raw: &str) -> Result<PathBuf, String> {
361 if raw.trim().is_empty() {
362 return Err("csvread: filename must not be empty".to_string());
363 }
364 let expanded = expand_user_path(raw, "csvread").map_err(|e| format!("csvread: {e}"))?;
365 Ok(Path::new(&expanded).to_path_buf())
366}
367
368fn read_csv_rows(path: &Path) -> Result<(Vec<Vec<f64>>, usize), String> {
369 let file = File::open(path)
370 .map_err(|e| format!("csvread: unable to open '{}': {e}", path.display()))?;
371 let mut reader = BufReader::new(file);
372 let mut buffer = String::new();
373 let mut rows = Vec::new();
374 let mut max_cols = 0usize;
375 let mut line_index = 0usize;
376
377 loop {
378 buffer.clear();
379 let bytes = reader
380 .read_line(&mut buffer)
381 .map_err(|e| format!("csvread: failed to read '{}': {}", path.display(), e))?;
382 if bytes == 0 {
383 break;
384 }
385 line_index += 1;
386 if buffer.trim().is_empty() {
387 continue;
388 }
389 if buffer.ends_with('\n') {
390 buffer.pop();
391 if buffer.ends_with('\r') {
392 buffer.pop();
393 }
394 } else if buffer.ends_with('\r') {
395 buffer.pop();
396 }
397 let parsed = parse_csv_row(&buffer, line_index)?;
398 max_cols = max_cols.max(parsed.len());
399 rows.push(parsed);
400 }
401
402 Ok((rows, max_cols))
403}
404
405fn parse_csv_row(line: &str, line_index: usize) -> Result<Vec<f64>, String> {
406 let mut values = Vec::new();
407 for (col_index, raw_field) in line.split(',').enumerate() {
408 let trimmed = raw_field.trim();
409 if trimmed.is_empty() {
410 values.push(0.0);
411 continue;
412 }
413 let unwrapped = if trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2
414 {
415 &trimmed[1..trimmed.len() - 1]
416 } else {
417 trimmed
418 };
419 let lowered = unwrapped.to_ascii_lowercase();
420 let value = match lowered.as_str() {
421 "nan" => f64::NAN,
422 "inf" | "+inf" => f64::INFINITY,
423 "-inf" => f64::NEG_INFINITY,
424 _ => unwrapped.parse::<f64>().map_err(|_| {
425 format!(
426 "csvread: nonnumeric token '{}' at row {}, column {}",
427 unwrapped,
428 line_index,
429 col_index + 1
430 )
431 })?,
432 };
433 values.push(value);
434 }
435 Ok(values)
436}
437
438#[derive(Clone, Copy, Debug)]
439struct RangeSpec {
440 start_row: usize,
441 start_col: usize,
442 end_row: Option<usize>,
443 end_col: Option<usize>,
444}
445
446fn parse_range(value: &Value) -> Result<RangeSpec, String> {
447 match value {
448 Value::String(s) => parse_range_string(s),
449 Value::CharArray(ca) if ca.rows == 1 => {
450 let text: String = ca.data.iter().collect();
451 parse_range_string(&text)
452 }
453 Value::StringArray(sa) => {
454 if sa.data.len() == 1 {
455 parse_range_string(&sa.data[0])
456 } else {
457 Err("csvread: Range string array inputs must be scalar".to_string())
458 }
459 }
460 Value::Tensor(_) => parse_range_numeric(value),
461 _ => Err("csvread: Range must be provided as a string or numeric vector".to_string()),
462 }
463}
464
465fn parse_range_string(text: &str) -> Result<RangeSpec, String> {
466 let trimmed = text.trim();
467 if trimmed.is_empty() {
468 return Err("csvread: Range string cannot be empty".to_string());
469 }
470 let parts: Vec<&str> = trimmed.split(':').collect();
471 if parts.len() > 2 {
472 return Err(format!("csvread: invalid Range specification '{trimmed}'"));
473 }
474 let start = parse_cell_reference(parts[0])?;
475 if start.col.is_none() {
476 return Err("csvread: Range must specify a starting column".to_string());
477 }
478 let end = if parts.len() == 2 {
479 Some(parse_cell_reference(parts[1])?)
480 } else {
481 None
482 };
483 if let Some(ref end_ref) = end {
484 if end_ref.col.is_none() {
485 return Err("csvread: Range end must include a column reference".to_string());
486 }
487 }
488 let start_row = start.row.unwrap_or(0);
489 let start_col = start.col.unwrap();
490 let end_row = end.as_ref().and_then(|r| r.row);
491 let end_col = end.as_ref().and_then(|r| r.col);
492 Ok(RangeSpec {
493 start_row,
494 start_col,
495 end_row,
496 end_col,
497 })
498}
499
500fn parse_range_numeric(value: &Value) -> Result<RangeSpec, String> {
501 let elements = match value {
502 Value::Tensor(t) => t.data.clone(),
503 _ => {
504 return Err(
505 "csvread: numeric Range must be provided as a vector with 2 or 4 elements"
506 .to_string(),
507 )
508 }
509 };
510 if elements.len() != 2 && elements.len() != 4 {
511 return Err("csvread: numeric Range must contain exactly 2 or 4 elements".to_string());
512 }
513 let mut indices = Vec::with_capacity(elements.len());
514 for (idx, element) in elements.iter().enumerate() {
515 indices.push(non_negative_index(*element, idx)?);
516 }
517 let start_row = indices[0];
518 let start_col = indices[1];
519 let (end_row, end_col) = if indices.len() == 4 {
520 (Some(indices[2]), Some(indices[3]))
521 } else {
522 (None, None)
523 };
524 Ok(RangeSpec {
525 start_row,
526 start_col,
527 end_row,
528 end_col,
529 })
530}
531
532fn non_negative_index(value: f64, position: usize) -> Result<usize, String> {
533 if !value.is_finite() {
534 return Err("csvread: Range indices must be finite".to_string());
535 }
536 if value < 0.0 {
537 return Err("csvread: Range indices must be non-negative".to_string());
538 }
539 let rounded = value.round();
540 if (rounded - value).abs() > f64::EPSILON {
541 return Err("csvread: Range indices must be integers".to_string());
542 }
543 usize::try_from(rounded as i64).map_err(|_| {
544 format!(
545 "csvread: Range index {} is too large to fit in usize",
546 position + 1
547 )
548 })
549}
550
551#[derive(Clone, Copy)]
552struct CellReference {
553 row: Option<usize>,
554 col: Option<usize>,
555}
556
557fn parse_cell_reference(token: &str) -> Result<CellReference, String> {
558 let mut letters = String::new();
559 let mut digits = String::new();
560 for ch in token.trim().chars() {
561 if ch == '$' {
562 continue;
563 }
564 if ch.is_ascii_alphabetic() {
565 letters.push(ch.to_ascii_uppercase());
566 } else if ch.is_ascii_digit() {
567 digits.push(ch);
568 } else {
569 return Err(format!("csvread: invalid Range component '{token}'"));
570 }
571 }
572 if letters.is_empty() && digits.is_empty() {
573 return Err("csvread: Range references cannot be empty".to_string());
574 }
575 let col = if letters.is_empty() {
576 None
577 } else {
578 Some(column_index_from_letters(&letters)?)
579 };
580 let row = if digits.is_empty() {
581 None
582 } else {
583 let parsed = digits.parse::<usize>().map_err(|_| {
584 format!(
585 "csvread: invalid row index '{}' in Range component '{token}'",
586 digits
587 )
588 })?;
589 if parsed == 0 {
590 return Err("csvread: Range rows must be >= 1".to_string());
591 }
592 Some(parsed - 1)
593 };
594 Ok(CellReference { row, col })
595}
596
597fn column_index_from_letters(letters: &str) -> Result<usize, String> {
598 let mut value: usize = 0;
599 for ch in letters.chars() {
600 if !ch.is_ascii_uppercase() {
601 return Err(format!(
602 "csvread: invalid column designator '{letters}' in Range"
603 ));
604 }
605 let digit = (ch as u8 - b'A' + 1) as usize;
606 value = value
607 .checked_mul(26)
608 .and_then(|v| v.checked_add(digit))
609 .ok_or_else(|| "csvread: Range column index overflowed".to_string())?;
610 }
611 value
612 .checked_sub(1)
613 .ok_or_else(|| "csvread: Range column index underflowed".to_string())
614}
615
616struct SubsetResult {
617 rows: Vec<Vec<f64>>,
618 row_count: usize,
619 col_count: usize,
620}
621
622fn apply_offsets(
623 rows: &[Vec<f64>],
624 max_cols: usize,
625 start_row: usize,
626 start_col: usize,
627 default_fill: f64,
628) -> SubsetResult {
629 if rows.is_empty() || max_cols == 0 {
630 return SubsetResult {
631 rows: Vec::new(),
632 row_count: 0,
633 col_count: 0,
634 };
635 }
636 if start_row >= rows.len() {
637 return SubsetResult {
638 rows: Vec::new(),
639 row_count: 0,
640 col_count: 0,
641 };
642 }
643 if start_col >= max_cols {
644 return SubsetResult {
645 rows: Vec::new(),
646 row_count: 0,
647 col_count: 0,
648 };
649 }
650
651 let mut subset_rows = Vec::new();
652 let mut col_count = 0usize;
653 for row in rows.iter().skip(start_row) {
654 if start_col >= row.len() && row.len() < max_cols {
655 let width = max_cols - start_col;
657 subset_rows.push(vec![default_fill; width]);
658 col_count = col_count.max(width);
659 continue;
660 }
661 let mut extracted = Vec::with_capacity(max_cols - start_col);
662 for col_idx in start_col..max_cols {
663 let value = row.get(col_idx).copied().unwrap_or(default_fill);
664 extracted.push(value);
665 }
666 col_count = col_count.max(extracted.len());
667 subset_rows.push(extracted);
668 }
669 let row_count = subset_rows.len();
670 SubsetResult {
671 rows: subset_rows,
672 row_count,
673 col_count,
674 }
675}
676
677fn apply_range(
678 rows: &[Vec<f64>],
679 max_cols: usize,
680 range: &RangeSpec,
681 default_fill: f64,
682) -> SubsetResult {
683 if rows.is_empty() || max_cols == 0 {
684 return SubsetResult {
685 rows: Vec::new(),
686 row_count: 0,
687 col_count: 0,
688 };
689 }
690 if range.start_row >= rows.len() || range.start_col >= max_cols {
691 return SubsetResult {
692 rows: Vec::new(),
693 row_count: 0,
694 col_count: 0,
695 };
696 }
697 let last_row = rows.len().saturating_sub(1);
698 let mut end_row = range.end_row.unwrap_or(last_row);
699 if end_row > last_row {
700 end_row = last_row;
701 }
702 if end_row < range.start_row {
703 return SubsetResult {
704 rows: Vec::new(),
705 row_count: 0,
706 col_count: 0,
707 };
708 }
709
710 let last_col = max_cols.saturating_sub(1);
711 let mut end_col = range.end_col.unwrap_or(last_col);
712 if end_col > last_col {
713 end_col = last_col;
714 }
715 if end_col < range.start_col {
716 return SubsetResult {
717 rows: Vec::new(),
718 row_count: 0,
719 col_count: 0,
720 };
721 }
722
723 let mut subset_rows = Vec::new();
724 let mut col_count = 0usize;
725 for row_idx in range.start_row..=end_row {
726 if row_idx >= rows.len() {
727 break;
728 }
729 let row = &rows[row_idx];
730 let mut extracted = Vec::with_capacity(end_col - range.start_col + 1);
731 for col_idx in range.start_col..=end_col {
732 if col_idx >= max_cols {
733 break;
734 }
735 let value = row.get(col_idx).copied().unwrap_or(default_fill);
736 extracted.push(value);
737 }
738 col_count = col_count.max(extracted.len());
739 subset_rows.push(extracted);
740 }
741 let row_count = subset_rows.len();
742 SubsetResult {
743 rows: subset_rows,
744 row_count,
745 col_count,
746 }
747}
748
749fn rows_to_tensor(
750 rows: Vec<Vec<f64>>,
751 row_count: usize,
752 col_count: usize,
753 default_fill: f64,
754) -> Result<Tensor, String> {
755 if row_count == 0 || col_count == 0 {
756 return Tensor::new(Vec::new(), vec![0, 0]).map_err(|e| format!("csvread: {e}"));
757 }
758 let mut data = vec![default_fill; row_count * col_count];
759 for (row_idx, row) in rows.iter().enumerate().take(row_count) {
760 for col_idx in 0..col_count {
761 let value = row.get(col_idx).copied().unwrap_or(default_fill);
762 data[row_idx + col_idx * row_count] = value;
763 }
764 }
765 Tensor::new(data, vec![row_count, col_count]).map_err(|e| format!("csvread: {e}"))
766}
767
768#[cfg(test)]
769mod tests {
770 use super::*;
771 use std::fs;
772 use std::sync::atomic::{AtomicUsize, Ordering};
773 use std::time::{SystemTime, UNIX_EPOCH};
774
775 use runmat_builtins::{CharArray, IntValue, Tensor as BuiltinTensor};
776
777 #[cfg(feature = "doc_export")]
778 use crate::builtins::common::test_support;
779
780 static UNIQUE_COUNTER: AtomicUsize = AtomicUsize::new(0);
781
782 fn unique_path(prefix: &str) -> PathBuf {
783 let nanos = SystemTime::now()
784 .duration_since(UNIX_EPOCH)
785 .unwrap()
786 .as_nanos();
787 let seq = UNIQUE_COUNTER.fetch_add(1, Ordering::Relaxed);
788 let mut path = std::env::temp_dir();
789 path.push(format!(
790 "runmat_csvread_{prefix}_{}_{}_{}",
791 std::process::id(),
792 nanos,
793 seq
794 ));
795 path
796 }
797
798 fn write_temp_file(lines: &[&str]) -> PathBuf {
799 let path = unique_path("input").with_extension("csv");
800 let contents = lines.join("\n");
801 fs::write(&path, contents).expect("write temp csv");
802 path
803 }
804
805 #[test]
806 fn csvread_basic_csv_roundtrip() {
807 let path = write_temp_file(&["1,2,3", "4,5,6"]);
808 let result = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
809 .expect("csvread");
810 match result {
811 Value::Tensor(t) => {
812 assert_eq!(t.shape, vec![2, 3]);
813 assert_eq!(t.data, vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
814 }
815 other => panic!("expected tensor, got {other:?}"),
816 }
817 fs::remove_file(path).ok();
818 }
819
820 #[test]
821 fn csvread_with_offsets() {
822 let path = write_temp_file(&["0,1,2", "3,4,5", "6,7,8"]);
823 let args = vec![Value::Int(IntValue::I32(1)), Value::Int(IntValue::I32(1))];
824 let result =
825 csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
826 match result {
827 Value::Tensor(t) => {
828 assert_eq!(t.shape, vec![2, 2]);
829 assert_eq!(t.data, vec![4.0, 7.0, 5.0, 8.0]);
830 }
831 other => panic!("expected tensor, got {other:?}"),
832 }
833 fs::remove_file(path).ok();
834 }
835
836 #[test]
837 fn csvread_with_numeric_range() {
838 let path = write_temp_file(&["1,2,3", "4,5,6", "7,8,9"]);
839 let args = vec![
840 Value::Int(IntValue::I32(0)),
841 Value::Int(IntValue::I32(0)),
842 Value::from(BuiltinTensor::new(vec![1.0, 1.0, 2.0, 2.0], vec![4, 1]).expect("tensor")),
843 ];
844 let result =
845 csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
846 match result {
847 Value::Tensor(t) => {
848 assert_eq!(t.shape, vec![2, 2]);
849 assert_eq!(t.data, vec![5.0, 8.0, 6.0, 9.0]);
850 }
851 other => panic!("expected tensor, got {other:?}"),
852 }
853 fs::remove_file(path).ok();
854 }
855
856 #[test]
857 fn csvread_with_string_range() {
858 let path = write_temp_file(&["1,2,3", "4,5,6", "7,8,9"]);
859 let args = vec![
860 Value::Int(IntValue::I32(0)),
861 Value::Int(IntValue::I32(0)),
862 Value::from("B2:C3"),
863 ];
864 let result =
865 csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
866 match result {
867 Value::Tensor(t) => {
868 assert_eq!(t.shape, vec![2, 2]);
869 assert_eq!(t.data, vec![5.0, 8.0, 6.0, 9.0]);
870 }
871 other => panic!("expected tensor, got {other:?}"),
872 }
873 fs::remove_file(path).ok();
874 }
875
876 #[test]
877 fn csvread_empty_fields_become_zero() {
878 let path = write_temp_file(&["1,,3", ",5,", "7,8,"]);
879 let result = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
880 .expect("csv");
881 match result {
882 Value::Tensor(t) => {
883 assert_eq!(t.shape, vec![3, 3]);
884 assert_eq!(t.data, vec![1.0, 0.0, 7.0, 0.0, 5.0, 8.0, 3.0, 0.0, 0.0]);
885 }
886 other => panic!("expected tensor, got {other:?}"),
887 }
888 fs::remove_file(path).ok();
889 }
890
891 #[test]
892 fn csvread_errors_on_text() {
893 let path = write_temp_file(&["1,2,3", "4,error,6"]);
894 let err = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
895 .expect_err("should fail");
896 assert!(
897 err.contains("nonnumeric token 'error'"),
898 "unexpected error: {err}"
899 );
900 fs::remove_file(path).ok();
901 }
902
903 #[test]
904 fn csvread_accepts_char_array_filename() {
905 let path = write_temp_file(&["1,2"]);
906 let path_string = path.to_string_lossy().to_string();
907 let data: Vec<char> = path_string.chars().collect();
908 let cols = data.len();
909 let chars = CharArray::new(data, 1, cols).expect("char array");
910 let result = csvread_builtin(Value::CharArray(chars), Vec::new()).expect("csv");
911 match result {
912 Value::Tensor(t) => {
913 assert_eq!(t.shape, vec![1, 2]);
914 assert_eq!(t.data, vec![1.0, 2.0]);
915 }
916 other => panic!("expected tensor, got {other:?}"),
917 }
918 fs::remove_file(path).ok();
919 }
920
921 #[test]
922 #[cfg(feature = "doc_export")]
923 fn doc_examples_present() {
924 let blocks = test_support::doc_examples(DOC_MD);
925 assert!(!blocks.is_empty());
926 }
927}