1use std::io::{BufRead, BufReader};
8use std::path::{Path, PathBuf};
9
10use runmat_builtins::{Tensor, Value};
11use runmat_filesystem::File;
12use runmat_macros::runtime_builtin;
13
14use crate::builtins::common::fs::expand_user_path;
15use crate::builtins::common::spec::{
16 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
17 ReductionNaN, ResidencyPolicy, ShapeRequirements,
18};
19use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
20
21const BUILTIN_NAME: &str = "csvread";
22
23#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::io::tabular::csvread")]
24pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
25 name: "csvread",
26 op_kind: GpuOpKind::Custom("io-csvread"),
27 supported_precisions: &[],
28 broadcast: BroadcastSemantics::None,
29 provider_hooks: &[],
30 constant_strategy: ConstantStrategy::InlineLiteral,
31 residency: ResidencyPolicy::GatherImmediately,
32 nan_mode: ReductionNaN::Include,
33 two_pass_threshold: None,
34 workgroup_size: None,
35 accepts_nan_mode: false,
36 notes: "Runs entirely on the host; acceleration providers are not involved.",
37};
38
39fn csvread_error(message: impl Into<String>) -> RuntimeError {
40 build_runtime_error(message)
41 .with_builtin(BUILTIN_NAME)
42 .build()
43}
44
45fn csvread_error_with_source<E>(message: impl Into<String>, source: E) -> RuntimeError
46where
47 E: std::error::Error + Send + Sync + 'static,
48{
49 build_runtime_error(message)
50 .with_builtin(BUILTIN_NAME)
51 .with_source(source)
52 .build()
53}
54
55fn map_control_flow(err: RuntimeError) -> RuntimeError {
56 let identifier = err.identifier().map(|value| value.to_string());
57 let message = err.message().to_string();
58 let mut builder = build_runtime_error(message)
59 .with_builtin(BUILTIN_NAME)
60 .with_source(err);
61 if let Some(identifier) = identifier {
62 builder = builder.with_identifier(identifier);
63 }
64 builder.build()
65}
66
67#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::io::tabular::csvread")]
68pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
69 name: "csvread",
70 shape: ShapeRequirements::Any,
71 constant_strategy: ConstantStrategy::InlineLiteral,
72 elementwise: None,
73 reduction: None,
74 emits_nan: false,
75 notes: "Not eligible for fusion; executes as a standalone host operation.",
76};
77
78#[runtime_builtin(
79 name = "csvread",
80 category = "io/tabular",
81 summary = "Read numeric data from a comma-separated text file.",
82 keywords = "csvread,csv,dlmread,numeric import,range",
83 accel = "cpu",
84 type_resolver(crate::builtins::io::type_resolvers::tensor_type),
85 builtin_path = "crate::builtins::io::tabular::csvread"
86)]
87async fn csvread_builtin(path: Value, rest: Vec<Value>) -> crate::BuiltinResult<Value> {
88 let gathered_path = gather_if_needed_async(&path)
89 .await
90 .map_err(map_control_flow)?;
91 let options = parse_arguments(&rest).await?;
92 let resolved = resolve_path(&gathered_path)?;
93 let (rows, max_cols, skipped_rows) = read_csv_rows(&resolved, &options)?;
94 let start_row = if options.range.is_none() {
95 options.start_row.saturating_sub(skipped_rows)
96 } else {
97 options.start_row
98 };
99 let subset = if let Some(range) = options.range {
100 apply_range(&rows, max_cols, &range, 0.0)
101 } else {
102 apply_offsets(&rows, max_cols, start_row, options.start_col, 0.0)
103 };
104 let tensor = rows_to_tensor(subset.rows, subset.row_count, subset.col_count, 0.0)?;
105 Ok(Value::Tensor(tensor))
106}
107
108#[derive(Debug, Default)]
109struct CsvReadOptions {
110 start_row: usize,
111 start_col: usize,
112 range: Option<RangeSpec>,
113}
114
115async fn parse_arguments(args: &[Value]) -> BuiltinResult<CsvReadOptions> {
116 let mut gathered = Vec::with_capacity(args.len());
117 for value in args {
118 gathered.push(
119 gather_if_needed_async(value)
120 .await
121 .map_err(map_control_flow)?,
122 );
123 }
124 match gathered.len() {
125 0 => Ok(CsvReadOptions::default()),
126 2 => {
127 let start_row = value_to_start_index(&gathered[0], "row")?;
128 let start_col = value_to_start_index(&gathered[1], "col")?;
129 Ok(CsvReadOptions {
130 start_row,
131 start_col,
132 range: None,
133 })
134 }
135 3 => {
136 let start_row = value_to_start_index(&gathered[0], "row")?;
137 let start_col = value_to_start_index(&gathered[1], "col")?;
138 let range = parse_range(&gathered[2])?;
139 Ok(CsvReadOptions {
140 start_row,
141 start_col,
142 range: Some(range),
143 })
144 }
145 _ => Err(csvread_error(
146 "csvread: expected csvread(filename[, row, col[, range]])",
147 )),
148 }
149}
150
151fn value_to_start_index(value: &Value, name: &str) -> BuiltinResult<usize> {
152 match value {
153 Value::Int(i) => {
154 let raw = i.to_i64();
155 if raw < 0 {
156 return Err(csvread_error(format!(
157 "csvread: {name} must be a non-negative integer"
158 )));
159 }
160 usize::try_from(raw).map_err(|_| csvread_error(format!("csvread: {name} is too large")))
161 }
162 Value::Num(n) => {
163 if !n.is_finite() {
164 return Err(csvread_error(format!(
165 "csvread: {name} must be a finite integer"
166 )));
167 }
168 if *n < 0.0 {
169 return Err(csvread_error(format!(
170 "csvread: {name} must be a non-negative integer"
171 )));
172 }
173 let rounded = n.round();
174 if (rounded - n).abs() > f64::EPSILON {
175 return Err(csvread_error(format!("csvread: {name} must be an integer")));
176 }
177 usize::try_from(rounded as i64)
178 .map_err(|_| csvread_error(format!("csvread: {name} is too large")))
179 }
180 _ => Err(csvread_error(format!(
181 "csvread: expected {name} as a numeric scalar, got {value:?}"
182 ))),
183 }
184}
185
186fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
187 match value {
188 Value::String(s) => normalize_path(s),
189 Value::CharArray(ca) if ca.rows == 1 => {
190 let text: String = ca.data.iter().collect();
191 normalize_path(&text)
192 }
193 Value::StringArray(sa) => {
194 if sa.data.len() == 1 {
195 normalize_path(&sa.data[0])
196 } else {
197 Err(csvread_error("csvread: string array inputs must be scalar"))
198 }
199 }
200 Value::CharArray(_) => Err(csvread_error(
201 "csvread: expected a 1-by-N character vector for the file name",
202 )),
203 other => Err(csvread_error(format!(
204 "csvread: expected filename as string scalar or character vector, got {other:?}"
205 ))),
206 }
207}
208
209fn normalize_path(raw: &str) -> BuiltinResult<PathBuf> {
210 if raw.trim().is_empty() {
211 return Err(csvread_error("csvread: filename must not be empty"));
212 }
213 let expanded = expand_user_path(raw, BUILTIN_NAME).map_err(csvread_error)?;
214 Ok(Path::new(&expanded).to_path_buf())
215}
216
217fn read_csv_rows(
218 path: &Path,
219 options: &CsvReadOptions,
220) -> BuiltinResult<(Vec<Vec<f64>>, usize, usize)> {
221 let file = File::open(path).map_err(|err| {
222 csvread_error_with_source(
223 format!("csvread: unable to open '{}': {err}", path.display()),
224 err,
225 )
226 })?;
227 let mut reader = BufReader::new(file);
228 let mut buffer = String::new();
229 let mut rows = Vec::new();
230 let mut max_cols = 0usize;
231 let mut line_index = 0usize;
232 let mut skipped_rows = 0usize;
233
234 loop {
235 buffer.clear();
236 let bytes = reader.read_line(&mut buffer).map_err(|err| {
237 csvread_error_with_source(
238 format!("csvread: failed to read '{}': {err}", path.display()),
239 err,
240 )
241 })?;
242 if bytes == 0 {
243 break;
244 }
245 line_index += 1;
246 if buffer.trim().is_empty() {
247 continue;
248 }
249 if buffer.ends_with('\n') {
250 buffer.pop();
251 if buffer.ends_with('\r') {
252 buffer.pop();
253 }
254 } else if buffer.ends_with('\r') {
255 buffer.pop();
256 }
257 if options.range.is_none() && options.start_row > 0 && line_index <= options.start_row {
258 skipped_rows += 1;
259 continue;
260 }
261 let parse_start_col = if options.range.is_none() {
262 options.start_col
263 } else {
264 0
265 };
266 let parsed = parse_csv_row(&buffer, line_index, parse_start_col)?;
267 max_cols = max_cols.max(parsed.len());
268 rows.push(parsed);
269 }
270
271 Ok((rows, max_cols, skipped_rows))
272}
273
274fn parse_csv_row(line: &str, line_index: usize, parse_start_col: usize) -> BuiltinResult<Vec<f64>> {
275 let mut values = Vec::new();
276 for (col_index, raw_field) in line.split(',').enumerate() {
277 if col_index < parse_start_col {
278 values.push(0.0);
281 continue;
282 }
283 let trimmed = raw_field.trim();
284 if trimmed.is_empty() {
285 values.push(0.0);
286 continue;
287 }
288 let unwrapped = if trimmed.starts_with('"') && trimmed.ends_with('"') && trimmed.len() >= 2
289 {
290 &trimmed[1..trimmed.len() - 1]
291 } else {
292 trimmed
293 };
294 let lowered = unwrapped.to_ascii_lowercase();
295 let value = match lowered.as_str() {
296 "nan" => f64::NAN,
297 "inf" | "+inf" => f64::INFINITY,
298 "-inf" => f64::NEG_INFINITY,
299 _ => unwrapped.parse::<f64>().map_err(|_| {
300 csvread_error(format!(
301 "csvread: nonnumeric token '{}' at row {} column {}",
302 unwrapped,
303 line_index,
304 col_index + 1
305 ))
306 })?,
307 };
308 values.push(value);
309 }
310 Ok(values)
311}
312
313#[derive(Clone, Copy, Debug)]
314struct RangeSpec {
315 start_row: usize,
316 start_col: usize,
317 end_row: Option<usize>,
318 end_col: Option<usize>,
319}
320
321fn parse_range(value: &Value) -> BuiltinResult<RangeSpec> {
322 match value {
323 Value::String(s) => parse_range_string(s),
324 Value::CharArray(ca) if ca.rows == 1 => {
325 let text: String = ca.data.iter().collect();
326 parse_range_string(&text)
327 }
328 Value::StringArray(sa) => {
329 if sa.data.len() == 1 {
330 parse_range_string(&sa.data[0])
331 } else {
332 Err(csvread_error(
333 "csvread: Range string array inputs must be scalar",
334 ))
335 }
336 }
337 Value::Tensor(_) => parse_range_numeric(value),
338 _ => Err(csvread_error(
339 "csvread: Range must be provided as a string or numeric vector",
340 )),
341 }
342}
343
344fn parse_range_string(text: &str) -> BuiltinResult<RangeSpec> {
345 let trimmed = text.trim();
346 if trimmed.is_empty() {
347 return Err(csvread_error("csvread: Range string cannot be empty"));
348 }
349 let parts: Vec<&str> = trimmed.split(':').collect();
350 if parts.len() > 2 {
351 return Err(csvread_error(format!(
352 "csvread: invalid Range specification '{trimmed}'"
353 )));
354 }
355 let start = parse_cell_reference(parts[0])?;
356 if start.col.is_none() {
357 return Err(csvread_error(
358 "csvread: Range must specify a starting column",
359 ));
360 }
361 let end = if parts.len() == 2 {
362 Some(parse_cell_reference(parts[1])?)
363 } else {
364 None
365 };
366 if let Some(ref end_ref) = end {
367 if end_ref.col.is_none() {
368 return Err(csvread_error(
369 "csvread: Range end must include a column reference",
370 ));
371 }
372 }
373 let start_row = start.row.unwrap_or(0);
374 let start_col = start.col.unwrap();
375 let end_row = end.as_ref().and_then(|r| r.row);
376 let end_col = end.as_ref().and_then(|r| r.col);
377 Ok(RangeSpec {
378 start_row,
379 start_col,
380 end_row,
381 end_col,
382 })
383}
384
385fn parse_range_numeric(value: &Value) -> BuiltinResult<RangeSpec> {
386 let elements = match value {
387 Value::Tensor(t) => t.data.clone(),
388 _ => {
389 return Err(csvread_error(
390 "csvread: numeric Range must be provided as a vector with 2 or 4 elements",
391 ));
392 }
393 };
394 if elements.len() != 2 && elements.len() != 4 {
395 return Err(csvread_error(
396 "csvread: numeric Range must contain exactly 2 or 4 elements",
397 ));
398 }
399 let mut indices = Vec::with_capacity(elements.len());
400 for (idx, element) in elements.iter().enumerate() {
401 indices.push(non_negative_index(*element, idx)?);
402 }
403 let start_row = indices[0];
404 let start_col = indices[1];
405 let (end_row, end_col) = if indices.len() == 4 {
406 (Some(indices[2]), Some(indices[3]))
407 } else {
408 (None, None)
409 };
410 Ok(RangeSpec {
411 start_row,
412 start_col,
413 end_row,
414 end_col,
415 })
416}
417
418fn non_negative_index(value: f64, position: usize) -> BuiltinResult<usize> {
419 if !value.is_finite() {
420 return Err(csvread_error("csvread: Range indices must be finite"));
421 }
422 if value < 0.0 {
423 return Err(csvread_error("csvread: Range indices must be non-negative"));
424 }
425 let rounded = value.round();
426 if (rounded - value).abs() > f64::EPSILON {
427 return Err(csvread_error("csvread: Range indices must be integers"));
428 }
429 usize::try_from(rounded as i64).map_err(|_| {
430 csvread_error(format!(
431 "csvread: Range index {} is too large to fit in usize",
432 position + 1
433 ))
434 })
435}
436
437#[derive(Clone, Copy)]
438struct CellReference {
439 row: Option<usize>,
440 col: Option<usize>,
441}
442
443fn parse_cell_reference(token: &str) -> BuiltinResult<CellReference> {
444 let mut letters = String::new();
445 let mut digits = String::new();
446 for ch in token.trim().chars() {
447 if ch == '$' {
448 continue;
449 }
450 if ch.is_ascii_alphabetic() {
451 letters.push(ch.to_ascii_uppercase());
452 } else if ch.is_ascii_digit() {
453 digits.push(ch);
454 } else {
455 return Err(csvread_error(format!(
456 "csvread: invalid Range component '{token}'"
457 )));
458 }
459 }
460 if letters.is_empty() && digits.is_empty() {
461 return Err(csvread_error("csvread: Range references cannot be empty"));
462 }
463 let col = if letters.is_empty() {
464 None
465 } else {
466 Some(column_index_from_letters(&letters)?)
467 };
468 let row = if digits.is_empty() {
469 None
470 } else {
471 let parsed = digits.parse::<usize>().map_err(|_| {
472 csvread_error(format!(
473 "csvread: invalid row index '{}' in Range component '{token}'",
474 digits
475 ))
476 })?;
477 if parsed == 0 {
478 return Err(csvread_error("csvread: Range rows must be >= 1"));
479 }
480 Some(parsed - 1)
481 };
482 Ok(CellReference { row, col })
483}
484
485fn column_index_from_letters(letters: &str) -> BuiltinResult<usize> {
486 let mut value: usize = 0;
487 for ch in letters.chars() {
488 if !ch.is_ascii_uppercase() {
489 return Err(csvread_error(format!(
490 "csvread: invalid column designator '{letters}' in Range"
491 )));
492 }
493 let digit = (ch as u8 - b'A' + 1) as usize;
494 value = value
495 .checked_mul(26)
496 .and_then(|v| v.checked_add(digit))
497 .ok_or_else(|| csvread_error("csvread: Range column index overflowed"))?;
498 }
499 value
500 .checked_sub(1)
501 .ok_or_else(|| csvread_error("csvread: Range column index underflowed"))
502}
503
504struct SubsetResult {
505 rows: Vec<Vec<f64>>,
506 row_count: usize,
507 col_count: usize,
508}
509
510fn apply_offsets(
511 rows: &[Vec<f64>],
512 max_cols: usize,
513 start_row: usize,
514 start_col: usize,
515 default_fill: f64,
516) -> SubsetResult {
517 if rows.is_empty() || max_cols == 0 {
518 return SubsetResult {
519 rows: Vec::new(),
520 row_count: 0,
521 col_count: 0,
522 };
523 }
524 if start_row >= rows.len() {
525 return SubsetResult {
526 rows: Vec::new(),
527 row_count: 0,
528 col_count: 0,
529 };
530 }
531 if start_col >= max_cols {
532 return SubsetResult {
533 rows: Vec::new(),
534 row_count: 0,
535 col_count: 0,
536 };
537 }
538
539 let mut subset_rows = Vec::new();
540 let mut col_count = 0usize;
541 for row in rows.iter().skip(start_row) {
542 if start_col >= row.len() && row.len() < max_cols {
543 let width = max_cols - start_col;
545 subset_rows.push(vec![default_fill; width]);
546 col_count = col_count.max(width);
547 continue;
548 }
549 let mut extracted = Vec::with_capacity(max_cols - start_col);
550 for col_idx in start_col..max_cols {
551 let value = row.get(col_idx).copied().unwrap_or(default_fill);
552 extracted.push(value);
553 }
554 col_count = col_count.max(extracted.len());
555 subset_rows.push(extracted);
556 }
557 let row_count = subset_rows.len();
558 SubsetResult {
559 rows: subset_rows,
560 row_count,
561 col_count,
562 }
563}
564
565fn apply_range(
566 rows: &[Vec<f64>],
567 max_cols: usize,
568 range: &RangeSpec,
569 default_fill: f64,
570) -> SubsetResult {
571 if rows.is_empty() || max_cols == 0 {
572 return SubsetResult {
573 rows: Vec::new(),
574 row_count: 0,
575 col_count: 0,
576 };
577 }
578 if range.start_row >= rows.len() || range.start_col >= max_cols {
579 return SubsetResult {
580 rows: Vec::new(),
581 row_count: 0,
582 col_count: 0,
583 };
584 }
585 let last_row = rows.len().saturating_sub(1);
586 let mut end_row = range.end_row.unwrap_or(last_row);
587 if end_row > last_row {
588 end_row = last_row;
589 }
590 if end_row < range.start_row {
591 return SubsetResult {
592 rows: Vec::new(),
593 row_count: 0,
594 col_count: 0,
595 };
596 }
597
598 let last_col = max_cols.saturating_sub(1);
599 let mut end_col = range.end_col.unwrap_or(last_col);
600 if end_col > last_col {
601 end_col = last_col;
602 }
603 if end_col < range.start_col {
604 return SubsetResult {
605 rows: Vec::new(),
606 row_count: 0,
607 col_count: 0,
608 };
609 }
610
611 let mut subset_rows = Vec::new();
612 let mut col_count = 0usize;
613 for row_idx in range.start_row..=end_row {
614 if row_idx >= rows.len() {
615 break;
616 }
617 let row = &rows[row_idx];
618 let mut extracted = Vec::with_capacity(end_col - range.start_col + 1);
619 for col_idx in range.start_col..=end_col {
620 if col_idx >= max_cols {
621 break;
622 }
623 let value = row.get(col_idx).copied().unwrap_or(default_fill);
624 extracted.push(value);
625 }
626 col_count = col_count.max(extracted.len());
627 subset_rows.push(extracted);
628 }
629 let row_count = subset_rows.len();
630 SubsetResult {
631 rows: subset_rows,
632 row_count,
633 col_count,
634 }
635}
636
637fn rows_to_tensor(
638 rows: Vec<Vec<f64>>,
639 row_count: usize,
640 col_count: usize,
641 default_fill: f64,
642) -> BuiltinResult<Tensor> {
643 if row_count == 0 || col_count == 0 {
644 return Tensor::new(Vec::new(), vec![0, 0])
645 .map_err(|e| csvread_error(format!("csvread: {e}")));
646 }
647 let mut data = vec![default_fill; row_count * col_count];
648 for (row_idx, row) in rows.iter().enumerate().take(row_count) {
649 for col_idx in 0..col_count {
650 let value = row.get(col_idx).copied().unwrap_or(default_fill);
651 data[row_idx + col_idx * row_count] = value;
652 }
653 }
654 Tensor::new(data, vec![row_count, col_count])
655 .map_err(|e| csvread_error(format!("csvread: {e}")))
656}
657
658#[cfg(test)]
659pub(crate) mod tests {
660 use super::*;
661 use runmat_time::unix_timestamp_ns;
662 use std::fs;
663 use std::sync::atomic::{AtomicUsize, Ordering};
664
665 use runmat_builtins::{CharArray, IntValue, Tensor as BuiltinTensor};
666
667 fn csvread_builtin(path: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
668 futures::executor::block_on(super::csvread_builtin(path, rest))
669 }
670
671 static UNIQUE_COUNTER: AtomicUsize = AtomicUsize::new(0);
672
673 fn unique_path(prefix: &str) -> PathBuf {
674 let nanos = unix_timestamp_ns();
675 let seq = UNIQUE_COUNTER.fetch_add(1, Ordering::Relaxed);
676 let mut path = std::env::temp_dir();
677 path.push(format!(
678 "runmat_csvread_{prefix}_{}_{}_{}",
679 std::process::id(),
680 nanos,
681 seq
682 ));
683 path
684 }
685
686 fn write_temp_file(lines: &[&str]) -> PathBuf {
687 let path = unique_path("input").with_extension("csv");
688 let contents = lines.join("\n");
689 fs::write(&path, contents).expect("write temp csv");
690 path
691 }
692
693 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
694 #[test]
695 fn csvread_basic_csv_roundtrip() {
696 let path = write_temp_file(&["1,2,3", "4,5,6"]);
697 let result = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
698 .expect("csvread");
699 match result {
700 Value::Tensor(t) => {
701 assert_eq!(t.shape, vec![2, 3]);
702 assert_eq!(t.data, vec![1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
703 }
704 other => panic!("expected tensor, got {other:?}"),
705 }
706 fs::remove_file(path).ok();
707 }
708
709 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
710 #[test]
711 fn csvread_with_offsets() {
712 let path = write_temp_file(&["0,1,2", "3,4,5", "6,7,8"]);
713 let args = vec![Value::Int(IntValue::I32(1)), Value::Int(IntValue::I32(1))];
714 let result =
715 csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
716 match result {
717 Value::Tensor(t) => {
718 assert_eq!(t.shape, vec![2, 2]);
719 assert_eq!(t.data, vec![4.0, 7.0, 5.0, 8.0]);
720 }
721 other => panic!("expected tensor, got {other:?}"),
722 }
723 fs::remove_file(path).ok();
724 }
725
726 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
727 #[test]
728 fn csvread_with_numeric_range() {
729 let path = write_temp_file(&["1,2,3", "4,5,6", "7,8,9"]);
730 let args = vec![
731 Value::Int(IntValue::I32(0)),
732 Value::Int(IntValue::I32(0)),
733 Value::from(BuiltinTensor::new(vec![1.0, 1.0, 2.0, 2.0], vec![4, 1]).expect("tensor")),
734 ];
735 let result =
736 csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
737 match result {
738 Value::Tensor(t) => {
739 assert_eq!(t.shape, vec![2, 2]);
740 assert_eq!(t.data, vec![5.0, 8.0, 6.0, 9.0]);
741 }
742 other => panic!("expected tensor, got {other:?}"),
743 }
744 fs::remove_file(path).ok();
745 }
746
747 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
748 #[test]
749 fn csvread_with_string_range() {
750 let path = write_temp_file(&["1,2,3", "4,5,6", "7,8,9"]);
751 let args = vec![
752 Value::Int(IntValue::I32(0)),
753 Value::Int(IntValue::I32(0)),
754 Value::from("B2:C3"),
755 ];
756 let result =
757 csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
758 match result {
759 Value::Tensor(t) => {
760 assert_eq!(t.shape, vec![2, 2]);
761 assert_eq!(t.data, vec![5.0, 8.0, 6.0, 9.0]);
762 }
763 other => panic!("expected tensor, got {other:?}"),
764 }
765 fs::remove_file(path).ok();
766 }
767
768 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
769 #[test]
770 fn csvread_empty_fields_become_zero() {
771 let path = write_temp_file(&["1,,3", ",5,", "7,8,"]);
772 let result = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
773 .expect("csv");
774 match result {
775 Value::Tensor(t) => {
776 assert_eq!(t.shape, vec![3, 3]);
777 assert_eq!(t.data, vec![1.0, 0.0, 7.0, 0.0, 5.0, 8.0, 3.0, 0.0, 0.0]);
778 }
779 other => panic!("expected tensor, got {other:?}"),
780 }
781 fs::remove_file(path).ok();
782 }
783
784 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
785 #[test]
786 fn csvread_errors_on_text() {
787 let path = write_temp_file(&["1,2,3", "4,error,6"]);
788 let err = csvread_builtin(Value::from(path.to_string_lossy().to_string()), Vec::new())
789 .expect_err("should fail");
790 let message = err.message().to_string();
791 assert!(
792 message.contains("nonnumeric token 'error'"),
793 "unexpected error: {message}"
794 );
795 fs::remove_file(path).ok();
796 }
797
798 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
799 #[test]
800 fn csvread_accepts_char_array_filename() {
801 let path = write_temp_file(&["1,2"]);
802 let path_string = path.to_string_lossy().to_string();
803 let data: Vec<char> = path_string.chars().collect();
804 let cols = data.len();
805 let chars = CharArray::new(data, 1, cols).expect("char array");
806 let result = csvread_builtin(Value::CharArray(chars), Vec::new()).expect("csv");
807 match result {
808 Value::Tensor(t) => {
809 assert_eq!(t.shape, vec![1, 2]);
810 assert_eq!(t.data, vec![1.0, 2.0]);
811 }
812 other => panic!("expected tensor, got {other:?}"),
813 }
814 fs::remove_file(path).ok();
815 }
816
817 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
818 #[test]
819 fn csvread_with_header_and_row_labels_using_offsets() {
820 let path = write_temp_file(&["Name,Jan,Feb", "alpha,1,2", "beta,3,4"]);
821 let args = vec![Value::Int(IntValue::I32(1)), Value::Int(IntValue::I32(1))];
822 let result =
823 csvread_builtin(Value::from(path.to_string_lossy().to_string()), args).expect("csv");
824 match result {
825 Value::Tensor(t) => {
826 assert_eq!(t.shape, vec![2, 2]);
827 assert_eq!(t.data, vec![1.0, 3.0, 2.0, 4.0]);
828 }
829 other => panic!("expected tensor, got {other:?}"),
830 }
831 fs::remove_file(path).ok();
832 }
833}