1use std::path::{Path, PathBuf};
4
5use runmat_builtins::{
6 BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
7 BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
8 CellArray, StructValue, Tensor, Value,
9};
10use runmat_filesystem as fs;
11use runmat_macros::runtime_builtin;
12
13use crate::builtins::common::fs::expand_user_path;
14use crate::builtins::common::spec::{
15 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
16 ReductionNaN, ResidencyPolicy, ShapeRequirements,
17};
18use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
19
20const BUILTIN_NAME: &str = "importdata";
21
22const IMPORTDATA_OUTPUTS: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
23 name: "A",
24 ty: BuiltinParamType::Any,
25 arity: BuiltinParamArity::Required,
26 default: None,
27 description: "Imported numeric matrix or import structure.",
28}];
29const IMPORTDATA_INPUTS_FILENAME: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
30 name: "filename",
31 ty: BuiltinParamType::StringScalar,
32 arity: BuiltinParamArity::Required,
33 default: None,
34 description: "File to import.",
35}];
36const IMPORTDATA_INPUTS_DELIMITER: [BuiltinParamDescriptor; 2] = [
37 BuiltinParamDescriptor {
38 name: "filename",
39 ty: BuiltinParamType::StringScalar,
40 arity: BuiltinParamArity::Required,
41 default: None,
42 description: "File to import.",
43 },
44 BuiltinParamDescriptor {
45 name: "delimiterIn",
46 ty: BuiltinParamType::StringScalar,
47 arity: BuiltinParamArity::Optional,
48 default: None,
49 description: "Delimiter to use for text files.",
50 },
51];
52const IMPORTDATA_INPUTS_DELIMITER_HEADER: [BuiltinParamDescriptor; 3] = [
53 BuiltinParamDescriptor {
54 name: "filename",
55 ty: BuiltinParamType::StringScalar,
56 arity: BuiltinParamArity::Required,
57 default: None,
58 description: "File to import.",
59 },
60 BuiltinParamDescriptor {
61 name: "delimiterIn",
62 ty: BuiltinParamType::StringScalar,
63 arity: BuiltinParamArity::Optional,
64 default: None,
65 description: "Delimiter to use for text files.",
66 },
67 BuiltinParamDescriptor {
68 name: "headerlinesIn",
69 ty: BuiltinParamType::IntegerScalar,
70 arity: BuiltinParamArity::Optional,
71 default: None,
72 description: "Number of header lines to skip.",
73 },
74];
75const IMPORTDATA_SIGNATURES: [BuiltinSignatureDescriptor; 3] = [
76 BuiltinSignatureDescriptor {
77 label: "A = importdata(filename)",
78 inputs: &IMPORTDATA_INPUTS_FILENAME,
79 outputs: &IMPORTDATA_OUTPUTS,
80 },
81 BuiltinSignatureDescriptor {
82 label: "A = importdata(filename, delimiterIn)",
83 inputs: &IMPORTDATA_INPUTS_DELIMITER,
84 outputs: &IMPORTDATA_OUTPUTS,
85 },
86 BuiltinSignatureDescriptor {
87 label: "A = importdata(filename, delimiterIn, headerlinesIn)",
88 inputs: &IMPORTDATA_INPUTS_DELIMITER_HEADER,
89 outputs: &IMPORTDATA_OUTPUTS,
90 },
91];
92
93const IMPORTDATA_ERROR_ARGUMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
94 code: "RM.IMPORTDATA.ARGUMENT",
95 identifier: Some("RunMat:importdata:InvalidArgument"),
96 when: "Filename, delimiter, or header line arguments are malformed.",
97 message: "importdata: invalid argument",
98};
99const IMPORTDATA_ERROR_IO: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
100 code: "RM.IMPORTDATA.IO",
101 identifier: Some("RunMat:importdata:Io"),
102 when: "The input file cannot be read.",
103 message: "importdata: unable to read file",
104};
105const IMPORTDATA_ERROR_PARSE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
106 code: "RM.IMPORTDATA.PARSE",
107 identifier: Some("RunMat:importdata:Parse"),
108 when: "Text content cannot be imported as supported numeric/header data.",
109 message: "importdata: unable to parse text data",
110};
111const IMPORTDATA_ERRORS: [BuiltinErrorDescriptor; 3] = [
112 IMPORTDATA_ERROR_ARGUMENT,
113 IMPORTDATA_ERROR_IO,
114 IMPORTDATA_ERROR_PARSE,
115];
116
117pub const IMPORTDATA_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
118 signatures: &IMPORTDATA_SIGNATURES,
119 output_mode: BuiltinOutputMode::Fixed,
120 completion_policy: BuiltinCompletionPolicy::Public,
121 errors: &IMPORTDATA_ERRORS,
122};
123
124#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::io::importdata")]
125pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
126 name: "importdata",
127 op_kind: GpuOpKind::Custom("io-importdata"),
128 supported_precisions: &[],
129 broadcast: BroadcastSemantics::None,
130 provider_hooks: &[],
131 constant_strategy: ConstantStrategy::InlineLiteral,
132 residency: ResidencyPolicy::GatherImmediately,
133 nan_mode: ReductionNaN::Include,
134 two_pass_threshold: None,
135 workgroup_size: None,
136 accepts_nan_mode: false,
137 notes: "Runs on the host; file import is not an acceleration operation.",
138};
139
140#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::io::importdata")]
141pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
142 name: "importdata",
143 shape: ShapeRequirements::Any,
144 constant_strategy: ConstantStrategy::InlineLiteral,
145 elementwise: None,
146 reduction: None,
147 emits_nan: false,
148 notes: "Not eligible for fusion; performs host-side file I/O.",
149};
150
151fn importdata_error(error: &'static BuiltinErrorDescriptor) -> RuntimeError {
152 importdata_error_with(error, error.message)
153}
154
155fn importdata_error_with(
156 error: &'static BuiltinErrorDescriptor,
157 message: impl Into<String>,
158) -> RuntimeError {
159 let mut builder = build_runtime_error(message).with_builtin(BUILTIN_NAME);
160 if let Some(identifier) = error.identifier {
161 builder = builder.with_identifier(identifier);
162 }
163 builder.build()
164}
165
166fn importdata_error_with_source<E>(
167 error: &'static BuiltinErrorDescriptor,
168 message: impl Into<String>,
169 source: E,
170) -> RuntimeError
171where
172 E: std::error::Error + Send + Sync + 'static,
173{
174 let mut builder = build_runtime_error(message)
175 .with_builtin(BUILTIN_NAME)
176 .with_source(source);
177 if let Some(identifier) = error.identifier {
178 builder = builder.with_identifier(identifier);
179 }
180 builder.build()
181}
182
183fn map_control_flow(err: RuntimeError) -> RuntimeError {
184 let identifier = err.identifier().map(|value| value.to_string());
185 let message = err.message().to_string();
186 let mut builder = build_runtime_error(message)
187 .with_builtin(BUILTIN_NAME)
188 .with_source(err);
189 if let Some(identifier) = identifier {
190 builder = builder.with_identifier(identifier);
191 }
192 builder.build()
193}
194
195#[runtime_builtin(
196 name = "importdata",
197 category = "io/import",
198 summary = "Import numeric text data with optional headers.",
199 keywords = "importdata,text,csv,delimited,header,numeric import",
200 accel = "cpu",
201 type_resolver(crate::builtins::io::type_resolvers::importdata_type),
202 descriptor(crate::builtins::io::importdata::IMPORTDATA_DESCRIPTOR),
203 builtin_path = "crate::builtins::io::importdata"
204)]
205async fn importdata_builtin(filename: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
206 if rest.len() > 2 {
207 return Err(importdata_error(&IMPORTDATA_ERROR_ARGUMENT));
208 }
209 let filename = gather_if_needed_async(&filename)
210 .await
211 .map_err(map_control_flow)?;
212 let path = resolve_path(&filename)?;
213
214 let delimiter = if let Some(value) = rest.first() {
215 let gathered = gather_if_needed_async(value)
216 .await
217 .map_err(map_control_flow)?;
218 Some(parse_delimiter_arg(&gathered)?)
219 } else {
220 None
221 };
222 let header_lines = if let Some(value) = rest.get(1) {
223 let gathered = gather_if_needed_async(value)
224 .await
225 .map_err(map_control_flow)?;
226 Some(parse_header_lines(&gathered)?)
227 } else {
228 None
229 };
230
231 let text = fs::read_to_string_async(&path).await.map_err(|err| {
232 importdata_error_with_source(
233 &IMPORTDATA_ERROR_IO,
234 format!("importdata: unable to read \"{}\" ({err})", path.display()),
235 err,
236 )
237 })?;
238 import_text_data(&text, delimiter.as_deref(), header_lines)
239}
240
241#[derive(Debug, Clone)]
242struct ImportedText {
243 data: Vec<Vec<f64>>,
244 textdata: Vec<Vec<String>>,
245 colheaders: Vec<String>,
246 rowheaders: Vec<String>,
247}
248
249fn import_text_data(
250 text: &str,
251 delimiter: Option<&str>,
252 header_lines: Option<usize>,
253) -> BuiltinResult<Value> {
254 let lines: Vec<&str> = text.lines().collect();
255 let nonempty: Vec<(usize, &str)> = lines
256 .iter()
257 .copied()
258 .enumerate()
259 .filter(|(_, line)| !line.trim().is_empty())
260 .collect();
261 if nonempty.is_empty() {
262 return Ok(Value::Tensor(Tensor::new(Vec::new(), vec![0, 0]).map_err(
263 |err| importdata_error_with(&IMPORTDATA_ERROR_PARSE, format!("importdata: {err}")),
264 )?));
265 }
266
267 let delimiter = delimiter
268 .map(Delimiter::Explicit)
269 .unwrap_or_else(|| detect_delimiter(nonempty.iter().map(|(_, line)| *line)));
270 let records: Vec<(usize, Vec<String>)> = nonempty
271 .iter()
272 .map(|(idx, line)| (*idx, split_record(line, &delimiter)))
273 .collect();
274
275 let data_start = header_lines.unwrap_or_else(|| infer_header_lines(&records));
276 if data_start > records.len() {
277 return Err(importdata_error_with(
278 &IMPORTDATA_ERROR_ARGUMENT,
279 "importdata: headerlinesIn exceeds number of non-empty lines",
280 ));
281 }
282
283 let header_records: Vec<Vec<String>> = records[..data_start]
284 .iter()
285 .map(|(_, record)| record.clone())
286 .collect();
287 let data_records = &records[data_start..];
288
289 let imported = parse_numeric_records(data_records, &header_records)?;
290 let tensor = rows_to_tensor(&imported.data)?;
291 if imported.textdata.is_empty()
292 && imported.colheaders.is_empty()
293 && imported.rowheaders.is_empty()
294 {
295 return Ok(Value::Tensor(tensor));
296 }
297
298 let mut out = StructValue::new();
299 out.insert("data", Value::Tensor(tensor));
300 if !imported.textdata.is_empty() {
301 out.insert("textdata", cell_from_rows(&imported.textdata)?);
302 }
303 if !imported.colheaders.is_empty() {
304 out.insert("colheaders", cell_from_row(&imported.colheaders)?);
305 }
306 if !imported.rowheaders.is_empty() {
307 out.insert("rowheaders", cell_from_col(&imported.rowheaders)?);
308 }
309 Ok(Value::Struct(out))
310}
311
312fn parse_numeric_records(
313 data_records: &[(usize, Vec<String>)],
314 header_records: &[Vec<String>],
315) -> BuiltinResult<ImportedText> {
316 if data_records.is_empty() {
317 return Ok(ImportedText {
318 data: Vec::new(),
319 textdata: header_records.to_vec(),
320 colheaders: header_records.last().cloned().unwrap_or_default(),
321 rowheaders: Vec::new(),
322 });
323 }
324
325 let first = &data_records[0].1;
326 let row_header_cols = infer_row_header_cols(data_records);
327 let numeric_cols = first.len().saturating_sub(row_header_cols);
328 if numeric_cols == 0 {
329 return Err(importdata_error_with(
330 &IMPORTDATA_ERROR_PARSE,
331 "importdata: no numeric columns found",
332 ));
333 }
334
335 let mut rows = Vec::with_capacity(data_records.len());
336 let mut rowheaders = Vec::new();
337 for (line_idx, record) in data_records {
338 let expected_cols = row_header_cols + numeric_cols;
339 if record.len() != expected_cols {
340 return Err(importdata_error_with(
341 &IMPORTDATA_ERROR_PARSE,
342 format!(
343 "importdata: row {} has {} columns, expected {}",
344 line_idx + 1,
345 record.len(),
346 expected_cols
347 ),
348 ));
349 }
350 if row_header_cols > 0 {
351 rowheaders.push(record[..row_header_cols].join(" "));
352 }
353 let mut row = Vec::with_capacity(numeric_cols);
354 for (col, token) in record[row_header_cols..row_header_cols + numeric_cols]
355 .iter()
356 .enumerate()
357 {
358 row.push(parse_numeric_token(token).ok_or_else(|| {
359 importdata_error_with(
360 &IMPORTDATA_ERROR_PARSE,
361 format!(
362 "importdata: nonnumeric token '{}' at row {}, column {}",
363 token,
364 line_idx + 1,
365 row_header_cols + col + 1
366 ),
367 )
368 })?);
369 }
370 rows.push(row);
371 }
372
373 let mut colheaders = Vec::new();
374 if let Some(last_header) = header_records.last() {
375 if last_header.len() >= row_header_cols + numeric_cols {
376 colheaders = last_header[row_header_cols..row_header_cols + numeric_cols].to_vec();
377 } else if last_header.len() == numeric_cols {
378 colheaders = last_header.clone();
379 }
380 }
381
382 Ok(ImportedText {
383 data: rows,
384 textdata: header_records.to_vec(),
385 colheaders,
386 rowheaders,
387 })
388}
389
390fn infer_row_header_cols(records: &[(usize, Vec<String>)]) -> usize {
391 let Some(first) = records.first() else {
392 return 0;
393 };
394 if first.1.len() < 2 || parse_numeric_token(&first.1[0]).is_some() {
395 return 0;
396 }
397 if records.iter().all(|(_, row)| {
398 row.len() == first.1.len()
399 && parse_numeric_token(&row[0]).is_none()
400 && row[1..]
401 .iter()
402 .all(|token| parse_numeric_token(token).is_some())
403 }) {
404 1
405 } else {
406 0
407 }
408}
409
410fn infer_header_lines(records: &[(usize, Vec<String>)]) -> usize {
411 records
412 .iter()
413 .position(|(_, row)| is_numeric_data_row(row))
414 .unwrap_or(records.len())
415}
416
417fn is_numeric_data_row(row: &[String]) -> bool {
418 if row.is_empty() {
419 return false;
420 }
421 if row.iter().all(|token| parse_numeric_token(token).is_some()) {
422 return true;
423 }
424 row.len() > 1
425 && parse_numeric_token(&row[0]).is_none()
426 && row[1..]
427 .iter()
428 .all(|token| parse_numeric_token(token).is_some())
429}
430
431fn rows_to_tensor(rows: &[Vec<f64>]) -> BuiltinResult<Tensor> {
432 let row_count = rows.len();
433 let col_count = rows.first().map(|row| row.len()).unwrap_or(0);
434 if rows.iter().any(|row| row.len() != col_count) {
435 return Err(importdata_error_with(
436 &IMPORTDATA_ERROR_PARSE,
437 "importdata: numeric rows have inconsistent column counts",
438 ));
439 }
440 let mut data = Vec::with_capacity(row_count * col_count);
441 for col in 0..col_count {
442 for row in rows {
443 data.push(row[col]);
444 }
445 }
446 Tensor::new(data, vec![row_count, col_count])
447 .map_err(|err| importdata_error_with(&IMPORTDATA_ERROR_PARSE, format!("importdata: {err}")))
448}
449
450#[derive(Debug, Clone, PartialEq, Eq)]
451enum Delimiter<'a> {
452 Whitespace,
453 Explicit(&'a str),
454}
455
456fn detect_delimiter<'a>(lines: impl Iterator<Item = &'a str>) -> Delimiter<'static> {
457 let candidates = [",", "\t", ";", "|"];
458 let sample: Vec<&str> = lines.take(12).collect();
459 let mut best: Option<(&str, usize, usize)> = None;
460 for candidate in candidates {
461 let counts: Vec<usize> = sample
462 .iter()
463 .map(|line| split_record(line, &Delimiter::Explicit(candidate)).len())
464 .filter(|count| *count > 1)
465 .collect();
466 if counts.is_empty() {
467 continue;
468 }
469 let consistent = counts.iter().filter(|count| **count == counts[0]).count();
470 let score = (consistent, counts[0]);
471 if best
472 .map(|(_, best_consistent, best_cols)| score > (best_consistent, best_cols))
473 .unwrap_or(true)
474 {
475 best = Some((candidate, consistent, counts[0]));
476 }
477 }
478 best.map(|(candidate, _, _)| Delimiter::Explicit(candidate))
479 .unwrap_or(Delimiter::Whitespace)
480}
481
482fn split_record(line: &str, delimiter: &Delimiter<'_>) -> Vec<String> {
483 match delimiter {
484 Delimiter::Whitespace => line
485 .split_whitespace()
486 .map(|token| unquote(token.trim()))
487 .filter(|token| !token.is_empty())
488 .collect(),
489 Delimiter::Explicit(delimiter) => split_explicit(line, delimiter),
490 }
491}
492
493fn split_explicit(line: &str, delimiter: &str) -> Vec<String> {
494 if delimiter.is_empty() {
495 return vec![line.trim().to_string()];
496 }
497 let mut fields = Vec::new();
498 let mut current = String::new();
499 let mut in_quotes = false;
500 let mut idx = 0usize;
501 while idx < line.len() {
502 let Some(ch) = line[idx..].chars().next() else {
503 break;
504 };
505 if ch == '"' {
506 if in_quotes && line[idx + ch.len_utf8()..].starts_with('"') {
507 current.push('"');
508 idx += ch.len_utf8() * 2;
509 continue;
510 }
511 in_quotes = !in_quotes;
512 idx += ch.len_utf8();
513 continue;
514 }
515 if !in_quotes && line[idx..].starts_with(delimiter) {
516 fields.push(unquote(current.trim()));
517 current.clear();
518 idx += delimiter.len();
519 continue;
520 }
521 current.push(ch);
522 idx += ch.len_utf8();
523 }
524 fields.push(unquote(current.trim()));
525 fields
526}
527
528fn unquote(token: &str) -> String {
529 let trimmed = token.trim();
530 if trimmed.len() >= 2 && trimmed.starts_with('"') && trimmed.ends_with('"') {
531 trimmed[1..trimmed.len() - 1].replace("\"\"", "\"")
532 } else {
533 trimmed.to_string()
534 }
535}
536
537fn parse_numeric_token(token: &str) -> Option<f64> {
538 let trimmed = token.trim();
539 if trimmed.is_empty() {
540 return Some(f64::NAN);
541 }
542 match trimmed.to_ascii_lowercase().as_str() {
543 "nan" => Some(f64::NAN),
544 "inf" | "+inf" | "infinity" | "+infinity" => Some(f64::INFINITY),
545 "-inf" | "-infinity" => Some(f64::NEG_INFINITY),
546 _ => trimmed.parse::<f64>().ok(),
547 }
548}
549
550fn parse_delimiter_arg(value: &Value) -> BuiltinResult<String> {
551 let text = string_scalar(value, "delimiterIn")?;
552 match text.as_str() {
553 "\\t" => Ok("\t".to_string()),
554 "\\n" => Ok("\n".to_string()),
555 "\\r" => Ok("\r".to_string()),
556 _ => Ok(text),
557 }
558}
559
560fn parse_header_lines(value: &Value) -> BuiltinResult<usize> {
561 let raw = match value {
562 Value::Num(n) => *n,
563 Value::Int(i) => i.to_i64() as f64,
564 Value::Tensor(t) if t.data.len() == 1 => t.data[0],
565 _ => {
566 return Err(importdata_error_with(
567 &IMPORTDATA_ERROR_ARGUMENT,
568 "importdata: headerlinesIn must be a nonnegative integer scalar",
569 ));
570 }
571 };
572 if !raw.is_finite() || raw < 0.0 || raw.fract() != 0.0 {
573 return Err(importdata_error_with(
574 &IMPORTDATA_ERROR_ARGUMENT,
575 "importdata: headerlinesIn must be a nonnegative integer scalar",
576 ));
577 }
578 Ok(raw as usize)
579}
580
581fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
582 match value {
583 Value::String(s) => normalize_path(s),
584 Value::CharArray(ca) if ca.rows == 1 => {
585 let text: String = ca.data.iter().collect();
586 normalize_path(&text)
587 }
588 Value::StringArray(sa) if sa.data.len() == 1 => normalize_path(&sa.data[0]),
589 _ => Err(importdata_error(&IMPORTDATA_ERROR_ARGUMENT)),
590 }
591}
592
593fn normalize_path(raw: &str) -> BuiltinResult<PathBuf> {
594 if raw.trim().is_empty() {
595 return Err(importdata_error_with(
596 &IMPORTDATA_ERROR_ARGUMENT,
597 "importdata: filename must not be empty",
598 ));
599 }
600 let expanded = expand_user_path(raw, BUILTIN_NAME)
601 .map_err(|msg| importdata_error_with(&IMPORTDATA_ERROR_ARGUMENT, msg))?;
602 Ok(Path::new(&expanded).to_path_buf())
603}
604
605fn string_scalar(value: &Value, context: &str) -> BuiltinResult<String> {
606 match value {
607 Value::String(s) => Ok(s.clone()),
608 Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
609 Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
610 _ => Err(importdata_error_with(
611 &IMPORTDATA_ERROR_ARGUMENT,
612 format!("importdata: expected {context} as a string scalar or character vector"),
613 )),
614 }
615}
616
617fn cell_from_rows(rows: &[Vec<String>]) -> BuiltinResult<Value> {
618 let row_count = rows.len();
619 let col_count = rows.iter().map(|row| row.len()).max().unwrap_or(0);
620 let mut values = Vec::with_capacity(row_count * col_count);
621 for row in rows {
622 for col in 0..col_count {
623 values.push(Value::String(row.get(col).cloned().unwrap_or_default()));
624 }
625 }
626 CellArray::new(values, row_count, col_count)
627 .map(Value::Cell)
628 .map_err(|err| importdata_error_with(&IMPORTDATA_ERROR_PARSE, format!("importdata: {err}")))
629}
630
631fn cell_from_row(values: &[String]) -> BuiltinResult<Value> {
632 CellArray::new(
633 values.iter().cloned().map(Value::String).collect(),
634 1,
635 values.len(),
636 )
637 .map(Value::Cell)
638 .map_err(|err| importdata_error_with(&IMPORTDATA_ERROR_PARSE, format!("importdata: {err}")))
639}
640
641fn cell_from_col(values: &[String]) -> BuiltinResult<Value> {
642 CellArray::new(
643 values.iter().cloned().map(Value::String).collect(),
644 values.len(),
645 1,
646 )
647 .map(Value::Cell)
648 .map_err(|err| importdata_error_with(&IMPORTDATA_ERROR_PARSE, format!("importdata: {err}")))
649}
650
651#[cfg(test)]
652mod tests {
653 use super::*;
654 use futures::executor::block_on;
655 use runmat_time::unix_timestamp_ms;
656 use std::fs;
657 use std::sync::atomic::{AtomicU64, Ordering};
658
659 static NEXT_ID: AtomicU64 = AtomicU64::new(0);
660
661 fn temp_path(ext: &str) -> PathBuf {
662 let millis = unix_timestamp_ms();
663 let unique = NEXT_ID.fetch_add(1, Ordering::Relaxed);
664 let mut path = std::env::temp_dir();
665 path.push(format!(
666 "runmat_importdata_{}_{}_{}.{}",
667 std::process::id(),
668 millis,
669 unique,
670 ext
671 ));
672 path
673 }
674
675 fn write_fixture(ext: &str, contents: &str) -> PathBuf {
676 let path = temp_path(ext);
677 fs::write(&path, contents).expect("write fixture");
678 path
679 }
680
681 fn struct_field<'a>(value: &'a Value, name: &str) -> &'a Value {
682 let Value::Struct(st) = value else {
683 panic!("expected struct");
684 };
685 st.fields
686 .get(name)
687 .unwrap_or_else(|| panic!("missing {name}"))
688 }
689
690 fn tensor_data(value: &Value) -> (&[f64], &[usize]) {
691 let Value::Tensor(tensor) = value else {
692 panic!("expected tensor");
693 };
694 (&tensor.data, &tensor.shape)
695 }
696
697 fn cell_text(value: &Value, row: usize, col: usize) -> String {
698 let Value::Cell(cell) = value else {
699 panic!("expected cell");
700 };
701 let Value::String(text) = cell.get(row, col).expect("cell value") else {
702 panic!("expected string cell");
703 };
704 text
705 }
706
707 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
708 #[test]
709 fn importdata_descriptor_covers_core_forms() {
710 let labels: Vec<&str> = IMPORTDATA_DESCRIPTOR
711 .signatures
712 .iter()
713 .map(|sig| sig.label)
714 .collect();
715 assert!(labels.contains(&"A = importdata(filename)"));
716 assert!(labels.contains(&"A = importdata(filename, delimiterIn)"));
717 assert!(labels.contains(&"A = importdata(filename, delimiterIn, headerlinesIn)"));
718 }
719
720 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
721 #[test]
722 fn importdata_reads_plain_numeric_matrix() {
723 let path = write_fixture("txt", "1 2 3\n4 5 6\n");
724 let out = block_on(importdata_builtin(
725 Value::from(path.to_string_lossy().into_owned()),
726 Vec::new(),
727 ))
728 .expect("importdata");
729 let (data, shape) = tensor_data(&out);
730 assert_eq!(shape, &[2, 3]);
731 assert_eq!(data, &[1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
732 let _ = fs::remove_file(path);
733 }
734
735 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
736 #[test]
737 fn importdata_headerlines_zero_numeric_input_returns_tensor() {
738 let path = write_fixture("txt", "1 2\n3 4\n");
739 let out = block_on(importdata_builtin(
740 Value::from(path.to_string_lossy().into_owned()),
741 vec![Value::from(" "), Value::Num(0.0)],
742 ))
743 .expect("importdata");
744 let (data, shape) = tensor_data(&out);
745 assert_eq!(shape, &[2, 2]);
746 assert_eq!(data, &[1.0, 3.0, 2.0, 4.0]);
747 let _ = fs::remove_file(path);
748 }
749
750 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
751 #[test]
752 fn importdata_detects_csv_header_and_colheaders() {
753 let path = write_fixture("csv", "time,value\n0,1.5\n1,2.5\n");
754 let out = block_on(importdata_builtin(
755 Value::from(path.to_string_lossy().into_owned()),
756 Vec::new(),
757 ))
758 .expect("importdata");
759 let data = struct_field(&out, "data");
760 let (values, shape) = tensor_data(data);
761 assert_eq!(shape, &[2, 2]);
762 assert_eq!(values, &[0.0, 1.0, 1.5, 2.5]);
763 assert_eq!(cell_text(struct_field(&out, "colheaders"), 0, 0), "time");
764 assert_eq!(cell_text(struct_field(&out, "colheaders"), 0, 1), "value");
765 let _ = fs::remove_file(path);
766 }
767
768 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
769 #[test]
770 fn importdata_honors_explicit_delimiter_and_header_lines() {
771 let path = write_fixture("dat", "# instrument log\nA|B\n10|20\n30|40\n");
772 let out = block_on(importdata_builtin(
773 Value::from(path.to_string_lossy().into_owned()),
774 vec![Value::from("|"), Value::Num(2.0)],
775 ))
776 .expect("importdata");
777 let data = struct_field(&out, "data");
778 let (values, shape) = tensor_data(data);
779 assert_eq!(shape, &[2, 2]);
780 assert_eq!(values, &[10.0, 30.0, 20.0, 40.0]);
781 assert_eq!(
782 cell_text(struct_field(&out, "textdata"), 0, 0),
783 "# instrument log"
784 );
785 let _ = fs::remove_file(path);
786 }
787
788 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
789 #[test]
790 fn importdata_preserves_rowheaders() {
791 let path = write_fixture("txt", "label x y\nr1 1 2\nr2 3 4\n");
792 let out = block_on(importdata_builtin(
793 Value::from(path.to_string_lossy().into_owned()),
794 Vec::new(),
795 ))
796 .expect("importdata");
797 assert_eq!(cell_text(struct_field(&out, "rowheaders"), 0, 0), "r1");
798 assert_eq!(cell_text(struct_field(&out, "rowheaders"), 1, 0), "r2");
799 assert_eq!(cell_text(struct_field(&out, "colheaders"), 0, 0), "x");
800 assert_eq!(cell_text(struct_field(&out, "colheaders"), 0, 1), "y");
801 let _ = fs::remove_file(path);
802 }
803
804 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
805 #[test]
806 fn importdata_reports_mixed_unsupported_data() {
807 let path = write_fixture("txt", "1 2\n3 nope\n");
808 let err = block_on(importdata_builtin(
809 Value::from(path.to_string_lossy().into_owned()),
810 Vec::new(),
811 ))
812 .expect_err("parse error");
813 assert!(err.message().contains("nonnumeric token"));
814 let _ = fs::remove_file(path);
815 }
816
817 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
818 #[test]
819 fn importdata_rejects_rows_with_extra_numeric_columns() {
820 let path = write_fixture("txt", "1 2\n3 4 5\n");
821 let err = block_on(importdata_builtin(
822 Value::from(path.to_string_lossy().into_owned()),
823 Vec::new(),
824 ))
825 .expect_err("width mismatch");
826 assert!(err.message().contains("expected 2"));
827 let _ = fs::remove_file(path);
828 }
829}