Skip to main content

runmat_runtime/builtins/io/
importdata.rs

1//! MATLAB-compatible `importdata` builtin for legacy text imports.
2
3use std::path::{Path, PathBuf};
4
5use runmat_builtins::{
6    BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
7    BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
8    CellArray, StructValue, Tensor, Value,
9};
10use runmat_filesystem as fs;
11use runmat_macros::runtime_builtin;
12
13use crate::builtins::common::fs::expand_user_path;
14use crate::builtins::common::spec::{
15    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
16    ReductionNaN, ResidencyPolicy, ShapeRequirements,
17};
18use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
19
20const BUILTIN_NAME: &str = "importdata";
21
22const IMPORTDATA_OUTPUTS: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
23    name: "A",
24    ty: BuiltinParamType::Any,
25    arity: BuiltinParamArity::Required,
26    default: None,
27    description: "Imported numeric matrix or import structure.",
28}];
29const IMPORTDATA_INPUTS_FILENAME: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
30    name: "filename",
31    ty: BuiltinParamType::StringScalar,
32    arity: BuiltinParamArity::Required,
33    default: None,
34    description: "File to import.",
35}];
36const IMPORTDATA_INPUTS_DELIMITER: [BuiltinParamDescriptor; 2] = [
37    BuiltinParamDescriptor {
38        name: "filename",
39        ty: BuiltinParamType::StringScalar,
40        arity: BuiltinParamArity::Required,
41        default: None,
42        description: "File to import.",
43    },
44    BuiltinParamDescriptor {
45        name: "delimiterIn",
46        ty: BuiltinParamType::StringScalar,
47        arity: BuiltinParamArity::Optional,
48        default: None,
49        description: "Delimiter to use for text files.",
50    },
51];
52const IMPORTDATA_INPUTS_DELIMITER_HEADER: [BuiltinParamDescriptor; 3] = [
53    BuiltinParamDescriptor {
54        name: "filename",
55        ty: BuiltinParamType::StringScalar,
56        arity: BuiltinParamArity::Required,
57        default: None,
58        description: "File to import.",
59    },
60    BuiltinParamDescriptor {
61        name: "delimiterIn",
62        ty: BuiltinParamType::StringScalar,
63        arity: BuiltinParamArity::Optional,
64        default: None,
65        description: "Delimiter to use for text files.",
66    },
67    BuiltinParamDescriptor {
68        name: "headerlinesIn",
69        ty: BuiltinParamType::IntegerScalar,
70        arity: BuiltinParamArity::Optional,
71        default: None,
72        description: "Number of header lines to skip.",
73    },
74];
75const IMPORTDATA_SIGNATURES: [BuiltinSignatureDescriptor; 3] = [
76    BuiltinSignatureDescriptor {
77        label: "A = importdata(filename)",
78        inputs: &IMPORTDATA_INPUTS_FILENAME,
79        outputs: &IMPORTDATA_OUTPUTS,
80    },
81    BuiltinSignatureDescriptor {
82        label: "A = importdata(filename, delimiterIn)",
83        inputs: &IMPORTDATA_INPUTS_DELIMITER,
84        outputs: &IMPORTDATA_OUTPUTS,
85    },
86    BuiltinSignatureDescriptor {
87        label: "A = importdata(filename, delimiterIn, headerlinesIn)",
88        inputs: &IMPORTDATA_INPUTS_DELIMITER_HEADER,
89        outputs: &IMPORTDATA_OUTPUTS,
90    },
91];
92
93const IMPORTDATA_ERROR_ARGUMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
94    code: "RM.IMPORTDATA.ARGUMENT",
95    identifier: Some("RunMat:importdata:InvalidArgument"),
96    when: "Filename, delimiter, or header line arguments are malformed.",
97    message: "importdata: invalid argument",
98};
99const IMPORTDATA_ERROR_IO: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
100    code: "RM.IMPORTDATA.IO",
101    identifier: Some("RunMat:importdata:Io"),
102    when: "The input file cannot be read.",
103    message: "importdata: unable to read file",
104};
105const IMPORTDATA_ERROR_PARSE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
106    code: "RM.IMPORTDATA.PARSE",
107    identifier: Some("RunMat:importdata:Parse"),
108    when: "Text content cannot be imported as supported numeric/header data.",
109    message: "importdata: unable to parse text data",
110};
111const IMPORTDATA_ERRORS: [BuiltinErrorDescriptor; 3] = [
112    IMPORTDATA_ERROR_ARGUMENT,
113    IMPORTDATA_ERROR_IO,
114    IMPORTDATA_ERROR_PARSE,
115];
116
117pub const IMPORTDATA_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
118    signatures: &IMPORTDATA_SIGNATURES,
119    output_mode: BuiltinOutputMode::Fixed,
120    completion_policy: BuiltinCompletionPolicy::Public,
121    errors: &IMPORTDATA_ERRORS,
122};
123
124#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::io::importdata")]
125pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
126    name: "importdata",
127    op_kind: GpuOpKind::Custom("io-importdata"),
128    supported_precisions: &[],
129    broadcast: BroadcastSemantics::None,
130    provider_hooks: &[],
131    constant_strategy: ConstantStrategy::InlineLiteral,
132    residency: ResidencyPolicy::GatherImmediately,
133    nan_mode: ReductionNaN::Include,
134    two_pass_threshold: None,
135    workgroup_size: None,
136    accepts_nan_mode: false,
137    notes: "Runs on the host; file import is not an acceleration operation.",
138};
139
140#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::io::importdata")]
141pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
142    name: "importdata",
143    shape: ShapeRequirements::Any,
144    constant_strategy: ConstantStrategy::InlineLiteral,
145    elementwise: None,
146    reduction: None,
147    emits_nan: false,
148    notes: "Not eligible for fusion; performs host-side file I/O.",
149};
150
151fn importdata_error(error: &'static BuiltinErrorDescriptor) -> RuntimeError {
152    importdata_error_with(error, error.message)
153}
154
155fn importdata_error_with(
156    error: &'static BuiltinErrorDescriptor,
157    message: impl Into<String>,
158) -> RuntimeError {
159    let mut builder = build_runtime_error(message).with_builtin(BUILTIN_NAME);
160    if let Some(identifier) = error.identifier {
161        builder = builder.with_identifier(identifier);
162    }
163    builder.build()
164}
165
166fn importdata_error_with_source<E>(
167    error: &'static BuiltinErrorDescriptor,
168    message: impl Into<String>,
169    source: E,
170) -> RuntimeError
171where
172    E: std::error::Error + Send + Sync + 'static,
173{
174    let mut builder = build_runtime_error(message)
175        .with_builtin(BUILTIN_NAME)
176        .with_source(source);
177    if let Some(identifier) = error.identifier {
178        builder = builder.with_identifier(identifier);
179    }
180    builder.build()
181}
182
183fn map_control_flow(err: RuntimeError) -> RuntimeError {
184    let identifier = err.identifier().map(|value| value.to_string());
185    let message = err.message().to_string();
186    let mut builder = build_runtime_error(message)
187        .with_builtin(BUILTIN_NAME)
188        .with_source(err);
189    if let Some(identifier) = identifier {
190        builder = builder.with_identifier(identifier);
191    }
192    builder.build()
193}
194
195#[runtime_builtin(
196    name = "importdata",
197    category = "io/import",
198    summary = "Import numeric text data with optional headers.",
199    keywords = "importdata,text,csv,delimited,header,numeric import",
200    accel = "cpu",
201    type_resolver(crate::builtins::io::type_resolvers::importdata_type),
202    descriptor(crate::builtins::io::importdata::IMPORTDATA_DESCRIPTOR),
203    builtin_path = "crate::builtins::io::importdata"
204)]
205async fn importdata_builtin(filename: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
206    if rest.len() > 2 {
207        return Err(importdata_error(&IMPORTDATA_ERROR_ARGUMENT));
208    }
209    let filename = gather_if_needed_async(&filename)
210        .await
211        .map_err(map_control_flow)?;
212    let path = resolve_path(&filename)?;
213
214    let delimiter = if let Some(value) = rest.first() {
215        let gathered = gather_if_needed_async(value)
216            .await
217            .map_err(map_control_flow)?;
218        Some(parse_delimiter_arg(&gathered)?)
219    } else {
220        None
221    };
222    let header_lines = if let Some(value) = rest.get(1) {
223        let gathered = gather_if_needed_async(value)
224            .await
225            .map_err(map_control_flow)?;
226        Some(parse_header_lines(&gathered)?)
227    } else {
228        None
229    };
230
231    let text = fs::read_to_string_async(&path).await.map_err(|err| {
232        importdata_error_with_source(
233            &IMPORTDATA_ERROR_IO,
234            format!("importdata: unable to read \"{}\" ({err})", path.display()),
235            err,
236        )
237    })?;
238    import_text_data(&text, delimiter.as_deref(), header_lines)
239}
240
241#[derive(Debug, Clone)]
242struct ImportedText {
243    data: Vec<Vec<f64>>,
244    textdata: Vec<Vec<String>>,
245    colheaders: Vec<String>,
246    rowheaders: Vec<String>,
247}
248
249fn import_text_data(
250    text: &str,
251    delimiter: Option<&str>,
252    header_lines: Option<usize>,
253) -> BuiltinResult<Value> {
254    let lines: Vec<&str> = text.lines().collect();
255    let nonempty: Vec<(usize, &str)> = lines
256        .iter()
257        .copied()
258        .enumerate()
259        .filter(|(_, line)| !line.trim().is_empty())
260        .collect();
261    if nonempty.is_empty() {
262        return Ok(Value::Tensor(Tensor::new(Vec::new(), vec![0, 0]).map_err(
263            |err| importdata_error_with(&IMPORTDATA_ERROR_PARSE, format!("importdata: {err}")),
264        )?));
265    }
266
267    let delimiter = delimiter
268        .map(Delimiter::Explicit)
269        .unwrap_or_else(|| detect_delimiter(nonempty.iter().map(|(_, line)| *line)));
270    let records: Vec<(usize, Vec<String>)> = nonempty
271        .iter()
272        .map(|(idx, line)| (*idx, split_record(line, &delimiter)))
273        .collect();
274
275    let data_start = header_lines.unwrap_or_else(|| infer_header_lines(&records));
276    if data_start > records.len() {
277        return Err(importdata_error_with(
278            &IMPORTDATA_ERROR_ARGUMENT,
279            "importdata: headerlinesIn exceeds number of non-empty lines",
280        ));
281    }
282
283    let header_records: Vec<Vec<String>> = records[..data_start]
284        .iter()
285        .map(|(_, record)| record.clone())
286        .collect();
287    let data_records = &records[data_start..];
288
289    let imported = parse_numeric_records(data_records, &header_records)?;
290    let tensor = rows_to_tensor(&imported.data)?;
291    if imported.textdata.is_empty()
292        && imported.colheaders.is_empty()
293        && imported.rowheaders.is_empty()
294    {
295        return Ok(Value::Tensor(tensor));
296    }
297
298    let mut out = StructValue::new();
299    out.insert("data", Value::Tensor(tensor));
300    if !imported.textdata.is_empty() {
301        out.insert("textdata", cell_from_rows(&imported.textdata)?);
302    }
303    if !imported.colheaders.is_empty() {
304        out.insert("colheaders", cell_from_row(&imported.colheaders)?);
305    }
306    if !imported.rowheaders.is_empty() {
307        out.insert("rowheaders", cell_from_col(&imported.rowheaders)?);
308    }
309    Ok(Value::Struct(out))
310}
311
312fn parse_numeric_records(
313    data_records: &[(usize, Vec<String>)],
314    header_records: &[Vec<String>],
315) -> BuiltinResult<ImportedText> {
316    if data_records.is_empty() {
317        return Ok(ImportedText {
318            data: Vec::new(),
319            textdata: header_records.to_vec(),
320            colheaders: header_records.last().cloned().unwrap_or_default(),
321            rowheaders: Vec::new(),
322        });
323    }
324
325    let first = &data_records[0].1;
326    let row_header_cols = infer_row_header_cols(data_records);
327    let numeric_cols = first.len().saturating_sub(row_header_cols);
328    if numeric_cols == 0 {
329        return Err(importdata_error_with(
330            &IMPORTDATA_ERROR_PARSE,
331            "importdata: no numeric columns found",
332        ));
333    }
334
335    let mut rows = Vec::with_capacity(data_records.len());
336    let mut rowheaders = Vec::new();
337    for (line_idx, record) in data_records {
338        let expected_cols = row_header_cols + numeric_cols;
339        if record.len() != expected_cols {
340            return Err(importdata_error_with(
341                &IMPORTDATA_ERROR_PARSE,
342                format!(
343                    "importdata: row {} has {} columns, expected {}",
344                    line_idx + 1,
345                    record.len(),
346                    expected_cols
347                ),
348            ));
349        }
350        if row_header_cols > 0 {
351            rowheaders.push(record[..row_header_cols].join(" "));
352        }
353        let mut row = Vec::with_capacity(numeric_cols);
354        for (col, token) in record[row_header_cols..row_header_cols + numeric_cols]
355            .iter()
356            .enumerate()
357        {
358            row.push(parse_numeric_token(token).ok_or_else(|| {
359                importdata_error_with(
360                    &IMPORTDATA_ERROR_PARSE,
361                    format!(
362                        "importdata: nonnumeric token '{}' at row {}, column {}",
363                        token,
364                        line_idx + 1,
365                        row_header_cols + col + 1
366                    ),
367                )
368            })?);
369        }
370        rows.push(row);
371    }
372
373    let mut colheaders = Vec::new();
374    if let Some(last_header) = header_records.last() {
375        if last_header.len() >= row_header_cols + numeric_cols {
376            colheaders = last_header[row_header_cols..row_header_cols + numeric_cols].to_vec();
377        } else if last_header.len() == numeric_cols {
378            colheaders = last_header.clone();
379        }
380    }
381
382    Ok(ImportedText {
383        data: rows,
384        textdata: header_records.to_vec(),
385        colheaders,
386        rowheaders,
387    })
388}
389
390fn infer_row_header_cols(records: &[(usize, Vec<String>)]) -> usize {
391    let Some(first) = records.first() else {
392        return 0;
393    };
394    if first.1.len() < 2 || parse_numeric_token(&first.1[0]).is_some() {
395        return 0;
396    }
397    if records.iter().all(|(_, row)| {
398        row.len() == first.1.len()
399            && parse_numeric_token(&row[0]).is_none()
400            && row[1..]
401                .iter()
402                .all(|token| parse_numeric_token(token).is_some())
403    }) {
404        1
405    } else {
406        0
407    }
408}
409
410fn infer_header_lines(records: &[(usize, Vec<String>)]) -> usize {
411    records
412        .iter()
413        .position(|(_, row)| is_numeric_data_row(row))
414        .unwrap_or(records.len())
415}
416
417fn is_numeric_data_row(row: &[String]) -> bool {
418    if row.is_empty() {
419        return false;
420    }
421    if row.iter().all(|token| parse_numeric_token(token).is_some()) {
422        return true;
423    }
424    row.len() > 1
425        && parse_numeric_token(&row[0]).is_none()
426        && row[1..]
427            .iter()
428            .all(|token| parse_numeric_token(token).is_some())
429}
430
431fn rows_to_tensor(rows: &[Vec<f64>]) -> BuiltinResult<Tensor> {
432    let row_count = rows.len();
433    let col_count = rows.first().map(|row| row.len()).unwrap_or(0);
434    if rows.iter().any(|row| row.len() != col_count) {
435        return Err(importdata_error_with(
436            &IMPORTDATA_ERROR_PARSE,
437            "importdata: numeric rows have inconsistent column counts",
438        ));
439    }
440    let mut data = Vec::with_capacity(row_count * col_count);
441    for col in 0..col_count {
442        for row in rows {
443            data.push(row[col]);
444        }
445    }
446    Tensor::new(data, vec![row_count, col_count])
447        .map_err(|err| importdata_error_with(&IMPORTDATA_ERROR_PARSE, format!("importdata: {err}")))
448}
449
450#[derive(Debug, Clone, PartialEq, Eq)]
451enum Delimiter<'a> {
452    Whitespace,
453    Explicit(&'a str),
454}
455
456fn detect_delimiter<'a>(lines: impl Iterator<Item = &'a str>) -> Delimiter<'static> {
457    let candidates = [",", "\t", ";", "|"];
458    let sample: Vec<&str> = lines.take(12).collect();
459    let mut best: Option<(&str, usize, usize)> = None;
460    for candidate in candidates {
461        let counts: Vec<usize> = sample
462            .iter()
463            .map(|line| split_record(line, &Delimiter::Explicit(candidate)).len())
464            .filter(|count| *count > 1)
465            .collect();
466        if counts.is_empty() {
467            continue;
468        }
469        let consistent = counts.iter().filter(|count| **count == counts[0]).count();
470        let score = (consistent, counts[0]);
471        if best
472            .map(|(_, best_consistent, best_cols)| score > (best_consistent, best_cols))
473            .unwrap_or(true)
474        {
475            best = Some((candidate, consistent, counts[0]));
476        }
477    }
478    best.map(|(candidate, _, _)| Delimiter::Explicit(candidate))
479        .unwrap_or(Delimiter::Whitespace)
480}
481
482fn split_record(line: &str, delimiter: &Delimiter<'_>) -> Vec<String> {
483    match delimiter {
484        Delimiter::Whitespace => line
485            .split_whitespace()
486            .map(|token| unquote(token.trim()))
487            .filter(|token| !token.is_empty())
488            .collect(),
489        Delimiter::Explicit(delimiter) => split_explicit(line, delimiter),
490    }
491}
492
493fn split_explicit(line: &str, delimiter: &str) -> Vec<String> {
494    if delimiter.is_empty() {
495        return vec![line.trim().to_string()];
496    }
497    let mut fields = Vec::new();
498    let mut current = String::new();
499    let mut in_quotes = false;
500    let mut idx = 0usize;
501    while idx < line.len() {
502        let Some(ch) = line[idx..].chars().next() else {
503            break;
504        };
505        if ch == '"' {
506            if in_quotes && line[idx + ch.len_utf8()..].starts_with('"') {
507                current.push('"');
508                idx += ch.len_utf8() * 2;
509                continue;
510            }
511            in_quotes = !in_quotes;
512            idx += ch.len_utf8();
513            continue;
514        }
515        if !in_quotes && line[idx..].starts_with(delimiter) {
516            fields.push(unquote(current.trim()));
517            current.clear();
518            idx += delimiter.len();
519            continue;
520        }
521        current.push(ch);
522        idx += ch.len_utf8();
523    }
524    fields.push(unquote(current.trim()));
525    fields
526}
527
528fn unquote(token: &str) -> String {
529    let trimmed = token.trim();
530    if trimmed.len() >= 2 && trimmed.starts_with('"') && trimmed.ends_with('"') {
531        trimmed[1..trimmed.len() - 1].replace("\"\"", "\"")
532    } else {
533        trimmed.to_string()
534    }
535}
536
537fn parse_numeric_token(token: &str) -> Option<f64> {
538    let trimmed = token.trim();
539    if trimmed.is_empty() {
540        return Some(f64::NAN);
541    }
542    match trimmed.to_ascii_lowercase().as_str() {
543        "nan" => Some(f64::NAN),
544        "inf" | "+inf" | "infinity" | "+infinity" => Some(f64::INFINITY),
545        "-inf" | "-infinity" => Some(f64::NEG_INFINITY),
546        _ => trimmed.parse::<f64>().ok(),
547    }
548}
549
550fn parse_delimiter_arg(value: &Value) -> BuiltinResult<String> {
551    let text = string_scalar(value, "delimiterIn")?;
552    match text.as_str() {
553        "\\t" => Ok("\t".to_string()),
554        "\\n" => Ok("\n".to_string()),
555        "\\r" => Ok("\r".to_string()),
556        _ => Ok(text),
557    }
558}
559
560fn parse_header_lines(value: &Value) -> BuiltinResult<usize> {
561    let raw = match value {
562        Value::Num(n) => *n,
563        Value::Int(i) => i.to_i64() as f64,
564        Value::Tensor(t) if t.data.len() == 1 => t.data[0],
565        _ => {
566            return Err(importdata_error_with(
567                &IMPORTDATA_ERROR_ARGUMENT,
568                "importdata: headerlinesIn must be a nonnegative integer scalar",
569            ));
570        }
571    };
572    if !raw.is_finite() || raw < 0.0 || raw.fract() != 0.0 {
573        return Err(importdata_error_with(
574            &IMPORTDATA_ERROR_ARGUMENT,
575            "importdata: headerlinesIn must be a nonnegative integer scalar",
576        ));
577    }
578    Ok(raw as usize)
579}
580
581fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
582    match value {
583        Value::String(s) => normalize_path(s),
584        Value::CharArray(ca) if ca.rows == 1 => {
585            let text: String = ca.data.iter().collect();
586            normalize_path(&text)
587        }
588        Value::StringArray(sa) if sa.data.len() == 1 => normalize_path(&sa.data[0]),
589        _ => Err(importdata_error(&IMPORTDATA_ERROR_ARGUMENT)),
590    }
591}
592
593fn normalize_path(raw: &str) -> BuiltinResult<PathBuf> {
594    if raw.trim().is_empty() {
595        return Err(importdata_error_with(
596            &IMPORTDATA_ERROR_ARGUMENT,
597            "importdata: filename must not be empty",
598        ));
599    }
600    let expanded = expand_user_path(raw, BUILTIN_NAME)
601        .map_err(|msg| importdata_error_with(&IMPORTDATA_ERROR_ARGUMENT, msg))?;
602    Ok(Path::new(&expanded).to_path_buf())
603}
604
605fn string_scalar(value: &Value, context: &str) -> BuiltinResult<String> {
606    match value {
607        Value::String(s) => Ok(s.clone()),
608        Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
609        Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
610        _ => Err(importdata_error_with(
611            &IMPORTDATA_ERROR_ARGUMENT,
612            format!("importdata: expected {context} as a string scalar or character vector"),
613        )),
614    }
615}
616
617fn cell_from_rows(rows: &[Vec<String>]) -> BuiltinResult<Value> {
618    let row_count = rows.len();
619    let col_count = rows.iter().map(|row| row.len()).max().unwrap_or(0);
620    let mut values = Vec::with_capacity(row_count * col_count);
621    for row in rows {
622        for col in 0..col_count {
623            values.push(Value::String(row.get(col).cloned().unwrap_or_default()));
624        }
625    }
626    CellArray::new(values, row_count, col_count)
627        .map(Value::Cell)
628        .map_err(|err| importdata_error_with(&IMPORTDATA_ERROR_PARSE, format!("importdata: {err}")))
629}
630
631fn cell_from_row(values: &[String]) -> BuiltinResult<Value> {
632    CellArray::new(
633        values.iter().cloned().map(Value::String).collect(),
634        1,
635        values.len(),
636    )
637    .map(Value::Cell)
638    .map_err(|err| importdata_error_with(&IMPORTDATA_ERROR_PARSE, format!("importdata: {err}")))
639}
640
641fn cell_from_col(values: &[String]) -> BuiltinResult<Value> {
642    CellArray::new(
643        values.iter().cloned().map(Value::String).collect(),
644        values.len(),
645        1,
646    )
647    .map(Value::Cell)
648    .map_err(|err| importdata_error_with(&IMPORTDATA_ERROR_PARSE, format!("importdata: {err}")))
649}
650
651#[cfg(test)]
652mod tests {
653    use super::*;
654    use futures::executor::block_on;
655    use runmat_time::unix_timestamp_ms;
656    use std::fs;
657    use std::sync::atomic::{AtomicU64, Ordering};
658
659    static NEXT_ID: AtomicU64 = AtomicU64::new(0);
660
661    fn temp_path(ext: &str) -> PathBuf {
662        let millis = unix_timestamp_ms();
663        let unique = NEXT_ID.fetch_add(1, Ordering::Relaxed);
664        let mut path = std::env::temp_dir();
665        path.push(format!(
666            "runmat_importdata_{}_{}_{}.{}",
667            std::process::id(),
668            millis,
669            unique,
670            ext
671        ));
672        path
673    }
674
675    fn write_fixture(ext: &str, contents: &str) -> PathBuf {
676        let path = temp_path(ext);
677        fs::write(&path, contents).expect("write fixture");
678        path
679    }
680
681    fn struct_field<'a>(value: &'a Value, name: &str) -> &'a Value {
682        let Value::Struct(st) = value else {
683            panic!("expected struct");
684        };
685        st.fields
686            .get(name)
687            .unwrap_or_else(|| panic!("missing {name}"))
688    }
689
690    fn tensor_data(value: &Value) -> (&[f64], &[usize]) {
691        let Value::Tensor(tensor) = value else {
692            panic!("expected tensor");
693        };
694        (&tensor.data, &tensor.shape)
695    }
696
697    fn cell_text(value: &Value, row: usize, col: usize) -> String {
698        let Value::Cell(cell) = value else {
699            panic!("expected cell");
700        };
701        let Value::String(text) = cell.get(row, col).expect("cell value") else {
702            panic!("expected string cell");
703        };
704        text
705    }
706
707    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
708    #[test]
709    fn importdata_descriptor_covers_core_forms() {
710        let labels: Vec<&str> = IMPORTDATA_DESCRIPTOR
711            .signatures
712            .iter()
713            .map(|sig| sig.label)
714            .collect();
715        assert!(labels.contains(&"A = importdata(filename)"));
716        assert!(labels.contains(&"A = importdata(filename, delimiterIn)"));
717        assert!(labels.contains(&"A = importdata(filename, delimiterIn, headerlinesIn)"));
718    }
719
720    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
721    #[test]
722    fn importdata_reads_plain_numeric_matrix() {
723        let path = write_fixture("txt", "1 2 3\n4 5 6\n");
724        let out = block_on(importdata_builtin(
725            Value::from(path.to_string_lossy().into_owned()),
726            Vec::new(),
727        ))
728        .expect("importdata");
729        let (data, shape) = tensor_data(&out);
730        assert_eq!(shape, &[2, 3]);
731        assert_eq!(data, &[1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
732        let _ = fs::remove_file(path);
733    }
734
735    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
736    #[test]
737    fn importdata_headerlines_zero_numeric_input_returns_tensor() {
738        let path = write_fixture("txt", "1 2\n3 4\n");
739        let out = block_on(importdata_builtin(
740            Value::from(path.to_string_lossy().into_owned()),
741            vec![Value::from(" "), Value::Num(0.0)],
742        ))
743        .expect("importdata");
744        let (data, shape) = tensor_data(&out);
745        assert_eq!(shape, &[2, 2]);
746        assert_eq!(data, &[1.0, 3.0, 2.0, 4.0]);
747        let _ = fs::remove_file(path);
748    }
749
750    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
751    #[test]
752    fn importdata_detects_csv_header_and_colheaders() {
753        let path = write_fixture("csv", "time,value\n0,1.5\n1,2.5\n");
754        let out = block_on(importdata_builtin(
755            Value::from(path.to_string_lossy().into_owned()),
756            Vec::new(),
757        ))
758        .expect("importdata");
759        let data = struct_field(&out, "data");
760        let (values, shape) = tensor_data(data);
761        assert_eq!(shape, &[2, 2]);
762        assert_eq!(values, &[0.0, 1.0, 1.5, 2.5]);
763        assert_eq!(cell_text(struct_field(&out, "colheaders"), 0, 0), "time");
764        assert_eq!(cell_text(struct_field(&out, "colheaders"), 0, 1), "value");
765        let _ = fs::remove_file(path);
766    }
767
768    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
769    #[test]
770    fn importdata_honors_explicit_delimiter_and_header_lines() {
771        let path = write_fixture("dat", "# instrument log\nA|B\n10|20\n30|40\n");
772        let out = block_on(importdata_builtin(
773            Value::from(path.to_string_lossy().into_owned()),
774            vec![Value::from("|"), Value::Num(2.0)],
775        ))
776        .expect("importdata");
777        let data = struct_field(&out, "data");
778        let (values, shape) = tensor_data(data);
779        assert_eq!(shape, &[2, 2]);
780        assert_eq!(values, &[10.0, 30.0, 20.0, 40.0]);
781        assert_eq!(
782            cell_text(struct_field(&out, "textdata"), 0, 0),
783            "# instrument log"
784        );
785        let _ = fs::remove_file(path);
786    }
787
788    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
789    #[test]
790    fn importdata_preserves_rowheaders() {
791        let path = write_fixture("txt", "label x y\nr1 1 2\nr2 3 4\n");
792        let out = block_on(importdata_builtin(
793            Value::from(path.to_string_lossy().into_owned()),
794            Vec::new(),
795        ))
796        .expect("importdata");
797        assert_eq!(cell_text(struct_field(&out, "rowheaders"), 0, 0), "r1");
798        assert_eq!(cell_text(struct_field(&out, "rowheaders"), 1, 0), "r2");
799        assert_eq!(cell_text(struct_field(&out, "colheaders"), 0, 0), "x");
800        assert_eq!(cell_text(struct_field(&out, "colheaders"), 0, 1), "y");
801        let _ = fs::remove_file(path);
802    }
803
804    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
805    #[test]
806    fn importdata_reports_mixed_unsupported_data() {
807        let path = write_fixture("txt", "1 2\n3 nope\n");
808        let err = block_on(importdata_builtin(
809            Value::from(path.to_string_lossy().into_owned()),
810            Vec::new(),
811        ))
812        .expect_err("parse error");
813        assert!(err.message().contains("nonnumeric token"));
814        let _ = fs::remove_file(path);
815    }
816
817    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
818    #[test]
819    fn importdata_rejects_rows_with_extra_numeric_columns() {
820        let path = write_fixture("txt", "1 2\n3 4 5\n");
821        let err = block_on(importdata_builtin(
822            Value::from(path.to_string_lossy().into_owned()),
823            Vec::new(),
824        ))
825        .expect_err("width mismatch");
826        assert!(err.message().contains("expected 2"));
827        let _ = fs::remove_file(path);
828    }
829}