Skip to main content

runmat_runtime/builtins/table/
mod.rs

1//! MATLAB table datatype support and tabular workflow builtins.
2
3use std::cell::Cell;
4use std::cmp::Ordering;
5use std::collections::{BTreeMap, HashMap, HashSet};
6use std::io::{Cursor, Read};
7use std::path::{Path, PathBuf};
8
9use calamine::{open_workbook_auto_from_rs, Data as SpreadsheetData, Reader as SpreadsheetReader};
10use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
11use encoding_rs::{Encoding, UTF_8};
12use runmat_builtins::{
13    Access, BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
14    BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
15    CellArray, CharArray, ClassDef, ComplexTensor, LogicalArray, MethodDef, NumericDType,
16    ObjectInstance, PropertyDef, StringArray, StructValue, Tensor, Value,
17};
18use runmat_filesystem::File;
19use runmat_macros::runtime_builtin;
20
21use crate::builtins::common::fs::expand_user_path;
22use crate::builtins::common::spec::{
23    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
24    ReductionNaN, ResidencyPolicy, ShapeRequirements,
25};
26use crate::{
27    build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError, OBJECT_INDEX_BRACE,
28    OBJECT_INDEX_MEMBER, OBJECT_INDEX_PAREN, OBJECT_SUBSASGN_METHOD, OBJECT_SUBSREF_METHOD,
29};
30
31pub const TABLE_CLASS: &str = "table";
32const TABLE_VARIABLES_FIELD: &str = "__table_variables";
33const TABLE_PROPERTIES_FIELD: &str = "__table_properties";
34const PROPERTIES_MEMBER: &str = "Properties";
35const VARIABLE_NAMES: &str = "VariableNames";
36const ROW_NAMES: &str = "RowNames";
37const DIMENSION_NAMES: &str = "DimensionNames";
38const VARIABLE_UNITS: &str = "VariableUnits";
39const VARIABLE_DESCRIPTIONS: &str = "VariableDescriptions";
40const DESCRIPTION: &str = "Description";
41const USER_DATA: &str = "UserData";
42const DEFAULT_ROW_DIM_NAME: &str = "Rows";
43const DEFAULT_VARIABLE_DIM_NAME: &str = "Variables";
44
45thread_local! {
46    static TABLE_CLASS_REGISTERED: Cell<bool> = const { Cell::new(false) };
47}
48
49const ANY_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
50    name: "out",
51    ty: BuiltinParamType::Any,
52    arity: BuiltinParamArity::Required,
53    default: None,
54    description: "Result value.",
55}];
56const NUM_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
57    name: "n",
58    ty: BuiltinParamType::IntegerScalar,
59    arity: BuiltinParamArity::Required,
60    default: None,
61    description: "Count.",
62}];
63const TABLE_INPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
64    name: "T",
65    ty: BuiltinParamType::Any,
66    arity: BuiltinParamArity::Required,
67    default: None,
68    description: "Table input.",
69}];
70const READTABLE_INPUTS_FILENAME: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
71    name: "filename",
72    ty: BuiltinParamType::StringScalar,
73    arity: BuiltinParamArity::Required,
74    default: None,
75    description: "Text or spreadsheet file path.",
76}];
77const READTABLE_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 2] = [
78    BuiltinParamDescriptor {
79        name: "filename",
80        ty: BuiltinParamType::StringScalar,
81        arity: BuiltinParamArity::Required,
82        default: None,
83        description: "Text or spreadsheet file path.",
84    },
85    BuiltinParamDescriptor {
86        name: "nameValuePairs",
87        ty: BuiltinParamType::Any,
88        arity: BuiltinParamArity::Variadic,
89        default: None,
90        description: "Name-value import options.",
91    },
92];
93const SPREADSHEET_IMPORT_OPTIONS_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
94    name: "opts",
95    ty: BuiltinParamType::Any,
96    arity: BuiltinParamArity::Required,
97    default: None,
98    description: "Spreadsheet import options struct.",
99}];
100const SPREADSHEET_IMPORT_OPTIONS_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 1] =
101    [BuiltinParamDescriptor {
102        name: "nameValuePairs",
103        ty: BuiltinParamType::Any,
104        arity: BuiltinParamArity::Variadic,
105        default: None,
106        description: "Name-value option pairs.",
107    }];
108const DETECT_IMPORT_OPTIONS_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
109    name: "opts",
110    ty: BuiltinParamType::Any,
111    arity: BuiltinParamArity::Required,
112    default: None,
113    description: "Detected import options struct accepted by readtable/readmatrix.",
114}];
115const DETECT_IMPORT_OPTIONS_INPUTS_FILENAME: [BuiltinParamDescriptor; 1] =
116    [BuiltinParamDescriptor {
117        name: "filename",
118        ty: BuiltinParamType::StringScalar,
119        arity: BuiltinParamArity::Required,
120        default: None,
121        description: "Text or spreadsheet file path to inspect.",
122    }];
123const DETECT_IMPORT_OPTIONS_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 2] = [
124    BuiltinParamDescriptor {
125        name: "filename",
126        ty: BuiltinParamType::StringScalar,
127        arity: BuiltinParamArity::Required,
128        default: None,
129        description: "Text or spreadsheet file path to inspect.",
130    },
131    BuiltinParamDescriptor {
132        name: "nameValuePairs",
133        ty: BuiltinParamType::Any,
134        arity: BuiltinParamArity::Variadic,
135        default: None,
136        description: "Detection overrides such as Delimiter, Range, Sheet, Encoding, or TextType.",
137    },
138];
139const TABLE_INPUTS_VALUES: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
140    name: "variables",
141    ty: BuiltinParamType::Any,
142    arity: BuiltinParamArity::Variadic,
143    default: None,
144    description: "Variables to assemble as table columns.",
145}];
146const GROUPSUMMARY_INPUTS: [BuiltinParamDescriptor; 4] = [
147    BuiltinParamDescriptor {
148        name: "T",
149        ty: BuiltinParamType::Any,
150        arity: BuiltinParamArity::Required,
151        default: None,
152        description: "Input table.",
153    },
154    BuiltinParamDescriptor {
155        name: "groupvars",
156        ty: BuiltinParamType::Any,
157        arity: BuiltinParamArity::Required,
158        default: None,
159        description: "Grouping variable name or names.",
160    },
161    BuiltinParamDescriptor {
162        name: "method",
163        ty: BuiltinParamType::Any,
164        arity: BuiltinParamArity::Required,
165        default: None,
166        description: "Summary method name or names.",
167    },
168    BuiltinParamDescriptor {
169        name: "datavars",
170        ty: BuiltinParamType::Any,
171        arity: BuiltinParamArity::Optional,
172        default: None,
173        description: "Data variable name or names.",
174    },
175];
176const OBJECT_INDEX_INPUTS: [BuiltinParamDescriptor; 3] = [
177    BuiltinParamDescriptor {
178        name: "obj",
179        ty: BuiltinParamType::Any,
180        arity: BuiltinParamArity::Required,
181        default: None,
182        description: "Table object receiver.",
183    },
184    BuiltinParamDescriptor {
185        name: "kind",
186        ty: BuiltinParamType::StringScalar,
187        arity: BuiltinParamArity::Required,
188        default: None,
189        description: "Index kind token.",
190    },
191    BuiltinParamDescriptor {
192        name: "payload",
193        ty: BuiltinParamType::Any,
194        arity: BuiltinParamArity::Required,
195        default: None,
196        description: "Index payload.",
197    },
198];
199const OBJECT_ASSIGN_INPUTS: [BuiltinParamDescriptor; 4] = [
200    BuiltinParamDescriptor {
201        name: "obj",
202        ty: BuiltinParamType::Any,
203        arity: BuiltinParamArity::Required,
204        default: None,
205        description: "Table object receiver.",
206    },
207    BuiltinParamDescriptor {
208        name: "kind",
209        ty: BuiltinParamType::StringScalar,
210        arity: BuiltinParamArity::Required,
211        default: None,
212        description: "Index kind token.",
213    },
214    BuiltinParamDescriptor {
215        name: "payload",
216        ty: BuiltinParamType::Any,
217        arity: BuiltinParamArity::Required,
218        default: None,
219        description: "Index payload.",
220    },
221    BuiltinParamDescriptor {
222        name: "rhs",
223        ty: BuiltinParamType::Any,
224        arity: BuiltinParamArity::Required,
225        default: None,
226        description: "Assigned value.",
227    },
228];
229
230const READTABLE_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
231    BuiltinSignatureDescriptor {
232        label: "T = readtable(filename)",
233        inputs: &READTABLE_INPUTS_FILENAME,
234        outputs: &ANY_OUTPUT,
235    },
236    BuiltinSignatureDescriptor {
237        label: "T = readtable(filename, nameValuePairs...)",
238        inputs: &READTABLE_INPUTS_NAME_VALUE,
239        outputs: &ANY_OUTPUT,
240    },
241];
242const SPREADSHEET_IMPORT_OPTIONS_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
243    BuiltinSignatureDescriptor {
244        label: "opts = spreadsheetImportOptions()",
245        inputs: &[],
246        outputs: &SPREADSHEET_IMPORT_OPTIONS_OUTPUT,
247    },
248    BuiltinSignatureDescriptor {
249        label: "opts = spreadsheetImportOptions(nameValuePairs...)",
250        inputs: &SPREADSHEET_IMPORT_OPTIONS_INPUTS_NAME_VALUE,
251        outputs: &SPREADSHEET_IMPORT_OPTIONS_OUTPUT,
252    },
253];
254const DETECT_IMPORT_OPTIONS_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
255    BuiltinSignatureDescriptor {
256        label: "opts = detectImportOptions(filename)",
257        inputs: &DETECT_IMPORT_OPTIONS_INPUTS_FILENAME,
258        outputs: &DETECT_IMPORT_OPTIONS_OUTPUT,
259    },
260    BuiltinSignatureDescriptor {
261        label: "opts = detectImportOptions(filename, nameValuePairs...)",
262        inputs: &DETECT_IMPORT_OPTIONS_INPUTS_NAME_VALUE,
263        outputs: &DETECT_IMPORT_OPTIONS_OUTPUT,
264    },
265];
266const TABLE_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
267    label: "T = table(variables...)",
268    inputs: &TABLE_INPUTS_VALUES,
269    outputs: &ANY_OUTPUT,
270}];
271const GROUPSUMMARY_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
272    label: "G = groupsummary(T, groupvars, method, datavars)",
273    inputs: &GROUPSUMMARY_INPUTS,
274    outputs: &ANY_OUTPUT,
275}];
276const HEIGHT_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
277    label: "n = height(T)",
278    inputs: &TABLE_INPUT,
279    outputs: &NUM_OUTPUT,
280}];
281const WIDTH_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
282    label: "n = width(T)",
283    inputs: &TABLE_INPUT,
284    outputs: &NUM_OUTPUT,
285}];
286const OBJECT_SUBSREF_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
287    label: "out = table.subsref(obj, kind, payload)",
288    inputs: &OBJECT_INDEX_INPUTS,
289    outputs: &ANY_OUTPUT,
290}];
291const OBJECT_SUBSASGN_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
292    label: "obj = table.subsasgn(obj, kind, payload, rhs)",
293    inputs: &OBJECT_ASSIGN_INPUTS,
294    outputs: &ANY_OUTPUT,
295}];
296
297const TABLE_ERROR_INVALID_ARGUMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
298    code: "RM.TABLE.INVALID_ARGUMENT",
299    identifier: Some("RunMat:table:InvalidArgument"),
300    when: "Arguments or table metadata are invalid.",
301    message: "table: invalid argument",
302};
303const TABLE_ERROR_INVALID_INDEX: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
304    code: "RM.TABLE.INVALID_INDEX",
305    identifier: Some("RunMat:table:InvalidIndex"),
306    when: "Table indexing is invalid.",
307    message: "table: invalid index",
308};
309const TABLE_ERROR_INVALID_VARIABLE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
310    code: "RM.TABLE.INVALID_VARIABLE",
311    identifier: Some("RunMat:table:InvalidVariable"),
312    when: "A table variable name or value is invalid.",
313    message: "table: invalid variable",
314};
315const TABLE_ERROR_IO: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
316    code: "RM.READTABLE.IO",
317    identifier: Some("RunMat:readtable:IOError"),
318    when: "readtable cannot open or read the requested file.",
319    message: "readtable: file read failed",
320};
321const TABLE_ERROR_UNSUPPORTED_FILE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
322    code: "RM.READTABLE.UNSUPPORTED_FILE",
323    identifier: Some("RunMat:readtable:UnsupportedFileType"),
324    when: "readtable receives a file type outside the text or spreadsheet import backends.",
325    message: "readtable: unsupported file type",
326};
327const TABLE_ERRORS: [BuiltinErrorDescriptor; 5] = [
328    TABLE_ERROR_INVALID_ARGUMENT,
329    TABLE_ERROR_INVALID_INDEX,
330    TABLE_ERROR_INVALID_VARIABLE,
331    TABLE_ERROR_IO,
332    TABLE_ERROR_UNSUPPORTED_FILE,
333];
334
335pub const READTABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
336    signatures: &READTABLE_SIGNATURES,
337    output_mode: BuiltinOutputMode::Fixed,
338    completion_policy: BuiltinCompletionPolicy::Public,
339    errors: &TABLE_ERRORS,
340};
341pub const SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
342    signatures: &SPREADSHEET_IMPORT_OPTIONS_SIGNATURES,
343    output_mode: BuiltinOutputMode::Fixed,
344    completion_policy: BuiltinCompletionPolicy::Public,
345    errors: &TABLE_ERRORS,
346};
347pub const DETECT_IMPORT_OPTIONS_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
348    signatures: &DETECT_IMPORT_OPTIONS_SIGNATURES,
349    output_mode: BuiltinOutputMode::Fixed,
350    completion_policy: BuiltinCompletionPolicy::Public,
351    errors: &TABLE_ERRORS,
352};
353pub const TABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
354    signatures: &TABLE_SIGNATURES,
355    output_mode: BuiltinOutputMode::Fixed,
356    completion_policy: BuiltinCompletionPolicy::Public,
357    errors: &TABLE_ERRORS,
358};
359pub const GROUPSUMMARY_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
360    signatures: &GROUPSUMMARY_SIGNATURES,
361    output_mode: BuiltinOutputMode::Fixed,
362    completion_policy: BuiltinCompletionPolicy::Public,
363    errors: &TABLE_ERRORS,
364};
365pub const HEIGHT_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
366    signatures: &HEIGHT_SIGNATURES,
367    output_mode: BuiltinOutputMode::Fixed,
368    completion_policy: BuiltinCompletionPolicy::Public,
369    errors: &TABLE_ERRORS,
370};
371pub const WIDTH_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
372    signatures: &WIDTH_SIGNATURES,
373    output_mode: BuiltinOutputMode::Fixed,
374    completion_policy: BuiltinCompletionPolicy::Public,
375    errors: &TABLE_ERRORS,
376};
377pub const TABLE_SUBSREF_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
378    signatures: &OBJECT_SUBSREF_SIGNATURES,
379    output_mode: BuiltinOutputMode::Fixed,
380    completion_policy: BuiltinCompletionPolicy::MethodOnly,
381    errors: &TABLE_ERRORS,
382};
383pub const TABLE_SUBSASGN_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
384    signatures: &OBJECT_SUBSASGN_SIGNATURES,
385    output_mode: BuiltinOutputMode::Fixed,
386    completion_policy: BuiltinCompletionPolicy::MethodOnly,
387    errors: &TABLE_ERRORS,
388};
389
390#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::table")]
391pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
392    name: "table",
393    op_kind: GpuOpKind::Custom("table"),
394    supported_precisions: &[],
395    broadcast: BroadcastSemantics::None,
396    provider_hooks: &[],
397    constant_strategy: ConstantStrategy::InlineLiteral,
398    residency: ResidencyPolicy::GatherImmediately,
399    nan_mode: ReductionNaN::Include,
400    two_pass_threshold: None,
401    workgroup_size: None,
402    accepts_nan_mode: false,
403    notes: "Tables are host containers. GPU variables are gathered when tabular algorithms need row-wise access.",
404};
405
406#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::table")]
407pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
408    name: "table",
409    shape: ShapeRequirements::Any,
410    constant_strategy: ConstantStrategy::InlineLiteral,
411    elementwise: None,
412    reduction: None,
413    emits_nan: false,
414    notes: "Tables are structured host containers and are not fusion operands.",
415};
416
417fn table_error(error: &'static BuiltinErrorDescriptor, message: impl Into<String>) -> RuntimeError {
418    let mut builder = build_runtime_error(message).with_builtin(TABLE_CLASS);
419    if let Some(identifier) = error.identifier {
420        builder = builder.with_identifier(identifier);
421    }
422    builder.build()
423}
424
425fn table_error_with_source<E>(
426    error: &'static BuiltinErrorDescriptor,
427    message: impl Into<String>,
428    source: E,
429) -> RuntimeError
430where
431    E: std::error::Error + Send + Sync + 'static,
432{
433    let mut builder = build_runtime_error(message)
434        .with_builtin(TABLE_CLASS)
435        .with_source(source);
436    if let Some(identifier) = error.identifier {
437        builder = builder.with_identifier(identifier);
438    }
439    builder.build()
440}
441
442fn invalid_argument(message: impl Into<String>) -> RuntimeError {
443    table_error(&TABLE_ERROR_INVALID_ARGUMENT, message)
444}
445
446fn invalid_index(message: impl Into<String>) -> RuntimeError {
447    table_error(&TABLE_ERROR_INVALID_INDEX, message)
448}
449
450fn invalid_variable(message: impl Into<String>) -> RuntimeError {
451    table_error(&TABLE_ERROR_INVALID_VARIABLE, message)
452}
453
454fn map_control_flow(err: RuntimeError) -> RuntimeError {
455    let identifier = err.identifier().map(ToString::to_string);
456    let message = err.message().to_string();
457    let mut builder = build_runtime_error(message)
458        .with_builtin(TABLE_CLASS)
459        .with_source(err);
460    if let Some(identifier) = identifier {
461        builder = builder.with_identifier(identifier);
462    }
463    builder.build()
464}
465
466pub fn ensure_table_class_registered() {
467    TABLE_CLASS_REGISTERED.with(|registered| {
468        if registered.get() {
469            return;
470        }
471        let mut properties = HashMap::new();
472        properties.insert(
473            PROPERTIES_MEMBER.to_string(),
474            PropertyDef {
475                name: PROPERTIES_MEMBER.to_string(),
476                is_static: false,
477                is_constant: false,
478                is_dependent: false,
479                get_access: Access::Public,
480                set_access: Access::Public,
481                default_value: Some(Value::Struct(default_properties(Vec::new(), None))),
482            },
483        );
484
485        let mut methods = HashMap::new();
486        for name in [OBJECT_SUBSREF_METHOD, OBJECT_SUBSASGN_METHOD] {
487            methods.insert(
488                name.to_string(),
489                MethodDef {
490                    name: name.to_string(),
491                    is_static: false,
492                    is_abstract: false,
493                    is_sealed: false,
494                    access: Access::Public,
495                    function_name: format!("{TABLE_CLASS}.{name}"),
496                    implicit_class_argument: None,
497                },
498            );
499        }
500
501        runmat_builtins::register_class(ClassDef {
502            name: TABLE_CLASS.to_string(),
503            parent: None,
504            properties,
505            methods,
506        });
507        registered.set(true);
508    });
509}
510
511#[runtime_builtin(
512    name = "table",
513    category = "table",
514    summary = "Create a table from named column variables.",
515    keywords = "table,VariableNames,RowNames,Properties",
516    accel = "cpu",
517    type_resolver(crate::builtins::io::type_resolvers::struct_type),
518    descriptor(crate::builtins::table::TABLE_DESCRIPTOR),
519    builtin_path = "crate::builtins::table"
520)]
521async fn table_builtin(args: Vec<Value>) -> BuiltinResult<Value> {
522    ensure_table_class_registered();
523    let gathered = gather_values(&args).await?;
524    let (variables, options) = split_table_constructor_args(gathered)?;
525    let names = if let Some(names) = options.variable_names {
526        names
527    } else {
528        generated_variable_names(variables.len())
529    };
530    table_from_columns_with_properties(names, variables, options.row_names)
531}
532
533#[runtime_builtin(
534    name = "readtable",
535    category = "io/tabular",
536    summary = "Import tabular text or spreadsheet data into a table.",
537    keywords = "readtable,table,csv,tsv,xlsx,xls,ods,spreadsheet,VariableNames,RowNames,Sheet,Range",
538    accel = "cpu",
539    type_resolver(crate::builtins::io::type_resolvers::struct_type),
540    descriptor(crate::builtins::table::READTABLE_DESCRIPTOR),
541    builtin_path = "crate::builtins::table"
542)]
543async fn readtable_builtin(path: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
544    ensure_table_class_registered();
545    let path_value = gather_if_needed_async(&path)
546        .await
547        .map_err(map_control_flow)?;
548    let args = gather_values(&rest).await?;
549    let options = ReadTableOptions::parse(&args)?;
550    let resolved = resolve_path(&path_value)?;
551    read_table_from_file(&resolved, &options).await
552}
553
554#[runtime_builtin(
555    name = "spreadsheetImportOptions",
556    category = "io/tabular",
557    summary = "Create spreadsheet import options for readtable.",
558    keywords = "spreadsheetImportOptions,readtable,spreadsheet,xlsx,xls,DataRange,VariableTypes,VariableNames,NumVariables",
559    accel = "cpu",
560    type_resolver(crate::builtins::io::type_resolvers::struct_type),
561    descriptor(crate::builtins::table::SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR),
562    builtin_path = "crate::builtins::table"
563)]
564async fn spreadsheet_import_options_builtin(args: Vec<Value>) -> BuiltinResult<Value> {
565    let gathered = gather_values(&args).await?;
566    spreadsheet_import_options(gathered)
567}
568
569#[runtime_builtin(
570    name = "detectImportOptions",
571    category = "io/tabular",
572    summary = "Inspect a text or spreadsheet file and create import options.",
573    keywords = "detectImportOptions,readtable,readmatrix,csv,tsv,xlsx,Delimiter,VariableTypes,VariableNames",
574    accel = "cpu",
575    type_resolver(crate::builtins::io::type_resolvers::struct_type),
576    descriptor(crate::builtins::table::DETECT_IMPORT_OPTIONS_DESCRIPTOR),
577    builtin_path = "crate::builtins::table"
578)]
579async fn detect_import_options_builtin(path: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
580    let path_value = gather_if_needed_async(&path)
581        .await
582        .map_err(map_control_flow)?;
583    let args = gather_values(&rest).await?;
584    let options = ReadTableOptions::parse(&args)?;
585    let resolved = resolve_path(&path_value)?;
586    detect_import_options_from_file(&resolved, &options).await
587}
588
589#[runtime_builtin(
590    name = "height",
591    category = "table",
592    summary = "Return the number of rows in a table.",
593    keywords = "height,table,rows",
594    descriptor(crate::builtins::table::HEIGHT_DESCRIPTOR),
595    builtin_path = "crate::builtins::table"
596)]
597async fn height_builtin(value: Value) -> BuiltinResult<Value> {
598    let host = gather_if_needed_async(&value)
599        .await
600        .map_err(map_control_flow)?;
601    if let Some(object) = table_object(&host) {
602        return Ok(Value::Num(table_height(object)? as f64));
603    }
604    value_row_count(&host).map(|n| Value::Num(n as f64))
605}
606
607#[runtime_builtin(
608    name = "width",
609    category = "table",
610    summary = "Return the number of variables in a table.",
611    keywords = "width,table,variables",
612    descriptor(crate::builtins::table::WIDTH_DESCRIPTOR),
613    builtin_path = "crate::builtins::table"
614)]
615async fn width_builtin(value: Value) -> BuiltinResult<Value> {
616    let host = gather_if_needed_async(&value)
617        .await
618        .map_err(map_control_flow)?;
619    if let Some(object) = table_object(&host) {
620        return Ok(Value::Num(table_width(object)? as f64));
621    }
622    match host {
623        Value::Tensor(t) => Ok(Value::Num(t.cols() as f64)),
624        Value::ComplexTensor(t) => Ok(Value::Num(t.cols as f64)),
625        Value::StringArray(sa) => Ok(Value::Num(sa.cols() as f64)),
626        Value::LogicalArray(la) => Ok(Value::Num(la.shape.get(1).copied().unwrap_or(1) as f64)),
627        Value::Cell(ca) => Ok(Value::Num(ca.cols as f64)),
628        Value::CharArray(ca) => Ok(Value::Num(ca.cols as f64)),
629        _ => Ok(Value::Num(1.0)),
630    }
631}
632
633#[runtime_builtin(
634    name = "groupsummary",
635    category = "table",
636    summary = "Group table rows and compute summary statistics for data variables.",
637    keywords = "groupsummary,group,table,mean,sum,count,median,min,max",
638    accel = "cpu",
639    descriptor(crate::builtins::table::GROUPSUMMARY_DESCRIPTOR),
640    builtin_path = "crate::builtins::table"
641)]
642async fn groupsummary_builtin(
643    table: Value,
644    groupvars: Value,
645    method: Value,
646    rest: Vec<Value>,
647) -> BuiltinResult<Value> {
648    let table = gather_if_needed_async(&table)
649        .await
650        .map_err(map_control_flow)?;
651    let groupvars = gather_if_needed_async(&groupvars)
652        .await
653        .map_err(map_control_flow)?;
654    let method = gather_if_needed_async(&method)
655        .await
656        .map_err(map_control_flow)?;
657    let rest = gather_values(&rest).await?;
658    groupsummary_impl(table, groupvars, method, rest)
659}
660
661#[runtime_builtin(
662    name = "table.subsref",
663    descriptor(crate::builtins::table::TABLE_SUBSREF_DESCRIPTOR),
664    builtin_path = "crate::builtins::table"
665)]
666async fn table_subsref(obj: Value, kind: String, payload: Value) -> BuiltinResult<Value> {
667    let object = into_table_object(obj, "table.subsref")?;
668    match kind.as_str() {
669        OBJECT_INDEX_MEMBER => table_member_get(&object, &payload),
670        OBJECT_INDEX_PAREN => table_paren_get(&object, &payload),
671        OBJECT_INDEX_BRACE => table_brace_get(&object, &payload),
672        other => Err(invalid_index(format!(
673            "table.subsref: unsupported indexing kind '{other}'"
674        ))),
675    }
676}
677
678#[runtime_builtin(
679    name = "table.subsasgn",
680    descriptor(crate::builtins::table::TABLE_SUBSASGN_DESCRIPTOR),
681    builtin_path = "crate::builtins::table"
682)]
683async fn table_subsasgn(
684    obj: Value,
685    kind: String,
686    payload: Value,
687    rhs: Value,
688) -> BuiltinResult<Value> {
689    let mut object = into_table_object(obj, "table.subsasgn")?;
690    match kind.as_str() {
691        OBJECT_INDEX_MEMBER => {
692            let field = scalar_text(&payload, "table member")?;
693            table_member_set(&mut object, &field, rhs)?;
694            Ok(Value::Object(object))
695        }
696        OBJECT_INDEX_PAREN => table_paren_assign(object, &payload, rhs),
697        OBJECT_INDEX_BRACE => table_brace_assign(object, &payload, rhs),
698        other => Err(invalid_index(format!(
699            "table.subsasgn: unsupported indexing kind '{other}'"
700        ))),
701    }
702}
703
704async fn gather_values(values: &[Value]) -> BuiltinResult<Vec<Value>> {
705    let mut out = Vec::with_capacity(values.len());
706    for value in values {
707        out.push(
708            gather_if_needed_async(value)
709                .await
710                .map_err(map_control_flow)?,
711        );
712    }
713    Ok(out)
714}
715
716#[derive(Default)]
717struct TableConstructorOptions {
718    variable_names: Option<Vec<String>>,
719    row_names: Option<Vec<String>>,
720}
721
722fn split_table_constructor_args(
723    args: Vec<Value>,
724) -> BuiltinResult<(Vec<Value>, TableConstructorOptions)> {
725    let mut variables = Vec::new();
726    let mut options = TableConstructorOptions::default();
727    let mut idx = 0usize;
728    while idx < args.len() {
729        if let Ok(name) = scalar_text(&args[idx], "table option") {
730            if idx + 1 < args.len() && is_table_constructor_option(&name) {
731                let value = &args[idx + 1];
732                if name.eq_ignore_ascii_case("VariableNames") {
733                    options.variable_names = Some(variable_name_list(value)?);
734                } else if name.eq_ignore_ascii_case("RowNames") {
735                    options.row_names = Some(string_list(value)?);
736                }
737                idx += 2;
738                continue;
739            }
740        }
741        variables.push(args[idx].clone());
742        idx += 1;
743    }
744    Ok((variables, options))
745}
746
747fn is_table_constructor_option(name: &str) -> bool {
748    name.eq_ignore_ascii_case("VariableNames") || name.eq_ignore_ascii_case("RowNames")
749}
750
751#[derive(Clone)]
752struct ReadTableOptions {
753    file_type: ImportFileType,
754    delimiter: Option<Delimiter>,
755    read_variable_names: Option<bool>,
756    read_row_names: bool,
757    num_variables: Option<usize>,
758    variable_names: Option<Vec<String>>,
759    variable_types: Option<Vec<ImportVariableType>>,
760    row_names: Option<Vec<String>>,
761    num_header_lines: usize,
762    range: Option<RangeSpec>,
763    sheet: Option<SheetSelector>,
764    preserve_variable_names: bool,
765    treat_as_missing: HashSet<String>,
766    empty_line_rule: EmptyLineRule,
767    text_type: TextImportType,
768    encoding: String,
769    datetime_type: DatetimeImportType,
770}
771
772impl Default for ReadTableOptions {
773    fn default() -> Self {
774        Self {
775            file_type: ImportFileType::Auto,
776            delimiter: None,
777            read_variable_names: None,
778            read_row_names: false,
779            num_variables: None,
780            variable_names: None,
781            variable_types: None,
782            row_names: None,
783            num_header_lines: 0,
784            range: None,
785            sheet: None,
786            preserve_variable_names: false,
787            treat_as_missing: HashSet::new(),
788            empty_line_rule: EmptyLineRule::Skip,
789            text_type: TextImportType::String,
790            encoding: "utf-8".to_string(),
791            datetime_type: DatetimeImportType::Datetime,
792        }
793    }
794}
795
796impl ReadTableOptions {
797    fn parse(args: &[Value]) -> BuiltinResult<Self> {
798        let mut options = Self::default();
799        let mut idx = 0usize;
800        if let Some(Value::Struct(st)) = args.first() {
801            for (name, value) in &st.fields {
802                options.apply(name, value)?;
803            }
804            idx = 1;
805        }
806        while idx < args.len() {
807            if idx + 1 >= args.len() {
808                return Err(invalid_argument(
809                    "readtable: name-value options must be provided in pairs",
810                ));
811            }
812            let name = scalar_text(&args[idx], "readtable option")?;
813            options.apply(&name, &args[idx + 1])?;
814            idx += 2;
815        }
816        Ok(options)
817    }
818
819    fn apply(&mut self, name: &str, value: &Value) -> BuiltinResult<()> {
820        if name.eq_ignore_ascii_case("FileType") {
821            self.file_type = ImportFileType::parse(value)?;
822        } else if name.eq_ignore_ascii_case("Delimiter") {
823            self.delimiter = Some(Delimiter::parse(value)?);
824        } else if name.eq_ignore_ascii_case("ReadVariableNames") {
825            self.read_variable_names = Some(bool_scalar(value, "ReadVariableNames")?);
826        } else if name.eq_ignore_ascii_case("ReadRowNames") {
827            self.read_row_names = bool_scalar(value, "ReadRowNames")?;
828        } else if name.eq_ignore_ascii_case("NumVariables") {
829            let count = nonnegative_usize(value, "NumVariables")?;
830            self.num_variables = (count > 0).then_some(count);
831        } else if name.eq_ignore_ascii_case("VariableNames") {
832            self.variable_names = optional_raw_variable_name_list(value)?;
833        } else if name.eq_ignore_ascii_case("VariableTypes") {
834            self.variable_types = optional_variable_type_list(value)?;
835        } else if name.eq_ignore_ascii_case("RowNames") {
836            self.row_names = Some(string_list(value)?);
837        } else if name.eq_ignore_ascii_case("NumHeaderLines") {
838            self.num_header_lines = nonnegative_usize(value, "NumHeaderLines")?;
839        } else if name.eq_ignore_ascii_case("Range") {
840            self.range = Some(RangeSpec::parse(value)?);
841        } else if name.eq_ignore_ascii_case("DataRange") {
842            self.range = optional_range_spec(value)?;
843        } else if name.eq_ignore_ascii_case("Sheet") {
844            self.sheet = optional_sheet_selector(value)?;
845        } else if name.eq_ignore_ascii_case("TreatAsMissing") {
846            for token in string_list(value)? {
847                self.treat_as_missing
848                    .insert(token.trim().to_ascii_lowercase());
849            }
850        } else if name.eq_ignore_ascii_case("PreserveVariableNames") {
851            self.preserve_variable_names = bool_scalar(value, "PreserveVariableNames")?;
852        } else if name.eq_ignore_ascii_case("VariableNamingRule") {
853            let rule = scalar_text(value, "VariableNamingRule")?;
854            if rule.eq_ignore_ascii_case("preserve") {
855                self.preserve_variable_names = true;
856            } else if rule.eq_ignore_ascii_case("modify") {
857                self.preserve_variable_names = false;
858            } else {
859                return Err(invalid_argument(format!(
860                    "readtable: unsupported VariableNamingRule '{rule}'"
861                )));
862            }
863        } else if name.eq_ignore_ascii_case("EmptyLineRule") {
864            let rule = scalar_text(value, "EmptyLineRule")?;
865            self.empty_line_rule = if rule.eq_ignore_ascii_case("read") {
866                EmptyLineRule::Read
867            } else if rule.eq_ignore_ascii_case("skip") {
868                EmptyLineRule::Skip
869            } else {
870                return Err(invalid_argument(format!(
871                    "readtable: unsupported EmptyLineRule '{rule}'"
872                )));
873            };
874        } else if name.eq_ignore_ascii_case("Encoding") {
875            let encoding = scalar_text(value, "Encoding")?;
876            validate_encoding_label(&encoding)?;
877            self.encoding = encoding;
878        } else if name.eq_ignore_ascii_case("TextType") {
879            self.text_type = TextImportType::parse(value, "readtable")?;
880        } else if name.eq_ignore_ascii_case("DatetimeType") {
881            self.datetime_type = DatetimeImportType::parse(value)?;
882        } else {
883            return Err(invalid_argument(format!(
884                "readtable: unsupported option '{name}'"
885            )));
886        }
887        Ok(())
888    }
889
890    fn is_missing(&self, token: &str) -> bool {
891        let trimmed = token.trim();
892        trimmed.is_empty()
893            || self
894                .treat_as_missing
895                .contains(&trimmed.to_ascii_lowercase())
896    }
897}
898
899fn spreadsheet_import_options(args: Vec<Value>) -> BuiltinResult<Value> {
900    if !args.len().is_multiple_of(2) {
901        return Err(invalid_argument(
902            "spreadsheetImportOptions: name-value options must be provided in pairs",
903        ));
904    }
905    let mut options = SpreadsheetImportOptions::default();
906    let mut idx = 0usize;
907    while idx < args.len() {
908        let name = scalar_text(&args[idx], "spreadsheetImportOptions option")?;
909        options.apply(&name, &args[idx + 1])?;
910        idx += 2;
911    }
912    Ok(Value::Struct(options.into_struct()?))
913}
914
915async fn detect_import_options_from_file(
916    path: &Path,
917    options: &ReadTableOptions,
918) -> BuiltinResult<Value> {
919    match options.file_type {
920        ImportFileType::Spreadsheet => detect_spreadsheet_import_options(path, options).await,
921        ImportFileType::Text => detect_text_import_options(path, options).await,
922        ImportFileType::Auto if is_spreadsheet_path(path) => {
923            detect_spreadsheet_import_options(path, options).await
924        }
925        ImportFileType::Auto => detect_text_import_options(path, options).await,
926    }
927}
928
929async fn detect_text_import_options(
930    path: &Path,
931    options: &ReadTableOptions,
932) -> BuiltinResult<Value> {
933    if options.sheet.is_some() {
934        return Err(invalid_argument(
935            "detectImportOptions: Sheet is only valid for spreadsheet files",
936        ));
937    }
938    let bytes = read_file_bytes(path).await?;
939    let text = strip_utf8_bom(decode_text_bytes(&bytes, &options.encoding)?);
940    let mut raw_lines = text.lines().map(ToString::to_string).collect::<Vec<_>>();
941    if let Some(first) = raw_lines.first_mut() {
942        if first.starts_with('\u{FEFF}') {
943            *first = first.trim_start_matches('\u{FEFF}').to_string();
944        }
945    }
946    let delimiter = options
947        .delimiter
948        .clone()
949        .or_else(|| detect_delimiter(&raw_lines))
950        .unwrap_or(Delimiter::Whitespace);
951    let mut rows = parse_text_records(&text, &delimiter, options.empty_line_rule);
952    if options.num_header_lines > 0 {
953        rows = rows.into_iter().skip(options.num_header_lines).collect();
954    }
955    if let Some(range) = options.range {
956        rows = apply_import_range(rows, range);
957    }
958    detected_options_from_rows(
959        ImportFileType::Text,
960        rows,
961        options,
962        Some(delimiter),
963        options.sheet.as_ref(),
964    )
965}
966
967async fn detect_spreadsheet_import_options(
968    path: &Path,
969    options: &ReadTableOptions,
970) -> BuiltinResult<Value> {
971    if options.delimiter.is_some() {
972        return Err(invalid_argument(
973            "detectImportOptions: Delimiter is only valid for text files",
974        ));
975    }
976    let bytes = read_file_bytes(path).await?;
977    let cursor = Cursor::new(bytes);
978    let mut workbook = open_workbook_auto_from_rs(cursor).map_err(|err| {
979        table_error(
980            &TABLE_ERROR_UNSUPPORTED_FILE,
981            format!(
982                "detectImportOptions: unable to open spreadsheet '{}': {err}",
983                path.display()
984            ),
985        )
986    })?;
987    let range = match &options.sheet {
988        Some(SheetSelector::Name(name)) => workbook.worksheet_range(name).map_err(|err| {
989            invalid_argument(format!(
990                "detectImportOptions: unable to read sheet '{name}': {err:?}"
991            ))
992        })?,
993        Some(SheetSelector::Index(index)) => workbook
994            .worksheet_range_at(*index)
995            .ok_or_else(|| {
996                invalid_argument(format!(
997                    "detectImportOptions: sheet index {} exceeds bounds",
998                    index + 1
999                ))
1000            })?
1001            .map_err(|err| {
1002                invalid_argument(format!(
1003                    "detectImportOptions: unable to read sheet {}: {err:?}",
1004                    index + 1
1005                ))
1006            })?,
1007        None => workbook
1008            .worksheet_range_at(0)
1009            .ok_or_else(|| {
1010                invalid_argument("detectImportOptions: spreadsheet contains no worksheets")
1011            })?
1012            .map_err(|err| {
1013                invalid_argument(format!(
1014                    "detectImportOptions: unable to read first sheet: {err:?}"
1015                ))
1016            })?,
1017    };
1018    let rows = spreadsheet_range_to_rows(&range, options)?;
1019    detected_options_from_rows(
1020        ImportFileType::Spreadsheet,
1021        rows,
1022        options,
1023        None,
1024        options.sheet.as_ref(),
1025    )
1026}
1027
1028fn detected_options_from_rows(
1029    file_type: ImportFileType,
1030    mut rows: Vec<Vec<ImportCell>>,
1031    options: &ReadTableOptions,
1032    delimiter: Option<Delimiter>,
1033    sheet: Option<&SheetSelector>,
1034) -> BuiltinResult<Value> {
1035    let mut variable_names = options.variable_names.clone();
1036    let read_variable_names = options
1037        .read_variable_names
1038        .unwrap_or_else(|| variable_names.is_none() && should_read_variable_names(&rows, options));
1039    let header_rows_consumed = usize::from(read_variable_names && variable_names.is_none());
1040    if header_rows_consumed > 0 && !rows.is_empty() {
1041        variable_names = Some(
1042            rows.remove(0)
1043                .into_iter()
1044                .map(|cell| cell.display_text())
1045                .collect(),
1046        );
1047    }
1048
1049    let mut data_rows = rows;
1050    let mut data_variable_names = variable_names.clone();
1051    let row_name_header = if options.read_row_names {
1052        for row in &mut data_rows {
1053            if !row.is_empty() {
1054                row.remove(0);
1055            }
1056        }
1057        let mut header = None;
1058        if let Some(names) = data_variable_names.as_mut() {
1059            if !names.is_empty() {
1060                header = Some(names.remove(0));
1061            }
1062        }
1063        Some(
1064            header
1065                .filter(|name| !name.is_empty())
1066                .unwrap_or_else(|| "Row".to_string()),
1067        )
1068    } else {
1069        None
1070    };
1071
1072    let column_count = import_column_count(&data_rows, &data_variable_names, options)?;
1073    let data_names = import_variable_names(data_variable_names, column_count, options);
1074    let names = if let Some(row_name_header) = row_name_header {
1075        let mut names = Vec::with_capacity(data_names.len() + 1);
1076        names.push(row_name_header);
1077        names.extend(data_names);
1078        names
1079    } else {
1080        data_names
1081    };
1082    let types = detected_variable_type_labels(&data_rows, options, column_count)?;
1083    let output_num_header_lines = detected_output_header_lines(options, header_rows_consumed);
1084    let output_range = detected_output_range(options.range, header_rows_consumed);
1085
1086    let mut out = StructValue::new();
1087    out.insert("FileType", Value::String(import_file_type_label(file_type)));
1088    if let Some(delimiter) = delimiter {
1089        out.insert("Delimiter", Value::String(delimiter_label(&delimiter)));
1090    }
1091    out.insert("NumHeaderLines", Value::Num(output_num_header_lines as f64));
1092    out.insert("ReadVariableNames", Value::Bool(false));
1093    out.insert("ReadRowNames", Value::Bool(options.read_row_names));
1094    out.insert("NumVariables", Value::Num(column_count as f64));
1095    out.insert(
1096        "VariableNames",
1097        string_array_value(names, "detectImportOptions")?,
1098    );
1099    out.insert(
1100        "VariableTypes",
1101        string_array_value(types, "detectImportOptions")?,
1102    );
1103    if let Some(range) = output_range {
1104        out.insert("Range", range_spec_value(range)?);
1105        out.insert("DataRange", range_spec_value(range)?);
1106    }
1107    if let Some(sheet) = sheet {
1108        out.insert("Sheet", sheet_value(sheet));
1109    }
1110    let mut treat_as_missing = options.treat_as_missing.iter().cloned().collect::<Vec<_>>();
1111    treat_as_missing.sort();
1112    out.insert(
1113        "TreatAsMissing",
1114        string_array_value(treat_as_missing, "detectImportOptions")?,
1115    );
1116    out.insert(
1117        "PreserveVariableNames",
1118        Value::Bool(options.preserve_variable_names),
1119    );
1120    out.insert(
1121        "VariableNamingRule",
1122        Value::String(if options.preserve_variable_names {
1123            "preserve".to_string()
1124        } else {
1125            "modify".to_string()
1126        }),
1127    );
1128    out.insert(
1129        "EmptyLineRule",
1130        Value::String(
1131            match options.empty_line_rule {
1132                EmptyLineRule::Skip => "skip",
1133                EmptyLineRule::Read => "read",
1134            }
1135            .to_string(),
1136        ),
1137    );
1138    out.insert(
1139        "TextType",
1140        Value::String(
1141            match options.text_type {
1142                TextImportType::String => "string",
1143                TextImportType::Char => "char",
1144            }
1145            .to_string(),
1146        ),
1147    );
1148    out.insert(
1149        "DatetimeType",
1150        Value::String(
1151            match options.datetime_type {
1152                DatetimeImportType::Datetime => "datetime",
1153                DatetimeImportType::Text => "text",
1154                DatetimeImportType::ExcelDatenum => "exceldatenum",
1155            }
1156            .to_string(),
1157        ),
1158    );
1159    out.insert("Encoding", Value::String(options.encoding.clone()));
1160    Ok(Value::Struct(out))
1161}
1162
1163fn detected_variable_type_labels(
1164    rows: &[Vec<ImportCell>],
1165    options: &ReadTableOptions,
1166    column_count: usize,
1167) -> BuiltinResult<Vec<String>> {
1168    if let Some(requested) = &options.variable_types {
1169        let mut labels = requested
1170            .iter()
1171            .map(import_variable_type_label)
1172            .collect::<Vec<_>>();
1173        while labels.len() < column_count {
1174            labels.push("auto".to_string());
1175        }
1176        labels.truncate(column_count);
1177        return Ok(labels);
1178    }
1179    Ok((0..column_count)
1180        .map(|col| {
1181            let values = rows
1182                .iter()
1183                .map(|row| row.get(col).cloned().unwrap_or(ImportCell::Empty))
1184                .collect::<Vec<_>>();
1185            infer_import_type_label(&values, options)
1186        })
1187        .collect())
1188}
1189
1190fn infer_import_type_label(values: &[ImportCell], options: &ReadTableOptions) -> String {
1191    if values
1192        .iter()
1193        .all(|value| is_detected_numeric(value, options))
1194    {
1195        return "double".to_string();
1196    }
1197    if values
1198        .iter()
1199        .all(|value| is_detected_logical(value, options))
1200    {
1201        return "logical".to_string();
1202    }
1203    if !matches!(options.datetime_type, DatetimeImportType::Text)
1204        && values
1205            .iter()
1206            .all(|value| is_detected_datetime(value, options))
1207    {
1208        return "datetime".to_string();
1209    }
1210    match options.text_type {
1211        TextImportType::String => "string".to_string(),
1212        TextImportType::Char => "char".to_string(),
1213    }
1214}
1215
1216fn is_detected_numeric(value: &ImportCell, options: &ReadTableOptions) -> bool {
1217    match value {
1218        ImportCell::Empty | ImportCell::Number(_) => true,
1219        ImportCell::Text(text) => {
1220            let token = unquote(text.trim()).trim();
1221            options.is_missing(token) || parse_numeric(token).is_some()
1222        }
1223        _ => false,
1224    }
1225}
1226
1227fn is_detected_logical(value: &ImportCell, options: &ReadTableOptions) -> bool {
1228    match value {
1229        ImportCell::Empty | ImportCell::Logical(_) => true,
1230        ImportCell::Text(text) => {
1231            let token = unquote(text.trim()).trim();
1232            options.is_missing(token) || parse_logical(token).is_some()
1233        }
1234        _ => false,
1235    }
1236}
1237
1238fn is_detected_datetime(value: &ImportCell, options: &ReadTableOptions) -> bool {
1239    match value {
1240        ImportCell::Empty | ImportCell::DateTime(_) => true,
1241        ImportCell::Text(text) => {
1242            let token = unquote(text.trim()).trim();
1243            options.is_missing(token) || parse_iso_datetime_to_datenum(token).is_some()
1244        }
1245        _ => false,
1246    }
1247}
1248
1249fn import_variable_type_label(kind: &ImportVariableType) -> String {
1250    match kind {
1251        ImportVariableType::Auto => "auto",
1252        ImportVariableType::Numeric(NumericDType::F64) => "double",
1253        ImportVariableType::Numeric(NumericDType::F32) => "single",
1254        ImportVariableType::Numeric(NumericDType::U8) => "uint8",
1255        ImportVariableType::Numeric(NumericDType::U16) => "uint16",
1256        ImportVariableType::Logical => "logical",
1257        ImportVariableType::Text(TextImportType::String) => "string",
1258        ImportVariableType::Text(TextImportType::Char) => "char",
1259        ImportVariableType::CellStr => "cellstr",
1260        ImportVariableType::Datetime => "datetime",
1261        ImportVariableType::Duration => "duration",
1262    }
1263    .to_string()
1264}
1265
1266fn detected_output_header_lines(options: &ReadTableOptions, header_rows_consumed: usize) -> usize {
1267    if options.range.is_some() {
1268        options.num_header_lines
1269    } else {
1270        options.num_header_lines + header_rows_consumed
1271    }
1272}
1273
1274fn detected_output_range(
1275    range: Option<RangeSpec>,
1276    header_rows_consumed: usize,
1277) -> Option<RangeSpec> {
1278    range.map(|mut range| {
1279        range.start_row = range.start_row.saturating_add(header_rows_consumed);
1280        range
1281    })
1282}
1283
1284fn import_file_type_label(file_type: ImportFileType) -> String {
1285    match file_type {
1286        ImportFileType::Text | ImportFileType::Auto => "text",
1287        ImportFileType::Spreadsheet => "spreadsheet",
1288    }
1289    .to_string()
1290}
1291
1292fn delimiter_label(delimiter: &Delimiter) -> String {
1293    match delimiter {
1294        Delimiter::Char('\t') => "\t".to_string(),
1295        Delimiter::Char(ch) => ch.to_string(),
1296        Delimiter::String(text) => text.clone(),
1297        Delimiter::Whitespace => "whitespace".to_string(),
1298    }
1299}
1300
1301fn sheet_value(sheet: &SheetSelector) -> Value {
1302    match sheet {
1303        SheetSelector::Name(name) => Value::String(name.clone()),
1304        SheetSelector::Index(index) => Value::Num((*index + 1) as f64),
1305    }
1306}
1307
1308fn range_spec_value(range: RangeSpec) -> BuiltinResult<Value> {
1309    Ok(Value::String(range_spec_text(range)))
1310}
1311
1312fn range_spec_text(range: RangeSpec) -> String {
1313    let has_end = range.end_row.is_some() || range.end_col.is_some();
1314    let include_start_col = range.start_col > 0 || range.end_col.is_some() || !has_end;
1315    let include_start_row = range.start_row > 0 || range.end_row.is_some() || !has_end;
1316    let start = range_ref_text(
1317        range.start_row,
1318        range.start_col,
1319        include_start_row,
1320        include_start_col,
1321    );
1322    if !has_end {
1323        return start;
1324    }
1325
1326    let end = range_ref_text(
1327        range.end_row.unwrap_or(0),
1328        range.end_col.unwrap_or(0),
1329        range.end_row.is_some(),
1330        range.end_col.is_some(),
1331    );
1332    format!("{start}:{end}")
1333}
1334
1335fn range_ref_text(row: usize, col: usize, include_row: bool, include_col: bool) -> String {
1336    let mut out = String::new();
1337    if include_col {
1338        out.push_str(&spreadsheet_column_label(col));
1339    }
1340    if include_row {
1341        out.push_str(&(row + 1).to_string());
1342    }
1343    out
1344}
1345
1346fn spreadsheet_column_label(mut col: usize) -> String {
1347    let mut chars = Vec::new();
1348    loop {
1349        let rem = col % 26;
1350        chars.push((b'A' + rem as u8) as char);
1351        if col < 26 {
1352            break;
1353        }
1354        col = col / 26 - 1;
1355    }
1356    chars.iter().rev().collect()
1357}
1358
1359fn string_array_value(values: Vec<String>, context: &str) -> BuiltinResult<Value> {
1360    let len = values.len();
1361    StringArray::new(values, vec![1, len])
1362        .map(Value::StringArray)
1363        .map_err(|err| invalid_variable(format!("{context}: {err}")))
1364}
1365
1366#[derive(Clone)]
1367struct SpreadsheetImportOptions {
1368    num_variables: usize,
1369    read_variable_names: Option<bool>,
1370    read_row_names: bool,
1371    variable_names: Vec<String>,
1372    variable_types: Vec<String>,
1373    data_range: Option<Value>,
1374    sheet: Option<Value>,
1375    treat_as_missing: Vec<String>,
1376    preserve_variable_names: bool,
1377    empty_line_rule: String,
1378    text_type: String,
1379    datetime_type: String,
1380}
1381
1382impl Default for SpreadsheetImportOptions {
1383    fn default() -> Self {
1384        let num_variables = 0;
1385        Self {
1386            num_variables,
1387            read_variable_names: None,
1388            read_row_names: false,
1389            variable_names: Vec::new(),
1390            variable_types: Vec::new(),
1391            data_range: None,
1392            sheet: None,
1393            treat_as_missing: Vec::new(),
1394            preserve_variable_names: false,
1395            empty_line_rule: "skip".to_string(),
1396            text_type: "string".to_string(),
1397            datetime_type: "datetime".to_string(),
1398        }
1399    }
1400}
1401
1402impl SpreadsheetImportOptions {
1403    fn apply(&mut self, name: &str, value: &Value) -> BuiltinResult<()> {
1404        if name.eq_ignore_ascii_case("NumVariables") {
1405            self.resize_variables(positive_usize(value, "NumVariables")?);
1406        } else if name.eq_ignore_ascii_case("VariableNames") {
1407            self.variable_names = raw_variable_name_list(value)?;
1408            self.align_variable_metadata_count(self.variable_names.len(), "VariableNames")?;
1409            self.ensure_variable_metadata_len();
1410        } else if name.eq_ignore_ascii_case("VariableTypes") {
1411            let types = variable_type_names(value)?;
1412            self.variable_types = types;
1413            self.align_variable_metadata_count(self.variable_types.len(), "VariableTypes")?;
1414            self.ensure_variable_metadata_len();
1415        } else if name.eq_ignore_ascii_case("DataRange") || name.eq_ignore_ascii_case("Range") {
1416            self.data_range = if option_value_is_empty(value) {
1417                None
1418            } else {
1419                RangeSpec::parse(value)?;
1420                Some(value.clone())
1421            };
1422        } else if name.eq_ignore_ascii_case("Sheet") {
1423            self.sheet = if option_value_is_empty(value) {
1424                None
1425            } else {
1426                SheetSelector::parse(value)?;
1427                Some(value.clone())
1428            };
1429        } else if name.eq_ignore_ascii_case("ReadVariableNames") {
1430            self.read_variable_names = Some(bool_scalar(value, "ReadVariableNames")?);
1431        } else if name.eq_ignore_ascii_case("ReadRowNames") {
1432            self.read_row_names = bool_scalar(value, "ReadRowNames")?;
1433        } else if name.eq_ignore_ascii_case("TreatAsMissing") {
1434            self.treat_as_missing = string_list(value)?;
1435        } else if name.eq_ignore_ascii_case("PreserveVariableNames") {
1436            self.preserve_variable_names = bool_scalar(value, "PreserveVariableNames")?;
1437        } else if name.eq_ignore_ascii_case("VariableNamingRule") {
1438            let rule = scalar_text(value, "VariableNamingRule")?;
1439            if rule.eq_ignore_ascii_case("preserve") {
1440                self.preserve_variable_names = true;
1441            } else if rule.eq_ignore_ascii_case("modify") {
1442                self.preserve_variable_names = false;
1443            } else {
1444                return Err(invalid_argument(format!(
1445                    "spreadsheetImportOptions: unsupported VariableNamingRule '{rule}'"
1446                )));
1447            }
1448        } else if name.eq_ignore_ascii_case("EmptyLineRule") {
1449            let rule = scalar_text(value, "EmptyLineRule")?;
1450            if !(rule.eq_ignore_ascii_case("read") || rule.eq_ignore_ascii_case("skip")) {
1451                return Err(invalid_argument(format!(
1452                    "spreadsheetImportOptions: unsupported EmptyLineRule '{rule}'"
1453                )));
1454            }
1455            self.empty_line_rule = rule.to_ascii_lowercase();
1456        } else if name.eq_ignore_ascii_case("TextType") {
1457            let text_type = scalar_text(value, "TextType")?;
1458            if !(text_type.eq_ignore_ascii_case("string") || text_type.eq_ignore_ascii_case("char"))
1459            {
1460                return Err(invalid_argument(format!(
1461                    "spreadsheetImportOptions: unsupported TextType '{text_type}'"
1462                )));
1463            }
1464            self.text_type = text_type.to_ascii_lowercase();
1465        } else if name.eq_ignore_ascii_case("DatetimeType") {
1466            let datetime_type = scalar_text(value, "DatetimeType")?;
1467            if !(datetime_type.eq_ignore_ascii_case("datetime")
1468                || datetime_type.eq_ignore_ascii_case("text")
1469                || datetime_type.eq_ignore_ascii_case("exceldatenum"))
1470            {
1471                return Err(invalid_argument(format!(
1472                    "spreadsheetImportOptions: unsupported DatetimeType '{datetime_type}'"
1473                )));
1474            }
1475            self.datetime_type = datetime_type.to_ascii_lowercase();
1476        } else {
1477            return Err(invalid_argument(format!(
1478                "spreadsheetImportOptions: unsupported option '{name}'"
1479            )));
1480        }
1481        Ok(())
1482    }
1483
1484    fn resize_variables(&mut self, num_variables: usize) {
1485        self.num_variables = num_variables;
1486        if self.variable_names.len() > num_variables {
1487            self.variable_names.truncate(num_variables);
1488        }
1489        if self.variable_types.len() > num_variables {
1490            self.variable_types.truncate(num_variables);
1491        }
1492        self.ensure_variable_metadata_len();
1493    }
1494
1495    fn align_variable_metadata_count(&mut self, len: usize, field: &str) -> BuiltinResult<()> {
1496        if self.num_variables == 0 {
1497            self.num_variables = len;
1498            return Ok(());
1499        }
1500        if len > self.num_variables {
1501            return Err(invalid_argument(format!(
1502                "spreadsheetImportOptions: {field} length exceeds NumVariables"
1503            )));
1504        }
1505        Ok(())
1506    }
1507
1508    fn ensure_variable_metadata_len(&mut self) {
1509        if self.num_variables == 0 {
1510            return;
1511        }
1512        while self.variable_names.len() < self.num_variables {
1513            self.variable_names
1514                .push(format!("Var{}", self.variable_names.len() + 1));
1515        }
1516        self.variable_names.truncate(self.num_variables);
1517        while self.variable_types.len() < self.num_variables {
1518            self.variable_types.push("auto".to_string());
1519        }
1520        self.variable_types.truncate(self.num_variables);
1521    }
1522
1523    fn into_struct(mut self) -> BuiltinResult<StructValue> {
1524        self.ensure_variable_metadata_len();
1525        let mut out = StructValue::new();
1526        out.insert("FileType", Value::String("spreadsheet".to_string()));
1527        out.insert("NumVariables", Value::Num(self.num_variables as f64));
1528        if let Some(read_variable_names) = self.read_variable_names {
1529            out.insert("ReadVariableNames", Value::Bool(read_variable_names));
1530        }
1531        out.insert("ReadRowNames", Value::Bool(self.read_row_names));
1532        out.insert(
1533            "VariableNames",
1534            Value::StringArray(
1535                StringArray::new(
1536                    self.variable_names.clone(),
1537                    vec![1, self.variable_names.len()],
1538                )
1539                .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1540            ),
1541        );
1542        out.insert(
1543            "VariableTypes",
1544            Value::StringArray(
1545                StringArray::new(
1546                    self.variable_types.clone(),
1547                    vec![1, self.variable_types.len()],
1548                )
1549                .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1550            ),
1551        );
1552        out.insert(
1553            "DataRange",
1554            self.data_range
1555                .unwrap_or_else(|| Value::String(String::new())),
1556        );
1557        out.insert(
1558            "Sheet",
1559            self.sheet.unwrap_or_else(|| Value::String(String::new())),
1560        );
1561        out.insert(
1562            "TreatAsMissing",
1563            Value::StringArray(
1564                StringArray::new(
1565                    self.treat_as_missing.clone(),
1566                    vec![1, self.treat_as_missing.len()],
1567                )
1568                .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1569            ),
1570        );
1571        out.insert(
1572            "PreserveVariableNames",
1573            Value::Bool(self.preserve_variable_names),
1574        );
1575        out.insert(
1576            "VariableNamingRule",
1577            Value::String(if self.preserve_variable_names {
1578                "preserve".to_string()
1579            } else {
1580                "modify".to_string()
1581            }),
1582        );
1583        out.insert("EmptyLineRule", Value::String(self.empty_line_rule));
1584        out.insert("TextType", Value::String(self.text_type));
1585        out.insert("DatetimeType", Value::String(self.datetime_type));
1586        Ok(out)
1587    }
1588}
1589
1590#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1591enum ImportVariableType {
1592    Auto,
1593    Numeric(NumericDType),
1594    Logical,
1595    Text(TextImportType),
1596    CellStr,
1597    Datetime,
1598    Duration,
1599}
1600
1601impl ImportVariableType {
1602    fn parse(raw: &str) -> BuiltinResult<Self> {
1603        match raw.trim().to_ascii_lowercase().as_str() {
1604            "" | "auto" => Ok(Self::Auto),
1605            "double" => Ok(Self::Numeric(NumericDType::F64)),
1606            "single" => Ok(Self::Numeric(NumericDType::F32)),
1607            "uint8" => Ok(Self::Numeric(NumericDType::U8)),
1608            "uint16" => Ok(Self::Numeric(NumericDType::U16)),
1609            "logical" | "bool" | "boolean" => Ok(Self::Logical),
1610            "string" => Ok(Self::Text(TextImportType::String)),
1611            "char" => Ok(Self::Text(TextImportType::Char)),
1612            "cellstr" => Ok(Self::CellStr),
1613            "int8" | "int16" | "int32" | "int64" | "uint32" | "uint64" => {
1614                Err(invalid_argument(format!(
1615                    "readtable: unsupported VariableTypes entry '{}'; RunMat table imports currently support double, single, uint8, and uint16 numeric arrays",
1616                    raw.trim()
1617                )))
1618            }
1619            "categorical" => Err(invalid_argument(
1620                "readtable: unsupported VariableTypes entry 'categorical'; categorical arrays are not implemented in RunMat yet",
1621            )),
1622            "datetime" => Ok(Self::Datetime),
1623            "duration" => Ok(Self::Duration),
1624            other => Err(invalid_argument(format!(
1625                "readtable: unsupported VariableTypes entry '{other}'"
1626            ))),
1627        }
1628    }
1629
1630    fn canonical_label(raw: &str) -> BuiltinResult<String> {
1631        Self::parse(raw)?;
1632        let label = raw.trim().to_ascii_lowercase();
1633        Ok(if label.is_empty() {
1634            "auto".to_string()
1635        } else {
1636            label
1637        })
1638    }
1639}
1640
1641#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1642enum TextImportType {
1643    String,
1644    Char,
1645}
1646
1647impl TextImportType {
1648    fn parse(value: &Value, context: &str) -> BuiltinResult<Self> {
1649        let text_type = scalar_text(value, "TextType")?;
1650        match text_type.trim().to_ascii_lowercase().as_str() {
1651            "string" => Ok(Self::String),
1652            "char" => Ok(Self::Char),
1653            other => Err(invalid_argument(format!(
1654                "{context}: unsupported TextType '{other}'"
1655            ))),
1656        }
1657    }
1658}
1659
1660#[derive(Clone, Copy)]
1661enum EmptyLineRule {
1662    Skip,
1663    Read,
1664}
1665
1666#[derive(Clone, Copy)]
1667enum DatetimeImportType {
1668    Datetime,
1669    Text,
1670    ExcelDatenum,
1671}
1672
1673impl DatetimeImportType {
1674    fn parse(value: &Value) -> BuiltinResult<Self> {
1675        let text = scalar_text(value, "DatetimeType")?;
1676        match text.trim().to_ascii_lowercase().as_str() {
1677            "datetime" => Ok(Self::Datetime),
1678            "text" => Ok(Self::Text),
1679            "exceldatenum" => Ok(Self::ExcelDatenum),
1680            other => Err(invalid_argument(format!(
1681                "readtable: unsupported DatetimeType '{other}'"
1682            ))),
1683        }
1684    }
1685}
1686
1687#[derive(Clone, Copy, PartialEq, Eq)]
1688enum ImportFileType {
1689    Auto,
1690    Text,
1691    Spreadsheet,
1692}
1693
1694impl ImportFileType {
1695    fn parse(value: &Value) -> BuiltinResult<Self> {
1696        let text = scalar_text(value, "FileType")?;
1697        match text.trim().to_ascii_lowercase().as_str() {
1698            "auto" => Ok(Self::Auto),
1699            "text" | "delimitedtext" | "delimited" => Ok(Self::Text),
1700            "spreadsheet" | "excel" => Ok(Self::Spreadsheet),
1701            other => Err(invalid_argument(format!(
1702                "readtable: unsupported FileType '{other}'"
1703            ))),
1704        }
1705    }
1706}
1707
1708#[derive(Clone)]
1709enum SheetSelector {
1710    Name(String),
1711    Index(usize),
1712}
1713
1714impl SheetSelector {
1715    fn parse(value: &Value) -> BuiltinResult<Self> {
1716        match value {
1717            Value::Int(i) if i.to_i64() >= 1 => Ok(Self::Index(i.to_i64() as usize - 1)),
1718            Value::Num(n)
1719                if n.is_finite() && *n >= 1.0 && (n.round() - n).abs() <= f64::EPSILON =>
1720            {
1721                Ok(Self::Index(n.round() as usize - 1))
1722            }
1723            _ => {
1724                let text = scalar_text(value, "Sheet")?;
1725                if text.trim().is_empty() {
1726                    return Err(invalid_argument("readtable: Sheet must not be empty"));
1727                }
1728                Ok(Self::Name(text))
1729            }
1730        }
1731    }
1732}
1733
1734#[derive(Clone)]
1735enum Delimiter {
1736    Char(char),
1737    String(String),
1738    Whitespace,
1739}
1740
1741impl Delimiter {
1742    fn parse(value: &Value) -> BuiltinResult<Self> {
1743        let text = scalar_text(value, "Delimiter")?;
1744        if text.is_empty() {
1745            return Err(invalid_argument("readtable: Delimiter must not be empty"));
1746        }
1747        match text.trim().to_ascii_lowercase().as_str() {
1748            "tab" => Ok(Self::Char('\t')),
1749            "space" | "whitespace" => Ok(Self::Whitespace),
1750            "comma" => Ok(Self::Char(',')),
1751            "semicolon" => Ok(Self::Char(';')),
1752            "bar" | "pipe" => Ok(Self::Char('|')),
1753            _ if text.chars().count() == 1 => Ok(Self::Char(text.chars().next().unwrap())),
1754            _ => Ok(Self::String(text)),
1755        }
1756    }
1757}
1758
1759#[derive(Clone, Copy)]
1760struct RangeSpec {
1761    start_row: usize,
1762    start_col: usize,
1763    end_row: Option<usize>,
1764    end_col: Option<usize>,
1765}
1766
1767impl RangeSpec {
1768    fn parse(value: &Value) -> BuiltinResult<Self> {
1769        match value {
1770            Value::String(text) => Self::parse_text(text),
1771            Value::CharArray(ca) if ca.rows == 1 => {
1772                let text: String = ca.data.iter().collect();
1773                Self::parse_text(&text)
1774            }
1775            Value::StringArray(sa) if sa.data.len() == 1 => Self::parse_text(&sa.data[0]),
1776            Value::Tensor(t) if t.data.len() == 2 || t.data.len() == 4 => {
1777                let mut indices = Vec::with_capacity(t.data.len());
1778                for value in &t.data {
1779                    indices.push(one_based_to_zero(*value, usize::MAX, "Range")?);
1780                }
1781                Ok(Self {
1782                    start_row: indices[0],
1783                    start_col: indices[1],
1784                    end_row: indices.get(2).copied(),
1785                    end_col: indices.get(3).copied(),
1786                })
1787            }
1788            _ => Err(invalid_argument(
1789                "readtable: Range must be a cell reference string or numeric vector",
1790            )),
1791        }
1792    }
1793
1794    fn parse_text(text: &str) -> BuiltinResult<Self> {
1795        let trimmed = text.trim();
1796        if trimmed.is_empty() {
1797            return Err(invalid_argument("readtable: Range must not be empty"));
1798        }
1799        let parts: Vec<&str> = trimmed.split(':').collect();
1800        if parts.len() > 2 {
1801            return Err(invalid_argument(format!(
1802                "readtable: invalid Range specification '{trimmed}'"
1803            )));
1804        }
1805        let start = parse_cell_ref(parts[0])?;
1806        let end = if parts.len() == 2 {
1807            Some(parse_cell_ref(parts[1])?)
1808        } else {
1809            None
1810        };
1811        Ok(Self {
1812            start_row: start.0.unwrap_or(0),
1813            start_col: start.1.unwrap_or(0),
1814            end_row: end.and_then(|item| item.0),
1815            end_col: end.and_then(|item| item.1),
1816        })
1817    }
1818}
1819
1820fn parse_cell_ref(token: &str) -> BuiltinResult<(Option<usize>, Option<usize>)> {
1821    let mut letters = String::new();
1822    let mut digits = String::new();
1823    for ch in token.trim().chars() {
1824        if ch == '$' {
1825            continue;
1826        }
1827        if ch.is_ascii_alphabetic() {
1828            letters.push(ch.to_ascii_uppercase());
1829        } else if ch.is_ascii_digit() {
1830            digits.push(ch);
1831        } else {
1832            return Err(invalid_argument(format!(
1833                "readtable: invalid Range component '{token}'"
1834            )));
1835        }
1836    }
1837    let col = if letters.is_empty() {
1838        None
1839    } else {
1840        let mut value = 0usize;
1841        for ch in letters.chars() {
1842            value = value
1843                .checked_mul(26)
1844                .and_then(|v| v.checked_add((ch as u8 - b'A' + 1) as usize))
1845                .ok_or_else(|| invalid_argument("readtable: Range column overflow"))?;
1846        }
1847        Some(value - 1)
1848    };
1849    let row = if digits.is_empty() {
1850        None
1851    } else {
1852        let parsed = digits
1853            .parse::<usize>()
1854            .map_err(|_| invalid_argument("readtable: invalid Range row"))?;
1855        if parsed == 0 {
1856            return Err(invalid_argument("readtable: Range rows are one-based"));
1857        }
1858        Some(parsed - 1)
1859    };
1860    Ok((row, col))
1861}
1862
1863fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
1864    let text = scalar_text(value, "filename").map_err(|_| {
1865        table_error(
1866            &TABLE_ERROR_INVALID_ARGUMENT,
1867            "readtable: filename must be a string scalar or character vector",
1868        )
1869    })?;
1870    if text.trim().is_empty() {
1871        return Err(invalid_argument("readtable: filename must not be empty"));
1872    }
1873    let expanded =
1874        expand_user_path(&text, "readtable").map_err(|msg| invalid_argument(msg.to_string()))?;
1875    Ok(Path::new(&expanded).to_path_buf())
1876}
1877
1878async fn read_table_from_file(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1879    match options.file_type {
1880        ImportFileType::Spreadsheet => read_spreadsheet_table(path, options).await,
1881        ImportFileType::Text => read_text_table(path, options).await,
1882        ImportFileType::Auto if is_spreadsheet_path(path) => {
1883            read_spreadsheet_table(path, options).await
1884        }
1885        ImportFileType::Auto => read_text_table(path, options).await,
1886    }
1887}
1888
1889async fn read_text_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1890    if options.sheet.is_some() {
1891        return Err(invalid_argument(
1892            "readtable: Sheet is only valid for spreadsheet files",
1893        ));
1894    }
1895    let bytes = read_file_bytes(path).await?;
1896    let text = strip_utf8_bom(decode_text_bytes(&bytes, &options.encoding)?);
1897    let mut raw_lines = text.lines().map(ToString::to_string).collect::<Vec<_>>();
1898    if let Some(first) = raw_lines.first_mut() {
1899        if first.starts_with('\u{FEFF}') {
1900            *first = first.trim_start_matches('\u{FEFF}').to_string();
1901        }
1902    }
1903    let delimiter = options
1904        .delimiter
1905        .clone()
1906        .or_else(|| detect_delimiter(&raw_lines))
1907        .unwrap_or(Delimiter::Whitespace);
1908    let mut rows = parse_text_records(&text, &delimiter, options.empty_line_rule);
1909    if options.num_header_lines > 0 {
1910        rows = rows.into_iter().skip(options.num_header_lines).collect();
1911    }
1912    if let Some(range) = options.range {
1913        rows = apply_import_range(rows, range);
1914    }
1915    import_rows_to_table(rows, options)
1916}
1917
1918async fn read_spreadsheet_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1919    if options.delimiter.is_some() {
1920        return Err(invalid_argument(
1921            "readtable: Delimiter is only valid for text files",
1922        ));
1923    }
1924    let bytes = read_file_bytes(path).await?;
1925    let cursor = Cursor::new(bytes);
1926    let mut workbook = open_workbook_auto_from_rs(cursor).map_err(|err| {
1927        table_error(
1928            &TABLE_ERROR_UNSUPPORTED_FILE,
1929            format!(
1930                "readtable: unable to open spreadsheet '{}': {err}",
1931                path.display()
1932            ),
1933        )
1934    })?;
1935    let range = match &options.sheet {
1936        Some(SheetSelector::Name(name)) => workbook.worksheet_range(name).map_err(|err| {
1937            invalid_argument(format!("readtable: unable to read sheet '{name}': {err:?}"))
1938        })?,
1939        Some(SheetSelector::Index(index)) => workbook
1940            .worksheet_range_at(*index)
1941            .ok_or_else(|| {
1942                invalid_argument(format!(
1943                    "readtable: sheet index {} exceeds bounds",
1944                    index + 1
1945                ))
1946            })?
1947            .map_err(|err| {
1948                invalid_argument(format!(
1949                    "readtable: unable to read sheet {}: {err:?}",
1950                    index + 1
1951                ))
1952            })?,
1953        None => workbook
1954            .worksheet_range_at(0)
1955            .ok_or_else(|| invalid_argument("readtable: spreadsheet contains no worksheets"))?
1956            .map_err(|err| {
1957                invalid_argument(format!("readtable: unable to read first sheet: {err:?}"))
1958            })?,
1959    };
1960    let rows = spreadsheet_range_to_rows(&range, options)?;
1961    import_rows_to_table(rows, options)
1962}
1963
1964async fn read_file_bytes(path: &Path) -> BuiltinResult<Vec<u8>> {
1965    let mut file = File::open_async(path).await.map_err(|err| {
1966        table_error_with_source(
1967            &TABLE_ERROR_IO,
1968            format!("readtable: unable to open '{}': {err}", path.display()),
1969            err,
1970        )
1971    })?;
1972    let mut bytes = Vec::new();
1973    file.read_to_end(&mut bytes).map_err(|err| {
1974        table_error_with_source(
1975            &TABLE_ERROR_IO,
1976            format!("readtable: unable to read '{}': {err}", path.display()),
1977            err,
1978        )
1979    })?;
1980    Ok(bytes)
1981}
1982
1983fn is_spreadsheet_path(path: &Path) -> bool {
1984    matches!(
1985        path.extension()
1986            .and_then(|ext| ext.to_str())
1987            .map(|ext| ext.to_ascii_lowercase())
1988            .as_deref(),
1989        Some("xls") | Some("xlsx") | Some("xlsm") | Some("xlsb") | Some("ods")
1990    )
1991}
1992
1993fn validate_encoding_label(label: &str) -> BuiltinResult<()> {
1994    encoding_for_label(label)
1995        .map(|_| ())
1996        .ok_or_else(|| invalid_argument(format!("readtable: unsupported Encoding '{label}'")))
1997}
1998
1999fn encoding_for_label(label: &str) -> Option<&'static Encoding> {
2000    let label = label.trim();
2001    if label.is_empty()
2002        || label.eq_ignore_ascii_case("auto")
2003        || label.eq_ignore_ascii_case("default")
2004        || label.eq_ignore_ascii_case("system")
2005        || label.eq_ignore_ascii_case("native")
2006        || label.eq_ignore_ascii_case("utf-8")
2007        || label.eq_ignore_ascii_case("utf8")
2008        || label.eq_ignore_ascii_case("unicode")
2009    {
2010        return Some(UTF_8);
2011    }
2012    Encoding::for_label(label.as_bytes())
2013}
2014
2015fn decode_text_bytes(bytes: &[u8], encoding: &str) -> BuiltinResult<String> {
2016    let (encoding, offset) = if encoding.trim().eq_ignore_ascii_case("auto") {
2017        Encoding::for_bom(bytes).unwrap_or((UTF_8, 0))
2018    } else {
2019        (
2020            encoding_for_label(encoding).ok_or_else(|| {
2021                invalid_argument(format!("readtable: unsupported Encoding '{encoding}'"))
2022            })?,
2023            0,
2024        )
2025    };
2026    let (decoded, _, had_errors) = encoding.decode(&bytes[offset..]);
2027    if had_errors {
2028        return Err(table_error(
2029            &TABLE_ERROR_IO,
2030            format!(
2031                "readtable: unable to decode file contents using encoding '{}'",
2032                encoding.name()
2033            ),
2034        ));
2035    }
2036    Ok(decoded.into_owned())
2037}
2038
2039fn strip_utf8_bom(text: String) -> String {
2040    text.strip_prefix('\u{FEFF}')
2041        .map(ToString::to_string)
2042        .unwrap_or(text)
2043}
2044
2045#[derive(Clone, Debug)]
2046enum ImportCell {
2047    Empty,
2048    Text(String),
2049    Number(f64),
2050    Logical(bool),
2051    DateTime(f64),
2052    Error(String),
2053}
2054
2055impl ImportCell {
2056    fn from_text(text: String) -> Self {
2057        if text.trim().is_empty() {
2058            Self::Empty
2059        } else {
2060            Self::Text(text)
2061        }
2062    }
2063
2064    fn display_text(&self) -> String {
2065        match self {
2066            Self::Empty => String::new(),
2067            Self::Text(text) => text.clone(),
2068            Self::Number(value) => format_key_number(*value),
2069            Self::Logical(value) => value.to_string(),
2070            Self::DateTime(serial) => format_key_number(*serial),
2071            Self::Error(text) => text.clone(),
2072        }
2073    }
2074
2075    fn is_missing(&self, options: &ReadTableOptions) -> bool {
2076        match self {
2077            Self::Empty => true,
2078            Self::Text(text) => options.is_missing(text),
2079            _ => false,
2080        }
2081    }
2082
2083    fn is_likely_data_token(&self, options: &ReadTableOptions) -> bool {
2084        match self {
2085            Self::Number(_) | Self::Logical(_) | Self::DateTime(_) => true,
2086            Self::Empty => false,
2087            Self::Text(text) => {
2088                let token = unquote(text.trim()).trim();
2089                options.is_missing(token)
2090                    || parse_numeric(token).is_some()
2091                    || parse_logical(token).is_some()
2092                    || parse_iso_datetime_to_datenum(token).is_some()
2093            }
2094            Self::Error(_) => true,
2095        }
2096    }
2097}
2098
2099fn spreadsheet_cell_to_import(cell: &SpreadsheetData) -> ImportCell {
2100    match cell {
2101        SpreadsheetData::Empty => ImportCell::Empty,
2102        SpreadsheetData::Int(value) => ImportCell::Number(*value as f64),
2103        SpreadsheetData::Float(value) => ImportCell::Number(*value),
2104        SpreadsheetData::String(text) => ImportCell::Text(text.clone()),
2105        SpreadsheetData::Bool(value) => ImportCell::Logical(*value),
2106        SpreadsheetData::DateTime(value) => value
2107            .as_datetime()
2108            .map(crate::builtins::datetime::datenum_from_naive)
2109            .map(ImportCell::DateTime)
2110            .unwrap_or_else(|| ImportCell::Number(value.as_f64())),
2111        SpreadsheetData::DateTimeIso(text) => parse_iso_datetime_to_datenum(text)
2112            .map(ImportCell::DateTime)
2113            .unwrap_or_else(|| ImportCell::Text(text.clone())),
2114        SpreadsheetData::DurationIso(text) => ImportCell::Text(text.clone()),
2115        SpreadsheetData::Error(err) => ImportCell::Error(err.to_string()),
2116    }
2117}
2118
2119fn spreadsheet_range_to_rows(
2120    range: &calamine::Range<SpreadsheetData>,
2121    options: &ReadTableOptions,
2122) -> BuiltinResult<Vec<Vec<ImportCell>>> {
2123    if range.is_empty() {
2124        return Ok(Vec::new());
2125    }
2126    let Some((range_start_row, range_start_col)) = range.start() else {
2127        return Ok(Vec::new());
2128    };
2129    let Some((range_end_row, range_end_col)) = range.end() else {
2130        return Ok(Vec::new());
2131    };
2132    let start_row = options
2133        .range
2134        .map(|spec| checked_u32(spec.start_row, "Range row"))
2135        .transpose()?
2136        .unwrap_or(range_start_row);
2137    let start_col = options
2138        .range
2139        .map(|spec| checked_u32(spec.start_col, "Range column"))
2140        .transpose()?
2141        .unwrap_or(range_start_col);
2142    let end_row = options
2143        .range
2144        .and_then(|spec| spec.end_row)
2145        .map(|row| checked_u32(row, "Range row"))
2146        .transpose()?
2147        .unwrap_or(range_end_row);
2148    let end_col = options
2149        .range
2150        .and_then(|spec| spec.end_col)
2151        .map(|col| checked_u32(col, "Range column"))
2152        .transpose()?
2153        .unwrap_or(range_end_col);
2154    if start_row > end_row || start_col > end_col {
2155        return Ok(Vec::new());
2156    }
2157    let mut rows = Vec::new();
2158    for row_idx in start_row..=end_row {
2159        let mut row = Vec::new();
2160        for col_idx in start_col..=end_col {
2161            row.push(
2162                range
2163                    .get_value((row_idx, col_idx))
2164                    .map(spreadsheet_cell_to_import)
2165                    .unwrap_or(ImportCell::Empty),
2166            );
2167        }
2168        if matches!(options.empty_line_rule, EmptyLineRule::Skip)
2169            && row.iter().all(|cell| cell.is_missing(options))
2170        {
2171            continue;
2172        }
2173        rows.push(row);
2174    }
2175    if options.num_header_lines > 0 {
2176        Ok(rows.into_iter().skip(options.num_header_lines).collect())
2177    } else {
2178        Ok(rows)
2179    }
2180}
2181
2182fn checked_u32(value: usize, context: &str) -> BuiltinResult<u32> {
2183    u32::try_from(value).map_err(|_| invalid_argument(format!("readtable: {context} overflow")))
2184}
2185
2186fn detect_delimiter(lines: &[String]) -> Option<Delimiter> {
2187    let candidates = [',', '\t', ';', '|'];
2188    let mut best: Option<(f64, Delimiter)> = None;
2189    for candidate in candidates {
2190        let counts = lines
2191            .iter()
2192            .take(32)
2193            .filter(|line| line.contains(candidate))
2194            .map(|line| split_with_char_delim(line, candidate).len())
2195            .filter(|count| *count >= 2)
2196            .collect::<Vec<_>>();
2197        if counts.is_empty() {
2198            continue;
2199        }
2200        let avg = counts.iter().copied().sum::<usize>() as f64 / counts.len() as f64;
2201        if avg >= 2.0
2202            && best
2203                .as_ref()
2204                .map(|(best_avg, _)| avg > *best_avg)
2205                .unwrap_or(true)
2206        {
2207            best = Some((avg, Delimiter::Char(candidate)));
2208        }
2209    }
2210    best.map(|(_, delimiter)| delimiter).or_else(|| {
2211        lines
2212            .iter()
2213            .take(32)
2214            .any(|line| line.split_whitespace().count() > 1)
2215            .then_some(Delimiter::Whitespace)
2216    })
2217}
2218
2219fn split_with_char_delim(line: &str, delimiter: char) -> Vec<String> {
2220    let mut out = Vec::new();
2221    let mut current = String::new();
2222    let mut in_quotes = false;
2223    let mut chars = line.chars().peekable();
2224    while let Some(ch) = chars.next() {
2225        if ch == '"' {
2226            if in_quotes && chars.peek() == Some(&'"') {
2227                current.push('"');
2228                chars.next();
2229            } else {
2230                in_quotes = !in_quotes;
2231            }
2232            continue;
2233        }
2234        if ch == delimiter && !in_quotes {
2235            out.push(current.clone());
2236            current.clear();
2237        } else {
2238            current.push(ch);
2239        }
2240    }
2241    out.push(current);
2242    out
2243}
2244
2245fn parse_text_records(
2246    text: &str,
2247    delimiter: &Delimiter,
2248    empty_line_rule: EmptyLineRule,
2249) -> Vec<Vec<ImportCell>> {
2250    match delimiter {
2251        Delimiter::Whitespace => parse_whitespace_records(text, empty_line_rule),
2252        Delimiter::Char(ch) => parse_delimited_records(text, &ch.to_string(), empty_line_rule),
2253        Delimiter::String(pattern) => parse_delimited_records(text, pattern, empty_line_rule),
2254    }
2255}
2256
2257fn parse_delimited_records(
2258    text: &str,
2259    delimiter: &str,
2260    empty_line_rule: EmptyLineRule,
2261) -> Vec<Vec<ImportCell>> {
2262    let mut records = Vec::new();
2263    let mut row = Vec::new();
2264    let mut current = String::new();
2265    let mut in_quotes = false;
2266    let mut idx = 0usize;
2267    while idx < text.len() {
2268        let ch = text[idx..].chars().next().expect("valid char boundary");
2269        if ch == '"' {
2270            if in_quotes && text[idx + ch.len_utf8()..].starts_with('"') {
2271                current.push('"');
2272                idx += ch.len_utf8() + 1;
2273                continue;
2274            }
2275            in_quotes = !in_quotes;
2276            idx += ch.len_utf8();
2277            continue;
2278        }
2279        if !in_quotes && !delimiter.is_empty() && text[idx..].starts_with(delimiter) {
2280            row.push(ImportCell::from_text(std::mem::take(&mut current)));
2281            idx += delimiter.len();
2282            continue;
2283        }
2284        if !in_quotes && (ch == '\n' || ch == '\r') {
2285            row.push(ImportCell::from_text(std::mem::take(&mut current)));
2286            push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
2287            idx += ch.len_utf8();
2288            if ch == '\r' && text[idx..].starts_with('\n') {
2289                idx += 1;
2290            }
2291            continue;
2292        }
2293        current.push(ch);
2294        idx += ch.len_utf8();
2295    }
2296    if !current.is_empty() || !row.is_empty() || text.ends_with(delimiter) {
2297        row.push(ImportCell::from_text(current));
2298        push_import_record(&mut records, row, empty_line_rule);
2299    }
2300    records
2301}
2302
2303fn parse_whitespace_records(text: &str, empty_line_rule: EmptyLineRule) -> Vec<Vec<ImportCell>> {
2304    let mut records = Vec::new();
2305    let mut row = Vec::new();
2306    let mut current = String::new();
2307    let mut in_quotes = false;
2308    let mut field_open = false;
2309    let mut chars = text.chars().peekable();
2310    while let Some(ch) = chars.next() {
2311        if ch == '"' {
2312            if in_quotes && chars.peek() == Some(&'"') {
2313                current.push('"');
2314                chars.next();
2315            } else {
2316                in_quotes = !in_quotes;
2317            }
2318            field_open = true;
2319            continue;
2320        }
2321        if !in_quotes && (ch == '\n' || ch == '\r') {
2322            if field_open || !current.is_empty() {
2323                row.push(ImportCell::from_text(std::mem::take(&mut current)));
2324            }
2325            field_open = false;
2326            push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
2327            if ch == '\r' && chars.peek() == Some(&'\n') {
2328                chars.next();
2329            }
2330            continue;
2331        }
2332        if !in_quotes && ch.is_whitespace() {
2333            if field_open || !current.is_empty() {
2334                row.push(ImportCell::from_text(std::mem::take(&mut current)));
2335                field_open = false;
2336            }
2337            continue;
2338        }
2339        current.push(ch);
2340        field_open = true;
2341    }
2342    if field_open || !current.is_empty() {
2343        row.push(ImportCell::from_text(current));
2344    }
2345    if !row.is_empty() {
2346        push_import_record(&mut records, row, empty_line_rule);
2347    }
2348    records
2349}
2350
2351fn push_import_record(
2352    records: &mut Vec<Vec<ImportCell>>,
2353    row: Vec<ImportCell>,
2354    empty_line_rule: EmptyLineRule,
2355) {
2356    if matches!(empty_line_rule, EmptyLineRule::Skip)
2357        && row.iter().all(|cell| matches!(cell, ImportCell::Empty))
2358    {
2359        return;
2360    }
2361    records.push(row);
2362}
2363
2364fn apply_import_range(rows: Vec<Vec<ImportCell>>, range: RangeSpec) -> Vec<Vec<ImportCell>> {
2365    if rows.is_empty() {
2366        return rows;
2367    }
2368    let end_row = range
2369        .end_row
2370        .unwrap_or_else(|| rows.len().saturating_sub(1));
2371    let max_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
2372    let end_col = range.end_col.unwrap_or_else(|| max_cols.saturating_sub(1));
2373    rows.into_iter()
2374        .enumerate()
2375        .filter_map(|(idx, row)| {
2376            if idx < range.start_row || idx > end_row {
2377                return None;
2378            }
2379            let selected = (range.start_col..=end_col)
2380                .map(|col| row.get(col).cloned().unwrap_or(ImportCell::Empty))
2381                .collect::<Vec<_>>();
2382            Some(selected)
2383        })
2384        .collect()
2385}
2386
2387fn import_rows_to_table(
2388    mut rows: Vec<Vec<ImportCell>>,
2389    options: &ReadTableOptions,
2390) -> BuiltinResult<Value> {
2391    let mut variable_names = options.variable_names.clone();
2392    let read_variable_names = options
2393        .read_variable_names
2394        .unwrap_or_else(|| variable_names.is_none() && should_read_variable_names(&rows, options));
2395    if variable_names.is_none() && read_variable_names && !rows.is_empty() {
2396        variable_names = Some(
2397            rows.remove(0)
2398                .into_iter()
2399                .map(|cell| cell.display_text())
2400                .collect(),
2401        );
2402    }
2403
2404    let mut row_names = options.row_names.clone();
2405    if options.read_row_names && !rows.is_empty() {
2406        row_names = Some(
2407            rows.iter_mut()
2408                .map(|row| {
2409                    if row.is_empty() {
2410                        String::new()
2411                    } else {
2412                        row.remove(0).display_text()
2413                    }
2414                })
2415                .collect(),
2416        );
2417        if let Some(names) = variable_names.as_mut() {
2418            if !names.is_empty() {
2419                names.remove(0);
2420            }
2421        }
2422    }
2423
2424    let column_count = import_column_count(&rows, &variable_names, options)?;
2425    let names = import_variable_names(variable_names, column_count, options);
2426
2427    let mut columns = Vec::with_capacity(names.len());
2428    for col in 0..names.len() {
2429        let values = rows
2430            .iter()
2431            .map(|row| row.get(col).cloned().unwrap_or(ImportCell::Empty))
2432            .collect::<Vec<_>>();
2433        let requested_type = options
2434            .variable_types
2435            .as_ref()
2436            .and_then(|types| types.get(col))
2437            .copied();
2438        columns.push(import_column(values, options, requested_type)?);
2439    }
2440    table_from_columns_with_properties(names, columns, row_names)
2441}
2442
2443fn import_column_count(
2444    rows: &[Vec<ImportCell>],
2445    variable_names: &Option<Vec<String>>,
2446    options: &ReadTableOptions,
2447) -> BuiltinResult<usize> {
2448    let data_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
2449    let name_cols = variable_names.as_ref().map(Vec::len).unwrap_or(0);
2450    let type_cols = options.variable_types.as_ref().map(Vec::len).unwrap_or(0);
2451    if let Some(count) = options.num_variables {
2452        if name_cols > count {
2453            return Err(invalid_argument(
2454                "readtable: VariableNames length exceeds NumVariables",
2455            ));
2456        }
2457        if type_cols > count {
2458            return Err(invalid_argument(
2459                "readtable: VariableTypes length exceeds NumVariables",
2460            ));
2461        }
2462        return Ok(count);
2463    }
2464    Ok(data_cols.max(name_cols).max(type_cols))
2465}
2466
2467fn import_variable_names(
2468    variable_names: Option<Vec<String>>,
2469    column_count: usize,
2470    options: &ReadTableOptions,
2471) -> Vec<String> {
2472    match variable_names {
2473        Some(mut names) => {
2474            while names.len() < column_count {
2475                names.push(format!("Var{}", names.len() + 1));
2476            }
2477            names.truncate(column_count);
2478            if options.preserve_variable_names {
2479                make_unique_names(names)
2480            } else {
2481                make_unique_variable_names(names)
2482            }
2483        }
2484        None => generated_variable_names(column_count),
2485    }
2486}
2487
2488fn should_read_variable_names(rows: &[Vec<ImportCell>], options: &ReadTableOptions) -> bool {
2489    let Some(first) = rows.first() else {
2490        return false;
2491    };
2492    if first.is_empty() {
2493        return false;
2494    }
2495    let names = first
2496        .iter()
2497        .map(ImportCell::display_text)
2498        .map(|text| text.trim().to_string())
2499        .collect::<Vec<_>>();
2500    if names.iter().any(|name| name.is_empty()) {
2501        return false;
2502    }
2503    if first.iter().all(|cell| cell.is_likely_data_token(options)) {
2504        return false;
2505    }
2506    true
2507}
2508
2509fn import_column(
2510    values: Vec<ImportCell>,
2511    options: &ReadTableOptions,
2512    requested_type: Option<ImportVariableType>,
2513) -> BuiltinResult<Value> {
2514    match requested_type.unwrap_or(ImportVariableType::Auto) {
2515        ImportVariableType::Auto => infer_import_column(values, options),
2516        ImportVariableType::Numeric(dtype) => import_numeric_column(values, options, dtype),
2517        ImportVariableType::Logical => import_logical_column(values, options),
2518        ImportVariableType::Text(kind) => import_text_column(values, options, kind),
2519        ImportVariableType::CellStr => import_cellstr_column(values, options),
2520        ImportVariableType::Datetime => import_datetime_column(values, options),
2521        ImportVariableType::Duration => import_duration_column(values, options),
2522    }
2523}
2524
2525fn import_numeric_column(
2526    values: Vec<ImportCell>,
2527    options: &ReadTableOptions,
2528    dtype: NumericDType,
2529) -> BuiltinResult<Value> {
2530    let mut numeric = Vec::with_capacity(values.len());
2531    for value in &values {
2532        let parsed = numeric_from_import_cell(value, options, dtype.class_name())?;
2533        numeric.push(cast_import_numeric(parsed, dtype));
2534    }
2535    Tensor::new_with_dtype(numeric, vec![values.len(), 1], dtype)
2536        .map(Value::Tensor)
2537        .map_err(|err| invalid_variable(format!("readtable: {err}")))
2538}
2539
2540fn numeric_from_import_cell(
2541    value: &ImportCell,
2542    options: &ReadTableOptions,
2543    context: &str,
2544) -> BuiltinResult<f64> {
2545    match value {
2546        ImportCell::Empty => Ok(f64::NAN),
2547        ImportCell::Number(value) => Ok(*value),
2548        ImportCell::Logical(value) => Ok(if *value { 1.0 } else { 0.0 }),
2549        ImportCell::DateTime(serial) => Ok(*serial),
2550        ImportCell::Text(text) => {
2551            let token = unquote(text.trim()).trim();
2552            if options.is_missing(token) {
2553                Ok(f64::NAN)
2554            } else {
2555                parse_numeric(token).ok_or_else(|| {
2556                    invalid_variable(format!("readtable: cannot import '{token}' as {context}"))
2557                })
2558            }
2559        }
2560        ImportCell::Error(text) => Err(invalid_variable(format!(
2561            "readtable: cannot import spreadsheet error '{text}' as {context}"
2562        ))),
2563    }
2564}
2565
2566fn cast_import_numeric(value: f64, dtype: NumericDType) -> f64 {
2567    match dtype {
2568        NumericDType::F64 => value,
2569        NumericDType::F32 => (value as f32) as f64,
2570        NumericDType::U8 => {
2571            if value.is_finite() {
2572                value.round().clamp(0.0, u8::MAX as f64)
2573            } else {
2574                0.0
2575            }
2576        }
2577        NumericDType::U16 => {
2578            if value.is_finite() {
2579                value.round().clamp(0.0, u16::MAX as f64)
2580            } else {
2581                0.0
2582            }
2583        }
2584    }
2585}
2586
2587fn import_logical_column(
2588    values: Vec<ImportCell>,
2589    options: &ReadTableOptions,
2590) -> BuiltinResult<Value> {
2591    let mut logical = Vec::with_capacity(values.len());
2592    for value in &values {
2593        logical.push(logical_from_import_cell(value, options)?);
2594    }
2595    LogicalArray::new(logical, vec![values.len(), 1])
2596        .map(Value::LogicalArray)
2597        .map_err(|err| invalid_variable(format!("readtable: {err}")))
2598}
2599
2600fn logical_from_import_cell(value: &ImportCell, options: &ReadTableOptions) -> BuiltinResult<u8> {
2601    let flag = match value {
2602        ImportCell::Empty => false,
2603        ImportCell::Logical(value) => *value,
2604        ImportCell::Number(value) => *value != 0.0,
2605        ImportCell::DateTime(serial) => *serial != 0.0,
2606        ImportCell::Text(text) => {
2607            let token = unquote(text.trim()).trim();
2608            if options.is_missing(token) {
2609                false
2610            } else if let Some(value) = parse_logical(token) {
2611                value
2612            } else if let Some(value) = parse_numeric(token) {
2613                value != 0.0
2614            } else {
2615                return Err(invalid_variable(format!(
2616                    "readtable: cannot import '{token}' as logical"
2617                )));
2618            }
2619        }
2620        ImportCell::Error(text) => {
2621            return Err(invalid_variable(format!(
2622                "readtable: cannot import spreadsheet error '{text}' as logical"
2623            )));
2624        }
2625    };
2626    Ok(u8::from(flag))
2627}
2628
2629fn import_text_column(
2630    values: Vec<ImportCell>,
2631    options: &ReadTableOptions,
2632    kind: TextImportType,
2633) -> BuiltinResult<Value> {
2634    let strings = import_text_values(values, options);
2635    match kind {
2636        TextImportType::String => StringArray::new(strings.clone(), vec![strings.len(), 1])
2637            .map(Value::StringArray)
2638            .map_err(|err| invalid_variable(format!("readtable: {err}"))),
2639        TextImportType::Char => import_char_column(strings),
2640    }
2641}
2642
2643fn import_text_values(values: Vec<ImportCell>, options: &ReadTableOptions) -> Vec<String> {
2644    values
2645        .into_iter()
2646        .map(|value| {
2647            if value.is_missing(options) {
2648                String::new()
2649            } else {
2650                unquote(value.display_text().trim()).to_string()
2651            }
2652        })
2653        .collect()
2654}
2655
2656fn import_char_column(strings: Vec<String>) -> BuiltinResult<Value> {
2657    let rows = strings.len();
2658    let cols = strings
2659        .iter()
2660        .map(|text| text.chars().count())
2661        .max()
2662        .unwrap_or(0);
2663    let mut data = vec![' '; rows * cols];
2664    for (row, text) in strings.iter().enumerate() {
2665        for (col, ch) in text.chars().enumerate() {
2666            data[row * cols + col] = ch;
2667        }
2668    }
2669    CharArray::new(data, rows, cols)
2670        .map(Value::CharArray)
2671        .map_err(|err| invalid_variable(format!("readtable: {err}")))
2672}
2673
2674fn import_cellstr_column(
2675    values: Vec<ImportCell>,
2676    options: &ReadTableOptions,
2677) -> BuiltinResult<Value> {
2678    let strings = import_text_values(values, options);
2679    let rows = strings.len();
2680    let cells = strings
2681        .into_iter()
2682        .map(|text| Value::CharArray(CharArray::new_row(&text)))
2683        .collect::<Vec<_>>();
2684    CellArray::new(cells, rows, 1)
2685        .map(Value::Cell)
2686        .map_err(|err| invalid_variable(format!("readtable: {err}")))
2687}
2688
2689fn import_datetime_column(
2690    values: Vec<ImportCell>,
2691    options: &ReadTableOptions,
2692) -> BuiltinResult<Value> {
2693    if matches!(options.datetime_type, DatetimeImportType::Text) {
2694        return import_text_column(values, options, options.text_type);
2695    }
2696
2697    let mut serials = Vec::with_capacity(values.len());
2698    for value in &values {
2699        serials.push(datetime_serial_from_import_cell(value, options)?);
2700    }
2701    let tensor = Tensor::new(serials, vec![values.len(), 1])
2702        .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2703    if matches!(options.datetime_type, DatetimeImportType::ExcelDatenum) {
2704        Ok(Value::Tensor(tensor))
2705    } else {
2706        crate::builtins::datetime::datetime_object_from_serial_tensor(tensor, "yyyy-MM-dd HH:mm:ss")
2707    }
2708}
2709
2710fn datetime_serial_from_import_cell(
2711    value: &ImportCell,
2712    options: &ReadTableOptions,
2713) -> BuiltinResult<f64> {
2714    match value {
2715        ImportCell::Empty => Ok(f64::NAN),
2716        ImportCell::DateTime(serial) => Ok(*serial),
2717        ImportCell::Number(value) => Ok(*value),
2718        ImportCell::Text(text) => {
2719            let token = unquote(text.trim()).trim();
2720            if options.is_missing(token) {
2721                Ok(f64::NAN)
2722            } else if let Some(serial) = parse_iso_datetime_to_datenum(token) {
2723                Ok(serial)
2724            } else if let Some(serial) = parse_numeric(token) {
2725                Ok(serial)
2726            } else {
2727                Err(invalid_variable(format!(
2728                    "readtable: cannot import '{token}' as datetime"
2729                )))
2730            }
2731        }
2732        ImportCell::Logical(_) => Err(invalid_variable(
2733            "readtable: cannot import logical value as datetime",
2734        )),
2735        ImportCell::Error(text) => Err(invalid_variable(format!(
2736            "readtable: cannot import spreadsheet error '{text}' as datetime"
2737        ))),
2738    }
2739}
2740
2741fn import_duration_column(
2742    values: Vec<ImportCell>,
2743    options: &ReadTableOptions,
2744) -> BuiltinResult<Value> {
2745    let mut days = Vec::with_capacity(values.len());
2746    for value in &values {
2747        days.push(duration_days_from_import_cell(value, options)?);
2748    }
2749    let tensor = Tensor::new(days, vec![values.len(), 1])
2750        .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2751    crate::builtins::duration::duration_object_from_days_tensor(
2752        tensor,
2753        crate::builtins::duration::DEFAULT_DURATION_FORMAT,
2754    )
2755}
2756
2757fn duration_days_from_import_cell(
2758    value: &ImportCell,
2759    options: &ReadTableOptions,
2760) -> BuiltinResult<f64> {
2761    match value {
2762        ImportCell::Empty => Ok(f64::NAN),
2763        ImportCell::Number(value) => Ok(*value),
2764        ImportCell::Logical(value) => Ok(if *value { 1.0 } else { 0.0 }),
2765        ImportCell::Text(text) => {
2766            let token = unquote(text.trim()).trim();
2767            if options.is_missing(token) {
2768                Ok(f64::NAN)
2769            } else {
2770                parse_duration_to_days(token).ok_or_else(|| {
2771                    invalid_variable(format!("readtable: cannot import '{token}' as duration"))
2772                })
2773            }
2774        }
2775        ImportCell::DateTime(_) => Err(invalid_variable(
2776            "readtable: cannot import datetime value as duration",
2777        )),
2778        ImportCell::Error(text) => Err(invalid_variable(format!(
2779            "readtable: cannot import spreadsheet error '{text}' as duration"
2780        ))),
2781    }
2782}
2783
2784fn infer_import_column(
2785    values: Vec<ImportCell>,
2786    options: &ReadTableOptions,
2787) -> BuiltinResult<Value> {
2788    let mut numeric = Vec::with_capacity(values.len());
2789    let mut all_numeric = true;
2790    for value in &values {
2791        match value {
2792            ImportCell::Empty => numeric.push(f64::NAN),
2793            ImportCell::Number(value) => numeric.push(*value),
2794            ImportCell::Text(text) => {
2795                let token = unquote(text.trim()).trim();
2796                if options.is_missing(token) {
2797                    numeric.push(f64::NAN);
2798                } else if let Some(value) = parse_numeric(token) {
2799                    numeric.push(value);
2800                } else {
2801                    all_numeric = false;
2802                    break;
2803                }
2804            }
2805            _ => {
2806                all_numeric = false;
2807                break;
2808            }
2809        }
2810    }
2811    if all_numeric {
2812        return Tensor::new(numeric, vec![values.len(), 1])
2813            .map(Value::Tensor)
2814            .map_err(|err| invalid_variable(format!("readtable: {err}")));
2815    }
2816
2817    let mut logical = Vec::with_capacity(values.len());
2818    let mut all_logical = true;
2819    for value in &values {
2820        match value {
2821            ImportCell::Empty => logical.push(0),
2822            ImportCell::Logical(value) => logical.push(i32::from(*value) as u8),
2823            ImportCell::Text(text) => {
2824                let token = unquote(text.trim()).trim();
2825                if options.is_missing(token) {
2826                    logical.push(0);
2827                } else if let Some(value) = parse_logical(token) {
2828                    logical.push(i32::from(value) as u8);
2829                } else {
2830                    all_logical = false;
2831                    break;
2832                }
2833            }
2834            _ => {
2835                all_logical = false;
2836                break;
2837            }
2838        }
2839    }
2840    if all_logical {
2841        return LogicalArray::new(logical, vec![values.len(), 1])
2842            .map(Value::LogicalArray)
2843            .map_err(|err| invalid_variable(format!("readtable: {err}")));
2844    }
2845
2846    if !matches!(options.datetime_type, DatetimeImportType::Text) {
2847        let mut serials = Vec::with_capacity(values.len());
2848        let mut all_datetime = true;
2849        for value in &values {
2850            match value {
2851                ImportCell::Empty => serials.push(f64::NAN),
2852                ImportCell::DateTime(serial) => serials.push(*serial),
2853                ImportCell::Text(text) => {
2854                    let token = unquote(text.trim()).trim();
2855                    if options.is_missing(token) {
2856                        serials.push(f64::NAN);
2857                    } else if let Some(serial) = parse_iso_datetime_to_datenum(token) {
2858                        serials.push(serial);
2859                    } else {
2860                        all_datetime = false;
2861                        break;
2862                    }
2863                }
2864                _ => {
2865                    all_datetime = false;
2866                    break;
2867                }
2868            }
2869        }
2870        if all_datetime {
2871            let tensor = Tensor::new(serials, vec![values.len(), 1])
2872                .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2873            if matches!(options.datetime_type, DatetimeImportType::ExcelDatenum) {
2874                return Ok(Value::Tensor(tensor));
2875            }
2876            return crate::builtins::datetime::datetime_object_from_serial_tensor(
2877                tensor,
2878                "yyyy-MM-dd HH:mm:ss",
2879            );
2880        }
2881    }
2882
2883    import_text_column(values, options, options.text_type)
2884}
2885
2886fn parse_numeric(token: &str) -> Option<f64> {
2887    match token.to_ascii_lowercase().as_str() {
2888        "nan" => Some(f64::NAN),
2889        "inf" | "+inf" | "infinity" | "+infinity" => Some(f64::INFINITY),
2890        "-inf" | "-infinity" => Some(f64::NEG_INFINITY),
2891        _ => token.parse::<f64>().ok(),
2892    }
2893}
2894
2895fn parse_logical(token: &str) -> Option<bool> {
2896    match token.to_ascii_lowercase().as_str() {
2897        "true" | "t" | "yes" | "on" => Some(true),
2898        "false" | "f" | "no" | "off" => Some(false),
2899        _ => None,
2900    }
2901}
2902
2903fn parse_duration_to_days(token: &str) -> Option<f64> {
2904    parse_numeric(token).or_else(|| parse_clock_duration_to_days(token))
2905}
2906
2907fn parse_clock_duration_to_days(token: &str) -> Option<f64> {
2908    let trimmed = token.trim();
2909    if trimmed.is_empty() {
2910        return None;
2911    }
2912    let (sign, body) = if let Some(rest) = trimmed.strip_prefix('-') {
2913        (-1.0, rest)
2914    } else if let Some(rest) = trimmed.strip_prefix('+') {
2915        (1.0, rest)
2916    } else {
2917        (1.0, trimmed)
2918    };
2919    let parts = body.split(':').collect::<Vec<_>>();
2920    let (hours, minutes, seconds) = match parts.as_slice() {
2921        [hours, minutes] => (
2922            hours.parse::<f64>().ok()?,
2923            minutes.parse::<f64>().ok()?,
2924            0.0,
2925        ),
2926        [hours, minutes, seconds] => (
2927            hours.parse::<f64>().ok()?,
2928            minutes.parse::<f64>().ok()?,
2929            seconds.parse::<f64>().ok()?,
2930        ),
2931        _ => return None,
2932    };
2933    if !hours.is_finite()
2934        || !minutes.is_finite()
2935        || !seconds.is_finite()
2936        || !(0.0..60.0).contains(&minutes)
2937        || !(0.0..60.0).contains(&seconds)
2938    {
2939        return None;
2940    }
2941    Some(sign * (hours * 3600.0 + minutes * 60.0 + seconds) / 86_400.0)
2942}
2943
2944fn parse_iso_datetime_to_datenum(token: &str) -> Option<f64> {
2945    let trimmed = token.trim();
2946    if trimmed.is_empty() {
2947        return None;
2948    }
2949    for format in [
2950        "%Y-%m-%dT%H:%M:%S%.f",
2951        "%Y-%m-%d %H:%M:%S%.f",
2952        "%Y/%m/%d %H:%M:%S%.f",
2953        "%m/%d/%Y %H:%M:%S%.f",
2954    ] {
2955        if let Ok(value) = NaiveDateTime::parse_from_str(trimmed, format) {
2956            return Some(crate::builtins::datetime::datenum_from_naive(value));
2957        }
2958    }
2959    for format in ["%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"] {
2960        if let Ok(date) = NaiveDate::parse_from_str(trimmed, format) {
2961            return Some(crate::builtins::datetime::datenum_from_naive(
2962                date.and_time(NaiveTime::MIN),
2963            ));
2964        }
2965    }
2966    None
2967}
2968
2969fn unquote(token: &str) -> &str {
2970    if token.len() >= 2 {
2971        let bytes = token.as_bytes();
2972        if (bytes[0] == b'"' && bytes[token.len() - 1] == b'"')
2973            || (bytes[0] == b'\'' && bytes[token.len() - 1] == b'\'')
2974        {
2975            return &token[1..token.len() - 1];
2976        }
2977    }
2978    token
2979}
2980
2981fn default_properties(variable_names: Vec<String>, row_names: Option<Vec<String>>) -> StructValue {
2982    let mut props = StructValue::new();
2983    props.insert(
2984        VARIABLE_NAMES,
2985        Value::StringArray(
2986            StringArray::new(variable_names.clone(), vec![1, variable_names.len()])
2987                .expect("VariableNames shape is valid"),
2988        ),
2989    );
2990    props.insert(
2991        ROW_NAMES,
2992        row_names
2993            .map(|names| {
2994                Value::StringArray(
2995                    StringArray::new(names.clone(), vec![names.len(), 1])
2996                        .expect("RowNames shape is valid"),
2997                )
2998            })
2999            .unwrap_or_else(|| {
3000                Value::StringArray(StringArray::new(Vec::new(), vec![0, 1]).unwrap())
3001            }),
3002    );
3003    props.insert(
3004        DIMENSION_NAMES,
3005        Value::StringArray(
3006            StringArray::new(
3007                vec![
3008                    DEFAULT_ROW_DIM_NAME.to_string(),
3009                    DEFAULT_VARIABLE_DIM_NAME.to_string(),
3010                ],
3011                vec![1, 2],
3012            )
3013            .expect("DimensionNames shape is valid"),
3014        ),
3015    );
3016    props.insert(
3017        VARIABLE_UNITS,
3018        Value::StringArray(
3019            StringArray::new(
3020                vec![String::new(); variable_names.len()],
3021                vec![1, variable_names.len()],
3022            )
3023            .expect("VariableUnits shape is valid"),
3024        ),
3025    );
3026    props.insert(
3027        VARIABLE_DESCRIPTIONS,
3028        Value::StringArray(
3029            StringArray::new(
3030                vec![String::new(); variable_names.len()],
3031                vec![1, variable_names.len()],
3032            )
3033            .expect("VariableDescriptions shape is valid"),
3034        ),
3035    );
3036    props.insert(DESCRIPTION, Value::String(String::new()));
3037    props.insert(USER_DATA, Value::Tensor(Tensor::zeros(vec![0, 0])));
3038    props
3039}
3040
3041pub fn table_from_columns(names: Vec<String>, columns: Vec<Value>) -> BuiltinResult<Value> {
3042    table_from_columns_with_properties(names, columns, None)
3043}
3044
3045fn table_from_columns_with_properties(
3046    names: Vec<String>,
3047    columns: Vec<Value>,
3048    row_names: Option<Vec<String>>,
3049) -> BuiltinResult<Value> {
3050    ensure_table_class_registered();
3051    if names.len() != columns.len() {
3052        return Err(invalid_variable(
3053            "table: number of variable names must match number of variables",
3054        ));
3055    }
3056    let names = make_unique_names(names);
3057    let height = validate_column_heights(&names, &columns)?;
3058    if let Some(row_names) = &row_names {
3059        if row_names.len() != height {
3060            return Err(invalid_variable(
3061                "table: number of row names must match table height",
3062            ));
3063        }
3064    }
3065    let mut variables = StructValue::new();
3066    for (name, value) in names.iter().cloned().zip(columns) {
3067        variables.insert(name, value);
3068    }
3069    let props = default_properties(names, row_names);
3070    let mut object = ObjectInstance::new(TABLE_CLASS.to_string());
3071    object
3072        .properties
3073        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
3074    object.properties.insert(
3075        TABLE_PROPERTIES_FIELD.to_string(),
3076        Value::Struct(props.clone()),
3077    );
3078    object
3079        .properties
3080        .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
3081    Ok(Value::Object(object))
3082}
3083
3084fn validate_column_heights(names: &[String], columns: &[Value]) -> BuiltinResult<usize> {
3085    if columns.is_empty() {
3086        return Ok(0);
3087    }
3088    let height = value_row_count(&columns[0])?;
3089    for (name, value) in names.iter().zip(columns) {
3090        let rows = value_row_count(value)?;
3091        if rows != height {
3092            return Err(invalid_variable(format!(
3093                "table: variable '{name}' has {rows} rows but expected {height}"
3094            )));
3095        }
3096    }
3097    Ok(height)
3098}
3099
3100pub fn is_table_value(value: &Value) -> bool {
3101    table_object(value).is_some()
3102}
3103
3104fn table_object(value: &Value) -> Option<&ObjectInstance> {
3105    match value {
3106        Value::Object(object) if object.is_class(TABLE_CLASS) => Some(object),
3107        _ => None,
3108    }
3109}
3110
3111fn into_table_object(value: Value, context: &str) -> BuiltinResult<ObjectInstance> {
3112    match value {
3113        Value::Object(object) if object.is_class(TABLE_CLASS) => Ok(object),
3114        other => Err(invalid_argument(format!(
3115            "{context}: expected table, got {other:?}"
3116        ))),
3117    }
3118}
3119
3120pub fn table_variables(object: &ObjectInstance) -> BuiltinResult<StructValue> {
3121    match object.properties.get(TABLE_VARIABLES_FIELD) {
3122        Some(Value::Struct(st)) => Ok(st.clone()),
3123        Some(other) => Err(invalid_variable(format!(
3124            "table: invalid internal variable storage {other:?}"
3125        ))),
3126        None => Ok(StructValue::new()),
3127    }
3128}
3129
3130pub fn table_variable_names_from_object(object: &ObjectInstance) -> BuiltinResult<Vec<String>> {
3131    let variables = table_variables(object)?;
3132    Ok(variables.fields.keys().cloned().collect())
3133}
3134
3135pub fn table_height(object: &ObjectInstance) -> BuiltinResult<usize> {
3136    let variables = table_variables(object)?;
3137    match variables.fields.values().next() {
3138        Some(value) => value_row_count(value),
3139        None => Ok(0),
3140    }
3141}
3142
3143pub fn table_width(object: &ObjectInstance) -> BuiltinResult<usize> {
3144    table_variables(object).map(|vars| vars.fields.len())
3145}
3146
3147fn table_public_properties(object: &ObjectInstance) -> BuiltinResult<StructValue> {
3148    match object
3149        .properties
3150        .get(TABLE_PROPERTIES_FIELD)
3151        .or_else(|| object.properties.get(PROPERTIES_MEMBER))
3152    {
3153        Some(Value::Struct(st)) => Ok(st.clone()),
3154        Some(other) => Err(invalid_variable(format!(
3155            "table: invalid Properties storage {other:?}"
3156        ))),
3157        None => Ok(default_properties(
3158            table_variable_names_from_object(object)?,
3159            None,
3160        )),
3161    }
3162}
3163
3164fn sync_table_properties(object: &mut ObjectInstance, props: StructValue) {
3165    object.properties.insert(
3166        TABLE_PROPERTIES_FIELD.to_string(),
3167        Value::Struct(props.clone()),
3168    );
3169    object
3170        .properties
3171        .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
3172}
3173
3174fn table_member_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
3175    let name = scalar_text(payload, "table member")?;
3176    if name == PROPERTIES_MEMBER {
3177        return Ok(Value::Struct(table_public_properties(object)?));
3178    }
3179    let variables = table_variables(object)?;
3180    variables
3181        .fields
3182        .get(&name)
3183        .cloned()
3184        .ok_or_else(|| invalid_variable(format!("table: unrecognized variable '{name}'")))
3185}
3186
3187fn table_member_set(object: &mut ObjectInstance, field: &str, rhs: Value) -> BuiltinResult<()> {
3188    if field == PROPERTIES_MEMBER {
3189        let Value::Struct(props) = rhs else {
3190            return Err(invalid_variable(
3191                "table: Properties assignment expects a scalar struct",
3192            ));
3193        };
3194        apply_properties(object, props)?;
3195        return Ok(());
3196    }
3197    let mut variables = table_variables(object)?;
3198    let mut names = table_variable_names_from_object(object)?;
3199    let height = table_height(object)?;
3200    let rhs_rows = value_row_count(&rhs)?;
3201    if !variables.fields.is_empty() && rhs_rows != height {
3202        return Err(invalid_variable(format!(
3203            "table: variable '{field}' has {rhs_rows} rows but table has {height}"
3204        )));
3205    }
3206    if !variables.fields.contains_key(field) {
3207        names.push(field.to_string());
3208    }
3209    variables.insert(field.to_string(), rhs);
3210    object
3211        .properties
3212        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
3213    let mut props = table_public_properties(object)?;
3214    update_variable_metadata_names(&mut props, names)?;
3215    sync_table_properties(object, props);
3216    Ok(())
3217}
3218
3219fn apply_properties(object: &mut ObjectInstance, mut props: StructValue) -> BuiltinResult<()> {
3220    if let Some(value) = props.fields.get(VARIABLE_NAMES) {
3221        let names = variable_name_list(value)?;
3222        rename_table_variables(object, names.clone())?;
3223        update_variable_metadata_names(&mut props, names)?;
3224    }
3225    sync_table_properties(object, props);
3226    Ok(())
3227}
3228
3229fn rename_table_variables(
3230    object: &mut ObjectInstance,
3231    new_names: Vec<String>,
3232) -> BuiltinResult<()> {
3233    let old_names = table_variable_names_from_object(object)?;
3234    if old_names.len() != new_names.len() {
3235        return Err(invalid_variable(
3236            "table: VariableNames assignment must preserve variable count",
3237        ));
3238    }
3239    let new_names = make_unique_variable_names(new_names);
3240    let variables = table_variables(object)?;
3241    let mut renamed = StructValue::new();
3242    for (old, new) in old_names.iter().zip(new_names.iter()) {
3243        let value = variables
3244            .fields
3245            .get(old)
3246            .cloned()
3247            .ok_or_else(|| invalid_variable(format!("table: missing variable '{old}'")))?;
3248        renamed.insert(new.clone(), value);
3249    }
3250    object
3251        .properties
3252        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(renamed));
3253    Ok(())
3254}
3255
3256fn update_variable_metadata_names(
3257    props: &mut StructValue,
3258    names: Vec<String>,
3259) -> BuiltinResult<()> {
3260    props.insert(
3261        VARIABLE_NAMES,
3262        Value::StringArray(
3263            StringArray::new(names.clone(), vec![1, names.len()])
3264                .map_err(|err| invalid_variable(format!("table: {err}")))?,
3265        ),
3266    );
3267    for field in [VARIABLE_UNITS, VARIABLE_DESCRIPTIONS] {
3268        let existing = props.fields.get(field).cloned();
3269        let values = match existing {
3270            Some(Value::StringArray(mut array)) => {
3271                array.data.resize(names.len(), String::new());
3272                array.data.truncate(names.len());
3273                array.data
3274            }
3275            _ => vec![String::new(); names.len()],
3276        };
3277        props.insert(
3278            field,
3279            Value::StringArray(
3280                StringArray::new(values, vec![1, names.len()])
3281                    .map_err(|err| invalid_variable(format!("table: {err}")))?,
3282            ),
3283        );
3284    }
3285    Ok(())
3286}
3287
3288fn table_paren_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
3289    let selectors = selector_values(payload)?;
3290    let rows = parse_row_selector(selectors.first(), table_height(object)?)?;
3291    let variable_names = table_variable_names_from_object(object)?;
3292    let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
3293    let variables = table_variables(object)?;
3294    let mut out = Vec::with_capacity(selected_names.len());
3295    for name in &selected_names {
3296        let value = variables
3297            .fields
3298            .get(name)
3299            .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
3300        out.push(select_rows(value, &rows)?);
3301    }
3302    let row_names = selected_row_names(object, &rows)?;
3303    table_from_columns_with_properties(selected_names, out, row_names)
3304}
3305
3306fn table_brace_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
3307    let subset = table_paren_get(object, payload)?;
3308    let object = into_table_object(subset, "table brace indexing")?;
3309    let variables = table_variables(&object)?;
3310    if variables.fields.len() == 1 {
3311        return variables
3312            .fields
3313            .values()
3314            .next()
3315            .cloned()
3316            .ok_or_else(|| invalid_variable("table: missing selected variable"));
3317    }
3318    let values = variables.fields.values().collect::<Vec<_>>();
3319    if values.iter().all(|value| matches!(value, Value::Tensor(_))) {
3320        return concatenate_numeric_columns(&values);
3321    }
3322    CellArray::new(
3323        values.into_iter().cloned().collect(),
3324        1,
3325        variables.fields.len(),
3326    )
3327    .map(Value::Cell)
3328    .map_err(|err| invalid_variable(format!("table: {err}")))
3329}
3330
3331fn table_paren_assign(
3332    mut object: ObjectInstance,
3333    payload: &Value,
3334    rhs: Value,
3335) -> BuiltinResult<Value> {
3336    let rhs_table = into_table_object(rhs, "table paren assignment")?;
3337    let selectors = selector_values(payload)?;
3338    let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
3339    let variable_names = table_variable_names_from_object(&object)?;
3340    let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
3341    let rhs_names = table_variable_names_from_object(&rhs_table)?;
3342    if selected_names.len() != rhs_names.len() {
3343        return Err(invalid_variable(
3344            "table: assignment variable count must match selected variables",
3345        ));
3346    }
3347    let mut variables = table_variables(&object)?;
3348    let rhs_variables = table_variables(&rhs_table)?;
3349    for (target_name, rhs_name) in selected_names.iter().zip(rhs_names.iter()) {
3350        let current =
3351            variables.fields.get(target_name).cloned().ok_or_else(|| {
3352                invalid_variable(format!("table: missing variable '{target_name}'"))
3353            })?;
3354        let rhs_col =
3355            rhs_variables.fields.get(rhs_name).cloned().ok_or_else(|| {
3356                invalid_variable(format!("table: missing rhs variable '{rhs_name}'"))
3357            })?;
3358        variables.insert(target_name.clone(), assign_rows(current, &rows, rhs_col)?);
3359    }
3360    object
3361        .properties
3362        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
3363    Ok(Value::Object(object))
3364}
3365
3366fn table_brace_assign(
3367    mut object: ObjectInstance,
3368    payload: &Value,
3369    rhs: Value,
3370) -> BuiltinResult<Value> {
3371    let selectors = selector_values(payload)?;
3372    let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
3373    let variable_names = table_variable_names_from_object(&object)?;
3374    let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
3375    if selected_names.len() != 1 {
3376        return Err(invalid_variable(
3377            "table: brace assignment supports one variable at a time",
3378        ));
3379    }
3380    let mut variables = table_variables(&object)?;
3381    let target = selected_names[0].clone();
3382    let current = variables
3383        .fields
3384        .get(&target)
3385        .cloned()
3386        .ok_or_else(|| invalid_variable(format!("table: missing variable '{target}'")))?;
3387    variables.insert(target, assign_rows(current, &rows, rhs)?);
3388    object
3389        .properties
3390        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
3391    Ok(Value::Object(object))
3392}
3393
3394fn selector_values(payload: &Value) -> BuiltinResult<Vec<Value>> {
3395    match payload {
3396        Value::Cell(cell) => {
3397            let mut out = Vec::with_capacity(cell.data.len());
3398            for handle in &cell.data {
3399                out.push(handle.clone());
3400            }
3401            Ok(out)
3402        }
3403        other => Ok(vec![other.clone()]),
3404    }
3405}
3406
3407fn parse_row_selector(selector: Option<&Value>, height: usize) -> BuiltinResult<Vec<usize>> {
3408    let Some(selector) = selector else {
3409        return Ok((0..height).collect());
3410    };
3411    if is_colon_selector(selector) {
3412        return Ok((0..height).collect());
3413    }
3414    if is_end_selector(selector) {
3415        return if height == 0 {
3416            Err(invalid_index(
3417                "table: end row index is invalid for empty table",
3418            ))
3419        } else {
3420            Ok(vec![height - 1])
3421        };
3422    }
3423    match selector {
3424        Value::Num(n) => Ok(vec![one_based_to_zero(*n, height, "row")?]),
3425        Value::Int(i) => Ok(vec![one_based_to_zero(i.to_f64(), height, "row")?]),
3426        Value::Tensor(tensor) => tensor
3427            .data
3428            .iter()
3429            .map(|value| one_based_to_zero(*value, height, "row"))
3430            .collect(),
3431        Value::LogicalArray(array) => {
3432            if array.data.len() != height {
3433                return Err(invalid_index(
3434                    "table: logical row selector length must match table height",
3435                ));
3436            }
3437            Ok(array
3438                .data
3439                .iter()
3440                .enumerate()
3441                .filter_map(|(idx, value)| (*value != 0).then_some(idx))
3442                .collect())
3443        }
3444        other => Err(invalid_index(format!(
3445            "table: unsupported row selector {other:?}"
3446        ))),
3447    }
3448}
3449
3450fn parse_variable_selector(
3451    selector: Option<&Value>,
3452    names: &[String],
3453) -> BuiltinResult<Vec<String>> {
3454    let Some(selector) = selector else {
3455        return Ok(names.to_vec());
3456    };
3457    if is_colon_selector(selector) {
3458        return Ok(names.to_vec());
3459    }
3460    match selector {
3461        Value::String(_) | Value::CharArray(_) | Value::StringArray(_) | Value::Cell(_) => {
3462            let selected = string_list(selector)?;
3463            for name in &selected {
3464                if !names.contains(name) {
3465                    return Err(invalid_variable(format!(
3466                        "table: unrecognized variable '{name}'"
3467                    )));
3468                }
3469            }
3470            Ok(selected)
3471        }
3472        Value::Num(n) => Ok(vec![name_at_index(names, *n)?]),
3473        Value::Int(i) => Ok(vec![name_at_index(names, i.to_f64())?]),
3474        Value::Tensor(tensor) => tensor
3475            .data
3476            .iter()
3477            .map(|value| name_at_index(names, *value))
3478            .collect(),
3479        Value::LogicalArray(array) => {
3480            if array.data.len() != names.len() {
3481                return Err(invalid_index(
3482                    "table: logical variable selector length must match table width",
3483                ));
3484            }
3485            Ok(array
3486                .data
3487                .iter()
3488                .zip(names.iter())
3489                .filter_map(|(flag, name)| (*flag != 0).then_some(name.clone()))
3490                .collect())
3491        }
3492        other => Err(invalid_index(format!(
3493            "table: unsupported variable selector {other:?}"
3494        ))),
3495    }
3496}
3497
3498fn is_colon_selector(value: &Value) -> bool {
3499    scalar_text(value, "selector")
3500        .map(|text| text == ":")
3501        .unwrap_or(false)
3502}
3503
3504fn is_end_selector(value: &Value) -> bool {
3505    scalar_text(value, "selector")
3506        .map(|text| text == "end")
3507        .unwrap_or(false)
3508}
3509
3510fn name_at_index(names: &[String], value: f64) -> BuiltinResult<String> {
3511    let idx = one_based_to_zero(value, names.len(), "variable")?;
3512    Ok(names[idx].clone())
3513}
3514
3515fn one_based_to_zero(value: f64, len: usize, context: &str) -> BuiltinResult<usize> {
3516    if !value.is_finite() || value < 1.0 || (value.round() - value).abs() > f64::EPSILON {
3517        return Err(invalid_index(format!(
3518            "table: {context} indices must be positive finite integers"
3519        )));
3520    }
3521    let idx = value.round() as usize - 1;
3522    if idx >= len {
3523        return Err(invalid_index(format!(
3524            "table: {context} index exceeds bounds"
3525        )));
3526    }
3527    Ok(idx)
3528}
3529
3530fn selected_row_names(
3531    object: &ObjectInstance,
3532    rows: &[usize],
3533) -> BuiltinResult<Option<Vec<String>>> {
3534    let props = table_public_properties(object)?;
3535    let Some(value) = props.fields.get(ROW_NAMES) else {
3536        return Ok(None);
3537    };
3538    let names = string_list(value)?;
3539    if names.is_empty() {
3540        return Ok(None);
3541    }
3542    Ok(Some(
3543        rows.iter()
3544            .filter_map(|row| names.get(*row).cloned())
3545            .collect(),
3546    ))
3547}
3548
3549fn value_row_count(value: &Value) -> BuiltinResult<usize> {
3550    match value {
3551        Value::Tensor(tensor) => Ok(tensor.rows()),
3552        Value::ComplexTensor(tensor) => Ok(tensor.rows),
3553        Value::StringArray(array) => Ok(array.rows()),
3554        Value::LogicalArray(array) => Ok(array.shape.first().copied().unwrap_or(array.data.len())),
3555        Value::Cell(cell) => Ok(cell.rows),
3556        Value::CharArray(array) => Ok(array.rows),
3557        Value::Object(obj) if obj.is_class("datetime") => {
3558            crate::builtins::datetime::serials_from_datetime_value(value)
3559                .map(|tensor| tensor.rows())
3560        }
3561        Value::Object(obj) if obj.is_class("duration") => {
3562            crate::builtins::duration::duration_tensor_from_duration_value(value)
3563                .map(|tensor| tensor.rows())
3564        }
3565        Value::Object(obj) if obj.is_class(TABLE_CLASS) => table_height(obj),
3566        _ => Ok(1),
3567    }
3568}
3569
3570fn select_rows(value: &Value, rows: &[usize]) -> BuiltinResult<Value> {
3571    match value {
3572        Value::Tensor(tensor) => {
3573            let cols = tensor.cols();
3574            let mut data = Vec::with_capacity(rows.len() * cols);
3575            for col in 0..cols {
3576                for &row in rows {
3577                    data.push(tensor.get2(row, col).map_err(invalid_index)?);
3578                }
3579            }
3580            Tensor::new_with_dtype(data, vec![rows.len(), cols], tensor.dtype)
3581                .map(Value::Tensor)
3582                .map_err(invalid_variable)
3583        }
3584        Value::ComplexTensor(tensor) => {
3585            let mut data = Vec::with_capacity(rows.len() * tensor.cols);
3586            for col in 0..tensor.cols {
3587                for &row in rows {
3588                    let idx = row + col * tensor.rows;
3589                    data.push(*tensor.data.get(idx).ok_or_else(|| {
3590                        invalid_index("table: complex variable row index out of bounds")
3591                    })?);
3592                }
3593            }
3594            ComplexTensor::new(data, vec![rows.len(), tensor.cols])
3595                .map(Value::ComplexTensor)
3596                .map_err(invalid_variable)
3597        }
3598        Value::StringArray(array) => {
3599            let cols = array.cols();
3600            let mut data = Vec::with_capacity(rows.len() * cols);
3601            for col in 0..cols {
3602                for &row in rows {
3603                    let idx = row + col * array.rows();
3604                    data.push(array.data.get(idx).cloned().ok_or_else(|| {
3605                        invalid_index("table: string variable row index out of bounds")
3606                    })?);
3607                }
3608            }
3609            StringArray::new(data, vec![rows.len(), cols])
3610                .map(Value::StringArray)
3611                .map_err(invalid_variable)
3612        }
3613        Value::CharArray(array) => {
3614            let mut data = Vec::with_capacity(rows.len() * array.cols);
3615            for &row in rows {
3616                if row >= array.rows {
3617                    return Err(invalid_index(
3618                        "table: char variable row index out of bounds",
3619                    ));
3620                }
3621                let start = row * array.cols;
3622                data.extend_from_slice(&array.data[start..start + array.cols]);
3623            }
3624            CharArray::new(data, rows.len(), array.cols)
3625                .map(Value::CharArray)
3626                .map_err(invalid_variable)
3627        }
3628        Value::LogicalArray(array) => {
3629            let source_rows = array.shape.first().copied().unwrap_or(array.data.len());
3630            let cols = array.shape.get(1).copied().unwrap_or(1);
3631            let mut data = Vec::with_capacity(rows.len() * cols);
3632            for col in 0..cols {
3633                for &row in rows {
3634                    let idx = row + col * source_rows;
3635                    data.push(*array.data.get(idx).ok_or_else(|| {
3636                        invalid_index("table: logical variable row index out of bounds")
3637                    })?);
3638                }
3639            }
3640            LogicalArray::new(data, vec![rows.len(), cols])
3641                .map(Value::LogicalArray)
3642                .map_err(invalid_variable)
3643        }
3644        Value::Cell(cell) => {
3645            let mut data = Vec::with_capacity(rows.len() * cell.cols);
3646            for col in 0..cell.cols {
3647                for &row in rows {
3648                    data.push(cell.get(row, col).map_err(invalid_index)?);
3649                }
3650            }
3651            CellArray::new(data, rows.len(), cell.cols)
3652                .map(Value::Cell)
3653                .map_err(invalid_variable)
3654        }
3655        Value::Object(obj) if obj.is_class("datetime") => {
3656            let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
3657            let selected = select_rows(&Value::Tensor(tensor), rows)?;
3658            match selected {
3659                Value::Tensor(tensor) => {
3660                    crate::builtins::datetime::datetime_object_from_serial_tensor(
3661                        tensor,
3662                        crate::builtins::datetime::datetime_format_from_value(value),
3663                    )
3664                }
3665                _ => unreachable!("select_rows tensor branch returns tensor"),
3666            }
3667        }
3668        Value::Object(obj) if obj.is_class("duration") => {
3669            let tensor = crate::builtins::duration::duration_tensor_from_duration_value(value)?;
3670            let selected = select_rows(&Value::Tensor(tensor), rows)?;
3671            match selected {
3672                Value::Tensor(tensor) => {
3673                    crate::builtins::duration::duration_object_from_days_tensor(
3674                        tensor,
3675                        crate::builtins::duration::duration_format_from_value(value),
3676                    )
3677                }
3678                _ => unreachable!("select_rows tensor branch returns tensor"),
3679            }
3680        }
3681        _ if rows.len() == 1 && rows[0] == 0 => Ok(value.clone()),
3682        other => Err(invalid_variable(format!(
3683            "table: row selection unsupported for variable {other:?}"
3684        ))),
3685    }
3686}
3687
3688fn assign_rows(mut current: Value, rows: &[usize], rhs: Value) -> BuiltinResult<Value> {
3689    if value_row_count(&rhs)? != rows.len() {
3690        return Err(invalid_variable(
3691            "table: assignment row count must match selected row count",
3692        ));
3693    }
3694    let replacing_all_rows = rows.len() == value_row_count(&current)?;
3695    match (&mut current, rhs) {
3696        (Value::Tensor(target), Value::Tensor(source)) => {
3697            if target.cols() != source.cols() {
3698                return Err(invalid_variable(
3699                    "table: tensor assignment column count mismatch",
3700                ));
3701            }
3702            for col in 0..target.cols() {
3703                for (src_row, &dst_row) in rows.iter().enumerate() {
3704                    let value = source.get2(src_row, col).map_err(invalid_index)?;
3705                    target.set2(dst_row, col, value).map_err(invalid_index)?;
3706                }
3707            }
3708            Ok(current)
3709        }
3710        (_, source) if replacing_all_rows => Ok(source),
3711        _ => Err(invalid_variable(
3712            "table: assignment for this variable type requires replacing all rows",
3713        )),
3714    }
3715}
3716
3717fn concatenate_numeric_columns(values: &[&Value]) -> BuiltinResult<Value> {
3718    let rows = values
3719        .first()
3720        .and_then(|value| match value {
3721            Value::Tensor(t) => Some(t.rows()),
3722            _ => None,
3723        })
3724        .unwrap_or(0);
3725    let cols = values
3726        .iter()
3727        .map(|value| match value {
3728            Value::Tensor(t) => Ok(t.cols()),
3729            _ => Err(invalid_variable("table: expected numeric variable")),
3730        })
3731        .collect::<BuiltinResult<Vec<_>>>()?;
3732    let total_cols: usize = cols.iter().sum();
3733    let mut data = Vec::with_capacity(rows * total_cols);
3734    for value in values {
3735        let Value::Tensor(tensor) = value else {
3736            return Err(invalid_variable("table: expected numeric variable"));
3737        };
3738        for col in 0..tensor.cols() {
3739            for row in 0..rows {
3740                data.push(tensor.get2(row, col).map_err(invalid_index)?);
3741            }
3742        }
3743    }
3744    Tensor::new(data, vec![rows, total_cols])
3745        .map(Value::Tensor)
3746        .map_err(invalid_variable)
3747}
3748
3749pub fn sortrows_table(value: Value, rest: &[Value]) -> BuiltinResult<(Value, Tensor)> {
3750    let object = into_table_object(value, "sortrows")?;
3751    let names = table_variable_names_from_object(&object)?;
3752    let sort_spec = SortSpec::parse(rest, &names)?;
3753    let height = table_height(&object)?;
3754    let variables = table_variables(&object)?;
3755    let mut indices: Vec<usize> = (0..height).collect();
3756    indices.sort_by(|&a, &b| {
3757        for key in &sort_spec.keys {
3758            let Some(value) = variables.fields.get(&key.name) else {
3759                continue;
3760            };
3761            let ord = compare_table_cells(value, a, b).unwrap_or(Ordering::Equal);
3762            let ord = if key.descending { ord.reverse() } else { ord };
3763            if ord != Ordering::Equal {
3764                return ord;
3765            }
3766        }
3767        a.cmp(&b)
3768    });
3769    let mut sorted_columns = Vec::with_capacity(names.len());
3770    for name in &names {
3771        let value = variables
3772            .fields
3773            .get(name)
3774            .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
3775        sorted_columns.push(select_rows(value, &indices)?);
3776    }
3777    let row_names = selected_row_names(&object, &indices)?;
3778    let sorted = table_from_columns_with_properties(names, sorted_columns, row_names)?;
3779    let indices_tensor = Tensor::new(
3780        indices.iter().map(|idx| *idx as f64 + 1.0).collect(),
3781        vec![indices.len(), 1],
3782    )
3783    .map_err(invalid_variable)?;
3784    Ok((sorted, indices_tensor))
3785}
3786
3787struct SortSpec {
3788    keys: Vec<SortKey>,
3789}
3790
3791struct SortKey {
3792    name: String,
3793    descending: bool,
3794}
3795
3796impl SortSpec {
3797    fn parse(rest: &[Value], names: &[String]) -> BuiltinResult<Self> {
3798        let mut keys = if rest.is_empty() {
3799            names
3800                .iter()
3801                .map(|name| SortKey {
3802                    name: name.clone(),
3803                    descending: false,
3804                })
3805                .collect::<Vec<_>>()
3806        } else {
3807            parse_variable_selector(rest.first(), names)?
3808                .into_iter()
3809                .map(|name| SortKey {
3810                    name,
3811                    descending: false,
3812                })
3813                .collect()
3814        };
3815        if let Some(direction) = rest.get(1) {
3816            let directions = string_list(direction)?;
3817            if directions.len() == 1 {
3818                let descending = directions[0].eq_ignore_ascii_case("descend")
3819                    || directions[0].eq_ignore_ascii_case("desc");
3820                for key in &mut keys {
3821                    key.descending = descending;
3822                }
3823            } else {
3824                for (key, direction) in keys.iter_mut().zip(directions.iter()) {
3825                    key.descending = direction.eq_ignore_ascii_case("descend")
3826                        || direction.eq_ignore_ascii_case("desc");
3827                }
3828            }
3829        }
3830        Ok(Self { keys })
3831    }
3832}
3833
3834fn compare_table_cells(value: &Value, a: usize, b: usize) -> BuiltinResult<Ordering> {
3835    match value {
3836        Value::Tensor(tensor) => Ok(tensor
3837            .get2(a, 0)
3838            .map_err(invalid_index)?
3839            .partial_cmp(&tensor.get2(b, 0).map_err(invalid_index)?)
3840            .unwrap_or(Ordering::Greater)),
3841        Value::StringArray(array) => {
3842            let av = array.data.get(a).cloned().unwrap_or_default();
3843            let bv = array.data.get(b).cloned().unwrap_or_default();
3844            Ok(av.cmp(&bv))
3845        }
3846        Value::LogicalArray(array) => {
3847            let av = *array.data.get(a).unwrap_or(&0);
3848            let bv = *array.data.get(b).unwrap_or(&0);
3849            Ok(av.cmp(&bv))
3850        }
3851        Value::Object(obj) if obj.is_class("datetime") => {
3852            let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
3853            Ok(tensor
3854                .data
3855                .get(a)
3856                .copied()
3857                .unwrap_or(f64::NAN)
3858                .partial_cmp(&tensor.data.get(b).copied().unwrap_or(f64::NAN))
3859                .unwrap_or(Ordering::Greater))
3860        }
3861        other => Ok(cell_key_string(other, a).cmp(&cell_key_string(other, b))),
3862    }
3863}
3864
3865#[derive(Clone, Debug)]
3866enum GroupAtom {
3867    Number(f64),
3868    Text(String),
3869    Logical(bool),
3870    Missing,
3871}
3872
3873impl GroupAtom {
3874    fn rank(&self) -> u8 {
3875        match self {
3876            Self::Missing => 0,
3877            Self::Logical(_) => 1,
3878            Self::Number(_) => 2,
3879            Self::Text(_) => 3,
3880        }
3881    }
3882}
3883
3884impl PartialEq for GroupAtom {
3885    fn eq(&self, other: &Self) -> bool {
3886        self.cmp(other) == Ordering::Equal
3887    }
3888}
3889
3890impl Eq for GroupAtom {}
3891
3892impl PartialOrd for GroupAtom {
3893    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
3894        Some(self.cmp(other))
3895    }
3896}
3897
3898impl Ord for GroupAtom {
3899    fn cmp(&self, other: &Self) -> Ordering {
3900        let rank = self.rank().cmp(&other.rank());
3901        if rank != Ordering::Equal {
3902            return rank;
3903        }
3904        match (self, other) {
3905            (Self::Missing, Self::Missing) => Ordering::Equal,
3906            (Self::Logical(a), Self::Logical(b)) => a.cmp(b),
3907            (Self::Number(a), Self::Number(b)) => a.total_cmp(b),
3908            (Self::Text(a), Self::Text(b)) => a.cmp(b),
3909            _ => Ordering::Equal,
3910        }
3911    }
3912}
3913
3914fn cell_group_atom(value: &Value, row: usize) -> GroupAtom {
3915    match value {
3916        Value::Tensor(tensor) => tensor
3917            .get2(row, 0)
3918            .map(GroupAtom::Number)
3919            .unwrap_or(GroupAtom::Missing),
3920        Value::StringArray(array) => array
3921            .data
3922            .get(row)
3923            .cloned()
3924            .map(GroupAtom::Text)
3925            .unwrap_or(GroupAtom::Missing),
3926        Value::LogicalArray(array) => array
3927            .data
3928            .get(row)
3929            .map(|value| GroupAtom::Logical(*value != 0))
3930            .unwrap_or(GroupAtom::Missing),
3931        Value::Object(obj) if obj.is_class("datetime") => {
3932            crate::builtins::datetime::serials_from_datetime_value(value)
3933                .ok()
3934                .and_then(|tensor| tensor.data.get(row).copied())
3935                .map(GroupAtom::Number)
3936                .unwrap_or(GroupAtom::Missing)
3937        }
3938        other => GroupAtom::Text(cell_key_string(other, row)),
3939    }
3940}
3941
3942fn groupsummary_impl(
3943    table: Value,
3944    groupvars: Value,
3945    method: Value,
3946    rest: Vec<Value>,
3947) -> BuiltinResult<Value> {
3948    let object = into_table_object(table, "groupsummary")?;
3949    let names = table_variable_names_from_object(&object)?;
3950    let group_names = parse_variable_selector(Some(&groupvars), &names)?;
3951    let methods = string_list(&method)?;
3952    if methods.is_empty() {
3953        return Err(invalid_argument(
3954            "groupsummary: method list must not be empty",
3955        ));
3956    }
3957    let data_names = if let Some(value) = rest.first() {
3958        parse_variable_selector(Some(value), &names)?
3959    } else {
3960        names
3961            .iter()
3962            .filter(|name| !group_names.contains(name))
3963            .filter(|name| {
3964                table_variables(&object)
3965                    .ok()
3966                    .and_then(|vars| vars.fields.get(*name).cloned())
3967                    .map(|value| matches!(value, Value::Tensor(_)))
3968                    .unwrap_or(false)
3969            })
3970            .cloned()
3971            .collect()
3972    };
3973    let variables = table_variables(&object)?;
3974    let height = table_height(&object)?;
3975    let mut groups: BTreeMap<Vec<GroupAtom>, Vec<usize>> = BTreeMap::new();
3976    for row in 0..height {
3977        let key = group_names
3978            .iter()
3979            .map(|name| {
3980                variables
3981                    .fields
3982                    .get(name)
3983                    .map(|value| cell_group_atom(value, row))
3984                    .unwrap_or(GroupAtom::Missing)
3985            })
3986            .collect::<Vec<_>>();
3987        groups.entry(key).or_default().push(row);
3988    }
3989    let group_rows = groups
3990        .values()
3991        .filter_map(|rows| rows.first().copied())
3992        .collect::<Vec<_>>();
3993    let mut out_names = Vec::new();
3994    let mut out_columns = Vec::new();
3995    for name in &group_names {
3996        let value = variables.fields.get(name).ok_or_else(|| {
3997            invalid_variable(format!("groupsummary: missing group variable '{name}'"))
3998        })?;
3999        out_names.push(name.clone());
4000        out_columns.push(select_rows(value, &group_rows)?);
4001    }
4002    out_names.push("GroupCount".to_string());
4003    out_columns.push(Value::Tensor(
4004        Tensor::new(
4005            groups.values().map(|rows| rows.len() as f64).collect(),
4006            vec![groups.len(), 1],
4007        )
4008        .map_err(invalid_variable)?,
4009    ));
4010    for method in &methods {
4011        for name in &data_names {
4012            let value = variables.fields.get(name).ok_or_else(|| {
4013                invalid_variable(format!("groupsummary: missing data variable '{name}'"))
4014            })?;
4015            let values = summarize_groups(value, groups.values(), method)?;
4016            out_names.push(format!("{}_{}", method.to_ascii_lowercase(), name));
4017            out_columns.push(Value::Tensor(
4018                Tensor::new(values, vec![groups.len(), 1]).map_err(invalid_variable)?,
4019            ));
4020        }
4021    }
4022    table_from_columns(out_names, out_columns)
4023}
4024
4025fn summarize_groups<'a>(
4026    value: &Value,
4027    groups: impl Iterator<Item = &'a Vec<usize>>,
4028    method: &str,
4029) -> BuiltinResult<Vec<f64>> {
4030    let tensor = match value {
4031        Value::Tensor(tensor) if tensor.cols() == 1 => tensor,
4032        _ => {
4033            return Err(invalid_variable(
4034                "groupsummary: summary data variables must be numeric column vectors",
4035            ))
4036        }
4037    };
4038    groups
4039        .map(|rows| {
4040            let mut values = rows
4041                .iter()
4042                .map(|row| tensor.get2(*row, 0).map_err(invalid_index))
4043                .collect::<BuiltinResult<Vec<_>>>()?;
4044            values.retain(|value| !value.is_nan());
4045            let result = match method.to_ascii_lowercase().as_str() {
4046                "mean" => {
4047                    if values.is_empty() {
4048                        f64::NAN
4049                    } else {
4050                        values.iter().sum::<f64>() / values.len() as f64
4051                    }
4052                }
4053                "sum" => values.iter().sum(),
4054                "min" => values.into_iter().fold(f64::INFINITY, f64::min),
4055                "max" => values.into_iter().fold(f64::NEG_INFINITY, f64::max),
4056                "median" => {
4057                    if values.is_empty() {
4058                        f64::NAN
4059                    } else {
4060                        values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
4061                        let mid = values.len() / 2;
4062                        if values.len() % 2 == 0 {
4063                            (values[mid - 1] + values[mid]) / 2.0
4064                        } else {
4065                            values[mid]
4066                        }
4067                    }
4068                }
4069                "count" | "numel" => values.len() as f64,
4070                other => {
4071                    return Err(invalid_argument(format!(
4072                        "groupsummary: unsupported method '{other}'"
4073                    )))
4074                }
4075            };
4076            Ok(result)
4077        })
4078        .collect()
4079}
4080
4081fn cell_key_string(value: &Value, row: usize) -> String {
4082    match value {
4083        Value::Tensor(tensor) => tensor
4084            .get2(row, 0)
4085            .map(format_key_number)
4086            .unwrap_or_default(),
4087        Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
4088        Value::LogicalArray(array) => array
4089            .data
4090            .get(row)
4091            .map(|value| value.to_string())
4092            .unwrap_or_default(),
4093        Value::Object(obj) if obj.is_class("datetime") => {
4094            crate::builtins::datetime::serials_from_datetime_value(value)
4095                .ok()
4096                .and_then(|tensor| tensor.data.get(row).copied())
4097                .map(format_key_number)
4098                .unwrap_or_default()
4099        }
4100        other => format!("{other}"),
4101    }
4102}
4103
4104pub fn table_display_text(value: &Value) -> BuiltinResult<String> {
4105    let object = match value {
4106        Value::Object(object) if object.is_class(TABLE_CLASS) => object,
4107        _ => return Err(invalid_argument("table display expects table object")),
4108    };
4109    let names = table_variable_names_from_object(object)?;
4110    let variables = table_variables(object)?;
4111    let rows = table_height(object)?;
4112    let preview = rows.min(12);
4113    let mut widths = names.iter().map(|name| name.len()).collect::<Vec<_>>();
4114    let rendered_cols = names
4115        .iter()
4116        .enumerate()
4117        .map(|(col, name)| {
4118            let value = variables
4119                .fields
4120                .get(name)
4121                .cloned()
4122                .unwrap_or_else(|| Value::String(String::new()));
4123            let cells = (0..preview)
4124                .map(|row| render_table_cell(&value, row))
4125                .collect::<Vec<_>>();
4126            for cell in &cells {
4127                widths[col] = widths[col].max(cell.len());
4128            }
4129            cells
4130        })
4131        .collect::<Vec<_>>();
4132
4133    let mut lines = Vec::new();
4134    lines.push(format!("{rows}x{} table", names.len()));
4135    if names.is_empty() {
4136        return Ok(lines.join("\n"));
4137    }
4138    let header = names
4139        .iter()
4140        .enumerate()
4141        .map(|(idx, name)| format!("{name:<width$}", width = widths[idx]))
4142        .collect::<Vec<_>>()
4143        .join("  ");
4144    lines.push(header);
4145    for row in 0..preview {
4146        lines.push(
4147            rendered_cols
4148                .iter()
4149                .enumerate()
4150                .map(|(col, cells)| format!("{:<width$}", cells[row], width = widths[col]))
4151                .collect::<Vec<_>>()
4152                .join("  "),
4153        );
4154    }
4155    if preview < rows {
4156        lines.push(format!("... {} more rows", rows - preview));
4157    }
4158    Ok(lines.join("\n"))
4159}
4160
4161pub fn table_summary_text(value: &Value) -> BuiltinResult<String> {
4162    let object = match value {
4163        Value::Object(object) if object.is_class(TABLE_CLASS) => object,
4164        _ => return Err(invalid_argument("table display expects table object")),
4165    };
4166    Ok(format!(
4167        "{}x{} table",
4168        table_height(object)?,
4169        table_width(object)?
4170    ))
4171}
4172
4173fn render_table_cell(value: &Value, row: usize) -> String {
4174    match value {
4175        Value::Tensor(tensor) => tensor
4176            .get2(row, 0)
4177            .map(format_table_number)
4178            .unwrap_or_default(),
4179        Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
4180        Value::LogicalArray(array) => array
4181            .data
4182            .get(row)
4183            .map(|value| if *value != 0 { "true" } else { "false" }.to_string())
4184            .unwrap_or_default(),
4185        Value::Object(obj) if obj.is_class("datetime") => {
4186            crate::builtins::datetime::datetime_string_array(value)
4187                .ok()
4188                .flatten()
4189                .and_then(|array| array.data.get(row).cloned())
4190                .unwrap_or_else(|| value.to_string())
4191        }
4192        other => other.to_string(),
4193    }
4194}
4195
4196fn format_table_number(value: f64) -> String {
4197    if value.is_nan() {
4198        "NaN".to_string()
4199    } else if value.fract() == 0.0 && value.abs() < 1e15 {
4200        format!("{}", value as i64)
4201    } else {
4202        trim_float(format!("{value:.6}"))
4203    }
4204}
4205
4206fn format_key_number(value: f64) -> String {
4207    if value.is_nan() {
4208        "NaN".to_string()
4209    } else if value.is_infinite() {
4210        value.to_string()
4211    } else {
4212        trim_float(format!("{value:.17}"))
4213    }
4214}
4215
4216fn trim_float(mut text: String) -> String {
4217    if let Some(dot) = text.find('.') {
4218        let mut end = text.len();
4219        while end > dot + 1 && text.as_bytes()[end - 1] == b'0' {
4220            end -= 1;
4221        }
4222        if end == dot + 1 {
4223            end -= 1;
4224        }
4225        text.truncate(end);
4226    }
4227    text
4228}
4229
4230fn scalar_text(value: &Value, context: &str) -> BuiltinResult<String> {
4231    match value {
4232        Value::String(text) => Ok(text.clone()),
4233        Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
4234        Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
4235        _ => Err(invalid_argument(format!(
4236            "table: {context} must be a string scalar or character vector"
4237        ))),
4238    }
4239}
4240
4241fn bool_scalar(value: &Value, context: &str) -> BuiltinResult<bool> {
4242    match value {
4243        Value::Bool(flag) => Ok(*flag),
4244        Value::Int(value) => Ok(value.to_i64() != 0),
4245        Value::Num(value) if value.is_finite() => Ok(*value != 0.0),
4246        Value::String(_) | Value::CharArray(_) | Value::StringArray(_) => {
4247            let text = scalar_text(value, context)?;
4248            match text.to_ascii_lowercase().as_str() {
4249                "true" | "on" | "yes" => Ok(true),
4250                "false" | "off" | "no" => Ok(false),
4251                _ => Err(invalid_argument(format!(
4252                    "table: {context} must be logical"
4253                ))),
4254            }
4255        }
4256        _ => Err(invalid_argument(format!(
4257            "table: {context} must be logical"
4258        ))),
4259    }
4260}
4261
4262fn nonnegative_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
4263    match value {
4264        Value::Int(value) if value.to_i64() >= 0 => Ok(value.to_i64() as usize),
4265        Value::Num(value)
4266            if value.is_finite()
4267                && *value >= 0.0
4268                && (value.round() - value).abs() <= f64::EPSILON =>
4269        {
4270            Ok(value.round() as usize)
4271        }
4272        _ => Err(invalid_argument(format!(
4273            "table: {context} must be a non-negative integer"
4274        ))),
4275    }
4276}
4277
4278fn positive_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
4279    let value = nonnegative_usize(value, context)?;
4280    if value == 0 {
4281        return Err(invalid_argument(format!(
4282            "table: {context} must be a positive integer"
4283        )));
4284    }
4285    Ok(value)
4286}
4287
4288fn option_value_is_empty(value: &Value) -> bool {
4289    match value {
4290        Value::String(text) => text.trim().is_empty(),
4291        Value::CharArray(array) => {
4292            array.data.is_empty()
4293                || (array.rows == 1 && array.data.iter().all(|ch| ch.is_whitespace()))
4294        }
4295        Value::StringArray(array) => {
4296            array.data.is_empty() || (array.data.len() == 1 && array.data[0].trim().is_empty())
4297        }
4298        Value::Cell(cell) => {
4299            cell.data.is_empty() || cell.data.iter().all(|handle| option_value_is_empty(handle))
4300        }
4301        _ => false,
4302    }
4303}
4304
4305fn string_list(value: &Value) -> BuiltinResult<Vec<String>> {
4306    match value {
4307        Value::String(text) => Ok(vec![text.clone()]),
4308        Value::CharArray(ca) if ca.rows == 1 => Ok(vec![ca.data.iter().collect()]),
4309        Value::StringArray(array) => Ok(array.data.clone()),
4310        Value::Cell(cell) => {
4311            let mut out = Vec::with_capacity(cell.data.len());
4312            for handle in &cell.data {
4313                let value = handle;
4314                out.extend(string_list(value)?);
4315            }
4316            Ok(out)
4317        }
4318        _ => Err(invalid_argument(
4319            "table: expected string, string array, character vector, or cellstr",
4320        )),
4321    }
4322}
4323
4324fn optional_raw_variable_name_list(value: &Value) -> BuiltinResult<Option<Vec<String>>> {
4325    if option_value_is_empty(value) {
4326        Ok(None)
4327    } else {
4328        raw_variable_name_list(value).map(Some)
4329    }
4330}
4331
4332fn raw_variable_name_list(value: &Value) -> BuiltinResult<Vec<String>> {
4333    let names = string_list(value)?;
4334    if names.is_empty() {
4335        return Err(invalid_variable("table: variable names must not be empty"));
4336    }
4337    Ok(names)
4338}
4339
4340fn variable_name_list(value: &Value) -> BuiltinResult<Vec<String>> {
4341    raw_variable_name_list(value).map(make_unique_variable_names)
4342}
4343
4344fn optional_variable_type_list(value: &Value) -> BuiltinResult<Option<Vec<ImportVariableType>>> {
4345    if option_value_is_empty(value) {
4346        Ok(None)
4347    } else {
4348        variable_type_list(value).map(Some)
4349    }
4350}
4351
4352fn variable_type_list(value: &Value) -> BuiltinResult<Vec<ImportVariableType>> {
4353    string_list(value)?
4354        .iter()
4355        .map(|raw| ImportVariableType::parse(raw))
4356        .collect()
4357}
4358
4359fn variable_type_names(value: &Value) -> BuiltinResult<Vec<String>> {
4360    string_list(value)?
4361        .iter()
4362        .map(|raw| ImportVariableType::canonical_label(raw))
4363        .collect()
4364}
4365
4366fn optional_range_spec(value: &Value) -> BuiltinResult<Option<RangeSpec>> {
4367    if option_value_is_empty(value) {
4368        Ok(None)
4369    } else {
4370        RangeSpec::parse(value).map(Some)
4371    }
4372}
4373
4374fn optional_sheet_selector(value: &Value) -> BuiltinResult<Option<SheetSelector>> {
4375    if option_value_is_empty(value) {
4376        Ok(None)
4377    } else {
4378        SheetSelector::parse(value).map(Some)
4379    }
4380}
4381
4382fn generated_variable_names(count: usize) -> Vec<String> {
4383    (1..=count).map(|idx| format!("Var{idx}")).collect()
4384}
4385
4386fn make_unique_variable_names(names: Vec<String>) -> Vec<String> {
4387    make_unique_names(
4388        names
4389            .into_iter()
4390            .enumerate()
4391            .map(|(idx, name)| make_valid_variable_name(&name, idx + 1))
4392            .collect(),
4393    )
4394}
4395
4396fn make_unique_names(names: Vec<String>) -> Vec<String> {
4397    let mut used = HashSet::new();
4398    let mut out = Vec::with_capacity(names.len());
4399    for (idx, name) in names.into_iter().enumerate() {
4400        let base = if name.trim().is_empty() {
4401            format!("Var{}", idx + 1)
4402        } else {
4403            name.trim().to_string()
4404        };
4405        let mut candidate = base.clone();
4406        let mut suffix = 1usize;
4407        while used.contains(&candidate.to_ascii_lowercase()) {
4408            suffix += 1;
4409            candidate = format!("{base}_{suffix}");
4410        }
4411        used.insert(candidate.to_ascii_lowercase());
4412        out.push(candidate);
4413    }
4414    out
4415}
4416
4417fn make_valid_variable_name(raw: &str, fallback_index: usize) -> String {
4418    let mut out = String::new();
4419    for (idx, ch) in raw.trim().chars().enumerate() {
4420        if (idx == 0 && (ch.is_ascii_alphabetic() || ch == '_'))
4421            || (idx > 0 && (ch.is_ascii_alphanumeric() || ch == '_'))
4422        {
4423            out.push(ch);
4424        } else if !out.ends_with('_') {
4425            out.push('_');
4426        }
4427    }
4428    while out.ends_with('_') {
4429        out.pop();
4430    }
4431    if out.is_empty() || !out.chars().next().unwrap().is_ascii_alphabetic() {
4432        format!("Var{fallback_index}")
4433    } else {
4434        out
4435    }
4436}
4437
4438#[cfg(test)]
4439mod tests {
4440    use super::*;
4441    #[cfg(not(target_arch = "wasm32"))]
4442    use async_trait::async_trait;
4443    use futures::executor::block_on;
4444    #[cfg(not(target_arch = "wasm32"))]
4445    use runmat_filesystem::{
4446        DirEntry, FileHandle, FsMetadata, FsProvider, NativeFsProvider, OpenFlags,
4447        SandboxFsProvider,
4448    };
4449    use runmat_time::unix_timestamp_ms;
4450    use std::fs;
4451    #[cfg(not(target_arch = "wasm32"))]
4452    use std::io;
4453    use std::io::Write;
4454
4455    #[cfg(not(target_arch = "wasm32"))]
4456    struct PrefixSandboxProvider {
4457        prefix: &'static str,
4458        sandbox: SandboxFsProvider,
4459        native: NativeFsProvider,
4460    }
4461
4462    #[cfg(not(target_arch = "wasm32"))]
4463    impl PrefixSandboxProvider {
4464        fn is_virtual(&self, path: &Path) -> bool {
4465            path.to_string_lossy().starts_with(self.prefix)
4466        }
4467    }
4468
4469    #[cfg(not(target_arch = "wasm32"))]
4470    #[async_trait(?Send)]
4471    impl FsProvider for PrefixSandboxProvider {
4472        fn open(&self, path: &Path, flags: &OpenFlags) -> io::Result<Box<dyn FileHandle>> {
4473            if self.is_virtual(path) {
4474                self.sandbox.open(path, flags)
4475            } else {
4476                self.native.open(path, flags)
4477            }
4478        }
4479
4480        async fn read(&self, path: &Path) -> io::Result<Vec<u8>> {
4481            if self.is_virtual(path) {
4482                self.sandbox.read(path).await
4483            } else {
4484                self.native.read(path).await
4485            }
4486        }
4487
4488        async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
4489            if self.is_virtual(path) {
4490                self.sandbox.write(path, data).await
4491            } else {
4492                self.native.write(path, data).await
4493            }
4494        }
4495
4496        async fn remove_file(&self, path: &Path) -> io::Result<()> {
4497            if self.is_virtual(path) {
4498                self.sandbox.remove_file(path).await
4499            } else {
4500                self.native.remove_file(path).await
4501            }
4502        }
4503
4504        async fn metadata(&self, path: &Path) -> io::Result<FsMetadata> {
4505            if self.is_virtual(path) {
4506                self.sandbox.metadata(path).await
4507            } else {
4508                self.native.metadata(path).await
4509            }
4510        }
4511
4512        async fn symlink_metadata(&self, path: &Path) -> io::Result<FsMetadata> {
4513            if self.is_virtual(path) {
4514                self.sandbox.symlink_metadata(path).await
4515            } else {
4516                self.native.symlink_metadata(path).await
4517            }
4518        }
4519
4520        async fn read_dir(&self, path: &Path) -> io::Result<Vec<DirEntry>> {
4521            if self.is_virtual(path) {
4522                self.sandbox.read_dir(path).await
4523            } else {
4524                self.native.read_dir(path).await
4525            }
4526        }
4527
4528        async fn canonicalize(&self, path: &Path) -> io::Result<PathBuf> {
4529            if self.is_virtual(path) {
4530                self.sandbox.canonicalize(path).await
4531            } else {
4532                self.native.canonicalize(path).await
4533            }
4534        }
4535
4536        async fn create_dir(&self, path: &Path) -> io::Result<()> {
4537            if self.is_virtual(path) {
4538                self.sandbox.create_dir(path).await
4539            } else {
4540                self.native.create_dir(path).await
4541            }
4542        }
4543
4544        async fn create_dir_all(&self, path: &Path) -> io::Result<()> {
4545            if self.is_virtual(path) {
4546                self.sandbox.create_dir_all(path).await
4547            } else {
4548                self.native.create_dir_all(path).await
4549            }
4550        }
4551
4552        async fn remove_dir(&self, path: &Path) -> io::Result<()> {
4553            if self.is_virtual(path) {
4554                self.sandbox.remove_dir(path).await
4555            } else {
4556                self.native.remove_dir(path).await
4557            }
4558        }
4559
4560        async fn remove_dir_all(&self, path: &Path) -> io::Result<()> {
4561            if self.is_virtual(path) {
4562                self.sandbox.remove_dir_all(path).await
4563            } else {
4564                self.native.remove_dir_all(path).await
4565            }
4566        }
4567
4568        async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
4569            match (self.is_virtual(from), self.is_virtual(to)) {
4570                (true, true) => self.sandbox.rename(from, to).await,
4571                (false, false) => self.native.rename(from, to).await,
4572                _ => Err(io::Error::new(
4573                    io::ErrorKind::Unsupported,
4574                    "cross-provider rename is unsupported in test provider",
4575                )),
4576            }
4577        }
4578
4579        async fn set_readonly(&self, path: &Path, readonly: bool) -> io::Result<()> {
4580            if self.is_virtual(path) {
4581                self.sandbox.set_readonly(path, readonly).await
4582            } else {
4583                self.native.set_readonly(path, readonly).await
4584            }
4585        }
4586    }
4587
4588    fn unique_path(prefix: &str) -> PathBuf {
4589        let mut path = std::env::temp_dir();
4590        path.push(format!(
4591            "runmat_{prefix}_{}_{}",
4592            std::process::id(),
4593            unix_timestamp_ms()
4594        ));
4595        path
4596    }
4597
4598    fn read_table(path: &Path, args: Vec<Value>) -> Value {
4599        block_on(readtable_builtin(
4600            Value::from(path.to_string_lossy().to_string()),
4601            args,
4602        ))
4603        .expect("readtable")
4604    }
4605
4606    fn read_table_err(path: &Path, args: Vec<Value>) -> RuntimeError {
4607        block_on(readtable_builtin(
4608            Value::from(path.to_string_lossy().to_string()),
4609            args,
4610        ))
4611        .expect_err("expected readtable failure")
4612    }
4613
4614    fn spreadsheet_options(args: Vec<Value>) -> StructValue {
4615        match block_on(spreadsheet_import_options_builtin(args)).expect("spreadsheetImportOptions")
4616        {
4617            Value::Struct(options) => options,
4618            other => panic!("expected struct options, got {other:?}"),
4619        }
4620    }
4621
4622    fn detect_options(path: &Path, args: Vec<Value>) -> StructValue {
4623        match block_on(detect_import_options_builtin(
4624            Value::from(path.to_string_lossy().to_string()),
4625            args,
4626        ))
4627        .expect("detectImportOptions")
4628        {
4629            Value::Struct(options) => options,
4630            other => panic!("expected struct options, got {other:?}"),
4631        }
4632    }
4633
4634    fn char_row(array: &CharArray, row: usize) -> String {
4635        let start = row * array.cols;
4636        array.data[start..start + array.cols].iter().collect()
4637    }
4638
4639    fn object(value: Value) -> ObjectInstance {
4640        match value {
4641            Value::Object(object) => object,
4642            other => panic!("expected table object, got {other:?}"),
4643        }
4644    }
4645
4646    #[test]
4647    fn readtable_imports_headered_numeric_and_text_columns() {
4648        let path = unique_path("readtable_basic");
4649        fs::write(&path, "Name,Score\nAda,10\nGrace,12\n").expect("write sample");
4650        let table = object(read_table(&path, Vec::new()));
4651        assert_eq!(
4652            table_variable_names_from_object(&table).unwrap(),
4653            vec!["Name".to_string(), "Score".to_string()]
4654        );
4655        match table_member_get(&table, &Value::from("Score")).unwrap() {
4656            Value::Tensor(tensor) => {
4657                assert_eq!(tensor.shape, vec![2, 1]);
4658                assert_eq!(tensor.data, vec![10.0, 12.0]);
4659            }
4660            other => panic!("expected tensor, got {other:?}"),
4661        }
4662        match table_member_get(&table, &Value::from("Name")).unwrap() {
4663            Value::StringArray(array) => {
4664                assert_eq!(array.data, vec!["Ada".to_string(), "Grace".to_string()]);
4665            }
4666            other => panic!("expected string array, got {other:?}"),
4667        }
4668        let _ = fs::remove_file(&path);
4669    }
4670
4671    #[test]
4672    fn readtable_auto_does_not_consume_headerless_numeric_rows() {
4673        let path = unique_path("readtable_headerless_numeric");
4674        fs::write(&path, "1,2\n3,4\n").expect("write sample");
4675        let table = object(read_table(&path, Vec::new()));
4676        assert_eq!(
4677            table_variable_names_from_object(&table).unwrap(),
4678            vec!["Var1".to_string(), "Var2".to_string()]
4679        );
4680        match table_member_get(&table, &Value::from("Var1")).unwrap() {
4681            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![1.0, 3.0]),
4682            other => panic!("expected tensor, got {other:?}"),
4683        }
4684        match table_member_get(&table, &Value::from("Var2")).unwrap() {
4685            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 4.0]),
4686            other => panic!("expected tensor, got {other:?}"),
4687        }
4688        let _ = fs::remove_file(&path);
4689    }
4690
4691    #[test]
4692    fn readtable_rejects_unknown_and_invalid_options() {
4693        let path = unique_path("readtable_invalid_options");
4694        fs::write(&path, "A\n1\n").expect("write sample");
4695        let err = read_table_err(
4696            &path,
4697            vec![Value::from("DefinitelyNotAnOption"), Value::from(1.0)],
4698        );
4699        assert!(err.message().contains("unsupported option"));
4700        let err = read_table_err(
4701            &path,
4702            vec![Value::from("VariableNamingRule"), Value::from("mangle")],
4703        );
4704        assert!(err.message().contains("unsupported VariableNamingRule"));
4705        let _ = fs::remove_file(&path);
4706    }
4707
4708    #[test]
4709    fn readtable_handles_quoted_delimiters_and_newlines() {
4710        let path = unique_path("readtable_quoted_newlines");
4711        fs::write(
4712            &path,
4713            "Name,Note\nAda,\"hello, world\"\nGrace,\"line one\nline two\"\n",
4714        )
4715        .expect("write sample");
4716        let table = object(read_table(&path, Vec::new()));
4717        match table_member_get(&table, &Value::from("Note")).unwrap() {
4718            Value::StringArray(array) => assert_eq!(
4719                array.data,
4720                vec!["hello, world".to_string(), "line one\nline two".to_string()]
4721            ),
4722            other => panic!("expected string array, got {other:?}"),
4723        }
4724        let _ = fs::remove_file(&path);
4725    }
4726
4727    #[test]
4728    fn readtable_supports_explicit_names_and_missing_tokens() {
4729        let path = unique_path("readtable_options");
4730        fs::write(&path, "1,NA\n2,4\n").expect("write sample");
4731        let names =
4732            StringArray::new(vec!["A".to_string(), "B".to_string()], vec![1, 2]).expect("names");
4733        let table = object(read_table(
4734            &path,
4735            vec![
4736                Value::from("ReadVariableNames"),
4737                Value::Bool(false),
4738                Value::from("VariableNames"),
4739                Value::StringArray(names),
4740                Value::from("TreatAsMissing"),
4741                Value::from("NA"),
4742            ],
4743        ));
4744        match table_member_get(&table, &Value::from("B")).unwrap() {
4745            Value::Tensor(tensor) => {
4746                assert!(tensor.data[0].is_nan());
4747                assert_eq!(tensor.data[1], 4.0);
4748            }
4749            other => panic!("expected tensor, got {other:?}"),
4750        }
4751        let _ = fs::remove_file(&path);
4752    }
4753
4754    #[test]
4755    fn readtable_preserves_variable_names_when_requested() {
4756        let path = unique_path("readtable_preserve_names");
4757        fs::write(&path, "daily revenue,total orders\n100,10\n").expect("write sample");
4758        let table = object(read_table(
4759            &path,
4760            vec![Value::from("VariableNamingRule"), Value::from("preserve")],
4761        ));
4762        assert_eq!(
4763            table_variable_names_from_object(&table).unwrap(),
4764            vec!["daily revenue".to_string(), "total orders".to_string()]
4765        );
4766        let _ = fs::remove_file(&path);
4767    }
4768
4769    fn write_zip_file(zip: &mut zip::ZipWriter<std::fs::File>, name: &str, contents: &str) {
4770        let options = zip::write::SimpleFileOptions::default()
4771            .compression_method(zip::CompressionMethod::Stored);
4772        zip.start_file(name, options).expect("start xlsx part");
4773        zip.write_all(contents.as_bytes()).expect("write xlsx part");
4774    }
4775
4776    fn write_minimal_xlsx(path: &Path) {
4777        let file = std::fs::File::create(path).expect("create xlsx");
4778        let mut zip = zip::ZipWriter::new(file);
4779        write_zip_file(
4780            &mut zip,
4781            "[Content_Types].xml",
4782            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4783<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
4784  <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
4785  <Default Extension="xml" ContentType="application/xml"/>
4786  <Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
4787  <Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
4788  <Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/>
4789</Types>"#,
4790        );
4791        write_zip_file(
4792            &mut zip,
4793            "_rels/.rels",
4794            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4795<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
4796  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
4797</Relationships>"#,
4798        );
4799        write_zip_file(
4800            &mut zip,
4801            "xl/workbook.xml",
4802            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4803<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
4804  <sheets>
4805    <sheet name="Data" sheetId="1" r:id="rId1"/>
4806  </sheets>
4807</workbook>"#,
4808        );
4809        write_zip_file(
4810            &mut zip,
4811            "xl/_rels/workbook.xml.rels",
4812            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4813<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
4814  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
4815  <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
4816</Relationships>"#,
4817        );
4818        write_zip_file(
4819            &mut zip,
4820            "xl/styles.xml",
4821            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4822<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
4823  <fonts count="1"><font><sz val="11"/><name val="Calibri"/></font></fonts>
4824  <fills count="1"><fill><patternFill patternType="none"/></fill></fills>
4825  <borders count="1"><border/></borders>
4826  <cellStyleXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellStyleXfs>
4827  <cellXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellXfs>
4828</styleSheet>"#,
4829        );
4830        write_zip_file(
4831            &mut zip,
4832            "xl/worksheets/sheet1.xml",
4833            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4834<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
4835  <sheetData>
4836    <row r="1">
4837      <c r="A1" t="inlineStr"><is><t>Date</t></is></c>
4838      <c r="B1" t="inlineStr"><is><t>Orders</t></is></c>
4839      <c r="C1" t="inlineStr"><is><t>Revenue</t></is></c>
4840    </row>
4841    <row r="2">
4842      <c r="A2" t="inlineStr"><is><t>2026-06-01</t></is></c>
4843      <c r="B2"><v>10</v></c>
4844      <c r="C2"><v>200</v></c>
4845    </row>
4846    <row r="3">
4847      <c r="A3" t="inlineStr"><is><t>2026-06-02</t></is></c>
4848      <c r="B3"><v>4</v></c>
4849      <c r="C3"><v>90</v></c>
4850    </row>
4851  </sheetData>
4852</worksheet>"#,
4853        );
4854        zip.finish().expect("finish xlsx");
4855    }
4856
4857    #[test]
4858    fn readtable_imports_xlsx_sheet_and_range() {
4859        let path = unique_path("readtable_spreadsheet");
4860        let path = path.with_extension("xlsx");
4861        write_minimal_xlsx(&path);
4862        let table = object(read_table(
4863            &path,
4864            vec![
4865                Value::from("Sheet"),
4866                Value::from("Data"),
4867                Value::from("Range"),
4868                Value::from("A1:C3"),
4869            ],
4870        ));
4871        assert_eq!(
4872            table_variable_names_from_object(&table).unwrap(),
4873            vec![
4874                "Date".to_string(),
4875                "Orders".to_string(),
4876                "Revenue".to_string()
4877            ]
4878        );
4879        match table_member_get(&table, &Value::from("Revenue")).unwrap() {
4880            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![200.0, 90.0]),
4881            other => panic!("expected tensor, got {other:?}"),
4882        }
4883        let _ = fs::remove_file(&path);
4884    }
4885
4886    #[test]
4887    fn spreadsheet_import_options_registers_public_descriptor() {
4888        assert!(runmat_builtins::builtin_function_by_name("spreadsheetImportOptions").is_some());
4889        let labels = SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR
4890            .signatures
4891            .iter()
4892            .map(|signature| signature.label)
4893            .collect::<Vec<_>>();
4894        assert!(labels.contains(&"opts = spreadsheetImportOptions()"));
4895        assert!(labels.contains(&"opts = spreadsheetImportOptions(nameValuePairs...)"));
4896    }
4897
4898    #[test]
4899    fn detect_import_options_registers_public_descriptor() {
4900        assert!(runmat_builtins::builtin_function_by_name("detectImportOptions").is_some());
4901        let labels = DETECT_IMPORT_OPTIONS_DESCRIPTOR
4902            .signatures
4903            .iter()
4904            .map(|signature| signature.label)
4905            .collect::<Vec<_>>();
4906        assert!(labels.contains(&"opts = detectImportOptions(filename)"));
4907        assert!(labels.contains(&"opts = detectImportOptions(filename, nameValuePairs...)"));
4908    }
4909
4910    #[test]
4911    fn detect_import_options_infers_text_delimiter_names_and_types() {
4912        let path = unique_path("detect_import_options_text");
4913        fs::write(
4914            &path,
4915            "Name;Score;Flag;When\nAda;10;true;2026-06-01\nGrace;12;false;2026-06-02\n",
4916        )
4917        .expect("write sample");
4918        let options = detect_options(&path, Vec::new());
4919        assert_eq!(options.fields.get("FileType"), Some(&Value::from("text")));
4920        assert_eq!(options.fields.get("Delimiter"), Some(&Value::from(";")));
4921        assert_eq!(options.fields.get("NumHeaderLines"), Some(&Value::Num(1.0)));
4922        assert_eq!(
4923            options.fields.get("ReadVariableNames"),
4924            Some(&Value::Bool(false))
4925        );
4926        match options.fields.get("VariableNames").unwrap() {
4927            Value::StringArray(array) => assert_eq!(
4928                array.data,
4929                vec![
4930                    "Name".to_string(),
4931                    "Score".to_string(),
4932                    "Flag".to_string(),
4933                    "When".to_string()
4934                ]
4935            ),
4936            other => panic!("expected string array, got {other:?}"),
4937        }
4938        match options.fields.get("VariableTypes").unwrap() {
4939            Value::StringArray(array) => assert_eq!(
4940                array.data,
4941                vec![
4942                    "string".to_string(),
4943                    "double".to_string(),
4944                    "logical".to_string(),
4945                    "datetime".to_string()
4946                ]
4947            ),
4948            other => panic!("expected string array, got {other:?}"),
4949        }
4950        let table = object(read_table(&path, vec![Value::Struct(options)]));
4951        assert_eq!(
4952            table_variable_names_from_object(&table).unwrap(),
4953            vec![
4954                "Name".to_string(),
4955                "Score".to_string(),
4956                "Flag".to_string(),
4957                "When".to_string()
4958            ]
4959        );
4960        match table_member_get(&table, &Value::from("Score")).unwrap() {
4961            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![10.0, 12.0]),
4962            other => panic!("expected tensor, got {other:?}"),
4963        }
4964        let _ = fs::remove_file(&path);
4965    }
4966
4967    #[test]
4968    fn detect_import_options_struct_can_drive_readmatrix() {
4969        let path = unique_path("detect_import_options_readmatrix");
4970        fs::write(&path, "A,B\n1,2\n3,4\n").expect("write sample");
4971        let options = detect_options(&path, Vec::new());
4972        let matrix = block_on(
4973            crate::builtins::io::tabular::readmatrix::readmatrix_builtin(
4974                Value::from(path.to_string_lossy().to_string()),
4975                vec![Value::Struct(options)],
4976            ),
4977        )
4978        .expect("readmatrix");
4979        match matrix {
4980            Value::Tensor(tensor) => {
4981                assert_eq!(tensor.shape, vec![2, 2]);
4982                assert_eq!(tensor.data, vec![1.0, 3.0, 2.0, 4.0]);
4983            }
4984            other => panic!("expected tensor, got {other:?}"),
4985        }
4986        let _ = fs::remove_file(&path);
4987    }
4988
4989    #[test]
4990    fn detect_import_options_strips_bom_from_detected_names() {
4991        let path = unique_path("detect_import_options_bom");
4992        fs::write(&path, "\u{FEFF}A,B\n1,2\n3,4\n").expect("write sample");
4993        let options = detect_options(&path, Vec::new());
4994        match options.fields.get("VariableNames").unwrap() {
4995            Value::StringArray(array) => {
4996                assert_eq!(array.data, vec!["A".to_string(), "B".to_string()])
4997            }
4998            other => panic!("expected string array, got {other:?}"),
4999        }
5000        let table = object(read_table(&path, vec![Value::Struct(options)]));
5001        assert_eq!(
5002            table_variable_names_from_object(&table).unwrap(),
5003            vec!["A".to_string(), "B".to_string()]
5004        );
5005        match table_member_get(&table, &Value::from("A")).unwrap() {
5006            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![1.0, 3.0]),
5007            other => panic!("expected tensor, got {other:?}"),
5008        }
5009        let _ = fs::remove_file(&path);
5010    }
5011
5012    #[test]
5013    fn detect_import_options_preserves_partial_ranges_for_replay() {
5014        let path = unique_path("detect_import_options_partial_range");
5015        fs::write(&path, "ID,A,B,C\nr1,1,2,3\nr2,4,5,6\nr3,7,8,9\n").expect("write sample");
5016
5017        let column_options = detect_options(&path, vec![Value::from("Range"), Value::from("C:D")]);
5018        assert_eq!(
5019            column_options.fields.get("Range"),
5020            Some(&Value::from("C2:D"))
5021        );
5022        let table = object(read_table(&path, vec![Value::Struct(column_options)]));
5023        assert_eq!(
5024            table_variable_names_from_object(&table).unwrap(),
5025            vec!["B".to_string(), "C".to_string()]
5026        );
5027        match table_member_get(&table, &Value::from("B")).unwrap() {
5028            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 5.0, 8.0]),
5029            other => panic!("expected tensor, got {other:?}"),
5030        }
5031
5032        fs::write(&path, "11,12\n21,22\n31,32\n41,42\n").expect("write numeric sample");
5033        let row_options = detect_options(&path, vec![Value::from("Range"), Value::from("2:3")]);
5034        assert_eq!(row_options.fields.get("Range"), Some(&Value::from("2:3")));
5035        let table = object(read_table(&path, vec![Value::Struct(row_options)]));
5036        match table_member_get(&table, &Value::from("Var2")).unwrap() {
5037            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![22.0, 32.0]),
5038            other => panic!("expected tensor, got {other:?}"),
5039        }
5040        let _ = fs::remove_file(&path);
5041    }
5042
5043    #[test]
5044    fn detect_import_options_read_row_names_replays_through_readtable() {
5045        let path = unique_path("detect_import_options_row_names");
5046        fs::write(&path, "Row,Name,Score\nr1,Ada,10\nr2,Grace,12\n").expect("write sample");
5047        let options = detect_options(&path, vec![Value::from("ReadRowNames"), Value::Bool(true)]);
5048        assert_eq!(options.fields.get("NumVariables"), Some(&Value::Num(2.0)));
5049        match options.fields.get("VariableNames").unwrap() {
5050            Value::StringArray(array) => assert_eq!(
5051                array.data,
5052                vec!["Row".to_string(), "Name".to_string(), "Score".to_string()]
5053            ),
5054            other => panic!("expected string array, got {other:?}"),
5055        }
5056        let table = object(read_table(&path, vec![Value::Struct(options)]));
5057        assert_eq!(
5058            table_variable_names_from_object(&table).unwrap(),
5059            vec!["Name".to_string(), "Score".to_string()]
5060        );
5061        let props = table_public_properties(&table).unwrap();
5062        match props.fields.get(ROW_NAMES).unwrap() {
5063            Value::StringArray(array) => {
5064                assert_eq!(array.data, vec!["r1".to_string(), "r2".to_string()])
5065            }
5066            other => panic!("expected row names, got {other:?}"),
5067        }
5068        match table_member_get(&table, &Value::from("Score")).unwrap() {
5069            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![10.0, 12.0]),
5070            other => panic!("expected tensor, got {other:?}"),
5071        }
5072        let _ = fs::remove_file(&path);
5073    }
5074
5075    #[test]
5076    fn detect_import_options_encoding_replays_through_readmatrix() {
5077        let path = unique_path("detect_import_options_encoding_readmatrix");
5078        fs::write(&path, b"Caf\xe9,Score\n1,2\n3,4\n").expect("write sample");
5079        let options = detect_options(
5080            &path,
5081            vec![Value::from("Encoding"), Value::from("windows-1252")],
5082        );
5083        let matrix = block_on(
5084            crate::builtins::io::tabular::readmatrix::readmatrix_builtin(
5085                Value::from(path.to_string_lossy().to_string()),
5086                vec![Value::Struct(options)],
5087            ),
5088        )
5089        .expect("readmatrix");
5090        match matrix {
5091            Value::Tensor(tensor) => {
5092                assert_eq!(tensor.shape, vec![2, 2]);
5093                assert_eq!(tensor.data, vec![1.0, 3.0, 2.0, 4.0]);
5094            }
5095            other => panic!("expected tensor, got {other:?}"),
5096        }
5097        let _ = fs::remove_file(&path);
5098    }
5099
5100    #[cfg(not(target_arch = "wasm32"))]
5101    #[test]
5102    fn detect_import_options_replays_through_filesystem_provider() {
5103        let root = unique_path("detect_import_options_provider_root");
5104        {
5105            let _provider_lock = runmat_filesystem::provider_override_lock();
5106            let provider = PrefixSandboxProvider {
5107                prefix: "/provider",
5108                sandbox: SandboxFsProvider::new(root.clone()).expect("sandbox provider"),
5109                native: NativeFsProvider,
5110            };
5111            let _provider_guard =
5112                runmat_filesystem::replace_provider(std::sync::Arc::new(provider));
5113            block_on(runmat_filesystem::write_async(
5114                "/provider.csv",
5115                b"Name,Score\nAda,10\nGrace,12\n",
5116            ))
5117            .expect("write provider sample");
5118
5119            let virtual_path = Path::new("/provider.csv");
5120            let options = detect_options(virtual_path, Vec::new());
5121            let table = object(read_table(
5122                virtual_path,
5123                vec![Value::Struct(options.clone())],
5124            ));
5125            assert_eq!(
5126                table_variable_names_from_object(&table).unwrap(),
5127                vec!["Name".to_string(), "Score".to_string()]
5128            );
5129            match table_member_get(&table, &Value::from("Score")).unwrap() {
5130                Value::Tensor(tensor) => assert_eq!(tensor.data, vec![10.0, 12.0]),
5131                other => panic!("expected tensor, got {other:?}"),
5132            }
5133
5134            block_on(runmat_filesystem::write_async(
5135                "/provider_numeric.csv",
5136                b"A,B\n1,2\n3,4\n",
5137            ))
5138            .expect("write provider numeric sample");
5139            let matrix_options = detect_options(Path::new("/provider_numeric.csv"), Vec::new());
5140            let matrix = block_on(
5141                crate::builtins::io::tabular::readmatrix::readmatrix_builtin(
5142                    Value::from("/provider_numeric.csv"),
5143                    vec![Value::Struct(matrix_options)],
5144                ),
5145            )
5146            .expect("readmatrix");
5147            match matrix {
5148                Value::Tensor(tensor) => {
5149                    assert_eq!(tensor.shape, vec![2, 2]);
5150                    assert_eq!(tensor.data, vec![1.0, 3.0, 2.0, 4.0]);
5151                }
5152                other => panic!("expected tensor, got {other:?}"),
5153            }
5154        }
5155        let _ = fs::remove_dir_all(&root);
5156    }
5157
5158    #[test]
5159    fn detect_import_options_honors_overrides_and_range() {
5160        let path = unique_path("detect_import_options_overrides");
5161        fs::write(&path, "ignore me\nRaw A|Raw B\n5|yes\n6|no\n").expect("write sample");
5162        let options = detect_options(
5163            &path,
5164            vec![
5165                Value::from("Delimiter"),
5166                Value::from("|"),
5167                Value::from("NumHeaderLines"),
5168                Value::Num(1.0),
5169                Value::from("VariableNamingRule"),
5170                Value::from("preserve"),
5171                Value::from("TextType"),
5172                Value::from("char"),
5173            ],
5174        );
5175        assert_eq!(options.fields.get("Delimiter"), Some(&Value::from("|")));
5176        assert_eq!(options.fields.get("NumHeaderLines"), Some(&Value::Num(2.0)));
5177        assert_eq!(
5178            options.fields.get("VariableNamingRule"),
5179            Some(&Value::from("preserve"))
5180        );
5181        match options.fields.get("VariableNames").unwrap() {
5182            Value::StringArray(array) => {
5183                assert_eq!(array.data, vec!["Raw A".to_string(), "Raw B".to_string()])
5184            }
5185            other => panic!("expected string array, got {other:?}"),
5186        }
5187        match options.fields.get("VariableTypes").unwrap() {
5188            Value::StringArray(array) => {
5189                assert_eq!(
5190                    array.data,
5191                    vec!["double".to_string(), "logical".to_string()]
5192                )
5193            }
5194            other => panic!("expected string array, got {other:?}"),
5195        }
5196        let _ = fs::remove_file(&path);
5197    }
5198
5199    #[test]
5200    fn spreadsheet_import_options_builds_editable_options_struct() {
5201        let options = spreadsheet_options(vec![
5202            Value::from("NumVariables"),
5203            Value::Num(2.0),
5204            Value::from("VariableTypes"),
5205            Value::StringArray(
5206                StringArray::new(vec!["double".into(), "string".into()], vec![1, 2]).unwrap(),
5207            ),
5208            Value::from("DataRange"),
5209            Value::from("A2:B5"),
5210        ]);
5211        assert_eq!(
5212            options.fields.get("FileType"),
5213            Some(&Value::from("spreadsheet"))
5214        );
5215        assert_eq!(options.fields.get("NumVariables"), Some(&Value::Num(2.0)));
5216        assert_eq!(options.fields.get("DataRange"), Some(&Value::from("A2:B5")));
5217        match options.fields.get("VariableNames").unwrap() {
5218            Value::StringArray(array) => {
5219                assert_eq!(array.data, vec!["Var1".to_string(), "Var2".to_string()]);
5220                assert_eq!(array.shape, vec![1, 2]);
5221            }
5222            other => panic!("expected string array, got {other:?}"),
5223        }
5224        match options.fields.get("VariableTypes").unwrap() {
5225            Value::StringArray(array) => {
5226                assert_eq!(array.data, vec!["double".to_string(), "string".to_string()]);
5227                assert_eq!(array.shape, vec![1, 2]);
5228            }
5229            other => panic!("expected string array, got {other:?}"),
5230        }
5231    }
5232
5233    #[test]
5234    fn readtable_consumes_spreadsheet_import_options_struct() {
5235        let path = unique_path("readtable_spreadsheet_options");
5236        let path = path.with_extension("xlsx");
5237        write_minimal_xlsx(&path);
5238        let mut options = spreadsheet_options(vec![Value::from("NumVariables"), Value::Num(1.0)]);
5239        options.insert("Sheet", Value::from("Data"));
5240        options.insert("DataRange", Value::from("C2:C3"));
5241        options.insert(
5242            "VariableNames",
5243            Value::StringArray(StringArray::new(vec!["Amount".into()], vec![1, 1]).unwrap()),
5244        );
5245        options.insert(
5246            "VariableTypes",
5247            Value::StringArray(StringArray::new(vec!["double".into()], vec![1, 1]).unwrap()),
5248        );
5249        let table = object(read_table(&path, vec![Value::Struct(options)]));
5250        assert_eq!(
5251            table_variable_names_from_object(&table).unwrap(),
5252            vec!["Amount".to_string()]
5253        );
5254        match table_member_get(&table, &Value::from("Amount")).unwrap() {
5255            Value::Tensor(tensor) => {
5256                assert_eq!(tensor.shape, vec![2, 1]);
5257                assert_eq!(tensor.data, vec![200.0, 90.0]);
5258                assert_eq!(tensor.dtype, NumericDType::F64);
5259            }
5260            other => panic!("expected tensor, got {other:?}"),
5261        }
5262        let _ = fs::remove_file(&path);
5263    }
5264
5265    #[test]
5266    fn readtable_default_spreadsheet_options_still_infers_headers() {
5267        let path = unique_path("readtable_default_spreadsheet_options");
5268        let path = path.with_extension("xlsx");
5269        write_minimal_xlsx(&path);
5270        let options = spreadsheet_options(Vec::new());
5271        let table = object(read_table(&path, vec![Value::Struct(options)]));
5272        assert_eq!(
5273            table_variable_names_from_object(&table).unwrap(),
5274            vec![
5275                "Date".to_string(),
5276                "Orders".to_string(),
5277                "Revenue".to_string()
5278            ]
5279        );
5280        let _ = fs::remove_file(&path);
5281    }
5282
5283    #[test]
5284    fn readtable_variable_types_coerce_imported_columns() {
5285        let path = unique_path("readtable_variable_types");
5286        fs::write(
5287            &path,
5288            "Value,Flag,When,Elapsed\n1.5,true,2026-06-01,01:30:00\n2.25,false,2026-06-02,02:00:00\n",
5289        )
5290        .expect("write sample");
5291        let types = StringArray::new(
5292            vec![
5293                "single".to_string(),
5294                "logical".to_string(),
5295                "datetime".to_string(),
5296                "duration".to_string(),
5297            ],
5298            vec![1, 4],
5299        )
5300        .unwrap();
5301        let table = object(read_table(
5302            &path,
5303            vec![Value::from("VariableTypes"), Value::StringArray(types)],
5304        ));
5305        match table_member_get(&table, &Value::from("Value")).unwrap() {
5306            Value::Tensor(tensor) => {
5307                assert_eq!(tensor.dtype, NumericDType::F32);
5308                assert_eq!(tensor.data, vec![1.5, 2.25]);
5309            }
5310            other => panic!("expected tensor, got {other:?}"),
5311        }
5312        match table_member_get(&table, &Value::from("Flag")).unwrap() {
5313            Value::LogicalArray(array) => assert_eq!(array.data, vec![1, 0]),
5314            other => panic!("expected logical array, got {other:?}"),
5315        }
5316        match table_member_get(&table, &Value::from("When")).unwrap() {
5317            Value::Object(object) => assert!(object.is_class("datetime")),
5318            other => panic!("expected datetime object, got {other:?}"),
5319        }
5320        match table_member_get(&table, &Value::from("Elapsed")).unwrap() {
5321            Value::Object(object) => assert!(object.is_class("duration")),
5322            other => panic!("expected duration object, got {other:?}"),
5323        }
5324        let _ = fs::remove_file(&path);
5325    }
5326
5327    #[test]
5328    fn readtable_preserves_explicit_import_variable_names_when_requested() {
5329        let path = unique_path("readtable_preserve_explicit_names");
5330        fs::write(&path, "100,10\n125,12\n").expect("write sample");
5331        let names = StringArray::new(
5332            vec!["daily revenue".to_string(), "total orders".to_string()],
5333            vec![1, 2],
5334        )
5335        .unwrap();
5336        let table = object(read_table(
5337            &path,
5338            vec![
5339                Value::from("ReadVariableNames"),
5340                Value::Bool(false),
5341                Value::from("VariableNames"),
5342                Value::StringArray(names),
5343                Value::from("VariableNamingRule"),
5344                Value::from("preserve"),
5345            ],
5346        ));
5347        assert_eq!(
5348            table_variable_names_from_object(&table).unwrap(),
5349            vec!["daily revenue".to_string(), "total orders".to_string()]
5350        );
5351        let _ = fs::remove_file(&path);
5352    }
5353
5354    #[test]
5355    fn readtable_text_type_char_imports_text_columns_as_char_matrix() {
5356        let path = unique_path("readtable_text_type_char");
5357        fs::write(&path, "Name\nAda\nGrace\n").expect("write sample");
5358        let table = object(read_table(
5359            &path,
5360            vec![Value::from("TextType"), Value::from("char")],
5361        ));
5362        match table_member_get(&table, &Value::from("Name")).unwrap() {
5363            Value::CharArray(array) => {
5364                assert_eq!(array.rows, 2);
5365                assert_eq!(array.cols, 5);
5366                assert_eq!(char_row(&array, 0), "Ada  ");
5367                assert_eq!(char_row(&array, 1), "Grace");
5368            }
5369            other => panic!("expected char array, got {other:?}"),
5370        }
5371        let _ = fs::remove_file(&path);
5372    }
5373
5374    #[test]
5375    fn readtable_variable_types_cellstr_imports_cell_column() {
5376        let path = unique_path("readtable_variable_types_cellstr");
5377        fs::write(&path, "Name\nAda\nGrace\n").expect("write sample");
5378        let types = StringArray::new(vec!["cellstr".to_string()], vec![1, 1]).unwrap();
5379        let table = object(read_table(
5380            &path,
5381            vec![Value::from("VariableTypes"), Value::StringArray(types)],
5382        ));
5383        match table_member_get(&table, &Value::from("Name")).unwrap() {
5384            Value::Cell(cell) => {
5385                assert_eq!(cell.rows, 2);
5386                assert_eq!(cell.cols, 1);
5387                assert_eq!(
5388                    cell.get(0, 0).unwrap(),
5389                    Value::CharArray(CharArray::new_row("Ada"))
5390                );
5391                assert_eq!(
5392                    cell.get(1, 0).unwrap(),
5393                    Value::CharArray(CharArray::new_row("Grace"))
5394                );
5395            }
5396            other => panic!("expected cell array, got {other:?}"),
5397        }
5398        let _ = fs::remove_file(&path);
5399    }
5400
5401    #[test]
5402    fn readtable_rejects_unrepresented_import_variable_types() {
5403        let path = unique_path("readtable_unsupported_variable_types");
5404        fs::write(&path, "A\n1\n").expect("write sample");
5405        let unsupported_integer = StringArray::new(vec!["int8".to_string()], vec![1, 1]).unwrap();
5406        let err = read_table_err(
5407            &path,
5408            vec![
5409                Value::from("VariableTypes"),
5410                Value::StringArray(unsupported_integer),
5411            ],
5412        );
5413        assert!(err
5414            .message()
5415            .contains("unsupported VariableTypes entry 'int8'"));
5416        let categorical = StringArray::new(vec!["categorical".to_string()], vec![1, 1]).unwrap();
5417        let err = read_table_err(
5418            &path,
5419            vec![
5420                Value::from("VariableTypes"),
5421                Value::StringArray(categorical),
5422            ],
5423        );
5424        assert!(err
5425            .message()
5426            .contains("unsupported VariableTypes entry 'categorical'"));
5427        let _ = fs::remove_file(&path);
5428    }
5429
5430    #[test]
5431    fn table_properties_variable_names_rename_columns() {
5432        let a = Value::Tensor(Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap());
5433        let b = Value::Tensor(Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap());
5434        let mut table =
5435            object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
5436        let mut props = table_public_properties(&table).unwrap();
5437        props.insert(
5438            VARIABLE_NAMES,
5439            Value::StringArray(StringArray::new(vec!["X".into(), "Y".into()], vec![1, 2]).unwrap()),
5440        );
5441        table_member_set(&mut table, PROPERTIES_MEMBER, Value::Struct(props)).unwrap();
5442        assert_eq!(
5443            table_variable_names_from_object(&table).unwrap(),
5444            vec!["X".to_string(), "Y".to_string()]
5445        );
5446    }
5447
5448    #[test]
5449    fn table_paren_selects_rows_and_named_variables() {
5450        let a = Value::Tensor(Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap());
5451        let b = Value::Tensor(Tensor::new(vec![4.0, 5.0, 6.0], vec![3, 1]).unwrap());
5452        let table = object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
5453        let selector = CellArray::new(
5454            vec![
5455                Value::Tensor(Tensor::new(vec![3.0, 1.0], vec![1, 2]).unwrap()),
5456                Value::Cell(CellArray::new(vec![Value::from("B")], 1, 1).unwrap()),
5457            ],
5458            1,
5459            2,
5460        )
5461        .unwrap();
5462        let subset = object(table_paren_get(&table, &Value::Cell(selector)).unwrap());
5463        assert_eq!(
5464            table_variable_names_from_object(&subset).unwrap(),
5465            vec!["B".to_string()]
5466        );
5467        match table_member_get(&subset, &Value::from("B")).unwrap() {
5468            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![6.0, 4.0]),
5469            other => panic!("expected tensor, got {other:?}"),
5470        }
5471    }
5472
5473    #[test]
5474    fn sortrows_preserves_row_names() {
5475        let values = Value::Tensor(Tensor::new(vec![2.0, 1.0], vec![2, 1]).unwrap());
5476        let table = table_from_columns_with_properties(
5477            vec!["X".into()],
5478            vec![values],
5479            Some(vec!["second".into(), "first".into()]),
5480        )
5481        .unwrap();
5482        let (sorted, _) = sortrows_table(table, &[Value::from("X")]).unwrap();
5483        let sorted = object(sorted);
5484        let props = table_public_properties(&sorted).unwrap();
5485        match props.fields.get(ROW_NAMES).unwrap() {
5486            Value::StringArray(array) => {
5487                assert_eq!(array.data, vec!["first".to_string(), "second".to_string()]);
5488            }
5489            other => panic!("expected row names, got {other:?}"),
5490        }
5491    }
5492
5493    #[test]
5494    fn groupsummary_mean_counts_groups() {
5495        let group = Value::StringArray(
5496            StringArray::new(vec!["a".into(), "b".into(), "a".into()], vec![3, 1]).unwrap(),
5497        );
5498        let value = Value::Tensor(Tensor::new(vec![2.0, 5.0, 4.0], vec![3, 1]).unwrap());
5499        let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
5500        let summary = groupsummary_impl(
5501            table,
5502            Value::from("G"),
5503            Value::from("mean"),
5504            vec![Value::from("X")],
5505        )
5506        .unwrap();
5507        let summary = object(summary);
5508        assert_eq!(
5509            table_variable_names_from_object(&summary).unwrap(),
5510            vec![
5511                "G".to_string(),
5512                "GroupCount".to_string(),
5513                "mean_X".to_string()
5514            ]
5515        );
5516        match table_member_get(&summary, &Value::from("mean_X")).unwrap() {
5517            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![3.0, 5.0]),
5518            other => panic!("expected tensor, got {other:?}"),
5519        }
5520    }
5521
5522    #[test]
5523    fn groupsummary_orders_numeric_groups_numerically() {
5524        let group = Value::Tensor(Tensor::new(vec![10.0, 2.0, 10.0], vec![3, 1]).unwrap());
5525        let value = Value::Tensor(Tensor::new(vec![1.0, 5.0, 3.0], vec![3, 1]).unwrap());
5526        let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
5527        let summary =
5528            object(groupsummary_impl(table, Value::from("G"), Value::from("sum"), vec![]).unwrap());
5529        match table_member_get(&summary, &Value::from("G")).unwrap() {
5530            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 10.0]),
5531            other => panic!("expected tensor, got {other:?}"),
5532        }
5533        match table_member_get(&summary, &Value::from("sum_X")).unwrap() {
5534            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![5.0, 4.0]),
5535            other => panic!("expected tensor, got {other:?}"),
5536        }
5537    }
5538}