Skip to main content

runmat_runtime/builtins/table/
mod.rs

1//! MATLAB table datatype support and tabular workflow builtins.
2
3use std::cmp::Ordering;
4use std::collections::{BTreeMap, HashMap, HashSet};
5use std::io::{Cursor, Read};
6use std::path::{Path, PathBuf};
7use std::sync::OnceLock;
8
9use calamine::{open_workbook_auto_from_rs, Data as SpreadsheetData, Reader as SpreadsheetReader};
10use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
11use encoding_rs::{Encoding, UTF_8};
12use runmat_builtins::{
13    Access, BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
14    BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
15    CellArray, CharArray, ClassDef, ComplexTensor, LogicalArray, MethodDef, NumericDType,
16    ObjectInstance, PropertyDef, StringArray, StructValue, Tensor, Value,
17};
18use runmat_filesystem::File;
19use runmat_macros::runtime_builtin;
20
21use crate::builtins::common::fs::expand_user_path;
22use crate::builtins::common::spec::{
23    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
24    ReductionNaN, ResidencyPolicy, ShapeRequirements,
25};
26use crate::{
27    build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError, OBJECT_INDEX_BRACE,
28    OBJECT_INDEX_MEMBER, OBJECT_INDEX_PAREN, OBJECT_SUBSASGN_METHOD, OBJECT_SUBSREF_METHOD,
29};
30
31pub const TABLE_CLASS: &str = "table";
32const TABLE_VARIABLES_FIELD: &str = "__table_variables";
33const TABLE_PROPERTIES_FIELD: &str = "__table_properties";
34const PROPERTIES_MEMBER: &str = "Properties";
35const VARIABLE_NAMES: &str = "VariableNames";
36const ROW_NAMES: &str = "RowNames";
37const DIMENSION_NAMES: &str = "DimensionNames";
38const VARIABLE_UNITS: &str = "VariableUnits";
39const VARIABLE_DESCRIPTIONS: &str = "VariableDescriptions";
40const DESCRIPTION: &str = "Description";
41const USER_DATA: &str = "UserData";
42const DEFAULT_ROW_DIM_NAME: &str = "Rows";
43const DEFAULT_VARIABLE_DIM_NAME: &str = "Variables";
44
45static TABLE_CLASS_REGISTERED: OnceLock<()> = OnceLock::new();
46
47const ANY_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
48    name: "out",
49    ty: BuiltinParamType::Any,
50    arity: BuiltinParamArity::Required,
51    default: None,
52    description: "Result value.",
53}];
54const NUM_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
55    name: "n",
56    ty: BuiltinParamType::IntegerScalar,
57    arity: BuiltinParamArity::Required,
58    default: None,
59    description: "Count.",
60}];
61const TABLE_INPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
62    name: "T",
63    ty: BuiltinParamType::Any,
64    arity: BuiltinParamArity::Required,
65    default: None,
66    description: "Table input.",
67}];
68const READTABLE_INPUTS_FILENAME: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
69    name: "filename",
70    ty: BuiltinParamType::StringScalar,
71    arity: BuiltinParamArity::Required,
72    default: None,
73    description: "Text or spreadsheet file path.",
74}];
75const READTABLE_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 2] = [
76    BuiltinParamDescriptor {
77        name: "filename",
78        ty: BuiltinParamType::StringScalar,
79        arity: BuiltinParamArity::Required,
80        default: None,
81        description: "Text or spreadsheet file path.",
82    },
83    BuiltinParamDescriptor {
84        name: "nameValuePairs",
85        ty: BuiltinParamType::Any,
86        arity: BuiltinParamArity::Variadic,
87        default: None,
88        description: "Name-value import options.",
89    },
90];
91const SPREADSHEET_IMPORT_OPTIONS_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
92    name: "opts",
93    ty: BuiltinParamType::Any,
94    arity: BuiltinParamArity::Required,
95    default: None,
96    description: "Spreadsheet import options struct.",
97}];
98const SPREADSHEET_IMPORT_OPTIONS_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 1] =
99    [BuiltinParamDescriptor {
100        name: "nameValuePairs",
101        ty: BuiltinParamType::Any,
102        arity: BuiltinParamArity::Variadic,
103        default: None,
104        description: "Name-value option pairs.",
105    }];
106const TABLE_INPUTS_VALUES: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
107    name: "variables",
108    ty: BuiltinParamType::Any,
109    arity: BuiltinParamArity::Variadic,
110    default: None,
111    description: "Variables to assemble as table columns.",
112}];
113const GROUPSUMMARY_INPUTS: [BuiltinParamDescriptor; 4] = [
114    BuiltinParamDescriptor {
115        name: "T",
116        ty: BuiltinParamType::Any,
117        arity: BuiltinParamArity::Required,
118        default: None,
119        description: "Input table.",
120    },
121    BuiltinParamDescriptor {
122        name: "groupvars",
123        ty: BuiltinParamType::Any,
124        arity: BuiltinParamArity::Required,
125        default: None,
126        description: "Grouping variable name or names.",
127    },
128    BuiltinParamDescriptor {
129        name: "method",
130        ty: BuiltinParamType::Any,
131        arity: BuiltinParamArity::Required,
132        default: None,
133        description: "Summary method name or names.",
134    },
135    BuiltinParamDescriptor {
136        name: "datavars",
137        ty: BuiltinParamType::Any,
138        arity: BuiltinParamArity::Optional,
139        default: None,
140        description: "Data variable name or names.",
141    },
142];
143const OBJECT_INDEX_INPUTS: [BuiltinParamDescriptor; 3] = [
144    BuiltinParamDescriptor {
145        name: "obj",
146        ty: BuiltinParamType::Any,
147        arity: BuiltinParamArity::Required,
148        default: None,
149        description: "Table object receiver.",
150    },
151    BuiltinParamDescriptor {
152        name: "kind",
153        ty: BuiltinParamType::StringScalar,
154        arity: BuiltinParamArity::Required,
155        default: None,
156        description: "Index kind token.",
157    },
158    BuiltinParamDescriptor {
159        name: "payload",
160        ty: BuiltinParamType::Any,
161        arity: BuiltinParamArity::Required,
162        default: None,
163        description: "Index payload.",
164    },
165];
166const OBJECT_ASSIGN_INPUTS: [BuiltinParamDescriptor; 4] = [
167    BuiltinParamDescriptor {
168        name: "obj",
169        ty: BuiltinParamType::Any,
170        arity: BuiltinParamArity::Required,
171        default: None,
172        description: "Table object receiver.",
173    },
174    BuiltinParamDescriptor {
175        name: "kind",
176        ty: BuiltinParamType::StringScalar,
177        arity: BuiltinParamArity::Required,
178        default: None,
179        description: "Index kind token.",
180    },
181    BuiltinParamDescriptor {
182        name: "payload",
183        ty: BuiltinParamType::Any,
184        arity: BuiltinParamArity::Required,
185        default: None,
186        description: "Index payload.",
187    },
188    BuiltinParamDescriptor {
189        name: "rhs",
190        ty: BuiltinParamType::Any,
191        arity: BuiltinParamArity::Required,
192        default: None,
193        description: "Assigned value.",
194    },
195];
196
197const READTABLE_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
198    BuiltinSignatureDescriptor {
199        label: "T = readtable(filename)",
200        inputs: &READTABLE_INPUTS_FILENAME,
201        outputs: &ANY_OUTPUT,
202    },
203    BuiltinSignatureDescriptor {
204        label: "T = readtable(filename, nameValuePairs...)",
205        inputs: &READTABLE_INPUTS_NAME_VALUE,
206        outputs: &ANY_OUTPUT,
207    },
208];
209const SPREADSHEET_IMPORT_OPTIONS_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
210    BuiltinSignatureDescriptor {
211        label: "opts = spreadsheetImportOptions()",
212        inputs: &[],
213        outputs: &SPREADSHEET_IMPORT_OPTIONS_OUTPUT,
214    },
215    BuiltinSignatureDescriptor {
216        label: "opts = spreadsheetImportOptions(nameValuePairs...)",
217        inputs: &SPREADSHEET_IMPORT_OPTIONS_INPUTS_NAME_VALUE,
218        outputs: &SPREADSHEET_IMPORT_OPTIONS_OUTPUT,
219    },
220];
221const TABLE_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
222    label: "T = table(variables...)",
223    inputs: &TABLE_INPUTS_VALUES,
224    outputs: &ANY_OUTPUT,
225}];
226const GROUPSUMMARY_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
227    label: "G = groupsummary(T, groupvars, method, datavars)",
228    inputs: &GROUPSUMMARY_INPUTS,
229    outputs: &ANY_OUTPUT,
230}];
231const HEIGHT_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
232    label: "n = height(T)",
233    inputs: &TABLE_INPUT,
234    outputs: &NUM_OUTPUT,
235}];
236const WIDTH_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
237    label: "n = width(T)",
238    inputs: &TABLE_INPUT,
239    outputs: &NUM_OUTPUT,
240}];
241const OBJECT_SUBSREF_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
242    label: "out = table.subsref(obj, kind, payload)",
243    inputs: &OBJECT_INDEX_INPUTS,
244    outputs: &ANY_OUTPUT,
245}];
246const OBJECT_SUBSASGN_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
247    label: "obj = table.subsasgn(obj, kind, payload, rhs)",
248    inputs: &OBJECT_ASSIGN_INPUTS,
249    outputs: &ANY_OUTPUT,
250}];
251
252const TABLE_ERROR_INVALID_ARGUMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
253    code: "RM.TABLE.INVALID_ARGUMENT",
254    identifier: Some("RunMat:table:InvalidArgument"),
255    when: "Arguments or table metadata are invalid.",
256    message: "table: invalid argument",
257};
258const TABLE_ERROR_INVALID_INDEX: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
259    code: "RM.TABLE.INVALID_INDEX",
260    identifier: Some("RunMat:table:InvalidIndex"),
261    when: "Table indexing is invalid.",
262    message: "table: invalid index",
263};
264const TABLE_ERROR_INVALID_VARIABLE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
265    code: "RM.TABLE.INVALID_VARIABLE",
266    identifier: Some("RunMat:table:InvalidVariable"),
267    when: "A table variable name or value is invalid.",
268    message: "table: invalid variable",
269};
270const TABLE_ERROR_IO: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
271    code: "RM.READTABLE.IO",
272    identifier: Some("RunMat:readtable:IOError"),
273    when: "readtable cannot open or read the requested file.",
274    message: "readtable: file read failed",
275};
276const TABLE_ERROR_UNSUPPORTED_FILE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
277    code: "RM.READTABLE.UNSUPPORTED_FILE",
278    identifier: Some("RunMat:readtable:UnsupportedFileType"),
279    when: "readtable receives a file type outside the text or spreadsheet import backends.",
280    message: "readtable: unsupported file type",
281};
282const TABLE_ERRORS: [BuiltinErrorDescriptor; 5] = [
283    TABLE_ERROR_INVALID_ARGUMENT,
284    TABLE_ERROR_INVALID_INDEX,
285    TABLE_ERROR_INVALID_VARIABLE,
286    TABLE_ERROR_IO,
287    TABLE_ERROR_UNSUPPORTED_FILE,
288];
289
290pub const READTABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
291    signatures: &READTABLE_SIGNATURES,
292    output_mode: BuiltinOutputMode::Fixed,
293    completion_policy: BuiltinCompletionPolicy::Public,
294    errors: &TABLE_ERRORS,
295};
296pub const SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
297    signatures: &SPREADSHEET_IMPORT_OPTIONS_SIGNATURES,
298    output_mode: BuiltinOutputMode::Fixed,
299    completion_policy: BuiltinCompletionPolicy::Public,
300    errors: &TABLE_ERRORS,
301};
302pub const TABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
303    signatures: &TABLE_SIGNATURES,
304    output_mode: BuiltinOutputMode::Fixed,
305    completion_policy: BuiltinCompletionPolicy::Public,
306    errors: &TABLE_ERRORS,
307};
308pub const GROUPSUMMARY_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
309    signatures: &GROUPSUMMARY_SIGNATURES,
310    output_mode: BuiltinOutputMode::Fixed,
311    completion_policy: BuiltinCompletionPolicy::Public,
312    errors: &TABLE_ERRORS,
313};
314pub const HEIGHT_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
315    signatures: &HEIGHT_SIGNATURES,
316    output_mode: BuiltinOutputMode::Fixed,
317    completion_policy: BuiltinCompletionPolicy::Public,
318    errors: &TABLE_ERRORS,
319};
320pub const WIDTH_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
321    signatures: &WIDTH_SIGNATURES,
322    output_mode: BuiltinOutputMode::Fixed,
323    completion_policy: BuiltinCompletionPolicy::Public,
324    errors: &TABLE_ERRORS,
325};
326pub const TABLE_SUBSREF_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
327    signatures: &OBJECT_SUBSREF_SIGNATURES,
328    output_mode: BuiltinOutputMode::Fixed,
329    completion_policy: BuiltinCompletionPolicy::MethodOnly,
330    errors: &TABLE_ERRORS,
331};
332pub const TABLE_SUBSASGN_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
333    signatures: &OBJECT_SUBSASGN_SIGNATURES,
334    output_mode: BuiltinOutputMode::Fixed,
335    completion_policy: BuiltinCompletionPolicy::MethodOnly,
336    errors: &TABLE_ERRORS,
337};
338
339#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::table")]
340pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
341    name: "table",
342    op_kind: GpuOpKind::Custom("table"),
343    supported_precisions: &[],
344    broadcast: BroadcastSemantics::None,
345    provider_hooks: &[],
346    constant_strategy: ConstantStrategy::InlineLiteral,
347    residency: ResidencyPolicy::GatherImmediately,
348    nan_mode: ReductionNaN::Include,
349    two_pass_threshold: None,
350    workgroup_size: None,
351    accepts_nan_mode: false,
352    notes: "Tables are host containers. GPU variables are gathered when tabular algorithms need row-wise access.",
353};
354
355#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::table")]
356pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
357    name: "table",
358    shape: ShapeRequirements::Any,
359    constant_strategy: ConstantStrategy::InlineLiteral,
360    elementwise: None,
361    reduction: None,
362    emits_nan: false,
363    notes: "Tables are structured host containers and are not fusion operands.",
364};
365
366fn table_error(error: &'static BuiltinErrorDescriptor, message: impl Into<String>) -> RuntimeError {
367    let mut builder = build_runtime_error(message).with_builtin(TABLE_CLASS);
368    if let Some(identifier) = error.identifier {
369        builder = builder.with_identifier(identifier);
370    }
371    builder.build()
372}
373
374fn table_error_with_source<E>(
375    error: &'static BuiltinErrorDescriptor,
376    message: impl Into<String>,
377    source: E,
378) -> RuntimeError
379where
380    E: std::error::Error + Send + Sync + 'static,
381{
382    let mut builder = build_runtime_error(message)
383        .with_builtin(TABLE_CLASS)
384        .with_source(source);
385    if let Some(identifier) = error.identifier {
386        builder = builder.with_identifier(identifier);
387    }
388    builder.build()
389}
390
391fn invalid_argument(message: impl Into<String>) -> RuntimeError {
392    table_error(&TABLE_ERROR_INVALID_ARGUMENT, message)
393}
394
395fn invalid_index(message: impl Into<String>) -> RuntimeError {
396    table_error(&TABLE_ERROR_INVALID_INDEX, message)
397}
398
399fn invalid_variable(message: impl Into<String>) -> RuntimeError {
400    table_error(&TABLE_ERROR_INVALID_VARIABLE, message)
401}
402
403fn map_control_flow(err: RuntimeError) -> RuntimeError {
404    let identifier = err.identifier().map(ToString::to_string);
405    let message = err.message().to_string();
406    let mut builder = build_runtime_error(message)
407        .with_builtin(TABLE_CLASS)
408        .with_source(err);
409    if let Some(identifier) = identifier {
410        builder = builder.with_identifier(identifier);
411    }
412    builder.build()
413}
414
415pub fn ensure_table_class_registered() {
416    TABLE_CLASS_REGISTERED.get_or_init(|| {
417        let mut properties = HashMap::new();
418        properties.insert(
419            PROPERTIES_MEMBER.to_string(),
420            PropertyDef {
421                name: PROPERTIES_MEMBER.to_string(),
422                is_static: false,
423                is_constant: false,
424                is_dependent: false,
425                get_access: Access::Public,
426                set_access: Access::Public,
427                default_value: Some(Value::Struct(default_properties(Vec::new(), None))),
428            },
429        );
430
431        let mut methods = HashMap::new();
432        for name in [OBJECT_SUBSREF_METHOD, OBJECT_SUBSASGN_METHOD] {
433            methods.insert(
434                name.to_string(),
435                MethodDef {
436                    name: name.to_string(),
437                    is_static: false,
438                    is_abstract: false,
439                    is_sealed: false,
440                    access: Access::Public,
441                    function_name: format!("{TABLE_CLASS}.{name}"),
442                    implicit_class_argument: None,
443                },
444            );
445        }
446
447        runmat_builtins::register_class(ClassDef {
448            name: TABLE_CLASS.to_string(),
449            parent: None,
450            properties,
451            methods,
452        });
453    });
454}
455
456#[runtime_builtin(
457    name = "table",
458    category = "table",
459    summary = "Create a table from named column variables.",
460    keywords = "table,VariableNames,RowNames,Properties",
461    accel = "cpu",
462    type_resolver(crate::builtins::io::type_resolvers::struct_type),
463    descriptor(crate::builtins::table::TABLE_DESCRIPTOR),
464    builtin_path = "crate::builtins::table"
465)]
466async fn table_builtin(args: Vec<Value>) -> BuiltinResult<Value> {
467    ensure_table_class_registered();
468    let gathered = gather_values(&args).await?;
469    let (variables, options) = split_table_constructor_args(gathered)?;
470    let names = if let Some(names) = options.variable_names {
471        names
472    } else {
473        generated_variable_names(variables.len())
474    };
475    table_from_columns_with_properties(names, variables, options.row_names)
476}
477
478#[runtime_builtin(
479    name = "readtable",
480    category = "io/tabular",
481    summary = "Import tabular text or spreadsheet data into a table.",
482    keywords = "readtable,table,csv,tsv,xlsx,xls,ods,spreadsheet,VariableNames,RowNames,Sheet,Range",
483    accel = "cpu",
484    type_resolver(crate::builtins::io::type_resolvers::struct_type),
485    descriptor(crate::builtins::table::READTABLE_DESCRIPTOR),
486    builtin_path = "crate::builtins::table"
487)]
488async fn readtable_builtin(path: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
489    ensure_table_class_registered();
490    let path_value = gather_if_needed_async(&path)
491        .await
492        .map_err(map_control_flow)?;
493    let args = gather_values(&rest).await?;
494    let options = ReadTableOptions::parse(&args)?;
495    let resolved = resolve_path(&path_value)?;
496    read_table_from_file(&resolved, &options).await
497}
498
499#[runtime_builtin(
500    name = "spreadsheetImportOptions",
501    category = "io/tabular",
502    summary = "Create spreadsheet import options for readtable.",
503    keywords = "spreadsheetImportOptions,readtable,spreadsheet,xlsx,xls,DataRange,VariableTypes,VariableNames,NumVariables",
504    accel = "cpu",
505    type_resolver(crate::builtins::io::type_resolvers::struct_type),
506    descriptor(crate::builtins::table::SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR),
507    builtin_path = "crate::builtins::table"
508)]
509async fn spreadsheet_import_options_builtin(args: Vec<Value>) -> BuiltinResult<Value> {
510    let gathered = gather_values(&args).await?;
511    spreadsheet_import_options(gathered)
512}
513
514#[runtime_builtin(
515    name = "height",
516    category = "table",
517    summary = "Return the number of rows in a table.",
518    keywords = "height,table,rows",
519    descriptor(crate::builtins::table::HEIGHT_DESCRIPTOR),
520    builtin_path = "crate::builtins::table"
521)]
522async fn height_builtin(value: Value) -> BuiltinResult<Value> {
523    let host = gather_if_needed_async(&value)
524        .await
525        .map_err(map_control_flow)?;
526    if let Some(object) = table_object(&host) {
527        return Ok(Value::Num(table_height(object)? as f64));
528    }
529    value_row_count(&host).map(|n| Value::Num(n as f64))
530}
531
532#[runtime_builtin(
533    name = "width",
534    category = "table",
535    summary = "Return the number of variables in a table.",
536    keywords = "width,table,variables",
537    descriptor(crate::builtins::table::WIDTH_DESCRIPTOR),
538    builtin_path = "crate::builtins::table"
539)]
540async fn width_builtin(value: Value) -> BuiltinResult<Value> {
541    let host = gather_if_needed_async(&value)
542        .await
543        .map_err(map_control_flow)?;
544    if let Some(object) = table_object(&host) {
545        return Ok(Value::Num(table_width(object)? as f64));
546    }
547    match host {
548        Value::Tensor(t) => Ok(Value::Num(t.cols() as f64)),
549        Value::ComplexTensor(t) => Ok(Value::Num(t.cols as f64)),
550        Value::StringArray(sa) => Ok(Value::Num(sa.cols() as f64)),
551        Value::LogicalArray(la) => Ok(Value::Num(la.shape.get(1).copied().unwrap_or(1) as f64)),
552        Value::Cell(ca) => Ok(Value::Num(ca.cols as f64)),
553        Value::CharArray(ca) => Ok(Value::Num(ca.cols as f64)),
554        _ => Ok(Value::Num(1.0)),
555    }
556}
557
558#[runtime_builtin(
559    name = "groupsummary",
560    category = "table",
561    summary = "Group table rows and compute summary statistics for data variables.",
562    keywords = "groupsummary,group,table,mean,sum,count,median,min,max",
563    accel = "cpu",
564    descriptor(crate::builtins::table::GROUPSUMMARY_DESCRIPTOR),
565    builtin_path = "crate::builtins::table"
566)]
567async fn groupsummary_builtin(
568    table: Value,
569    groupvars: Value,
570    method: Value,
571    rest: Vec<Value>,
572) -> BuiltinResult<Value> {
573    let table = gather_if_needed_async(&table)
574        .await
575        .map_err(map_control_flow)?;
576    let groupvars = gather_if_needed_async(&groupvars)
577        .await
578        .map_err(map_control_flow)?;
579    let method = gather_if_needed_async(&method)
580        .await
581        .map_err(map_control_flow)?;
582    let rest = gather_values(&rest).await?;
583    groupsummary_impl(table, groupvars, method, rest)
584}
585
586#[runtime_builtin(
587    name = "table.subsref",
588    descriptor(crate::builtins::table::TABLE_SUBSREF_DESCRIPTOR),
589    builtin_path = "crate::builtins::table"
590)]
591async fn table_subsref(obj: Value, kind: String, payload: Value) -> BuiltinResult<Value> {
592    let object = into_table_object(obj, "table.subsref")?;
593    match kind.as_str() {
594        OBJECT_INDEX_MEMBER => table_member_get(&object, &payload),
595        OBJECT_INDEX_PAREN => table_paren_get(&object, &payload),
596        OBJECT_INDEX_BRACE => table_brace_get(&object, &payload),
597        other => Err(invalid_index(format!(
598            "table.subsref: unsupported indexing kind '{other}'"
599        ))),
600    }
601}
602
603#[runtime_builtin(
604    name = "table.subsasgn",
605    descriptor(crate::builtins::table::TABLE_SUBSASGN_DESCRIPTOR),
606    builtin_path = "crate::builtins::table"
607)]
608async fn table_subsasgn(
609    obj: Value,
610    kind: String,
611    payload: Value,
612    rhs: Value,
613) -> BuiltinResult<Value> {
614    let mut object = into_table_object(obj, "table.subsasgn")?;
615    match kind.as_str() {
616        OBJECT_INDEX_MEMBER => {
617            let field = scalar_text(&payload, "table member")?;
618            table_member_set(&mut object, &field, rhs)?;
619            Ok(Value::Object(object))
620        }
621        OBJECT_INDEX_PAREN => table_paren_assign(object, &payload, rhs),
622        OBJECT_INDEX_BRACE => table_brace_assign(object, &payload, rhs),
623        other => Err(invalid_index(format!(
624            "table.subsasgn: unsupported indexing kind '{other}'"
625        ))),
626    }
627}
628
629async fn gather_values(values: &[Value]) -> BuiltinResult<Vec<Value>> {
630    let mut out = Vec::with_capacity(values.len());
631    for value in values {
632        out.push(
633            gather_if_needed_async(value)
634                .await
635                .map_err(map_control_flow)?,
636        );
637    }
638    Ok(out)
639}
640
641#[derive(Default)]
642struct TableConstructorOptions {
643    variable_names: Option<Vec<String>>,
644    row_names: Option<Vec<String>>,
645}
646
647fn split_table_constructor_args(
648    args: Vec<Value>,
649) -> BuiltinResult<(Vec<Value>, TableConstructorOptions)> {
650    let mut variables = Vec::new();
651    let mut options = TableConstructorOptions::default();
652    let mut idx = 0usize;
653    while idx < args.len() {
654        if let Ok(name) = scalar_text(&args[idx], "table option") {
655            if idx + 1 < args.len() && is_table_constructor_option(&name) {
656                let value = &args[idx + 1];
657                if name.eq_ignore_ascii_case("VariableNames") {
658                    options.variable_names = Some(variable_name_list(value)?);
659                } else if name.eq_ignore_ascii_case("RowNames") {
660                    options.row_names = Some(string_list(value)?);
661                }
662                idx += 2;
663                continue;
664            }
665        }
666        variables.push(args[idx].clone());
667        idx += 1;
668    }
669    Ok((variables, options))
670}
671
672fn is_table_constructor_option(name: &str) -> bool {
673    name.eq_ignore_ascii_case("VariableNames") || name.eq_ignore_ascii_case("RowNames")
674}
675
676#[derive(Clone)]
677struct ReadTableOptions {
678    file_type: ImportFileType,
679    delimiter: Option<Delimiter>,
680    read_variable_names: Option<bool>,
681    read_row_names: bool,
682    num_variables: Option<usize>,
683    variable_names: Option<Vec<String>>,
684    variable_types: Option<Vec<ImportVariableType>>,
685    row_names: Option<Vec<String>>,
686    num_header_lines: usize,
687    range: Option<RangeSpec>,
688    sheet: Option<SheetSelector>,
689    preserve_variable_names: bool,
690    treat_as_missing: HashSet<String>,
691    empty_line_rule: EmptyLineRule,
692    text_type: TextImportType,
693    encoding: String,
694    datetime_type: DatetimeImportType,
695}
696
697impl Default for ReadTableOptions {
698    fn default() -> Self {
699        Self {
700            file_type: ImportFileType::Auto,
701            delimiter: None,
702            read_variable_names: None,
703            read_row_names: false,
704            num_variables: None,
705            variable_names: None,
706            variable_types: None,
707            row_names: None,
708            num_header_lines: 0,
709            range: None,
710            sheet: None,
711            preserve_variable_names: false,
712            treat_as_missing: HashSet::new(),
713            empty_line_rule: EmptyLineRule::Skip,
714            text_type: TextImportType::String,
715            encoding: "utf-8".to_string(),
716            datetime_type: DatetimeImportType::Datetime,
717        }
718    }
719}
720
721impl ReadTableOptions {
722    fn parse(args: &[Value]) -> BuiltinResult<Self> {
723        let mut options = Self::default();
724        let mut idx = 0usize;
725        if let Some(Value::Struct(st)) = args.first() {
726            for (name, value) in &st.fields {
727                options.apply(name, value)?;
728            }
729            idx = 1;
730        }
731        while idx < args.len() {
732            if idx + 1 >= args.len() {
733                return Err(invalid_argument(
734                    "readtable: name-value options must be provided in pairs",
735                ));
736            }
737            let name = scalar_text(&args[idx], "readtable option")?;
738            options.apply(&name, &args[idx + 1])?;
739            idx += 2;
740        }
741        Ok(options)
742    }
743
744    fn apply(&mut self, name: &str, value: &Value) -> BuiltinResult<()> {
745        if name.eq_ignore_ascii_case("FileType") {
746            self.file_type = ImportFileType::parse(value)?;
747        } else if name.eq_ignore_ascii_case("Delimiter") {
748            self.delimiter = Some(Delimiter::parse(value)?);
749        } else if name.eq_ignore_ascii_case("ReadVariableNames") {
750            self.read_variable_names = Some(bool_scalar(value, "ReadVariableNames")?);
751        } else if name.eq_ignore_ascii_case("ReadRowNames") {
752            self.read_row_names = bool_scalar(value, "ReadRowNames")?;
753        } else if name.eq_ignore_ascii_case("NumVariables") {
754            let count = nonnegative_usize(value, "NumVariables")?;
755            self.num_variables = (count > 0).then_some(count);
756        } else if name.eq_ignore_ascii_case("VariableNames") {
757            self.variable_names = optional_raw_variable_name_list(value)?;
758        } else if name.eq_ignore_ascii_case("VariableTypes") {
759            self.variable_types = optional_variable_type_list(value)?;
760        } else if name.eq_ignore_ascii_case("RowNames") {
761            self.row_names = Some(string_list(value)?);
762        } else if name.eq_ignore_ascii_case("NumHeaderLines") {
763            self.num_header_lines = nonnegative_usize(value, "NumHeaderLines")?;
764        } else if name.eq_ignore_ascii_case("Range") {
765            self.range = Some(RangeSpec::parse(value)?);
766        } else if name.eq_ignore_ascii_case("DataRange") {
767            self.range = optional_range_spec(value)?;
768        } else if name.eq_ignore_ascii_case("Sheet") {
769            self.sheet = optional_sheet_selector(value)?;
770        } else if name.eq_ignore_ascii_case("TreatAsMissing") {
771            for token in string_list(value)? {
772                self.treat_as_missing
773                    .insert(token.trim().to_ascii_lowercase());
774            }
775        } else if name.eq_ignore_ascii_case("PreserveVariableNames") {
776            self.preserve_variable_names = bool_scalar(value, "PreserveVariableNames")?;
777        } else if name.eq_ignore_ascii_case("VariableNamingRule") {
778            let rule = scalar_text(value, "VariableNamingRule")?;
779            if rule.eq_ignore_ascii_case("preserve") {
780                self.preserve_variable_names = true;
781            } else if rule.eq_ignore_ascii_case("modify") {
782                self.preserve_variable_names = false;
783            } else {
784                return Err(invalid_argument(format!(
785                    "readtable: unsupported VariableNamingRule '{rule}'"
786                )));
787            }
788        } else if name.eq_ignore_ascii_case("EmptyLineRule") {
789            let rule = scalar_text(value, "EmptyLineRule")?;
790            self.empty_line_rule = if rule.eq_ignore_ascii_case("read") {
791                EmptyLineRule::Read
792            } else if rule.eq_ignore_ascii_case("skip") {
793                EmptyLineRule::Skip
794            } else {
795                return Err(invalid_argument(format!(
796                    "readtable: unsupported EmptyLineRule '{rule}'"
797                )));
798            };
799        } else if name.eq_ignore_ascii_case("Encoding") {
800            let encoding = scalar_text(value, "Encoding")?;
801            validate_encoding_label(&encoding)?;
802            self.encoding = encoding;
803        } else if name.eq_ignore_ascii_case("TextType") {
804            self.text_type = TextImportType::parse(value, "readtable")?;
805        } else if name.eq_ignore_ascii_case("DatetimeType") {
806            self.datetime_type = DatetimeImportType::parse(value)?;
807        } else {
808            return Err(invalid_argument(format!(
809                "readtable: unsupported option '{name}'"
810            )));
811        }
812        Ok(())
813    }
814
815    fn is_missing(&self, token: &str) -> bool {
816        let trimmed = token.trim();
817        trimmed.is_empty()
818            || self
819                .treat_as_missing
820                .contains(&trimmed.to_ascii_lowercase())
821    }
822}
823
824fn spreadsheet_import_options(args: Vec<Value>) -> BuiltinResult<Value> {
825    if !args.len().is_multiple_of(2) {
826        return Err(invalid_argument(
827            "spreadsheetImportOptions: name-value options must be provided in pairs",
828        ));
829    }
830    let mut options = SpreadsheetImportOptions::default();
831    let mut idx = 0usize;
832    while idx < args.len() {
833        let name = scalar_text(&args[idx], "spreadsheetImportOptions option")?;
834        options.apply(&name, &args[idx + 1])?;
835        idx += 2;
836    }
837    Ok(Value::Struct(options.into_struct()?))
838}
839
840#[derive(Clone)]
841struct SpreadsheetImportOptions {
842    num_variables: usize,
843    read_variable_names: Option<bool>,
844    read_row_names: bool,
845    variable_names: Vec<String>,
846    variable_types: Vec<String>,
847    data_range: Option<Value>,
848    sheet: Option<Value>,
849    treat_as_missing: Vec<String>,
850    preserve_variable_names: bool,
851    empty_line_rule: String,
852    text_type: String,
853    datetime_type: String,
854}
855
856impl Default for SpreadsheetImportOptions {
857    fn default() -> Self {
858        let num_variables = 0;
859        Self {
860            num_variables,
861            read_variable_names: None,
862            read_row_names: false,
863            variable_names: Vec::new(),
864            variable_types: Vec::new(),
865            data_range: None,
866            sheet: None,
867            treat_as_missing: Vec::new(),
868            preserve_variable_names: false,
869            empty_line_rule: "skip".to_string(),
870            text_type: "string".to_string(),
871            datetime_type: "datetime".to_string(),
872        }
873    }
874}
875
876impl SpreadsheetImportOptions {
877    fn apply(&mut self, name: &str, value: &Value) -> BuiltinResult<()> {
878        if name.eq_ignore_ascii_case("NumVariables") {
879            self.resize_variables(positive_usize(value, "NumVariables")?);
880        } else if name.eq_ignore_ascii_case("VariableNames") {
881            self.variable_names = raw_variable_name_list(value)?;
882            self.align_variable_metadata_count(self.variable_names.len(), "VariableNames")?;
883            self.ensure_variable_metadata_len();
884        } else if name.eq_ignore_ascii_case("VariableTypes") {
885            let types = variable_type_names(value)?;
886            self.variable_types = types;
887            self.align_variable_metadata_count(self.variable_types.len(), "VariableTypes")?;
888            self.ensure_variable_metadata_len();
889        } else if name.eq_ignore_ascii_case("DataRange") || name.eq_ignore_ascii_case("Range") {
890            self.data_range = if option_value_is_empty(value) {
891                None
892            } else {
893                RangeSpec::parse(value)?;
894                Some(value.clone())
895            };
896        } else if name.eq_ignore_ascii_case("Sheet") {
897            self.sheet = if option_value_is_empty(value) {
898                None
899            } else {
900                SheetSelector::parse(value)?;
901                Some(value.clone())
902            };
903        } else if name.eq_ignore_ascii_case("ReadVariableNames") {
904            self.read_variable_names = Some(bool_scalar(value, "ReadVariableNames")?);
905        } else if name.eq_ignore_ascii_case("ReadRowNames") {
906            self.read_row_names = bool_scalar(value, "ReadRowNames")?;
907        } else if name.eq_ignore_ascii_case("TreatAsMissing") {
908            self.treat_as_missing = string_list(value)?;
909        } else if name.eq_ignore_ascii_case("PreserveVariableNames") {
910            self.preserve_variable_names = bool_scalar(value, "PreserveVariableNames")?;
911        } else if name.eq_ignore_ascii_case("VariableNamingRule") {
912            let rule = scalar_text(value, "VariableNamingRule")?;
913            if rule.eq_ignore_ascii_case("preserve") {
914                self.preserve_variable_names = true;
915            } else if rule.eq_ignore_ascii_case("modify") {
916                self.preserve_variable_names = false;
917            } else {
918                return Err(invalid_argument(format!(
919                    "spreadsheetImportOptions: unsupported VariableNamingRule '{rule}'"
920                )));
921            }
922        } else if name.eq_ignore_ascii_case("EmptyLineRule") {
923            let rule = scalar_text(value, "EmptyLineRule")?;
924            if !(rule.eq_ignore_ascii_case("read") || rule.eq_ignore_ascii_case("skip")) {
925                return Err(invalid_argument(format!(
926                    "spreadsheetImportOptions: unsupported EmptyLineRule '{rule}'"
927                )));
928            }
929            self.empty_line_rule = rule.to_ascii_lowercase();
930        } else if name.eq_ignore_ascii_case("TextType") {
931            let text_type = scalar_text(value, "TextType")?;
932            if !(text_type.eq_ignore_ascii_case("string") || text_type.eq_ignore_ascii_case("char"))
933            {
934                return Err(invalid_argument(format!(
935                    "spreadsheetImportOptions: unsupported TextType '{text_type}'"
936                )));
937            }
938            self.text_type = text_type.to_ascii_lowercase();
939        } else if name.eq_ignore_ascii_case("DatetimeType") {
940            let datetime_type = scalar_text(value, "DatetimeType")?;
941            if !(datetime_type.eq_ignore_ascii_case("datetime")
942                || datetime_type.eq_ignore_ascii_case("text")
943                || datetime_type.eq_ignore_ascii_case("exceldatenum"))
944            {
945                return Err(invalid_argument(format!(
946                    "spreadsheetImportOptions: unsupported DatetimeType '{datetime_type}'"
947                )));
948            }
949            self.datetime_type = datetime_type.to_ascii_lowercase();
950        } else {
951            return Err(invalid_argument(format!(
952                "spreadsheetImportOptions: unsupported option '{name}'"
953            )));
954        }
955        Ok(())
956    }
957
958    fn resize_variables(&mut self, num_variables: usize) {
959        self.num_variables = num_variables;
960        if self.variable_names.len() > num_variables {
961            self.variable_names.truncate(num_variables);
962        }
963        if self.variable_types.len() > num_variables {
964            self.variable_types.truncate(num_variables);
965        }
966        self.ensure_variable_metadata_len();
967    }
968
969    fn align_variable_metadata_count(&mut self, len: usize, field: &str) -> BuiltinResult<()> {
970        if self.num_variables == 0 {
971            self.num_variables = len;
972            return Ok(());
973        }
974        if len > self.num_variables {
975            return Err(invalid_argument(format!(
976                "spreadsheetImportOptions: {field} length exceeds NumVariables"
977            )));
978        }
979        Ok(())
980    }
981
982    fn ensure_variable_metadata_len(&mut self) {
983        if self.num_variables == 0 {
984            return;
985        }
986        while self.variable_names.len() < self.num_variables {
987            self.variable_names
988                .push(format!("Var{}", self.variable_names.len() + 1));
989        }
990        self.variable_names.truncate(self.num_variables);
991        while self.variable_types.len() < self.num_variables {
992            self.variable_types.push("auto".to_string());
993        }
994        self.variable_types.truncate(self.num_variables);
995    }
996
997    fn into_struct(mut self) -> BuiltinResult<StructValue> {
998        self.ensure_variable_metadata_len();
999        let mut out = StructValue::new();
1000        out.insert("FileType", Value::String("spreadsheet".to_string()));
1001        out.insert("NumVariables", Value::Num(self.num_variables as f64));
1002        if let Some(read_variable_names) = self.read_variable_names {
1003            out.insert("ReadVariableNames", Value::Bool(read_variable_names));
1004        }
1005        out.insert("ReadRowNames", Value::Bool(self.read_row_names));
1006        out.insert(
1007            "VariableNames",
1008            Value::StringArray(
1009                StringArray::new(
1010                    self.variable_names.clone(),
1011                    vec![1, self.variable_names.len()],
1012                )
1013                .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1014            ),
1015        );
1016        out.insert(
1017            "VariableTypes",
1018            Value::StringArray(
1019                StringArray::new(
1020                    self.variable_types.clone(),
1021                    vec![1, self.variable_types.len()],
1022                )
1023                .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1024            ),
1025        );
1026        out.insert(
1027            "DataRange",
1028            self.data_range
1029                .unwrap_or_else(|| Value::String(String::new())),
1030        );
1031        out.insert(
1032            "Sheet",
1033            self.sheet.unwrap_or_else(|| Value::String(String::new())),
1034        );
1035        out.insert(
1036            "TreatAsMissing",
1037            Value::StringArray(
1038                StringArray::new(
1039                    self.treat_as_missing.clone(),
1040                    vec![1, self.treat_as_missing.len()],
1041                )
1042                .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1043            ),
1044        );
1045        out.insert(
1046            "PreserveVariableNames",
1047            Value::Bool(self.preserve_variable_names),
1048        );
1049        out.insert(
1050            "VariableNamingRule",
1051            Value::String(if self.preserve_variable_names {
1052                "preserve".to_string()
1053            } else {
1054                "modify".to_string()
1055            }),
1056        );
1057        out.insert("EmptyLineRule", Value::String(self.empty_line_rule));
1058        out.insert("TextType", Value::String(self.text_type));
1059        out.insert("DatetimeType", Value::String(self.datetime_type));
1060        Ok(out)
1061    }
1062}
1063
1064#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1065enum ImportVariableType {
1066    Auto,
1067    Numeric(NumericDType),
1068    Logical,
1069    Text(TextImportType),
1070    CellStr,
1071    Datetime,
1072    Duration,
1073}
1074
1075impl ImportVariableType {
1076    fn parse(raw: &str) -> BuiltinResult<Self> {
1077        match raw.trim().to_ascii_lowercase().as_str() {
1078            "" | "auto" => Ok(Self::Auto),
1079            "double" => Ok(Self::Numeric(NumericDType::F64)),
1080            "single" => Ok(Self::Numeric(NumericDType::F32)),
1081            "uint8" => Ok(Self::Numeric(NumericDType::U8)),
1082            "uint16" => Ok(Self::Numeric(NumericDType::U16)),
1083            "logical" | "bool" | "boolean" => Ok(Self::Logical),
1084            "string" => Ok(Self::Text(TextImportType::String)),
1085            "char" => Ok(Self::Text(TextImportType::Char)),
1086            "cellstr" => Ok(Self::CellStr),
1087            "int8" | "int16" | "int32" | "int64" | "uint32" | "uint64" => {
1088                Err(invalid_argument(format!(
1089                    "readtable: unsupported VariableTypes entry '{}'; RunMat table imports currently support double, single, uint8, and uint16 numeric arrays",
1090                    raw.trim()
1091                )))
1092            }
1093            "categorical" => Err(invalid_argument(
1094                "readtable: unsupported VariableTypes entry 'categorical'; categorical arrays are not implemented in RunMat yet",
1095            )),
1096            "datetime" => Ok(Self::Datetime),
1097            "duration" => Ok(Self::Duration),
1098            other => Err(invalid_argument(format!(
1099                "readtable: unsupported VariableTypes entry '{other}'"
1100            ))),
1101        }
1102    }
1103
1104    fn canonical_label(raw: &str) -> BuiltinResult<String> {
1105        Self::parse(raw)?;
1106        let label = raw.trim().to_ascii_lowercase();
1107        Ok(if label.is_empty() {
1108            "auto".to_string()
1109        } else {
1110            label
1111        })
1112    }
1113}
1114
1115#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1116enum TextImportType {
1117    String,
1118    Char,
1119}
1120
1121impl TextImportType {
1122    fn parse(value: &Value, context: &str) -> BuiltinResult<Self> {
1123        let text_type = scalar_text(value, "TextType")?;
1124        match text_type.trim().to_ascii_lowercase().as_str() {
1125            "string" => Ok(Self::String),
1126            "char" => Ok(Self::Char),
1127            other => Err(invalid_argument(format!(
1128                "{context}: unsupported TextType '{other}'"
1129            ))),
1130        }
1131    }
1132}
1133
1134#[derive(Clone, Copy)]
1135enum EmptyLineRule {
1136    Skip,
1137    Read,
1138}
1139
1140#[derive(Clone, Copy)]
1141enum DatetimeImportType {
1142    Datetime,
1143    Text,
1144    ExcelDatenum,
1145}
1146
1147impl DatetimeImportType {
1148    fn parse(value: &Value) -> BuiltinResult<Self> {
1149        let text = scalar_text(value, "DatetimeType")?;
1150        match text.trim().to_ascii_lowercase().as_str() {
1151            "datetime" => Ok(Self::Datetime),
1152            "text" => Ok(Self::Text),
1153            "exceldatenum" => Ok(Self::ExcelDatenum),
1154            other => Err(invalid_argument(format!(
1155                "readtable: unsupported DatetimeType '{other}'"
1156            ))),
1157        }
1158    }
1159}
1160
1161#[derive(Clone, Copy, PartialEq, Eq)]
1162enum ImportFileType {
1163    Auto,
1164    Text,
1165    Spreadsheet,
1166}
1167
1168impl ImportFileType {
1169    fn parse(value: &Value) -> BuiltinResult<Self> {
1170        let text = scalar_text(value, "FileType")?;
1171        match text.trim().to_ascii_lowercase().as_str() {
1172            "auto" => Ok(Self::Auto),
1173            "text" | "delimitedtext" | "delimited" => Ok(Self::Text),
1174            "spreadsheet" | "excel" => Ok(Self::Spreadsheet),
1175            other => Err(invalid_argument(format!(
1176                "readtable: unsupported FileType '{other}'"
1177            ))),
1178        }
1179    }
1180}
1181
1182#[derive(Clone)]
1183enum SheetSelector {
1184    Name(String),
1185    Index(usize),
1186}
1187
1188impl SheetSelector {
1189    fn parse(value: &Value) -> BuiltinResult<Self> {
1190        match value {
1191            Value::Int(i) if i.to_i64() >= 1 => Ok(Self::Index(i.to_i64() as usize - 1)),
1192            Value::Num(n)
1193                if n.is_finite() && *n >= 1.0 && (n.round() - n).abs() <= f64::EPSILON =>
1194            {
1195                Ok(Self::Index(n.round() as usize - 1))
1196            }
1197            _ => {
1198                let text = scalar_text(value, "Sheet")?;
1199                if text.trim().is_empty() {
1200                    return Err(invalid_argument("readtable: Sheet must not be empty"));
1201                }
1202                Ok(Self::Name(text))
1203            }
1204        }
1205    }
1206}
1207
1208#[derive(Clone)]
1209enum Delimiter {
1210    Char(char),
1211    String(String),
1212    Whitespace,
1213}
1214
1215impl Delimiter {
1216    fn parse(value: &Value) -> BuiltinResult<Self> {
1217        let text = scalar_text(value, "Delimiter")?;
1218        if text.is_empty() {
1219            return Err(invalid_argument("readtable: Delimiter must not be empty"));
1220        }
1221        match text.trim().to_ascii_lowercase().as_str() {
1222            "tab" => Ok(Self::Char('\t')),
1223            "space" | "whitespace" => Ok(Self::Whitespace),
1224            "comma" => Ok(Self::Char(',')),
1225            "semicolon" => Ok(Self::Char(';')),
1226            "bar" | "pipe" => Ok(Self::Char('|')),
1227            _ if text.chars().count() == 1 => Ok(Self::Char(text.chars().next().unwrap())),
1228            _ => Ok(Self::String(text)),
1229        }
1230    }
1231}
1232
1233#[derive(Clone, Copy)]
1234struct RangeSpec {
1235    start_row: usize,
1236    start_col: usize,
1237    end_row: Option<usize>,
1238    end_col: Option<usize>,
1239}
1240
1241impl RangeSpec {
1242    fn parse(value: &Value) -> BuiltinResult<Self> {
1243        match value {
1244            Value::String(text) => Self::parse_text(text),
1245            Value::CharArray(ca) if ca.rows == 1 => {
1246                let text: String = ca.data.iter().collect();
1247                Self::parse_text(&text)
1248            }
1249            Value::StringArray(sa) if sa.data.len() == 1 => Self::parse_text(&sa.data[0]),
1250            Value::Tensor(t) if t.data.len() == 2 || t.data.len() == 4 => {
1251                let mut indices = Vec::with_capacity(t.data.len());
1252                for value in &t.data {
1253                    indices.push(one_based_to_zero(*value, usize::MAX, "Range")?);
1254                }
1255                Ok(Self {
1256                    start_row: indices[0],
1257                    start_col: indices[1],
1258                    end_row: indices.get(2).copied(),
1259                    end_col: indices.get(3).copied(),
1260                })
1261            }
1262            _ => Err(invalid_argument(
1263                "readtable: Range must be a cell reference string or numeric vector",
1264            )),
1265        }
1266    }
1267
1268    fn parse_text(text: &str) -> BuiltinResult<Self> {
1269        let trimmed = text.trim();
1270        if trimmed.is_empty() {
1271            return Err(invalid_argument("readtable: Range must not be empty"));
1272        }
1273        let parts: Vec<&str> = trimmed.split(':').collect();
1274        if parts.len() > 2 {
1275            return Err(invalid_argument(format!(
1276                "readtable: invalid Range specification '{trimmed}'"
1277            )));
1278        }
1279        let start = parse_cell_ref(parts[0])?;
1280        let end = if parts.len() == 2 {
1281            Some(parse_cell_ref(parts[1])?)
1282        } else {
1283            None
1284        };
1285        Ok(Self {
1286            start_row: start.0.unwrap_or(0),
1287            start_col: start.1.unwrap_or(0),
1288            end_row: end.and_then(|item| item.0),
1289            end_col: end.and_then(|item| item.1),
1290        })
1291    }
1292}
1293
1294fn parse_cell_ref(token: &str) -> BuiltinResult<(Option<usize>, Option<usize>)> {
1295    let mut letters = String::new();
1296    let mut digits = String::new();
1297    for ch in token.trim().chars() {
1298        if ch == '$' {
1299            continue;
1300        }
1301        if ch.is_ascii_alphabetic() {
1302            letters.push(ch.to_ascii_uppercase());
1303        } else if ch.is_ascii_digit() {
1304            digits.push(ch);
1305        } else {
1306            return Err(invalid_argument(format!(
1307                "readtable: invalid Range component '{token}'"
1308            )));
1309        }
1310    }
1311    let col = if letters.is_empty() {
1312        None
1313    } else {
1314        let mut value = 0usize;
1315        for ch in letters.chars() {
1316            value = value
1317                .checked_mul(26)
1318                .and_then(|v| v.checked_add((ch as u8 - b'A' + 1) as usize))
1319                .ok_or_else(|| invalid_argument("readtable: Range column overflow"))?;
1320        }
1321        Some(value - 1)
1322    };
1323    let row = if digits.is_empty() {
1324        None
1325    } else {
1326        let parsed = digits
1327            .parse::<usize>()
1328            .map_err(|_| invalid_argument("readtable: invalid Range row"))?;
1329        if parsed == 0 {
1330            return Err(invalid_argument("readtable: Range rows are one-based"));
1331        }
1332        Some(parsed - 1)
1333    };
1334    Ok((row, col))
1335}
1336
1337fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
1338    let text = scalar_text(value, "filename").map_err(|_| {
1339        table_error(
1340            &TABLE_ERROR_INVALID_ARGUMENT,
1341            "readtable: filename must be a string scalar or character vector",
1342        )
1343    })?;
1344    if text.trim().is_empty() {
1345        return Err(invalid_argument("readtable: filename must not be empty"));
1346    }
1347    let expanded =
1348        expand_user_path(&text, "readtable").map_err(|msg| invalid_argument(msg.to_string()))?;
1349    Ok(Path::new(&expanded).to_path_buf())
1350}
1351
1352async fn read_table_from_file(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1353    match options.file_type {
1354        ImportFileType::Spreadsheet => read_spreadsheet_table(path, options).await,
1355        ImportFileType::Text => read_text_table(path, options).await,
1356        ImportFileType::Auto if is_spreadsheet_path(path) => {
1357            read_spreadsheet_table(path, options).await
1358        }
1359        ImportFileType::Auto => read_text_table(path, options).await,
1360    }
1361}
1362
1363async fn read_text_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1364    if options.sheet.is_some() {
1365        return Err(invalid_argument(
1366            "readtable: Sheet is only valid for spreadsheet files",
1367        ));
1368    }
1369    let bytes = read_file_bytes(path).await?;
1370    let text = decode_text_bytes(&bytes, &options.encoding)?;
1371    let mut raw_lines = text.lines().map(ToString::to_string).collect::<Vec<_>>();
1372    if let Some(first) = raw_lines.first_mut() {
1373        if first.starts_with('\u{FEFF}') {
1374            *first = first.trim_start_matches('\u{FEFF}').to_string();
1375        }
1376    }
1377    let delimiter = options
1378        .delimiter
1379        .clone()
1380        .or_else(|| detect_delimiter(&raw_lines))
1381        .unwrap_or(Delimiter::Whitespace);
1382    let mut rows = parse_text_records(&text, &delimiter, options.empty_line_rule);
1383    if options.num_header_lines > 0 {
1384        rows = rows.into_iter().skip(options.num_header_lines).collect();
1385    }
1386    if let Some(range) = options.range {
1387        rows = apply_import_range(rows, range);
1388    }
1389    import_rows_to_table(rows, options)
1390}
1391
1392async fn read_spreadsheet_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1393    if options.delimiter.is_some() {
1394        return Err(invalid_argument(
1395            "readtable: Delimiter is only valid for text files",
1396        ));
1397    }
1398    let bytes = read_file_bytes(path).await?;
1399    let cursor = Cursor::new(bytes);
1400    let mut workbook = open_workbook_auto_from_rs(cursor).map_err(|err| {
1401        table_error(
1402            &TABLE_ERROR_UNSUPPORTED_FILE,
1403            format!(
1404                "readtable: unable to open spreadsheet '{}': {err}",
1405                path.display()
1406            ),
1407        )
1408    })?;
1409    let range = match &options.sheet {
1410        Some(SheetSelector::Name(name)) => workbook.worksheet_range(name).map_err(|err| {
1411            invalid_argument(format!("readtable: unable to read sheet '{name}': {err:?}"))
1412        })?,
1413        Some(SheetSelector::Index(index)) => workbook
1414            .worksheet_range_at(*index)
1415            .ok_or_else(|| {
1416                invalid_argument(format!(
1417                    "readtable: sheet index {} exceeds bounds",
1418                    index + 1
1419                ))
1420            })?
1421            .map_err(|err| {
1422                invalid_argument(format!(
1423                    "readtable: unable to read sheet {}: {err:?}",
1424                    index + 1
1425                ))
1426            })?,
1427        None => workbook
1428            .worksheet_range_at(0)
1429            .ok_or_else(|| invalid_argument("readtable: spreadsheet contains no worksheets"))?
1430            .map_err(|err| {
1431                invalid_argument(format!("readtable: unable to read first sheet: {err:?}"))
1432            })?,
1433    };
1434    let rows = spreadsheet_range_to_rows(&range, options)?;
1435    import_rows_to_table(rows, options)
1436}
1437
1438async fn read_file_bytes(path: &Path) -> BuiltinResult<Vec<u8>> {
1439    let mut file = File::open_async(path).await.map_err(|err| {
1440        table_error_with_source(
1441            &TABLE_ERROR_IO,
1442            format!("readtable: unable to open '{}': {err}", path.display()),
1443            err,
1444        )
1445    })?;
1446    let mut bytes = Vec::new();
1447    file.read_to_end(&mut bytes).map_err(|err| {
1448        table_error_with_source(
1449            &TABLE_ERROR_IO,
1450            format!("readtable: unable to read '{}': {err}", path.display()),
1451            err,
1452        )
1453    })?;
1454    Ok(bytes)
1455}
1456
1457fn is_spreadsheet_path(path: &Path) -> bool {
1458    matches!(
1459        path.extension()
1460            .and_then(|ext| ext.to_str())
1461            .map(|ext| ext.to_ascii_lowercase())
1462            .as_deref(),
1463        Some("xls") | Some("xlsx") | Some("xlsm") | Some("xlsb") | Some("ods")
1464    )
1465}
1466
1467fn validate_encoding_label(label: &str) -> BuiltinResult<()> {
1468    encoding_for_label(label)
1469        .map(|_| ())
1470        .ok_or_else(|| invalid_argument(format!("readtable: unsupported Encoding '{label}'")))
1471}
1472
1473fn encoding_for_label(label: &str) -> Option<&'static Encoding> {
1474    let label = label.trim();
1475    if label.is_empty()
1476        || label.eq_ignore_ascii_case("auto")
1477        || label.eq_ignore_ascii_case("default")
1478        || label.eq_ignore_ascii_case("system")
1479        || label.eq_ignore_ascii_case("native")
1480        || label.eq_ignore_ascii_case("utf-8")
1481        || label.eq_ignore_ascii_case("utf8")
1482        || label.eq_ignore_ascii_case("unicode")
1483    {
1484        return Some(UTF_8);
1485    }
1486    Encoding::for_label(label.as_bytes())
1487}
1488
1489fn decode_text_bytes(bytes: &[u8], encoding: &str) -> BuiltinResult<String> {
1490    let (encoding, offset) = if encoding.trim().eq_ignore_ascii_case("auto") {
1491        Encoding::for_bom(bytes).unwrap_or((UTF_8, 0))
1492    } else {
1493        (
1494            encoding_for_label(encoding).ok_or_else(|| {
1495                invalid_argument(format!("readtable: unsupported Encoding '{encoding}'"))
1496            })?,
1497            0,
1498        )
1499    };
1500    let (decoded, _, had_errors) = encoding.decode(&bytes[offset..]);
1501    if had_errors {
1502        return Err(table_error(
1503            &TABLE_ERROR_IO,
1504            format!(
1505                "readtable: unable to decode file contents using encoding '{}'",
1506                encoding.name()
1507            ),
1508        ));
1509    }
1510    Ok(decoded.into_owned())
1511}
1512
1513#[derive(Clone, Debug)]
1514enum ImportCell {
1515    Empty,
1516    Text(String),
1517    Number(f64),
1518    Logical(bool),
1519    DateTime(f64),
1520    Error(String),
1521}
1522
1523impl ImportCell {
1524    fn from_text(text: String) -> Self {
1525        if text.trim().is_empty() {
1526            Self::Empty
1527        } else {
1528            Self::Text(text)
1529        }
1530    }
1531
1532    fn display_text(&self) -> String {
1533        match self {
1534            Self::Empty => String::new(),
1535            Self::Text(text) => text.clone(),
1536            Self::Number(value) => format_key_number(*value),
1537            Self::Logical(value) => value.to_string(),
1538            Self::DateTime(serial) => format_key_number(*serial),
1539            Self::Error(text) => text.clone(),
1540        }
1541    }
1542
1543    fn is_missing(&self, options: &ReadTableOptions) -> bool {
1544        match self {
1545            Self::Empty => true,
1546            Self::Text(text) => options.is_missing(text),
1547            _ => false,
1548        }
1549    }
1550
1551    fn is_likely_data_token(&self, options: &ReadTableOptions) -> bool {
1552        match self {
1553            Self::Number(_) | Self::Logical(_) | Self::DateTime(_) => true,
1554            Self::Empty => false,
1555            Self::Text(text) => {
1556                let token = unquote(text.trim()).trim();
1557                options.is_missing(token)
1558                    || parse_numeric(token).is_some()
1559                    || parse_logical(token).is_some()
1560                    || parse_iso_datetime_to_datenum(token).is_some()
1561            }
1562            Self::Error(_) => true,
1563        }
1564    }
1565}
1566
1567fn spreadsheet_cell_to_import(cell: &SpreadsheetData) -> ImportCell {
1568    match cell {
1569        SpreadsheetData::Empty => ImportCell::Empty,
1570        SpreadsheetData::Int(value) => ImportCell::Number(*value as f64),
1571        SpreadsheetData::Float(value) => ImportCell::Number(*value),
1572        SpreadsheetData::String(text) => ImportCell::Text(text.clone()),
1573        SpreadsheetData::Bool(value) => ImportCell::Logical(*value),
1574        SpreadsheetData::DateTime(value) => value
1575            .as_datetime()
1576            .map(crate::builtins::datetime::datenum_from_naive)
1577            .map(ImportCell::DateTime)
1578            .unwrap_or_else(|| ImportCell::Number(value.as_f64())),
1579        SpreadsheetData::DateTimeIso(text) => parse_iso_datetime_to_datenum(text)
1580            .map(ImportCell::DateTime)
1581            .unwrap_or_else(|| ImportCell::Text(text.clone())),
1582        SpreadsheetData::DurationIso(text) => ImportCell::Text(text.clone()),
1583        SpreadsheetData::Error(err) => ImportCell::Error(err.to_string()),
1584    }
1585}
1586
1587fn spreadsheet_range_to_rows(
1588    range: &calamine::Range<SpreadsheetData>,
1589    options: &ReadTableOptions,
1590) -> BuiltinResult<Vec<Vec<ImportCell>>> {
1591    if range.is_empty() {
1592        return Ok(Vec::new());
1593    }
1594    let Some((range_start_row, range_start_col)) = range.start() else {
1595        return Ok(Vec::new());
1596    };
1597    let Some((range_end_row, range_end_col)) = range.end() else {
1598        return Ok(Vec::new());
1599    };
1600    let start_row = options
1601        .range
1602        .map(|spec| checked_u32(spec.start_row, "Range row"))
1603        .transpose()?
1604        .unwrap_or(range_start_row);
1605    let start_col = options
1606        .range
1607        .map(|spec| checked_u32(spec.start_col, "Range column"))
1608        .transpose()?
1609        .unwrap_or(range_start_col);
1610    let end_row = options
1611        .range
1612        .and_then(|spec| spec.end_row)
1613        .map(|row| checked_u32(row, "Range row"))
1614        .transpose()?
1615        .unwrap_or(range_end_row);
1616    let end_col = options
1617        .range
1618        .and_then(|spec| spec.end_col)
1619        .map(|col| checked_u32(col, "Range column"))
1620        .transpose()?
1621        .unwrap_or(range_end_col);
1622    if start_row > end_row || start_col > end_col {
1623        return Ok(Vec::new());
1624    }
1625    let mut rows = Vec::new();
1626    for row_idx in start_row..=end_row {
1627        let mut row = Vec::new();
1628        for col_idx in start_col..=end_col {
1629            row.push(
1630                range
1631                    .get_value((row_idx, col_idx))
1632                    .map(spreadsheet_cell_to_import)
1633                    .unwrap_or(ImportCell::Empty),
1634            );
1635        }
1636        if matches!(options.empty_line_rule, EmptyLineRule::Skip)
1637            && row.iter().all(|cell| cell.is_missing(options))
1638        {
1639            continue;
1640        }
1641        rows.push(row);
1642    }
1643    if options.num_header_lines > 0 {
1644        Ok(rows.into_iter().skip(options.num_header_lines).collect())
1645    } else {
1646        Ok(rows)
1647    }
1648}
1649
1650fn checked_u32(value: usize, context: &str) -> BuiltinResult<u32> {
1651    u32::try_from(value).map_err(|_| invalid_argument(format!("readtable: {context} overflow")))
1652}
1653
1654fn detect_delimiter(lines: &[String]) -> Option<Delimiter> {
1655    let candidates = [',', '\t', ';', '|'];
1656    let mut best: Option<(f64, Delimiter)> = None;
1657    for candidate in candidates {
1658        let counts = lines
1659            .iter()
1660            .take(32)
1661            .filter(|line| line.contains(candidate))
1662            .map(|line| split_with_char_delim(line, candidate).len())
1663            .filter(|count| *count >= 2)
1664            .collect::<Vec<_>>();
1665        if counts.is_empty() {
1666            continue;
1667        }
1668        let avg = counts.iter().copied().sum::<usize>() as f64 / counts.len() as f64;
1669        if avg >= 2.0
1670            && best
1671                .as_ref()
1672                .map(|(best_avg, _)| avg > *best_avg)
1673                .unwrap_or(true)
1674        {
1675            best = Some((avg, Delimiter::Char(candidate)));
1676        }
1677    }
1678    best.map(|(_, delimiter)| delimiter).or_else(|| {
1679        lines
1680            .iter()
1681            .take(32)
1682            .any(|line| line.split_whitespace().count() > 1)
1683            .then_some(Delimiter::Whitespace)
1684    })
1685}
1686
1687fn split_with_char_delim(line: &str, delimiter: char) -> Vec<String> {
1688    let mut out = Vec::new();
1689    let mut current = String::new();
1690    let mut in_quotes = false;
1691    let mut chars = line.chars().peekable();
1692    while let Some(ch) = chars.next() {
1693        if ch == '"' {
1694            if in_quotes && chars.peek() == Some(&'"') {
1695                current.push('"');
1696                chars.next();
1697            } else {
1698                in_quotes = !in_quotes;
1699            }
1700            continue;
1701        }
1702        if ch == delimiter && !in_quotes {
1703            out.push(current.clone());
1704            current.clear();
1705        } else {
1706            current.push(ch);
1707        }
1708    }
1709    out.push(current);
1710    out
1711}
1712
1713fn parse_text_records(
1714    text: &str,
1715    delimiter: &Delimiter,
1716    empty_line_rule: EmptyLineRule,
1717) -> Vec<Vec<ImportCell>> {
1718    match delimiter {
1719        Delimiter::Whitespace => parse_whitespace_records(text, empty_line_rule),
1720        Delimiter::Char(ch) => parse_delimited_records(text, &ch.to_string(), empty_line_rule),
1721        Delimiter::String(pattern) => parse_delimited_records(text, pattern, empty_line_rule),
1722    }
1723}
1724
1725fn parse_delimited_records(
1726    text: &str,
1727    delimiter: &str,
1728    empty_line_rule: EmptyLineRule,
1729) -> Vec<Vec<ImportCell>> {
1730    let mut records = Vec::new();
1731    let mut row = Vec::new();
1732    let mut current = String::new();
1733    let mut in_quotes = false;
1734    let mut idx = 0usize;
1735    while idx < text.len() {
1736        let ch = text[idx..].chars().next().expect("valid char boundary");
1737        if ch == '"' {
1738            if in_quotes && text[idx + ch.len_utf8()..].starts_with('"') {
1739                current.push('"');
1740                idx += ch.len_utf8() + 1;
1741                continue;
1742            }
1743            in_quotes = !in_quotes;
1744            idx += ch.len_utf8();
1745            continue;
1746        }
1747        if !in_quotes && !delimiter.is_empty() && text[idx..].starts_with(delimiter) {
1748            row.push(ImportCell::from_text(std::mem::take(&mut current)));
1749            idx += delimiter.len();
1750            continue;
1751        }
1752        if !in_quotes && (ch == '\n' || ch == '\r') {
1753            row.push(ImportCell::from_text(std::mem::take(&mut current)));
1754            push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
1755            idx += ch.len_utf8();
1756            if ch == '\r' && text[idx..].starts_with('\n') {
1757                idx += 1;
1758            }
1759            continue;
1760        }
1761        current.push(ch);
1762        idx += ch.len_utf8();
1763    }
1764    if !current.is_empty() || !row.is_empty() || text.ends_with(delimiter) {
1765        row.push(ImportCell::from_text(current));
1766        push_import_record(&mut records, row, empty_line_rule);
1767    }
1768    records
1769}
1770
1771fn parse_whitespace_records(text: &str, empty_line_rule: EmptyLineRule) -> Vec<Vec<ImportCell>> {
1772    let mut records = Vec::new();
1773    let mut row = Vec::new();
1774    let mut current = String::new();
1775    let mut in_quotes = false;
1776    let mut field_open = false;
1777    let mut chars = text.chars().peekable();
1778    while let Some(ch) = chars.next() {
1779        if ch == '"' {
1780            if in_quotes && chars.peek() == Some(&'"') {
1781                current.push('"');
1782                chars.next();
1783            } else {
1784                in_quotes = !in_quotes;
1785            }
1786            field_open = true;
1787            continue;
1788        }
1789        if !in_quotes && (ch == '\n' || ch == '\r') {
1790            if field_open || !current.is_empty() {
1791                row.push(ImportCell::from_text(std::mem::take(&mut current)));
1792            }
1793            field_open = false;
1794            push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
1795            if ch == '\r' && chars.peek() == Some(&'\n') {
1796                chars.next();
1797            }
1798            continue;
1799        }
1800        if !in_quotes && ch.is_whitespace() {
1801            if field_open || !current.is_empty() {
1802                row.push(ImportCell::from_text(std::mem::take(&mut current)));
1803                field_open = false;
1804            }
1805            continue;
1806        }
1807        current.push(ch);
1808        field_open = true;
1809    }
1810    if field_open || !current.is_empty() {
1811        row.push(ImportCell::from_text(current));
1812    }
1813    if !row.is_empty() {
1814        push_import_record(&mut records, row, empty_line_rule);
1815    }
1816    records
1817}
1818
1819fn push_import_record(
1820    records: &mut Vec<Vec<ImportCell>>,
1821    row: Vec<ImportCell>,
1822    empty_line_rule: EmptyLineRule,
1823) {
1824    if matches!(empty_line_rule, EmptyLineRule::Skip)
1825        && row.iter().all(|cell| matches!(cell, ImportCell::Empty))
1826    {
1827        return;
1828    }
1829    records.push(row);
1830}
1831
1832fn apply_import_range(rows: Vec<Vec<ImportCell>>, range: RangeSpec) -> Vec<Vec<ImportCell>> {
1833    if rows.is_empty() {
1834        return rows;
1835    }
1836    let end_row = range
1837        .end_row
1838        .unwrap_or_else(|| rows.len().saturating_sub(1));
1839    let max_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
1840    let end_col = range.end_col.unwrap_or_else(|| max_cols.saturating_sub(1));
1841    rows.into_iter()
1842        .enumerate()
1843        .filter_map(|(idx, row)| {
1844            if idx < range.start_row || idx > end_row {
1845                return None;
1846            }
1847            let selected = (range.start_col..=end_col)
1848                .map(|col| row.get(col).cloned().unwrap_or(ImportCell::Empty))
1849                .collect::<Vec<_>>();
1850            Some(selected)
1851        })
1852        .collect()
1853}
1854
1855fn import_rows_to_table(
1856    mut rows: Vec<Vec<ImportCell>>,
1857    options: &ReadTableOptions,
1858) -> BuiltinResult<Value> {
1859    let mut variable_names = options.variable_names.clone();
1860    let read_variable_names = options
1861        .read_variable_names
1862        .unwrap_or_else(|| variable_names.is_none() && should_read_variable_names(&rows, options));
1863    if variable_names.is_none() && read_variable_names && !rows.is_empty() {
1864        variable_names = Some(
1865            rows.remove(0)
1866                .into_iter()
1867                .map(|cell| cell.display_text())
1868                .collect(),
1869        );
1870    }
1871
1872    let mut row_names = options.row_names.clone();
1873    if options.read_row_names && !rows.is_empty() {
1874        row_names = Some(
1875            rows.iter_mut()
1876                .map(|row| {
1877                    if row.is_empty() {
1878                        String::new()
1879                    } else {
1880                        row.remove(0).display_text()
1881                    }
1882                })
1883                .collect(),
1884        );
1885        if let Some(names) = variable_names.as_mut() {
1886            if !names.is_empty() {
1887                names.remove(0);
1888            }
1889        }
1890    }
1891
1892    let column_count = import_column_count(&rows, &variable_names, options)?;
1893    let names = import_variable_names(variable_names, column_count, options);
1894
1895    let mut columns = Vec::with_capacity(names.len());
1896    for col in 0..names.len() {
1897        let values = rows
1898            .iter()
1899            .map(|row| row.get(col).cloned().unwrap_or(ImportCell::Empty))
1900            .collect::<Vec<_>>();
1901        let requested_type = options
1902            .variable_types
1903            .as_ref()
1904            .and_then(|types| types.get(col))
1905            .copied();
1906        columns.push(import_column(values, options, requested_type)?);
1907    }
1908    table_from_columns_with_properties(names, columns, row_names)
1909}
1910
1911fn import_column_count(
1912    rows: &[Vec<ImportCell>],
1913    variable_names: &Option<Vec<String>>,
1914    options: &ReadTableOptions,
1915) -> BuiltinResult<usize> {
1916    let data_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
1917    let name_cols = variable_names.as_ref().map(Vec::len).unwrap_or(0);
1918    let type_cols = options.variable_types.as_ref().map(Vec::len).unwrap_or(0);
1919    if let Some(count) = options.num_variables {
1920        if name_cols > count {
1921            return Err(invalid_argument(
1922                "readtable: VariableNames length exceeds NumVariables",
1923            ));
1924        }
1925        if type_cols > count {
1926            return Err(invalid_argument(
1927                "readtable: VariableTypes length exceeds NumVariables",
1928            ));
1929        }
1930        return Ok(count);
1931    }
1932    Ok(data_cols.max(name_cols).max(type_cols))
1933}
1934
1935fn import_variable_names(
1936    variable_names: Option<Vec<String>>,
1937    column_count: usize,
1938    options: &ReadTableOptions,
1939) -> Vec<String> {
1940    match variable_names {
1941        Some(mut names) => {
1942            while names.len() < column_count {
1943                names.push(format!("Var{}", names.len() + 1));
1944            }
1945            names.truncate(column_count);
1946            if options.preserve_variable_names {
1947                make_unique_names(names)
1948            } else {
1949                make_unique_variable_names(names)
1950            }
1951        }
1952        None => generated_variable_names(column_count),
1953    }
1954}
1955
1956fn should_read_variable_names(rows: &[Vec<ImportCell>], options: &ReadTableOptions) -> bool {
1957    let Some(first) = rows.first() else {
1958        return false;
1959    };
1960    if first.is_empty() {
1961        return false;
1962    }
1963    let names = first
1964        .iter()
1965        .map(ImportCell::display_text)
1966        .map(|text| text.trim().to_string())
1967        .collect::<Vec<_>>();
1968    if names.iter().any(|name| name.is_empty()) {
1969        return false;
1970    }
1971    if first.iter().all(|cell| cell.is_likely_data_token(options)) {
1972        return false;
1973    }
1974    true
1975}
1976
1977fn import_column(
1978    values: Vec<ImportCell>,
1979    options: &ReadTableOptions,
1980    requested_type: Option<ImportVariableType>,
1981) -> BuiltinResult<Value> {
1982    match requested_type.unwrap_or(ImportVariableType::Auto) {
1983        ImportVariableType::Auto => infer_import_column(values, options),
1984        ImportVariableType::Numeric(dtype) => import_numeric_column(values, options, dtype),
1985        ImportVariableType::Logical => import_logical_column(values, options),
1986        ImportVariableType::Text(kind) => import_text_column(values, options, kind),
1987        ImportVariableType::CellStr => import_cellstr_column(values, options),
1988        ImportVariableType::Datetime => import_datetime_column(values, options),
1989        ImportVariableType::Duration => import_duration_column(values, options),
1990    }
1991}
1992
1993fn import_numeric_column(
1994    values: Vec<ImportCell>,
1995    options: &ReadTableOptions,
1996    dtype: NumericDType,
1997) -> BuiltinResult<Value> {
1998    let mut numeric = Vec::with_capacity(values.len());
1999    for value in &values {
2000        let parsed = numeric_from_import_cell(value, options, dtype.class_name())?;
2001        numeric.push(cast_import_numeric(parsed, dtype));
2002    }
2003    Tensor::new_with_dtype(numeric, vec![values.len(), 1], dtype)
2004        .map(Value::Tensor)
2005        .map_err(|err| invalid_variable(format!("readtable: {err}")))
2006}
2007
2008fn numeric_from_import_cell(
2009    value: &ImportCell,
2010    options: &ReadTableOptions,
2011    context: &str,
2012) -> BuiltinResult<f64> {
2013    match value {
2014        ImportCell::Empty => Ok(f64::NAN),
2015        ImportCell::Number(value) => Ok(*value),
2016        ImportCell::Logical(value) => Ok(if *value { 1.0 } else { 0.0 }),
2017        ImportCell::DateTime(serial) => Ok(*serial),
2018        ImportCell::Text(text) => {
2019            let token = unquote(text.trim()).trim();
2020            if options.is_missing(token) {
2021                Ok(f64::NAN)
2022            } else {
2023                parse_numeric(token).ok_or_else(|| {
2024                    invalid_variable(format!("readtable: cannot import '{token}' as {context}"))
2025                })
2026            }
2027        }
2028        ImportCell::Error(text) => Err(invalid_variable(format!(
2029            "readtable: cannot import spreadsheet error '{text}' as {context}"
2030        ))),
2031    }
2032}
2033
2034fn cast_import_numeric(value: f64, dtype: NumericDType) -> f64 {
2035    match dtype {
2036        NumericDType::F64 => value,
2037        NumericDType::F32 => (value as f32) as f64,
2038        NumericDType::U8 => {
2039            if value.is_finite() {
2040                value.round().clamp(0.0, u8::MAX as f64)
2041            } else {
2042                0.0
2043            }
2044        }
2045        NumericDType::U16 => {
2046            if value.is_finite() {
2047                value.round().clamp(0.0, u16::MAX as f64)
2048            } else {
2049                0.0
2050            }
2051        }
2052    }
2053}
2054
2055fn import_logical_column(
2056    values: Vec<ImportCell>,
2057    options: &ReadTableOptions,
2058) -> BuiltinResult<Value> {
2059    let mut logical = Vec::with_capacity(values.len());
2060    for value in &values {
2061        logical.push(logical_from_import_cell(value, options)?);
2062    }
2063    LogicalArray::new(logical, vec![values.len(), 1])
2064        .map(Value::LogicalArray)
2065        .map_err(|err| invalid_variable(format!("readtable: {err}")))
2066}
2067
2068fn logical_from_import_cell(value: &ImportCell, options: &ReadTableOptions) -> BuiltinResult<u8> {
2069    let flag = match value {
2070        ImportCell::Empty => false,
2071        ImportCell::Logical(value) => *value,
2072        ImportCell::Number(value) => *value != 0.0,
2073        ImportCell::DateTime(serial) => *serial != 0.0,
2074        ImportCell::Text(text) => {
2075            let token = unquote(text.trim()).trim();
2076            if options.is_missing(token) {
2077                false
2078            } else if let Some(value) = parse_logical(token) {
2079                value
2080            } else if let Some(value) = parse_numeric(token) {
2081                value != 0.0
2082            } else {
2083                return Err(invalid_variable(format!(
2084                    "readtable: cannot import '{token}' as logical"
2085                )));
2086            }
2087        }
2088        ImportCell::Error(text) => {
2089            return Err(invalid_variable(format!(
2090                "readtable: cannot import spreadsheet error '{text}' as logical"
2091            )));
2092        }
2093    };
2094    Ok(u8::from(flag))
2095}
2096
2097fn import_text_column(
2098    values: Vec<ImportCell>,
2099    options: &ReadTableOptions,
2100    kind: TextImportType,
2101) -> BuiltinResult<Value> {
2102    let strings = import_text_values(values, options);
2103    match kind {
2104        TextImportType::String => StringArray::new(strings.clone(), vec![strings.len(), 1])
2105            .map(Value::StringArray)
2106            .map_err(|err| invalid_variable(format!("readtable: {err}"))),
2107        TextImportType::Char => import_char_column(strings),
2108    }
2109}
2110
2111fn import_text_values(values: Vec<ImportCell>, options: &ReadTableOptions) -> Vec<String> {
2112    values
2113        .into_iter()
2114        .map(|value| {
2115            if value.is_missing(options) {
2116                String::new()
2117            } else {
2118                unquote(value.display_text().trim()).to_string()
2119            }
2120        })
2121        .collect()
2122}
2123
2124fn import_char_column(strings: Vec<String>) -> BuiltinResult<Value> {
2125    let rows = strings.len();
2126    let cols = strings
2127        .iter()
2128        .map(|text| text.chars().count())
2129        .max()
2130        .unwrap_or(0);
2131    let mut data = vec![' '; rows * cols];
2132    for (row, text) in strings.iter().enumerate() {
2133        for (col, ch) in text.chars().enumerate() {
2134            data[row * cols + col] = ch;
2135        }
2136    }
2137    CharArray::new(data, rows, cols)
2138        .map(Value::CharArray)
2139        .map_err(|err| invalid_variable(format!("readtable: {err}")))
2140}
2141
2142fn import_cellstr_column(
2143    values: Vec<ImportCell>,
2144    options: &ReadTableOptions,
2145) -> BuiltinResult<Value> {
2146    let strings = import_text_values(values, options);
2147    let rows = strings.len();
2148    let cells = strings
2149        .into_iter()
2150        .map(|text| Value::CharArray(CharArray::new_row(&text)))
2151        .collect::<Vec<_>>();
2152    CellArray::new(cells, rows, 1)
2153        .map(Value::Cell)
2154        .map_err(|err| invalid_variable(format!("readtable: {err}")))
2155}
2156
2157fn import_datetime_column(
2158    values: Vec<ImportCell>,
2159    options: &ReadTableOptions,
2160) -> BuiltinResult<Value> {
2161    if matches!(options.datetime_type, DatetimeImportType::Text) {
2162        return import_text_column(values, options, options.text_type);
2163    }
2164
2165    let mut serials = Vec::with_capacity(values.len());
2166    for value in &values {
2167        serials.push(datetime_serial_from_import_cell(value, options)?);
2168    }
2169    let tensor = Tensor::new(serials, vec![values.len(), 1])
2170        .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2171    if matches!(options.datetime_type, DatetimeImportType::ExcelDatenum) {
2172        Ok(Value::Tensor(tensor))
2173    } else {
2174        crate::builtins::datetime::datetime_object_from_serial_tensor(tensor, "yyyy-MM-dd HH:mm:ss")
2175    }
2176}
2177
2178fn datetime_serial_from_import_cell(
2179    value: &ImportCell,
2180    options: &ReadTableOptions,
2181) -> BuiltinResult<f64> {
2182    match value {
2183        ImportCell::Empty => Ok(f64::NAN),
2184        ImportCell::DateTime(serial) => Ok(*serial),
2185        ImportCell::Number(value) => Ok(*value),
2186        ImportCell::Text(text) => {
2187            let token = unquote(text.trim()).trim();
2188            if options.is_missing(token) {
2189                Ok(f64::NAN)
2190            } else if let Some(serial) = parse_iso_datetime_to_datenum(token) {
2191                Ok(serial)
2192            } else if let Some(serial) = parse_numeric(token) {
2193                Ok(serial)
2194            } else {
2195                Err(invalid_variable(format!(
2196                    "readtable: cannot import '{token}' as datetime"
2197                )))
2198            }
2199        }
2200        ImportCell::Logical(_) => Err(invalid_variable(
2201            "readtable: cannot import logical value as datetime",
2202        )),
2203        ImportCell::Error(text) => Err(invalid_variable(format!(
2204            "readtable: cannot import spreadsheet error '{text}' as datetime"
2205        ))),
2206    }
2207}
2208
2209fn import_duration_column(
2210    values: Vec<ImportCell>,
2211    options: &ReadTableOptions,
2212) -> BuiltinResult<Value> {
2213    let mut days = Vec::with_capacity(values.len());
2214    for value in &values {
2215        days.push(duration_days_from_import_cell(value, options)?);
2216    }
2217    let tensor = Tensor::new(days, vec![values.len(), 1])
2218        .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2219    crate::builtins::duration::duration_object_from_days_tensor(
2220        tensor,
2221        crate::builtins::duration::DEFAULT_DURATION_FORMAT,
2222    )
2223}
2224
2225fn duration_days_from_import_cell(
2226    value: &ImportCell,
2227    options: &ReadTableOptions,
2228) -> BuiltinResult<f64> {
2229    match value {
2230        ImportCell::Empty => Ok(f64::NAN),
2231        ImportCell::Number(value) => Ok(*value),
2232        ImportCell::Logical(value) => Ok(if *value { 1.0 } else { 0.0 }),
2233        ImportCell::Text(text) => {
2234            let token = unquote(text.trim()).trim();
2235            if options.is_missing(token) {
2236                Ok(f64::NAN)
2237            } else {
2238                parse_duration_to_days(token).ok_or_else(|| {
2239                    invalid_variable(format!("readtable: cannot import '{token}' as duration"))
2240                })
2241            }
2242        }
2243        ImportCell::DateTime(_) => Err(invalid_variable(
2244            "readtable: cannot import datetime value as duration",
2245        )),
2246        ImportCell::Error(text) => Err(invalid_variable(format!(
2247            "readtable: cannot import spreadsheet error '{text}' as duration"
2248        ))),
2249    }
2250}
2251
2252fn infer_import_column(
2253    values: Vec<ImportCell>,
2254    options: &ReadTableOptions,
2255) -> BuiltinResult<Value> {
2256    let mut numeric = Vec::with_capacity(values.len());
2257    let mut all_numeric = true;
2258    for value in &values {
2259        match value {
2260            ImportCell::Empty => numeric.push(f64::NAN),
2261            ImportCell::Number(value) => numeric.push(*value),
2262            ImportCell::Text(text) => {
2263                let token = unquote(text.trim()).trim();
2264                if options.is_missing(token) {
2265                    numeric.push(f64::NAN);
2266                } else if let Some(value) = parse_numeric(token) {
2267                    numeric.push(value);
2268                } else {
2269                    all_numeric = false;
2270                    break;
2271                }
2272            }
2273            _ => {
2274                all_numeric = false;
2275                break;
2276            }
2277        }
2278    }
2279    if all_numeric {
2280        return Tensor::new(numeric, vec![values.len(), 1])
2281            .map(Value::Tensor)
2282            .map_err(|err| invalid_variable(format!("readtable: {err}")));
2283    }
2284
2285    let mut logical = Vec::with_capacity(values.len());
2286    let mut all_logical = true;
2287    for value in &values {
2288        match value {
2289            ImportCell::Empty => logical.push(0),
2290            ImportCell::Logical(value) => logical.push(i32::from(*value) as u8),
2291            ImportCell::Text(text) => {
2292                let token = unquote(text.trim()).trim();
2293                if options.is_missing(token) {
2294                    logical.push(0);
2295                } else if let Some(value) = parse_logical(token) {
2296                    logical.push(i32::from(value) as u8);
2297                } else {
2298                    all_logical = false;
2299                    break;
2300                }
2301            }
2302            _ => {
2303                all_logical = false;
2304                break;
2305            }
2306        }
2307    }
2308    if all_logical {
2309        return LogicalArray::new(logical, vec![values.len(), 1])
2310            .map(Value::LogicalArray)
2311            .map_err(|err| invalid_variable(format!("readtable: {err}")));
2312    }
2313
2314    if !matches!(options.datetime_type, DatetimeImportType::Text) {
2315        let mut serials = Vec::with_capacity(values.len());
2316        let mut all_datetime = true;
2317        for value in &values {
2318            match value {
2319                ImportCell::Empty => serials.push(f64::NAN),
2320                ImportCell::DateTime(serial) => serials.push(*serial),
2321                ImportCell::Text(text) => {
2322                    let token = unquote(text.trim()).trim();
2323                    if options.is_missing(token) {
2324                        serials.push(f64::NAN);
2325                    } else if let Some(serial) = parse_iso_datetime_to_datenum(token) {
2326                        serials.push(serial);
2327                    } else {
2328                        all_datetime = false;
2329                        break;
2330                    }
2331                }
2332                _ => {
2333                    all_datetime = false;
2334                    break;
2335                }
2336            }
2337        }
2338        if all_datetime {
2339            let tensor = Tensor::new(serials, vec![values.len(), 1])
2340                .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2341            if matches!(options.datetime_type, DatetimeImportType::ExcelDatenum) {
2342                return Ok(Value::Tensor(tensor));
2343            }
2344            return crate::builtins::datetime::datetime_object_from_serial_tensor(
2345                tensor,
2346                "yyyy-MM-dd HH:mm:ss",
2347            );
2348        }
2349    }
2350
2351    import_text_column(values, options, options.text_type)
2352}
2353
2354fn parse_numeric(token: &str) -> Option<f64> {
2355    match token.to_ascii_lowercase().as_str() {
2356        "nan" => Some(f64::NAN),
2357        "inf" | "+inf" | "infinity" | "+infinity" => Some(f64::INFINITY),
2358        "-inf" | "-infinity" => Some(f64::NEG_INFINITY),
2359        _ => token.parse::<f64>().ok(),
2360    }
2361}
2362
2363fn parse_logical(token: &str) -> Option<bool> {
2364    match token.to_ascii_lowercase().as_str() {
2365        "true" | "t" | "yes" | "on" => Some(true),
2366        "false" | "f" | "no" | "off" => Some(false),
2367        _ => None,
2368    }
2369}
2370
2371fn parse_duration_to_days(token: &str) -> Option<f64> {
2372    parse_numeric(token).or_else(|| parse_clock_duration_to_days(token))
2373}
2374
2375fn parse_clock_duration_to_days(token: &str) -> Option<f64> {
2376    let trimmed = token.trim();
2377    if trimmed.is_empty() {
2378        return None;
2379    }
2380    let (sign, body) = if let Some(rest) = trimmed.strip_prefix('-') {
2381        (-1.0, rest)
2382    } else if let Some(rest) = trimmed.strip_prefix('+') {
2383        (1.0, rest)
2384    } else {
2385        (1.0, trimmed)
2386    };
2387    let parts = body.split(':').collect::<Vec<_>>();
2388    let (hours, minutes, seconds) = match parts.as_slice() {
2389        [hours, minutes] => (
2390            hours.parse::<f64>().ok()?,
2391            minutes.parse::<f64>().ok()?,
2392            0.0,
2393        ),
2394        [hours, minutes, seconds] => (
2395            hours.parse::<f64>().ok()?,
2396            minutes.parse::<f64>().ok()?,
2397            seconds.parse::<f64>().ok()?,
2398        ),
2399        _ => return None,
2400    };
2401    if !hours.is_finite()
2402        || !minutes.is_finite()
2403        || !seconds.is_finite()
2404        || !(0.0..60.0).contains(&minutes)
2405        || !(0.0..60.0).contains(&seconds)
2406    {
2407        return None;
2408    }
2409    Some(sign * (hours * 3600.0 + minutes * 60.0 + seconds) / 86_400.0)
2410}
2411
2412fn parse_iso_datetime_to_datenum(token: &str) -> Option<f64> {
2413    let trimmed = token.trim();
2414    if trimmed.is_empty() {
2415        return None;
2416    }
2417    for format in [
2418        "%Y-%m-%dT%H:%M:%S%.f",
2419        "%Y-%m-%d %H:%M:%S%.f",
2420        "%Y/%m/%d %H:%M:%S%.f",
2421        "%m/%d/%Y %H:%M:%S%.f",
2422    ] {
2423        if let Ok(value) = NaiveDateTime::parse_from_str(trimmed, format) {
2424            return Some(crate::builtins::datetime::datenum_from_naive(value));
2425        }
2426    }
2427    for format in ["%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"] {
2428        if let Ok(date) = NaiveDate::parse_from_str(trimmed, format) {
2429            return Some(crate::builtins::datetime::datenum_from_naive(
2430                date.and_time(NaiveTime::MIN),
2431            ));
2432        }
2433    }
2434    None
2435}
2436
2437fn unquote(token: &str) -> &str {
2438    if token.len() >= 2 {
2439        let bytes = token.as_bytes();
2440        if (bytes[0] == b'"' && bytes[token.len() - 1] == b'"')
2441            || (bytes[0] == b'\'' && bytes[token.len() - 1] == b'\'')
2442        {
2443            return &token[1..token.len() - 1];
2444        }
2445    }
2446    token
2447}
2448
2449fn default_properties(variable_names: Vec<String>, row_names: Option<Vec<String>>) -> StructValue {
2450    let mut props = StructValue::new();
2451    props.insert(
2452        VARIABLE_NAMES,
2453        Value::StringArray(
2454            StringArray::new(variable_names.clone(), vec![1, variable_names.len()])
2455                .expect("VariableNames shape is valid"),
2456        ),
2457    );
2458    props.insert(
2459        ROW_NAMES,
2460        row_names
2461            .map(|names| {
2462                Value::StringArray(
2463                    StringArray::new(names.clone(), vec![names.len(), 1])
2464                        .expect("RowNames shape is valid"),
2465                )
2466            })
2467            .unwrap_or_else(|| {
2468                Value::StringArray(StringArray::new(Vec::new(), vec![0, 1]).unwrap())
2469            }),
2470    );
2471    props.insert(
2472        DIMENSION_NAMES,
2473        Value::StringArray(
2474            StringArray::new(
2475                vec![
2476                    DEFAULT_ROW_DIM_NAME.to_string(),
2477                    DEFAULT_VARIABLE_DIM_NAME.to_string(),
2478                ],
2479                vec![1, 2],
2480            )
2481            .expect("DimensionNames shape is valid"),
2482        ),
2483    );
2484    props.insert(
2485        VARIABLE_UNITS,
2486        Value::StringArray(
2487            StringArray::new(
2488                vec![String::new(); variable_names.len()],
2489                vec![1, variable_names.len()],
2490            )
2491            .expect("VariableUnits shape is valid"),
2492        ),
2493    );
2494    props.insert(
2495        VARIABLE_DESCRIPTIONS,
2496        Value::StringArray(
2497            StringArray::new(
2498                vec![String::new(); variable_names.len()],
2499                vec![1, variable_names.len()],
2500            )
2501            .expect("VariableDescriptions shape is valid"),
2502        ),
2503    );
2504    props.insert(DESCRIPTION, Value::String(String::new()));
2505    props.insert(USER_DATA, Value::Tensor(Tensor::zeros(vec![0, 0])));
2506    props
2507}
2508
2509pub fn table_from_columns(names: Vec<String>, columns: Vec<Value>) -> BuiltinResult<Value> {
2510    table_from_columns_with_properties(names, columns, None)
2511}
2512
2513fn table_from_columns_with_properties(
2514    names: Vec<String>,
2515    columns: Vec<Value>,
2516    row_names: Option<Vec<String>>,
2517) -> BuiltinResult<Value> {
2518    ensure_table_class_registered();
2519    if names.len() != columns.len() {
2520        return Err(invalid_variable(
2521            "table: number of variable names must match number of variables",
2522        ));
2523    }
2524    let names = make_unique_names(names);
2525    let height = validate_column_heights(&names, &columns)?;
2526    if let Some(row_names) = &row_names {
2527        if row_names.len() != height {
2528            return Err(invalid_variable(
2529                "table: number of row names must match table height",
2530            ));
2531        }
2532    }
2533    let mut variables = StructValue::new();
2534    for (name, value) in names.iter().cloned().zip(columns) {
2535        variables.insert(name, value);
2536    }
2537    let props = default_properties(names, row_names);
2538    let mut object = ObjectInstance::new(TABLE_CLASS.to_string());
2539    object
2540        .properties
2541        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2542    object.properties.insert(
2543        TABLE_PROPERTIES_FIELD.to_string(),
2544        Value::Struct(props.clone()),
2545    );
2546    object
2547        .properties
2548        .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
2549    Ok(Value::Object(object))
2550}
2551
2552fn validate_column_heights(names: &[String], columns: &[Value]) -> BuiltinResult<usize> {
2553    if columns.is_empty() {
2554        return Ok(0);
2555    }
2556    let height = value_row_count(&columns[0])?;
2557    for (name, value) in names.iter().zip(columns) {
2558        let rows = value_row_count(value)?;
2559        if rows != height {
2560            return Err(invalid_variable(format!(
2561                "table: variable '{name}' has {rows} rows but expected {height}"
2562            )));
2563        }
2564    }
2565    Ok(height)
2566}
2567
2568pub fn is_table_value(value: &Value) -> bool {
2569    table_object(value).is_some()
2570}
2571
2572fn table_object(value: &Value) -> Option<&ObjectInstance> {
2573    match value {
2574        Value::Object(object) if object.is_class(TABLE_CLASS) => Some(object),
2575        _ => None,
2576    }
2577}
2578
2579fn into_table_object(value: Value, context: &str) -> BuiltinResult<ObjectInstance> {
2580    match value {
2581        Value::Object(object) if object.is_class(TABLE_CLASS) => Ok(object),
2582        other => Err(invalid_argument(format!(
2583            "{context}: expected table, got {other:?}"
2584        ))),
2585    }
2586}
2587
2588pub fn table_variables(object: &ObjectInstance) -> BuiltinResult<StructValue> {
2589    match object.properties.get(TABLE_VARIABLES_FIELD) {
2590        Some(Value::Struct(st)) => Ok(st.clone()),
2591        Some(other) => Err(invalid_variable(format!(
2592            "table: invalid internal variable storage {other:?}"
2593        ))),
2594        None => Ok(StructValue::new()),
2595    }
2596}
2597
2598pub fn table_variable_names_from_object(object: &ObjectInstance) -> BuiltinResult<Vec<String>> {
2599    let variables = table_variables(object)?;
2600    Ok(variables.fields.keys().cloned().collect())
2601}
2602
2603pub fn table_height(object: &ObjectInstance) -> BuiltinResult<usize> {
2604    let variables = table_variables(object)?;
2605    match variables.fields.values().next() {
2606        Some(value) => value_row_count(value),
2607        None => Ok(0),
2608    }
2609}
2610
2611pub fn table_width(object: &ObjectInstance) -> BuiltinResult<usize> {
2612    table_variables(object).map(|vars| vars.fields.len())
2613}
2614
2615fn table_public_properties(object: &ObjectInstance) -> BuiltinResult<StructValue> {
2616    match object
2617        .properties
2618        .get(TABLE_PROPERTIES_FIELD)
2619        .or_else(|| object.properties.get(PROPERTIES_MEMBER))
2620    {
2621        Some(Value::Struct(st)) => Ok(st.clone()),
2622        Some(other) => Err(invalid_variable(format!(
2623            "table: invalid Properties storage {other:?}"
2624        ))),
2625        None => Ok(default_properties(
2626            table_variable_names_from_object(object)?,
2627            None,
2628        )),
2629    }
2630}
2631
2632fn sync_table_properties(object: &mut ObjectInstance, props: StructValue) {
2633    object.properties.insert(
2634        TABLE_PROPERTIES_FIELD.to_string(),
2635        Value::Struct(props.clone()),
2636    );
2637    object
2638        .properties
2639        .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
2640}
2641
2642fn table_member_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
2643    let name = scalar_text(payload, "table member")?;
2644    if name == PROPERTIES_MEMBER {
2645        return Ok(Value::Struct(table_public_properties(object)?));
2646    }
2647    let variables = table_variables(object)?;
2648    variables
2649        .fields
2650        .get(&name)
2651        .cloned()
2652        .ok_or_else(|| invalid_variable(format!("table: unrecognized variable '{name}'")))
2653}
2654
2655fn table_member_set(object: &mut ObjectInstance, field: &str, rhs: Value) -> BuiltinResult<()> {
2656    if field == PROPERTIES_MEMBER {
2657        let Value::Struct(props) = rhs else {
2658            return Err(invalid_variable(
2659                "table: Properties assignment expects a scalar struct",
2660            ));
2661        };
2662        apply_properties(object, props)?;
2663        return Ok(());
2664    }
2665    let mut variables = table_variables(object)?;
2666    let mut names = table_variable_names_from_object(object)?;
2667    let height = table_height(object)?;
2668    let rhs_rows = value_row_count(&rhs)?;
2669    if !variables.fields.is_empty() && rhs_rows != height {
2670        return Err(invalid_variable(format!(
2671            "table: variable '{field}' has {rhs_rows} rows but table has {height}"
2672        )));
2673    }
2674    if !variables.fields.contains_key(field) {
2675        names.push(field.to_string());
2676    }
2677    variables.insert(field.to_string(), rhs);
2678    object
2679        .properties
2680        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2681    let mut props = table_public_properties(object)?;
2682    update_variable_metadata_names(&mut props, names)?;
2683    sync_table_properties(object, props);
2684    Ok(())
2685}
2686
2687fn apply_properties(object: &mut ObjectInstance, mut props: StructValue) -> BuiltinResult<()> {
2688    if let Some(value) = props.fields.get(VARIABLE_NAMES) {
2689        let names = variable_name_list(value)?;
2690        rename_table_variables(object, names.clone())?;
2691        update_variable_metadata_names(&mut props, names)?;
2692    }
2693    sync_table_properties(object, props);
2694    Ok(())
2695}
2696
2697fn rename_table_variables(
2698    object: &mut ObjectInstance,
2699    new_names: Vec<String>,
2700) -> BuiltinResult<()> {
2701    let old_names = table_variable_names_from_object(object)?;
2702    if old_names.len() != new_names.len() {
2703        return Err(invalid_variable(
2704            "table: VariableNames assignment must preserve variable count",
2705        ));
2706    }
2707    let new_names = make_unique_variable_names(new_names);
2708    let variables = table_variables(object)?;
2709    let mut renamed = StructValue::new();
2710    for (old, new) in old_names.iter().zip(new_names.iter()) {
2711        let value = variables
2712            .fields
2713            .get(old)
2714            .cloned()
2715            .ok_or_else(|| invalid_variable(format!("table: missing variable '{old}'")))?;
2716        renamed.insert(new.clone(), value);
2717    }
2718    object
2719        .properties
2720        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(renamed));
2721    Ok(())
2722}
2723
2724fn update_variable_metadata_names(
2725    props: &mut StructValue,
2726    names: Vec<String>,
2727) -> BuiltinResult<()> {
2728    props.insert(
2729        VARIABLE_NAMES,
2730        Value::StringArray(
2731            StringArray::new(names.clone(), vec![1, names.len()])
2732                .map_err(|err| invalid_variable(format!("table: {err}")))?,
2733        ),
2734    );
2735    for field in [VARIABLE_UNITS, VARIABLE_DESCRIPTIONS] {
2736        let existing = props.fields.get(field).cloned();
2737        let values = match existing {
2738            Some(Value::StringArray(mut array)) => {
2739                array.data.resize(names.len(), String::new());
2740                array.data.truncate(names.len());
2741                array.data
2742            }
2743            _ => vec![String::new(); names.len()],
2744        };
2745        props.insert(
2746            field,
2747            Value::StringArray(
2748                StringArray::new(values, vec![1, names.len()])
2749                    .map_err(|err| invalid_variable(format!("table: {err}")))?,
2750            ),
2751        );
2752    }
2753    Ok(())
2754}
2755
2756fn table_paren_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
2757    let selectors = selector_values(payload)?;
2758    let rows = parse_row_selector(selectors.first(), table_height(object)?)?;
2759    let variable_names = table_variable_names_from_object(object)?;
2760    let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2761    let variables = table_variables(object)?;
2762    let mut out = Vec::with_capacity(selected_names.len());
2763    for name in &selected_names {
2764        let value = variables
2765            .fields
2766            .get(name)
2767            .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
2768        out.push(select_rows(value, &rows)?);
2769    }
2770    let row_names = selected_row_names(object, &rows)?;
2771    table_from_columns_with_properties(selected_names, out, row_names)
2772}
2773
2774fn table_brace_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
2775    let subset = table_paren_get(object, payload)?;
2776    let object = into_table_object(subset, "table brace indexing")?;
2777    let variables = table_variables(&object)?;
2778    if variables.fields.len() == 1 {
2779        return variables
2780            .fields
2781            .values()
2782            .next()
2783            .cloned()
2784            .ok_or_else(|| invalid_variable("table: missing selected variable"));
2785    }
2786    let values = variables.fields.values().collect::<Vec<_>>();
2787    if values.iter().all(|value| matches!(value, Value::Tensor(_))) {
2788        return concatenate_numeric_columns(&values);
2789    }
2790    CellArray::new(
2791        values.into_iter().cloned().collect(),
2792        1,
2793        variables.fields.len(),
2794    )
2795    .map(Value::Cell)
2796    .map_err(|err| invalid_variable(format!("table: {err}")))
2797}
2798
2799fn table_paren_assign(
2800    mut object: ObjectInstance,
2801    payload: &Value,
2802    rhs: Value,
2803) -> BuiltinResult<Value> {
2804    let rhs_table = into_table_object(rhs, "table paren assignment")?;
2805    let selectors = selector_values(payload)?;
2806    let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
2807    let variable_names = table_variable_names_from_object(&object)?;
2808    let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2809    let rhs_names = table_variable_names_from_object(&rhs_table)?;
2810    if selected_names.len() != rhs_names.len() {
2811        return Err(invalid_variable(
2812            "table: assignment variable count must match selected variables",
2813        ));
2814    }
2815    let mut variables = table_variables(&object)?;
2816    let rhs_variables = table_variables(&rhs_table)?;
2817    for (target_name, rhs_name) in selected_names.iter().zip(rhs_names.iter()) {
2818        let current =
2819            variables.fields.get(target_name).cloned().ok_or_else(|| {
2820                invalid_variable(format!("table: missing variable '{target_name}'"))
2821            })?;
2822        let rhs_col =
2823            rhs_variables.fields.get(rhs_name).cloned().ok_or_else(|| {
2824                invalid_variable(format!("table: missing rhs variable '{rhs_name}'"))
2825            })?;
2826        variables.insert(target_name.clone(), assign_rows(current, &rows, rhs_col)?);
2827    }
2828    object
2829        .properties
2830        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2831    Ok(Value::Object(object))
2832}
2833
2834fn table_brace_assign(
2835    mut object: ObjectInstance,
2836    payload: &Value,
2837    rhs: Value,
2838) -> BuiltinResult<Value> {
2839    let selectors = selector_values(payload)?;
2840    let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
2841    let variable_names = table_variable_names_from_object(&object)?;
2842    let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2843    if selected_names.len() != 1 {
2844        return Err(invalid_variable(
2845            "table: brace assignment supports one variable at a time",
2846        ));
2847    }
2848    let mut variables = table_variables(&object)?;
2849    let target = selected_names[0].clone();
2850    let current = variables
2851        .fields
2852        .get(&target)
2853        .cloned()
2854        .ok_or_else(|| invalid_variable(format!("table: missing variable '{target}'")))?;
2855    variables.insert(target, assign_rows(current, &rows, rhs)?);
2856    object
2857        .properties
2858        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2859    Ok(Value::Object(object))
2860}
2861
2862fn selector_values(payload: &Value) -> BuiltinResult<Vec<Value>> {
2863    match payload {
2864        Value::Cell(cell) => {
2865            let mut out = Vec::with_capacity(cell.data.len());
2866            for handle in &cell.data {
2867                out.push(unsafe { &*handle.as_raw() }.clone());
2868            }
2869            Ok(out)
2870        }
2871        other => Ok(vec![other.clone()]),
2872    }
2873}
2874
2875fn parse_row_selector(selector: Option<&Value>, height: usize) -> BuiltinResult<Vec<usize>> {
2876    let Some(selector) = selector else {
2877        return Ok((0..height).collect());
2878    };
2879    if is_colon_selector(selector) {
2880        return Ok((0..height).collect());
2881    }
2882    if is_end_selector(selector) {
2883        return if height == 0 {
2884            Err(invalid_index(
2885                "table: end row index is invalid for empty table",
2886            ))
2887        } else {
2888            Ok(vec![height - 1])
2889        };
2890    }
2891    match selector {
2892        Value::Num(n) => Ok(vec![one_based_to_zero(*n, height, "row")?]),
2893        Value::Int(i) => Ok(vec![one_based_to_zero(i.to_f64(), height, "row")?]),
2894        Value::Tensor(tensor) => tensor
2895            .data
2896            .iter()
2897            .map(|value| one_based_to_zero(*value, height, "row"))
2898            .collect(),
2899        Value::LogicalArray(array) => {
2900            if array.data.len() != height {
2901                return Err(invalid_index(
2902                    "table: logical row selector length must match table height",
2903                ));
2904            }
2905            Ok(array
2906                .data
2907                .iter()
2908                .enumerate()
2909                .filter_map(|(idx, value)| (*value != 0).then_some(idx))
2910                .collect())
2911        }
2912        other => Err(invalid_index(format!(
2913            "table: unsupported row selector {other:?}"
2914        ))),
2915    }
2916}
2917
2918fn parse_variable_selector(
2919    selector: Option<&Value>,
2920    names: &[String],
2921) -> BuiltinResult<Vec<String>> {
2922    let Some(selector) = selector else {
2923        return Ok(names.to_vec());
2924    };
2925    if is_colon_selector(selector) {
2926        return Ok(names.to_vec());
2927    }
2928    match selector {
2929        Value::String(_) | Value::CharArray(_) | Value::StringArray(_) | Value::Cell(_) => {
2930            let selected = string_list(selector)?;
2931            for name in &selected {
2932                if !names.contains(name) {
2933                    return Err(invalid_variable(format!(
2934                        "table: unrecognized variable '{name}'"
2935                    )));
2936                }
2937            }
2938            Ok(selected)
2939        }
2940        Value::Num(n) => Ok(vec![name_at_index(names, *n)?]),
2941        Value::Int(i) => Ok(vec![name_at_index(names, i.to_f64())?]),
2942        Value::Tensor(tensor) => tensor
2943            .data
2944            .iter()
2945            .map(|value| name_at_index(names, *value))
2946            .collect(),
2947        Value::LogicalArray(array) => {
2948            if array.data.len() != names.len() {
2949                return Err(invalid_index(
2950                    "table: logical variable selector length must match table width",
2951                ));
2952            }
2953            Ok(array
2954                .data
2955                .iter()
2956                .zip(names.iter())
2957                .filter_map(|(flag, name)| (*flag != 0).then_some(name.clone()))
2958                .collect())
2959        }
2960        other => Err(invalid_index(format!(
2961            "table: unsupported variable selector {other:?}"
2962        ))),
2963    }
2964}
2965
2966fn is_colon_selector(value: &Value) -> bool {
2967    scalar_text(value, "selector")
2968        .map(|text| text == ":")
2969        .unwrap_or(false)
2970}
2971
2972fn is_end_selector(value: &Value) -> bool {
2973    scalar_text(value, "selector")
2974        .map(|text| text == "end")
2975        .unwrap_or(false)
2976}
2977
2978fn name_at_index(names: &[String], value: f64) -> BuiltinResult<String> {
2979    let idx = one_based_to_zero(value, names.len(), "variable")?;
2980    Ok(names[idx].clone())
2981}
2982
2983fn one_based_to_zero(value: f64, len: usize, context: &str) -> BuiltinResult<usize> {
2984    if !value.is_finite() || value < 1.0 || (value.round() - value).abs() > f64::EPSILON {
2985        return Err(invalid_index(format!(
2986            "table: {context} indices must be positive finite integers"
2987        )));
2988    }
2989    let idx = value.round() as usize - 1;
2990    if idx >= len {
2991        return Err(invalid_index(format!(
2992            "table: {context} index exceeds bounds"
2993        )));
2994    }
2995    Ok(idx)
2996}
2997
2998fn selected_row_names(
2999    object: &ObjectInstance,
3000    rows: &[usize],
3001) -> BuiltinResult<Option<Vec<String>>> {
3002    let props = table_public_properties(object)?;
3003    let Some(value) = props.fields.get(ROW_NAMES) else {
3004        return Ok(None);
3005    };
3006    let names = string_list(value)?;
3007    if names.is_empty() {
3008        return Ok(None);
3009    }
3010    Ok(Some(
3011        rows.iter()
3012            .filter_map(|row| names.get(*row).cloned())
3013            .collect(),
3014    ))
3015}
3016
3017fn value_row_count(value: &Value) -> BuiltinResult<usize> {
3018    match value {
3019        Value::Tensor(tensor) => Ok(tensor.rows()),
3020        Value::ComplexTensor(tensor) => Ok(tensor.rows),
3021        Value::StringArray(array) => Ok(array.rows()),
3022        Value::LogicalArray(array) => Ok(array.shape.first().copied().unwrap_or(array.data.len())),
3023        Value::Cell(cell) => Ok(cell.rows),
3024        Value::CharArray(array) => Ok(array.rows),
3025        Value::Object(obj) if obj.is_class("datetime") => {
3026            crate::builtins::datetime::serials_from_datetime_value(value)
3027                .map(|tensor| tensor.rows())
3028        }
3029        Value::Object(obj) if obj.is_class("duration") => {
3030            crate::builtins::duration::duration_tensor_from_duration_value(value)
3031                .map(|tensor| tensor.rows())
3032        }
3033        Value::Object(obj) if obj.is_class(TABLE_CLASS) => table_height(obj),
3034        _ => Ok(1),
3035    }
3036}
3037
3038fn select_rows(value: &Value, rows: &[usize]) -> BuiltinResult<Value> {
3039    match value {
3040        Value::Tensor(tensor) => {
3041            let cols = tensor.cols();
3042            let mut data = Vec::with_capacity(rows.len() * cols);
3043            for col in 0..cols {
3044                for &row in rows {
3045                    data.push(tensor.get2(row, col).map_err(invalid_index)?);
3046                }
3047            }
3048            Tensor::new_with_dtype(data, vec![rows.len(), cols], tensor.dtype)
3049                .map(Value::Tensor)
3050                .map_err(invalid_variable)
3051        }
3052        Value::ComplexTensor(tensor) => {
3053            let mut data = Vec::with_capacity(rows.len() * tensor.cols);
3054            for col in 0..tensor.cols {
3055                for &row in rows {
3056                    let idx = row + col * tensor.rows;
3057                    data.push(*tensor.data.get(idx).ok_or_else(|| {
3058                        invalid_index("table: complex variable row index out of bounds")
3059                    })?);
3060                }
3061            }
3062            ComplexTensor::new(data, vec![rows.len(), tensor.cols])
3063                .map(Value::ComplexTensor)
3064                .map_err(invalid_variable)
3065        }
3066        Value::StringArray(array) => {
3067            let cols = array.cols();
3068            let mut data = Vec::with_capacity(rows.len() * cols);
3069            for col in 0..cols {
3070                for &row in rows {
3071                    let idx = row + col * array.rows();
3072                    data.push(array.data.get(idx).cloned().ok_or_else(|| {
3073                        invalid_index("table: string variable row index out of bounds")
3074                    })?);
3075                }
3076            }
3077            StringArray::new(data, vec![rows.len(), cols])
3078                .map(Value::StringArray)
3079                .map_err(invalid_variable)
3080        }
3081        Value::CharArray(array) => {
3082            let mut data = Vec::with_capacity(rows.len() * array.cols);
3083            for &row in rows {
3084                if row >= array.rows {
3085                    return Err(invalid_index(
3086                        "table: char variable row index out of bounds",
3087                    ));
3088                }
3089                let start = row * array.cols;
3090                data.extend_from_slice(&array.data[start..start + array.cols]);
3091            }
3092            CharArray::new(data, rows.len(), array.cols)
3093                .map(Value::CharArray)
3094                .map_err(invalid_variable)
3095        }
3096        Value::LogicalArray(array) => {
3097            let source_rows = array.shape.first().copied().unwrap_or(array.data.len());
3098            let cols = array.shape.get(1).copied().unwrap_or(1);
3099            let mut data = Vec::with_capacity(rows.len() * cols);
3100            for col in 0..cols {
3101                for &row in rows {
3102                    let idx = row + col * source_rows;
3103                    data.push(*array.data.get(idx).ok_or_else(|| {
3104                        invalid_index("table: logical variable row index out of bounds")
3105                    })?);
3106                }
3107            }
3108            LogicalArray::new(data, vec![rows.len(), cols])
3109                .map(Value::LogicalArray)
3110                .map_err(invalid_variable)
3111        }
3112        Value::Cell(cell) => {
3113            let mut data = Vec::with_capacity(rows.len() * cell.cols);
3114            for col in 0..cell.cols {
3115                for &row in rows {
3116                    data.push(cell.get(row, col).map_err(invalid_index)?);
3117                }
3118            }
3119            CellArray::new(data, rows.len(), cell.cols)
3120                .map(Value::Cell)
3121                .map_err(invalid_variable)
3122        }
3123        Value::Object(obj) if obj.is_class("datetime") => {
3124            let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
3125            let selected = select_rows(&Value::Tensor(tensor), rows)?;
3126            match selected {
3127                Value::Tensor(tensor) => {
3128                    crate::builtins::datetime::datetime_object_from_serial_tensor(
3129                        tensor,
3130                        crate::builtins::datetime::datetime_format_from_value(value),
3131                    )
3132                }
3133                _ => unreachable!("select_rows tensor branch returns tensor"),
3134            }
3135        }
3136        Value::Object(obj) if obj.is_class("duration") => {
3137            let tensor = crate::builtins::duration::duration_tensor_from_duration_value(value)?;
3138            let selected = select_rows(&Value::Tensor(tensor), rows)?;
3139            match selected {
3140                Value::Tensor(tensor) => {
3141                    crate::builtins::duration::duration_object_from_days_tensor(
3142                        tensor,
3143                        crate::builtins::duration::duration_format_from_value(value),
3144                    )
3145                }
3146                _ => unreachable!("select_rows tensor branch returns tensor"),
3147            }
3148        }
3149        _ if rows.len() == 1 && rows[0] == 0 => Ok(value.clone()),
3150        other => Err(invalid_variable(format!(
3151            "table: row selection unsupported for variable {other:?}"
3152        ))),
3153    }
3154}
3155
3156fn assign_rows(mut current: Value, rows: &[usize], rhs: Value) -> BuiltinResult<Value> {
3157    if value_row_count(&rhs)? != rows.len() {
3158        return Err(invalid_variable(
3159            "table: assignment row count must match selected row count",
3160        ));
3161    }
3162    let replacing_all_rows = rows.len() == value_row_count(&current)?;
3163    match (&mut current, rhs) {
3164        (Value::Tensor(target), Value::Tensor(source)) => {
3165            if target.cols() != source.cols() {
3166                return Err(invalid_variable(
3167                    "table: tensor assignment column count mismatch",
3168                ));
3169            }
3170            for col in 0..target.cols() {
3171                for (src_row, &dst_row) in rows.iter().enumerate() {
3172                    let value = source.get2(src_row, col).map_err(invalid_index)?;
3173                    target.set2(dst_row, col, value).map_err(invalid_index)?;
3174                }
3175            }
3176            Ok(current)
3177        }
3178        (_, source) if replacing_all_rows => Ok(source),
3179        _ => Err(invalid_variable(
3180            "table: assignment for this variable type requires replacing all rows",
3181        )),
3182    }
3183}
3184
3185fn concatenate_numeric_columns(values: &[&Value]) -> BuiltinResult<Value> {
3186    let rows = values
3187        .first()
3188        .and_then(|value| match value {
3189            Value::Tensor(t) => Some(t.rows()),
3190            _ => None,
3191        })
3192        .unwrap_or(0);
3193    let cols = values
3194        .iter()
3195        .map(|value| match value {
3196            Value::Tensor(t) => Ok(t.cols()),
3197            _ => Err(invalid_variable("table: expected numeric variable")),
3198        })
3199        .collect::<BuiltinResult<Vec<_>>>()?;
3200    let total_cols: usize = cols.iter().sum();
3201    let mut data = Vec::with_capacity(rows * total_cols);
3202    for value in values {
3203        let Value::Tensor(tensor) = value else {
3204            return Err(invalid_variable("table: expected numeric variable"));
3205        };
3206        for col in 0..tensor.cols() {
3207            for row in 0..rows {
3208                data.push(tensor.get2(row, col).map_err(invalid_index)?);
3209            }
3210        }
3211    }
3212    Tensor::new(data, vec![rows, total_cols])
3213        .map(Value::Tensor)
3214        .map_err(invalid_variable)
3215}
3216
3217pub fn sortrows_table(value: Value, rest: &[Value]) -> BuiltinResult<(Value, Tensor)> {
3218    let object = into_table_object(value, "sortrows")?;
3219    let names = table_variable_names_from_object(&object)?;
3220    let sort_spec = SortSpec::parse(rest, &names)?;
3221    let height = table_height(&object)?;
3222    let variables = table_variables(&object)?;
3223    let mut indices: Vec<usize> = (0..height).collect();
3224    indices.sort_by(|&a, &b| {
3225        for key in &sort_spec.keys {
3226            let Some(value) = variables.fields.get(&key.name) else {
3227                continue;
3228            };
3229            let ord = compare_table_cells(value, a, b).unwrap_or(Ordering::Equal);
3230            let ord = if key.descending { ord.reverse() } else { ord };
3231            if ord != Ordering::Equal {
3232                return ord;
3233            }
3234        }
3235        a.cmp(&b)
3236    });
3237    let mut sorted_columns = Vec::with_capacity(names.len());
3238    for name in &names {
3239        let value = variables
3240            .fields
3241            .get(name)
3242            .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
3243        sorted_columns.push(select_rows(value, &indices)?);
3244    }
3245    let row_names = selected_row_names(&object, &indices)?;
3246    let sorted = table_from_columns_with_properties(names, sorted_columns, row_names)?;
3247    let indices_tensor = Tensor::new(
3248        indices.iter().map(|idx| *idx as f64 + 1.0).collect(),
3249        vec![indices.len(), 1],
3250    )
3251    .map_err(invalid_variable)?;
3252    Ok((sorted, indices_tensor))
3253}
3254
3255struct SortSpec {
3256    keys: Vec<SortKey>,
3257}
3258
3259struct SortKey {
3260    name: String,
3261    descending: bool,
3262}
3263
3264impl SortSpec {
3265    fn parse(rest: &[Value], names: &[String]) -> BuiltinResult<Self> {
3266        let mut keys = if rest.is_empty() {
3267            names
3268                .iter()
3269                .map(|name| SortKey {
3270                    name: name.clone(),
3271                    descending: false,
3272                })
3273                .collect::<Vec<_>>()
3274        } else {
3275            parse_variable_selector(rest.first(), names)?
3276                .into_iter()
3277                .map(|name| SortKey {
3278                    name,
3279                    descending: false,
3280                })
3281                .collect()
3282        };
3283        if let Some(direction) = rest.get(1) {
3284            let directions = string_list(direction)?;
3285            if directions.len() == 1 {
3286                let descending = directions[0].eq_ignore_ascii_case("descend")
3287                    || directions[0].eq_ignore_ascii_case("desc");
3288                for key in &mut keys {
3289                    key.descending = descending;
3290                }
3291            } else {
3292                for (key, direction) in keys.iter_mut().zip(directions.iter()) {
3293                    key.descending = direction.eq_ignore_ascii_case("descend")
3294                        || direction.eq_ignore_ascii_case("desc");
3295                }
3296            }
3297        }
3298        Ok(Self { keys })
3299    }
3300}
3301
3302fn compare_table_cells(value: &Value, a: usize, b: usize) -> BuiltinResult<Ordering> {
3303    match value {
3304        Value::Tensor(tensor) => Ok(tensor
3305            .get2(a, 0)
3306            .map_err(invalid_index)?
3307            .partial_cmp(&tensor.get2(b, 0).map_err(invalid_index)?)
3308            .unwrap_or(Ordering::Greater)),
3309        Value::StringArray(array) => {
3310            let av = array.data.get(a).cloned().unwrap_or_default();
3311            let bv = array.data.get(b).cloned().unwrap_or_default();
3312            Ok(av.cmp(&bv))
3313        }
3314        Value::LogicalArray(array) => {
3315            let av = *array.data.get(a).unwrap_or(&0);
3316            let bv = *array.data.get(b).unwrap_or(&0);
3317            Ok(av.cmp(&bv))
3318        }
3319        Value::Object(obj) if obj.is_class("datetime") => {
3320            let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
3321            Ok(tensor
3322                .data
3323                .get(a)
3324                .copied()
3325                .unwrap_or(f64::NAN)
3326                .partial_cmp(&tensor.data.get(b).copied().unwrap_or(f64::NAN))
3327                .unwrap_or(Ordering::Greater))
3328        }
3329        other => Ok(cell_key_string(other, a).cmp(&cell_key_string(other, b))),
3330    }
3331}
3332
3333#[derive(Clone, Debug)]
3334enum GroupAtom {
3335    Number(f64),
3336    Text(String),
3337    Logical(bool),
3338    Missing,
3339}
3340
3341impl GroupAtom {
3342    fn rank(&self) -> u8 {
3343        match self {
3344            Self::Missing => 0,
3345            Self::Logical(_) => 1,
3346            Self::Number(_) => 2,
3347            Self::Text(_) => 3,
3348        }
3349    }
3350}
3351
3352impl PartialEq for GroupAtom {
3353    fn eq(&self, other: &Self) -> bool {
3354        self.cmp(other) == Ordering::Equal
3355    }
3356}
3357
3358impl Eq for GroupAtom {}
3359
3360impl PartialOrd for GroupAtom {
3361    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
3362        Some(self.cmp(other))
3363    }
3364}
3365
3366impl Ord for GroupAtom {
3367    fn cmp(&self, other: &Self) -> Ordering {
3368        let rank = self.rank().cmp(&other.rank());
3369        if rank != Ordering::Equal {
3370            return rank;
3371        }
3372        match (self, other) {
3373            (Self::Missing, Self::Missing) => Ordering::Equal,
3374            (Self::Logical(a), Self::Logical(b)) => a.cmp(b),
3375            (Self::Number(a), Self::Number(b)) => a.total_cmp(b),
3376            (Self::Text(a), Self::Text(b)) => a.cmp(b),
3377            _ => Ordering::Equal,
3378        }
3379    }
3380}
3381
3382fn cell_group_atom(value: &Value, row: usize) -> GroupAtom {
3383    match value {
3384        Value::Tensor(tensor) => tensor
3385            .get2(row, 0)
3386            .map(GroupAtom::Number)
3387            .unwrap_or(GroupAtom::Missing),
3388        Value::StringArray(array) => array
3389            .data
3390            .get(row)
3391            .cloned()
3392            .map(GroupAtom::Text)
3393            .unwrap_or(GroupAtom::Missing),
3394        Value::LogicalArray(array) => array
3395            .data
3396            .get(row)
3397            .map(|value| GroupAtom::Logical(*value != 0))
3398            .unwrap_or(GroupAtom::Missing),
3399        Value::Object(obj) if obj.is_class("datetime") => {
3400            crate::builtins::datetime::serials_from_datetime_value(value)
3401                .ok()
3402                .and_then(|tensor| tensor.data.get(row).copied())
3403                .map(GroupAtom::Number)
3404                .unwrap_or(GroupAtom::Missing)
3405        }
3406        other => GroupAtom::Text(cell_key_string(other, row)),
3407    }
3408}
3409
3410fn groupsummary_impl(
3411    table: Value,
3412    groupvars: Value,
3413    method: Value,
3414    rest: Vec<Value>,
3415) -> BuiltinResult<Value> {
3416    let object = into_table_object(table, "groupsummary")?;
3417    let names = table_variable_names_from_object(&object)?;
3418    let group_names = parse_variable_selector(Some(&groupvars), &names)?;
3419    let methods = string_list(&method)?;
3420    if methods.is_empty() {
3421        return Err(invalid_argument(
3422            "groupsummary: method list must not be empty",
3423        ));
3424    }
3425    let data_names = if let Some(value) = rest.first() {
3426        parse_variable_selector(Some(value), &names)?
3427    } else {
3428        names
3429            .iter()
3430            .filter(|name| !group_names.contains(name))
3431            .filter(|name| {
3432                table_variables(&object)
3433                    .ok()
3434                    .and_then(|vars| vars.fields.get(*name).cloned())
3435                    .map(|value| matches!(value, Value::Tensor(_)))
3436                    .unwrap_or(false)
3437            })
3438            .cloned()
3439            .collect()
3440    };
3441    let variables = table_variables(&object)?;
3442    let height = table_height(&object)?;
3443    let mut groups: BTreeMap<Vec<GroupAtom>, Vec<usize>> = BTreeMap::new();
3444    for row in 0..height {
3445        let key = group_names
3446            .iter()
3447            .map(|name| {
3448                variables
3449                    .fields
3450                    .get(name)
3451                    .map(|value| cell_group_atom(value, row))
3452                    .unwrap_or(GroupAtom::Missing)
3453            })
3454            .collect::<Vec<_>>();
3455        groups.entry(key).or_default().push(row);
3456    }
3457    let group_rows = groups
3458        .values()
3459        .filter_map(|rows| rows.first().copied())
3460        .collect::<Vec<_>>();
3461    let mut out_names = Vec::new();
3462    let mut out_columns = Vec::new();
3463    for name in &group_names {
3464        let value = variables.fields.get(name).ok_or_else(|| {
3465            invalid_variable(format!("groupsummary: missing group variable '{name}'"))
3466        })?;
3467        out_names.push(name.clone());
3468        out_columns.push(select_rows(value, &group_rows)?);
3469    }
3470    out_names.push("GroupCount".to_string());
3471    out_columns.push(Value::Tensor(
3472        Tensor::new(
3473            groups.values().map(|rows| rows.len() as f64).collect(),
3474            vec![groups.len(), 1],
3475        )
3476        .map_err(invalid_variable)?,
3477    ));
3478    for method in &methods {
3479        for name in &data_names {
3480            let value = variables.fields.get(name).ok_or_else(|| {
3481                invalid_variable(format!("groupsummary: missing data variable '{name}'"))
3482            })?;
3483            let values = summarize_groups(value, groups.values(), method)?;
3484            out_names.push(format!("{}_{}", method.to_ascii_lowercase(), name));
3485            out_columns.push(Value::Tensor(
3486                Tensor::new(values, vec![groups.len(), 1]).map_err(invalid_variable)?,
3487            ));
3488        }
3489    }
3490    table_from_columns(out_names, out_columns)
3491}
3492
3493fn summarize_groups<'a>(
3494    value: &Value,
3495    groups: impl Iterator<Item = &'a Vec<usize>>,
3496    method: &str,
3497) -> BuiltinResult<Vec<f64>> {
3498    let tensor = match value {
3499        Value::Tensor(tensor) if tensor.cols() == 1 => tensor,
3500        _ => {
3501            return Err(invalid_variable(
3502                "groupsummary: summary data variables must be numeric column vectors",
3503            ))
3504        }
3505    };
3506    groups
3507        .map(|rows| {
3508            let mut values = rows
3509                .iter()
3510                .map(|row| tensor.get2(*row, 0).map_err(invalid_index))
3511                .collect::<BuiltinResult<Vec<_>>>()?;
3512            values.retain(|value| !value.is_nan());
3513            let result = match method.to_ascii_lowercase().as_str() {
3514                "mean" => {
3515                    if values.is_empty() {
3516                        f64::NAN
3517                    } else {
3518                        values.iter().sum::<f64>() / values.len() as f64
3519                    }
3520                }
3521                "sum" => values.iter().sum(),
3522                "min" => values.into_iter().fold(f64::INFINITY, f64::min),
3523                "max" => values.into_iter().fold(f64::NEG_INFINITY, f64::max),
3524                "median" => {
3525                    if values.is_empty() {
3526                        f64::NAN
3527                    } else {
3528                        values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
3529                        let mid = values.len() / 2;
3530                        if values.len() % 2 == 0 {
3531                            (values[mid - 1] + values[mid]) / 2.0
3532                        } else {
3533                            values[mid]
3534                        }
3535                    }
3536                }
3537                "count" | "numel" => values.len() as f64,
3538                other => {
3539                    return Err(invalid_argument(format!(
3540                        "groupsummary: unsupported method '{other}'"
3541                    )))
3542                }
3543            };
3544            Ok(result)
3545        })
3546        .collect()
3547}
3548
3549fn cell_key_string(value: &Value, row: usize) -> String {
3550    match value {
3551        Value::Tensor(tensor) => tensor
3552            .get2(row, 0)
3553            .map(format_key_number)
3554            .unwrap_or_default(),
3555        Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
3556        Value::LogicalArray(array) => array
3557            .data
3558            .get(row)
3559            .map(|value| value.to_string())
3560            .unwrap_or_default(),
3561        Value::Object(obj) if obj.is_class("datetime") => {
3562            crate::builtins::datetime::serials_from_datetime_value(value)
3563                .ok()
3564                .and_then(|tensor| tensor.data.get(row).copied())
3565                .map(format_key_number)
3566                .unwrap_or_default()
3567        }
3568        other => format!("{other}"),
3569    }
3570}
3571
3572pub fn table_display_text(value: &Value) -> BuiltinResult<String> {
3573    let object = match value {
3574        Value::Object(object) if object.is_class(TABLE_CLASS) => object,
3575        _ => return Err(invalid_argument("table display expects table object")),
3576    };
3577    let names = table_variable_names_from_object(object)?;
3578    let variables = table_variables(object)?;
3579    let rows = table_height(object)?;
3580    let preview = rows.min(12);
3581    let mut widths = names.iter().map(|name| name.len()).collect::<Vec<_>>();
3582    let rendered_cols = names
3583        .iter()
3584        .enumerate()
3585        .map(|(col, name)| {
3586            let value = variables
3587                .fields
3588                .get(name)
3589                .cloned()
3590                .unwrap_or_else(|| Value::String(String::new()));
3591            let cells = (0..preview)
3592                .map(|row| render_table_cell(&value, row))
3593                .collect::<Vec<_>>();
3594            for cell in &cells {
3595                widths[col] = widths[col].max(cell.len());
3596            }
3597            cells
3598        })
3599        .collect::<Vec<_>>();
3600
3601    let mut lines = Vec::new();
3602    lines.push(format!("{rows}x{} table", names.len()));
3603    if names.is_empty() {
3604        return Ok(lines.join("\n"));
3605    }
3606    let header = names
3607        .iter()
3608        .enumerate()
3609        .map(|(idx, name)| format!("{name:<width$}", width = widths[idx]))
3610        .collect::<Vec<_>>()
3611        .join("  ");
3612    lines.push(header);
3613    for row in 0..preview {
3614        lines.push(
3615            rendered_cols
3616                .iter()
3617                .enumerate()
3618                .map(|(col, cells)| format!("{:<width$}", cells[row], width = widths[col]))
3619                .collect::<Vec<_>>()
3620                .join("  "),
3621        );
3622    }
3623    if preview < rows {
3624        lines.push(format!("... {} more rows", rows - preview));
3625    }
3626    Ok(lines.join("\n"))
3627}
3628
3629pub fn table_summary_text(value: &Value) -> BuiltinResult<String> {
3630    let object = match value {
3631        Value::Object(object) if object.is_class(TABLE_CLASS) => object,
3632        _ => return Err(invalid_argument("table display expects table object")),
3633    };
3634    Ok(format!(
3635        "{}x{} table",
3636        table_height(object)?,
3637        table_width(object)?
3638    ))
3639}
3640
3641fn render_table_cell(value: &Value, row: usize) -> String {
3642    match value {
3643        Value::Tensor(tensor) => tensor
3644            .get2(row, 0)
3645            .map(format_table_number)
3646            .unwrap_or_default(),
3647        Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
3648        Value::LogicalArray(array) => array
3649            .data
3650            .get(row)
3651            .map(|value| if *value != 0 { "true" } else { "false" }.to_string())
3652            .unwrap_or_default(),
3653        Value::Object(obj) if obj.is_class("datetime") => {
3654            crate::builtins::datetime::datetime_string_array(value)
3655                .ok()
3656                .flatten()
3657                .and_then(|array| array.data.get(row).cloned())
3658                .unwrap_or_else(|| value.to_string())
3659        }
3660        other => other.to_string(),
3661    }
3662}
3663
3664fn format_table_number(value: f64) -> String {
3665    if value.is_nan() {
3666        "NaN".to_string()
3667    } else if value.fract() == 0.0 && value.abs() < 1e15 {
3668        format!("{}", value as i64)
3669    } else {
3670        trim_float(format!("{value:.6}"))
3671    }
3672}
3673
3674fn format_key_number(value: f64) -> String {
3675    if value.is_nan() {
3676        "NaN".to_string()
3677    } else if value.is_infinite() {
3678        value.to_string()
3679    } else {
3680        trim_float(format!("{value:.17}"))
3681    }
3682}
3683
3684fn trim_float(mut text: String) -> String {
3685    if let Some(dot) = text.find('.') {
3686        let mut end = text.len();
3687        while end > dot + 1 && text.as_bytes()[end - 1] == b'0' {
3688            end -= 1;
3689        }
3690        if end == dot + 1 {
3691            end -= 1;
3692        }
3693        text.truncate(end);
3694    }
3695    text
3696}
3697
3698fn scalar_text(value: &Value, context: &str) -> BuiltinResult<String> {
3699    match value {
3700        Value::String(text) => Ok(text.clone()),
3701        Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
3702        Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
3703        _ => Err(invalid_argument(format!(
3704            "table: {context} must be a string scalar or character vector"
3705        ))),
3706    }
3707}
3708
3709fn bool_scalar(value: &Value, context: &str) -> BuiltinResult<bool> {
3710    match value {
3711        Value::Bool(flag) => Ok(*flag),
3712        Value::Int(value) => Ok(value.to_i64() != 0),
3713        Value::Num(value) if value.is_finite() => Ok(*value != 0.0),
3714        Value::String(_) | Value::CharArray(_) | Value::StringArray(_) => {
3715            let text = scalar_text(value, context)?;
3716            match text.to_ascii_lowercase().as_str() {
3717                "true" | "on" | "yes" => Ok(true),
3718                "false" | "off" | "no" => Ok(false),
3719                _ => Err(invalid_argument(format!(
3720                    "table: {context} must be logical"
3721                ))),
3722            }
3723        }
3724        _ => Err(invalid_argument(format!(
3725            "table: {context} must be logical"
3726        ))),
3727    }
3728}
3729
3730fn nonnegative_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
3731    match value {
3732        Value::Int(value) if value.to_i64() >= 0 => Ok(value.to_i64() as usize),
3733        Value::Num(value)
3734            if value.is_finite()
3735                && *value >= 0.0
3736                && (value.round() - value).abs() <= f64::EPSILON =>
3737        {
3738            Ok(value.round() as usize)
3739        }
3740        _ => Err(invalid_argument(format!(
3741            "table: {context} must be a non-negative integer"
3742        ))),
3743    }
3744}
3745
3746fn positive_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
3747    let value = nonnegative_usize(value, context)?;
3748    if value == 0 {
3749        return Err(invalid_argument(format!(
3750            "table: {context} must be a positive integer"
3751        )));
3752    }
3753    Ok(value)
3754}
3755
3756fn option_value_is_empty(value: &Value) -> bool {
3757    match value {
3758        Value::String(text) => text.trim().is_empty(),
3759        Value::CharArray(array) => {
3760            array.data.is_empty()
3761                || (array.rows == 1 && array.data.iter().all(|ch| ch.is_whitespace()))
3762        }
3763        Value::StringArray(array) => {
3764            array.data.is_empty() || (array.data.len() == 1 && array.data[0].trim().is_empty())
3765        }
3766        Value::Cell(cell) => {
3767            cell.data.is_empty()
3768                || cell
3769                    .data
3770                    .iter()
3771                    .all(|handle| option_value_is_empty(unsafe { &*handle.as_raw() }))
3772        }
3773        _ => false,
3774    }
3775}
3776
3777fn string_list(value: &Value) -> BuiltinResult<Vec<String>> {
3778    match value {
3779        Value::String(text) => Ok(vec![text.clone()]),
3780        Value::CharArray(ca) if ca.rows == 1 => Ok(vec![ca.data.iter().collect()]),
3781        Value::StringArray(array) => Ok(array.data.clone()),
3782        Value::Cell(cell) => {
3783            let mut out = Vec::with_capacity(cell.data.len());
3784            for handle in &cell.data {
3785                let value = unsafe { &*handle.as_raw() };
3786                out.extend(string_list(value)?);
3787            }
3788            Ok(out)
3789        }
3790        _ => Err(invalid_argument(
3791            "table: expected string, string array, character vector, or cellstr",
3792        )),
3793    }
3794}
3795
3796fn optional_raw_variable_name_list(value: &Value) -> BuiltinResult<Option<Vec<String>>> {
3797    if option_value_is_empty(value) {
3798        Ok(None)
3799    } else {
3800        raw_variable_name_list(value).map(Some)
3801    }
3802}
3803
3804fn raw_variable_name_list(value: &Value) -> BuiltinResult<Vec<String>> {
3805    let names = string_list(value)?;
3806    if names.is_empty() {
3807        return Err(invalid_variable("table: variable names must not be empty"));
3808    }
3809    Ok(names)
3810}
3811
3812fn variable_name_list(value: &Value) -> BuiltinResult<Vec<String>> {
3813    raw_variable_name_list(value).map(make_unique_variable_names)
3814}
3815
3816fn optional_variable_type_list(value: &Value) -> BuiltinResult<Option<Vec<ImportVariableType>>> {
3817    if option_value_is_empty(value) {
3818        Ok(None)
3819    } else {
3820        variable_type_list(value).map(Some)
3821    }
3822}
3823
3824fn variable_type_list(value: &Value) -> BuiltinResult<Vec<ImportVariableType>> {
3825    string_list(value)?
3826        .iter()
3827        .map(|raw| ImportVariableType::parse(raw))
3828        .collect()
3829}
3830
3831fn variable_type_names(value: &Value) -> BuiltinResult<Vec<String>> {
3832    string_list(value)?
3833        .iter()
3834        .map(|raw| ImportVariableType::canonical_label(raw))
3835        .collect()
3836}
3837
3838fn optional_range_spec(value: &Value) -> BuiltinResult<Option<RangeSpec>> {
3839    if option_value_is_empty(value) {
3840        Ok(None)
3841    } else {
3842        RangeSpec::parse(value).map(Some)
3843    }
3844}
3845
3846fn optional_sheet_selector(value: &Value) -> BuiltinResult<Option<SheetSelector>> {
3847    if option_value_is_empty(value) {
3848        Ok(None)
3849    } else {
3850        SheetSelector::parse(value).map(Some)
3851    }
3852}
3853
3854fn generated_variable_names(count: usize) -> Vec<String> {
3855    (1..=count).map(|idx| format!("Var{idx}")).collect()
3856}
3857
3858fn make_unique_variable_names(names: Vec<String>) -> Vec<String> {
3859    make_unique_names(
3860        names
3861            .into_iter()
3862            .enumerate()
3863            .map(|(idx, name)| make_valid_variable_name(&name, idx + 1))
3864            .collect(),
3865    )
3866}
3867
3868fn make_unique_names(names: Vec<String>) -> Vec<String> {
3869    let mut used = HashSet::new();
3870    let mut out = Vec::with_capacity(names.len());
3871    for (idx, name) in names.into_iter().enumerate() {
3872        let base = if name.trim().is_empty() {
3873            format!("Var{}", idx + 1)
3874        } else {
3875            name.trim().to_string()
3876        };
3877        let mut candidate = base.clone();
3878        let mut suffix = 1usize;
3879        while used.contains(&candidate.to_ascii_lowercase()) {
3880            suffix += 1;
3881            candidate = format!("{base}_{suffix}");
3882        }
3883        used.insert(candidate.to_ascii_lowercase());
3884        out.push(candidate);
3885    }
3886    out
3887}
3888
3889fn make_valid_variable_name(raw: &str, fallback_index: usize) -> String {
3890    let mut out = String::new();
3891    for (idx, ch) in raw.trim().chars().enumerate() {
3892        if (idx == 0 && (ch.is_ascii_alphabetic() || ch == '_'))
3893            || (idx > 0 && (ch.is_ascii_alphanumeric() || ch == '_'))
3894        {
3895            out.push(ch);
3896        } else if !out.ends_with('_') {
3897            out.push('_');
3898        }
3899    }
3900    while out.ends_with('_') {
3901        out.pop();
3902    }
3903    if out.is_empty() || !out.chars().next().unwrap().is_ascii_alphabetic() {
3904        format!("Var{fallback_index}")
3905    } else {
3906        out
3907    }
3908}
3909
3910#[cfg(test)]
3911mod tests {
3912    use super::*;
3913    use futures::executor::block_on;
3914    use runmat_time::unix_timestamp_ms;
3915    use std::fs;
3916    use std::io::Write;
3917
3918    fn unique_path(prefix: &str) -> PathBuf {
3919        let mut path = std::env::temp_dir();
3920        path.push(format!(
3921            "runmat_{prefix}_{}_{}",
3922            std::process::id(),
3923            unix_timestamp_ms()
3924        ));
3925        path
3926    }
3927
3928    fn read_table(path: &Path, args: Vec<Value>) -> Value {
3929        block_on(readtable_builtin(
3930            Value::from(path.to_string_lossy().to_string()),
3931            args,
3932        ))
3933        .expect("readtable")
3934    }
3935
3936    fn read_table_err(path: &Path, args: Vec<Value>) -> RuntimeError {
3937        block_on(readtable_builtin(
3938            Value::from(path.to_string_lossy().to_string()),
3939            args,
3940        ))
3941        .expect_err("expected readtable failure")
3942    }
3943
3944    fn spreadsheet_options(args: Vec<Value>) -> StructValue {
3945        match block_on(spreadsheet_import_options_builtin(args)).expect("spreadsheetImportOptions")
3946        {
3947            Value::Struct(options) => options,
3948            other => panic!("expected struct options, got {other:?}"),
3949        }
3950    }
3951
3952    fn char_row(array: &CharArray, row: usize) -> String {
3953        let start = row * array.cols;
3954        array.data[start..start + array.cols].iter().collect()
3955    }
3956
3957    fn object(value: Value) -> ObjectInstance {
3958        match value {
3959            Value::Object(object) => object,
3960            other => panic!("expected table object, got {other:?}"),
3961        }
3962    }
3963
3964    #[test]
3965    fn readtable_imports_headered_numeric_and_text_columns() {
3966        let path = unique_path("readtable_basic");
3967        fs::write(&path, "Name,Score\nAda,10\nGrace,12\n").expect("write sample");
3968        let table = object(read_table(&path, Vec::new()));
3969        assert_eq!(
3970            table_variable_names_from_object(&table).unwrap(),
3971            vec!["Name".to_string(), "Score".to_string()]
3972        );
3973        match table_member_get(&table, &Value::from("Score")).unwrap() {
3974            Value::Tensor(tensor) => {
3975                assert_eq!(tensor.shape, vec![2, 1]);
3976                assert_eq!(tensor.data, vec![10.0, 12.0]);
3977            }
3978            other => panic!("expected tensor, got {other:?}"),
3979        }
3980        match table_member_get(&table, &Value::from("Name")).unwrap() {
3981            Value::StringArray(array) => {
3982                assert_eq!(array.data, vec!["Ada".to_string(), "Grace".to_string()]);
3983            }
3984            other => panic!("expected string array, got {other:?}"),
3985        }
3986        let _ = fs::remove_file(&path);
3987    }
3988
3989    #[test]
3990    fn readtable_auto_does_not_consume_headerless_numeric_rows() {
3991        let path = unique_path("readtable_headerless_numeric");
3992        fs::write(&path, "1,2\n3,4\n").expect("write sample");
3993        let table = object(read_table(&path, Vec::new()));
3994        assert_eq!(
3995            table_variable_names_from_object(&table).unwrap(),
3996            vec!["Var1".to_string(), "Var2".to_string()]
3997        );
3998        match table_member_get(&table, &Value::from("Var1")).unwrap() {
3999            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![1.0, 3.0]),
4000            other => panic!("expected tensor, got {other:?}"),
4001        }
4002        match table_member_get(&table, &Value::from("Var2")).unwrap() {
4003            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 4.0]),
4004            other => panic!("expected tensor, got {other:?}"),
4005        }
4006        let _ = fs::remove_file(&path);
4007    }
4008
4009    #[test]
4010    fn readtable_rejects_unknown_and_invalid_options() {
4011        let path = unique_path("readtable_invalid_options");
4012        fs::write(&path, "A\n1\n").expect("write sample");
4013        let err = read_table_err(
4014            &path,
4015            vec![Value::from("DefinitelyNotAnOption"), Value::from(1.0)],
4016        );
4017        assert!(err.message().contains("unsupported option"));
4018        let err = read_table_err(
4019            &path,
4020            vec![Value::from("VariableNamingRule"), Value::from("mangle")],
4021        );
4022        assert!(err.message().contains("unsupported VariableNamingRule"));
4023        let _ = fs::remove_file(&path);
4024    }
4025
4026    #[test]
4027    fn readtable_handles_quoted_delimiters_and_newlines() {
4028        let path = unique_path("readtable_quoted_newlines");
4029        fs::write(
4030            &path,
4031            "Name,Note\nAda,\"hello, world\"\nGrace,\"line one\nline two\"\n",
4032        )
4033        .expect("write sample");
4034        let table = object(read_table(&path, Vec::new()));
4035        match table_member_get(&table, &Value::from("Note")).unwrap() {
4036            Value::StringArray(array) => assert_eq!(
4037                array.data,
4038                vec!["hello, world".to_string(), "line one\nline two".to_string()]
4039            ),
4040            other => panic!("expected string array, got {other:?}"),
4041        }
4042        let _ = fs::remove_file(&path);
4043    }
4044
4045    #[test]
4046    fn readtable_supports_explicit_names_and_missing_tokens() {
4047        let path = unique_path("readtable_options");
4048        fs::write(&path, "1,NA\n2,4\n").expect("write sample");
4049        let names =
4050            StringArray::new(vec!["A".to_string(), "B".to_string()], vec![1, 2]).expect("names");
4051        let table = object(read_table(
4052            &path,
4053            vec![
4054                Value::from("ReadVariableNames"),
4055                Value::Bool(false),
4056                Value::from("VariableNames"),
4057                Value::StringArray(names),
4058                Value::from("TreatAsMissing"),
4059                Value::from("NA"),
4060            ],
4061        ));
4062        match table_member_get(&table, &Value::from("B")).unwrap() {
4063            Value::Tensor(tensor) => {
4064                assert!(tensor.data[0].is_nan());
4065                assert_eq!(tensor.data[1], 4.0);
4066            }
4067            other => panic!("expected tensor, got {other:?}"),
4068        }
4069        let _ = fs::remove_file(&path);
4070    }
4071
4072    #[test]
4073    fn readtable_preserves_variable_names_when_requested() {
4074        let path = unique_path("readtable_preserve_names");
4075        fs::write(&path, "daily revenue,total orders\n100,10\n").expect("write sample");
4076        let table = object(read_table(
4077            &path,
4078            vec![Value::from("VariableNamingRule"), Value::from("preserve")],
4079        ));
4080        assert_eq!(
4081            table_variable_names_from_object(&table).unwrap(),
4082            vec!["daily revenue".to_string(), "total orders".to_string()]
4083        );
4084        let _ = fs::remove_file(&path);
4085    }
4086
4087    fn write_zip_file(zip: &mut zip::ZipWriter<std::fs::File>, name: &str, contents: &str) {
4088        let options = zip::write::SimpleFileOptions::default()
4089            .compression_method(zip::CompressionMethod::Stored);
4090        zip.start_file(name, options).expect("start xlsx part");
4091        zip.write_all(contents.as_bytes()).expect("write xlsx part");
4092    }
4093
4094    fn write_minimal_xlsx(path: &Path) {
4095        let file = std::fs::File::create(path).expect("create xlsx");
4096        let mut zip = zip::ZipWriter::new(file);
4097        write_zip_file(
4098            &mut zip,
4099            "[Content_Types].xml",
4100            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4101<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
4102  <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
4103  <Default Extension="xml" ContentType="application/xml"/>
4104  <Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
4105  <Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
4106  <Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/>
4107</Types>"#,
4108        );
4109        write_zip_file(
4110            &mut zip,
4111            "_rels/.rels",
4112            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4113<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
4114  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
4115</Relationships>"#,
4116        );
4117        write_zip_file(
4118            &mut zip,
4119            "xl/workbook.xml",
4120            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4121<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
4122  <sheets>
4123    <sheet name="Data" sheetId="1" r:id="rId1"/>
4124  </sheets>
4125</workbook>"#,
4126        );
4127        write_zip_file(
4128            &mut zip,
4129            "xl/_rels/workbook.xml.rels",
4130            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4131<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
4132  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
4133  <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
4134</Relationships>"#,
4135        );
4136        write_zip_file(
4137            &mut zip,
4138            "xl/styles.xml",
4139            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4140<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
4141  <fonts count="1"><font><sz val="11"/><name val="Calibri"/></font></fonts>
4142  <fills count="1"><fill><patternFill patternType="none"/></fill></fills>
4143  <borders count="1"><border/></borders>
4144  <cellStyleXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellStyleXfs>
4145  <cellXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellXfs>
4146</styleSheet>"#,
4147        );
4148        write_zip_file(
4149            &mut zip,
4150            "xl/worksheets/sheet1.xml",
4151            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4152<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
4153  <sheetData>
4154    <row r="1">
4155      <c r="A1" t="inlineStr"><is><t>Date</t></is></c>
4156      <c r="B1" t="inlineStr"><is><t>Orders</t></is></c>
4157      <c r="C1" t="inlineStr"><is><t>Revenue</t></is></c>
4158    </row>
4159    <row r="2">
4160      <c r="A2" t="inlineStr"><is><t>2026-06-01</t></is></c>
4161      <c r="B2"><v>10</v></c>
4162      <c r="C2"><v>200</v></c>
4163    </row>
4164    <row r="3">
4165      <c r="A3" t="inlineStr"><is><t>2026-06-02</t></is></c>
4166      <c r="B3"><v>4</v></c>
4167      <c r="C3"><v>90</v></c>
4168    </row>
4169  </sheetData>
4170</worksheet>"#,
4171        );
4172        zip.finish().expect("finish xlsx");
4173    }
4174
4175    #[test]
4176    fn readtable_imports_xlsx_sheet_and_range() {
4177        let path = unique_path("readtable_spreadsheet");
4178        let path = path.with_extension("xlsx");
4179        write_minimal_xlsx(&path);
4180        let table = object(read_table(
4181            &path,
4182            vec![
4183                Value::from("Sheet"),
4184                Value::from("Data"),
4185                Value::from("Range"),
4186                Value::from("A1:C3"),
4187            ],
4188        ));
4189        assert_eq!(
4190            table_variable_names_from_object(&table).unwrap(),
4191            vec![
4192                "Date".to_string(),
4193                "Orders".to_string(),
4194                "Revenue".to_string()
4195            ]
4196        );
4197        match table_member_get(&table, &Value::from("Revenue")).unwrap() {
4198            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![200.0, 90.0]),
4199            other => panic!("expected tensor, got {other:?}"),
4200        }
4201        let _ = fs::remove_file(&path);
4202    }
4203
4204    #[test]
4205    fn spreadsheet_import_options_registers_public_descriptor() {
4206        assert!(runmat_builtins::builtin_function_by_name("spreadsheetImportOptions").is_some());
4207        let labels = SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR
4208            .signatures
4209            .iter()
4210            .map(|signature| signature.label)
4211            .collect::<Vec<_>>();
4212        assert!(labels.contains(&"opts = spreadsheetImportOptions()"));
4213        assert!(labels.contains(&"opts = spreadsheetImportOptions(nameValuePairs...)"));
4214    }
4215
4216    #[test]
4217    fn spreadsheet_import_options_builds_editable_options_struct() {
4218        let options = spreadsheet_options(vec![
4219            Value::from("NumVariables"),
4220            Value::Num(2.0),
4221            Value::from("VariableTypes"),
4222            Value::StringArray(
4223                StringArray::new(vec!["double".into(), "string".into()], vec![1, 2]).unwrap(),
4224            ),
4225            Value::from("DataRange"),
4226            Value::from("A2:B5"),
4227        ]);
4228        assert_eq!(
4229            options.fields.get("FileType"),
4230            Some(&Value::from("spreadsheet"))
4231        );
4232        assert_eq!(options.fields.get("NumVariables"), Some(&Value::Num(2.0)));
4233        assert_eq!(options.fields.get("DataRange"), Some(&Value::from("A2:B5")));
4234        match options.fields.get("VariableNames").unwrap() {
4235            Value::StringArray(array) => {
4236                assert_eq!(array.data, vec!["Var1".to_string(), "Var2".to_string()]);
4237                assert_eq!(array.shape, vec![1, 2]);
4238            }
4239            other => panic!("expected string array, got {other:?}"),
4240        }
4241        match options.fields.get("VariableTypes").unwrap() {
4242            Value::StringArray(array) => {
4243                assert_eq!(array.data, vec!["double".to_string(), "string".to_string()]);
4244                assert_eq!(array.shape, vec![1, 2]);
4245            }
4246            other => panic!("expected string array, got {other:?}"),
4247        }
4248    }
4249
4250    #[test]
4251    fn readtable_consumes_spreadsheet_import_options_struct() {
4252        let path = unique_path("readtable_spreadsheet_options");
4253        let path = path.with_extension("xlsx");
4254        write_minimal_xlsx(&path);
4255        let mut options = spreadsheet_options(vec![Value::from("NumVariables"), Value::Num(1.0)]);
4256        options.insert("Sheet", Value::from("Data"));
4257        options.insert("DataRange", Value::from("C2:C3"));
4258        options.insert(
4259            "VariableNames",
4260            Value::StringArray(StringArray::new(vec!["Amount".into()], vec![1, 1]).unwrap()),
4261        );
4262        options.insert(
4263            "VariableTypes",
4264            Value::StringArray(StringArray::new(vec!["double".into()], vec![1, 1]).unwrap()),
4265        );
4266        let table = object(read_table(&path, vec![Value::Struct(options)]));
4267        assert_eq!(
4268            table_variable_names_from_object(&table).unwrap(),
4269            vec!["Amount".to_string()]
4270        );
4271        match table_member_get(&table, &Value::from("Amount")).unwrap() {
4272            Value::Tensor(tensor) => {
4273                assert_eq!(tensor.shape, vec![2, 1]);
4274                assert_eq!(tensor.data, vec![200.0, 90.0]);
4275                assert_eq!(tensor.dtype, NumericDType::F64);
4276            }
4277            other => panic!("expected tensor, got {other:?}"),
4278        }
4279        let _ = fs::remove_file(&path);
4280    }
4281
4282    #[test]
4283    fn readtable_default_spreadsheet_options_still_infers_headers() {
4284        let path = unique_path("readtable_default_spreadsheet_options");
4285        let path = path.with_extension("xlsx");
4286        write_minimal_xlsx(&path);
4287        let options = spreadsheet_options(Vec::new());
4288        let table = object(read_table(&path, vec![Value::Struct(options)]));
4289        assert_eq!(
4290            table_variable_names_from_object(&table).unwrap(),
4291            vec![
4292                "Date".to_string(),
4293                "Orders".to_string(),
4294                "Revenue".to_string()
4295            ]
4296        );
4297        let _ = fs::remove_file(&path);
4298    }
4299
4300    #[test]
4301    fn readtable_variable_types_coerce_imported_columns() {
4302        let path = unique_path("readtable_variable_types");
4303        fs::write(
4304            &path,
4305            "Value,Flag,When,Elapsed\n1.5,true,2026-06-01,01:30:00\n2.25,false,2026-06-02,02:00:00\n",
4306        )
4307        .expect("write sample");
4308        let types = StringArray::new(
4309            vec![
4310                "single".to_string(),
4311                "logical".to_string(),
4312                "datetime".to_string(),
4313                "duration".to_string(),
4314            ],
4315            vec![1, 4],
4316        )
4317        .unwrap();
4318        let table = object(read_table(
4319            &path,
4320            vec![Value::from("VariableTypes"), Value::StringArray(types)],
4321        ));
4322        match table_member_get(&table, &Value::from("Value")).unwrap() {
4323            Value::Tensor(tensor) => {
4324                assert_eq!(tensor.dtype, NumericDType::F32);
4325                assert_eq!(tensor.data, vec![1.5, 2.25]);
4326            }
4327            other => panic!("expected tensor, got {other:?}"),
4328        }
4329        match table_member_get(&table, &Value::from("Flag")).unwrap() {
4330            Value::LogicalArray(array) => assert_eq!(array.data, vec![1, 0]),
4331            other => panic!("expected logical array, got {other:?}"),
4332        }
4333        match table_member_get(&table, &Value::from("When")).unwrap() {
4334            Value::Object(object) => assert!(object.is_class("datetime")),
4335            other => panic!("expected datetime object, got {other:?}"),
4336        }
4337        match table_member_get(&table, &Value::from("Elapsed")).unwrap() {
4338            Value::Object(object) => assert!(object.is_class("duration")),
4339            other => panic!("expected duration object, got {other:?}"),
4340        }
4341        let _ = fs::remove_file(&path);
4342    }
4343
4344    #[test]
4345    fn readtable_preserves_explicit_import_variable_names_when_requested() {
4346        let path = unique_path("readtable_preserve_explicit_names");
4347        fs::write(&path, "100,10\n125,12\n").expect("write sample");
4348        let names = StringArray::new(
4349            vec!["daily revenue".to_string(), "total orders".to_string()],
4350            vec![1, 2],
4351        )
4352        .unwrap();
4353        let table = object(read_table(
4354            &path,
4355            vec![
4356                Value::from("ReadVariableNames"),
4357                Value::Bool(false),
4358                Value::from("VariableNames"),
4359                Value::StringArray(names),
4360                Value::from("VariableNamingRule"),
4361                Value::from("preserve"),
4362            ],
4363        ));
4364        assert_eq!(
4365            table_variable_names_from_object(&table).unwrap(),
4366            vec!["daily revenue".to_string(), "total orders".to_string()]
4367        );
4368        let _ = fs::remove_file(&path);
4369    }
4370
4371    #[test]
4372    fn readtable_text_type_char_imports_text_columns_as_char_matrix() {
4373        let path = unique_path("readtable_text_type_char");
4374        fs::write(&path, "Name\nAda\nGrace\n").expect("write sample");
4375        let table = object(read_table(
4376            &path,
4377            vec![Value::from("TextType"), Value::from("char")],
4378        ));
4379        match table_member_get(&table, &Value::from("Name")).unwrap() {
4380            Value::CharArray(array) => {
4381                assert_eq!(array.rows, 2);
4382                assert_eq!(array.cols, 5);
4383                assert_eq!(char_row(&array, 0), "Ada  ");
4384                assert_eq!(char_row(&array, 1), "Grace");
4385            }
4386            other => panic!("expected char array, got {other:?}"),
4387        }
4388        let _ = fs::remove_file(&path);
4389    }
4390
4391    #[test]
4392    fn readtable_variable_types_cellstr_imports_cell_column() {
4393        let path = unique_path("readtable_variable_types_cellstr");
4394        fs::write(&path, "Name\nAda\nGrace\n").expect("write sample");
4395        let types = StringArray::new(vec!["cellstr".to_string()], vec![1, 1]).unwrap();
4396        let table = object(read_table(
4397            &path,
4398            vec![Value::from("VariableTypes"), Value::StringArray(types)],
4399        ));
4400        match table_member_get(&table, &Value::from("Name")).unwrap() {
4401            Value::Cell(cell) => {
4402                assert_eq!(cell.rows, 2);
4403                assert_eq!(cell.cols, 1);
4404                assert_eq!(
4405                    cell.get(0, 0).unwrap(),
4406                    Value::CharArray(CharArray::new_row("Ada"))
4407                );
4408                assert_eq!(
4409                    cell.get(1, 0).unwrap(),
4410                    Value::CharArray(CharArray::new_row("Grace"))
4411                );
4412            }
4413            other => panic!("expected cell array, got {other:?}"),
4414        }
4415        let _ = fs::remove_file(&path);
4416    }
4417
4418    #[test]
4419    fn readtable_rejects_unrepresented_import_variable_types() {
4420        let path = unique_path("readtable_unsupported_variable_types");
4421        fs::write(&path, "A\n1\n").expect("write sample");
4422        let unsupported_integer = StringArray::new(vec!["int8".to_string()], vec![1, 1]).unwrap();
4423        let err = read_table_err(
4424            &path,
4425            vec![
4426                Value::from("VariableTypes"),
4427                Value::StringArray(unsupported_integer),
4428            ],
4429        );
4430        assert!(err
4431            .message()
4432            .contains("unsupported VariableTypes entry 'int8'"));
4433        let categorical = StringArray::new(vec!["categorical".to_string()], vec![1, 1]).unwrap();
4434        let err = read_table_err(
4435            &path,
4436            vec![
4437                Value::from("VariableTypes"),
4438                Value::StringArray(categorical),
4439            ],
4440        );
4441        assert!(err
4442            .message()
4443            .contains("unsupported VariableTypes entry 'categorical'"));
4444        let _ = fs::remove_file(&path);
4445    }
4446
4447    #[test]
4448    fn table_properties_variable_names_rename_columns() {
4449        let a = Value::Tensor(Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap());
4450        let b = Value::Tensor(Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap());
4451        let mut table =
4452            object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
4453        let mut props = table_public_properties(&table).unwrap();
4454        props.insert(
4455            VARIABLE_NAMES,
4456            Value::StringArray(StringArray::new(vec!["X".into(), "Y".into()], vec![1, 2]).unwrap()),
4457        );
4458        table_member_set(&mut table, PROPERTIES_MEMBER, Value::Struct(props)).unwrap();
4459        assert_eq!(
4460            table_variable_names_from_object(&table).unwrap(),
4461            vec!["X".to_string(), "Y".to_string()]
4462        );
4463    }
4464
4465    #[test]
4466    fn table_paren_selects_rows_and_named_variables() {
4467        let a = Value::Tensor(Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap());
4468        let b = Value::Tensor(Tensor::new(vec![4.0, 5.0, 6.0], vec![3, 1]).unwrap());
4469        let table = object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
4470        let selector = CellArray::new(
4471            vec![
4472                Value::Tensor(Tensor::new(vec![3.0, 1.0], vec![1, 2]).unwrap()),
4473                Value::Cell(CellArray::new(vec![Value::from("B")], 1, 1).unwrap()),
4474            ],
4475            1,
4476            2,
4477        )
4478        .unwrap();
4479        let subset = object(table_paren_get(&table, &Value::Cell(selector)).unwrap());
4480        assert_eq!(
4481            table_variable_names_from_object(&subset).unwrap(),
4482            vec!["B".to_string()]
4483        );
4484        match table_member_get(&subset, &Value::from("B")).unwrap() {
4485            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![6.0, 4.0]),
4486            other => panic!("expected tensor, got {other:?}"),
4487        }
4488    }
4489
4490    #[test]
4491    fn sortrows_preserves_row_names() {
4492        let values = Value::Tensor(Tensor::new(vec![2.0, 1.0], vec![2, 1]).unwrap());
4493        let table = table_from_columns_with_properties(
4494            vec!["X".into()],
4495            vec![values],
4496            Some(vec!["second".into(), "first".into()]),
4497        )
4498        .unwrap();
4499        let (sorted, _) = sortrows_table(table, &[Value::from("X")]).unwrap();
4500        let sorted = object(sorted);
4501        let props = table_public_properties(&sorted).unwrap();
4502        match props.fields.get(ROW_NAMES).unwrap() {
4503            Value::StringArray(array) => {
4504                assert_eq!(array.data, vec!["first".to_string(), "second".to_string()]);
4505            }
4506            other => panic!("expected row names, got {other:?}"),
4507        }
4508    }
4509
4510    #[test]
4511    fn groupsummary_mean_counts_groups() {
4512        let group = Value::StringArray(
4513            StringArray::new(vec!["a".into(), "b".into(), "a".into()], vec![3, 1]).unwrap(),
4514        );
4515        let value = Value::Tensor(Tensor::new(vec![2.0, 5.0, 4.0], vec![3, 1]).unwrap());
4516        let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
4517        let summary = groupsummary_impl(
4518            table,
4519            Value::from("G"),
4520            Value::from("mean"),
4521            vec![Value::from("X")],
4522        )
4523        .unwrap();
4524        let summary = object(summary);
4525        assert_eq!(
4526            table_variable_names_from_object(&summary).unwrap(),
4527            vec![
4528                "G".to_string(),
4529                "GroupCount".to_string(),
4530                "mean_X".to_string()
4531            ]
4532        );
4533        match table_member_get(&summary, &Value::from("mean_X")).unwrap() {
4534            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![3.0, 5.0]),
4535            other => panic!("expected tensor, got {other:?}"),
4536        }
4537    }
4538
4539    #[test]
4540    fn groupsummary_orders_numeric_groups_numerically() {
4541        let group = Value::Tensor(Tensor::new(vec![10.0, 2.0, 10.0], vec![3, 1]).unwrap());
4542        let value = Value::Tensor(Tensor::new(vec![1.0, 5.0, 3.0], vec![3, 1]).unwrap());
4543        let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
4544        let summary =
4545            object(groupsummary_impl(table, Value::from("G"), Value::from("sum"), vec![]).unwrap());
4546        match table_member_get(&summary, &Value::from("G")).unwrap() {
4547            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 10.0]),
4548            other => panic!("expected tensor, got {other:?}"),
4549        }
4550        match table_member_get(&summary, &Value::from("sum_X")).unwrap() {
4551            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![5.0, 4.0]),
4552            other => panic!("expected tensor, got {other:?}"),
4553        }
4554    }
4555}