Skip to main content

runmat_runtime/builtins/table/
mod.rs

1//! MATLAB table datatype support and tabular workflow builtins.
2
3use std::cmp::Ordering;
4use std::collections::{BTreeMap, HashMap, HashSet};
5use std::io::{Cursor, Read};
6use std::path::{Path, PathBuf};
7use std::sync::OnceLock;
8
9use calamine::{open_workbook_auto_from_rs, Data as SpreadsheetData, Reader as SpreadsheetReader};
10use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
11use encoding_rs::{Encoding, UTF_8};
12use runmat_builtins::{
13    Access, BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
14    BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
15    CellArray, ClassDef, ComplexTensor, LogicalArray, MethodDef, ObjectInstance, PropertyDef,
16    StringArray, StructValue, Tensor, Value,
17};
18use runmat_filesystem::File;
19use runmat_macros::runtime_builtin;
20
21use crate::builtins::common::fs::expand_user_path;
22use crate::builtins::common::spec::{
23    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
24    ReductionNaN, ResidencyPolicy, ShapeRequirements,
25};
26use crate::{
27    build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError, OBJECT_INDEX_BRACE,
28    OBJECT_INDEX_MEMBER, OBJECT_INDEX_PAREN, OBJECT_SUBSASGN_METHOD, OBJECT_SUBSREF_METHOD,
29};
30
31pub const TABLE_CLASS: &str = "table";
32const TABLE_VARIABLES_FIELD: &str = "__table_variables";
33const TABLE_PROPERTIES_FIELD: &str = "__table_properties";
34const PROPERTIES_MEMBER: &str = "Properties";
35const VARIABLE_NAMES: &str = "VariableNames";
36const ROW_NAMES: &str = "RowNames";
37const DIMENSION_NAMES: &str = "DimensionNames";
38const VARIABLE_UNITS: &str = "VariableUnits";
39const VARIABLE_DESCRIPTIONS: &str = "VariableDescriptions";
40const DESCRIPTION: &str = "Description";
41const USER_DATA: &str = "UserData";
42const DEFAULT_ROW_DIM_NAME: &str = "Rows";
43const DEFAULT_VARIABLE_DIM_NAME: &str = "Variables";
44
45static TABLE_CLASS_REGISTERED: OnceLock<()> = OnceLock::new();
46
47const ANY_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
48    name: "out",
49    ty: BuiltinParamType::Any,
50    arity: BuiltinParamArity::Required,
51    default: None,
52    description: "Result value.",
53}];
54const NUM_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
55    name: "n",
56    ty: BuiltinParamType::IntegerScalar,
57    arity: BuiltinParamArity::Required,
58    default: None,
59    description: "Count.",
60}];
61const TABLE_INPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
62    name: "T",
63    ty: BuiltinParamType::Any,
64    arity: BuiltinParamArity::Required,
65    default: None,
66    description: "Table input.",
67}];
68const READTABLE_INPUTS_FILENAME: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
69    name: "filename",
70    ty: BuiltinParamType::StringScalar,
71    arity: BuiltinParamArity::Required,
72    default: None,
73    description: "Text or spreadsheet file path.",
74}];
75const READTABLE_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 2] = [
76    BuiltinParamDescriptor {
77        name: "filename",
78        ty: BuiltinParamType::StringScalar,
79        arity: BuiltinParamArity::Required,
80        default: None,
81        description: "Text or spreadsheet file path.",
82    },
83    BuiltinParamDescriptor {
84        name: "nameValuePairs",
85        ty: BuiltinParamType::Any,
86        arity: BuiltinParamArity::Variadic,
87        default: None,
88        description: "Name-value import options.",
89    },
90];
91const TABLE_INPUTS_VALUES: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
92    name: "variables",
93    ty: BuiltinParamType::Any,
94    arity: BuiltinParamArity::Variadic,
95    default: None,
96    description: "Variables to assemble as table columns.",
97}];
98const GROUPSUMMARY_INPUTS: [BuiltinParamDescriptor; 4] = [
99    BuiltinParamDescriptor {
100        name: "T",
101        ty: BuiltinParamType::Any,
102        arity: BuiltinParamArity::Required,
103        default: None,
104        description: "Input table.",
105    },
106    BuiltinParamDescriptor {
107        name: "groupvars",
108        ty: BuiltinParamType::Any,
109        arity: BuiltinParamArity::Required,
110        default: None,
111        description: "Grouping variable name or names.",
112    },
113    BuiltinParamDescriptor {
114        name: "method",
115        ty: BuiltinParamType::Any,
116        arity: BuiltinParamArity::Required,
117        default: None,
118        description: "Summary method name or names.",
119    },
120    BuiltinParamDescriptor {
121        name: "datavars",
122        ty: BuiltinParamType::Any,
123        arity: BuiltinParamArity::Optional,
124        default: None,
125        description: "Data variable name or names.",
126    },
127];
128const OBJECT_INDEX_INPUTS: [BuiltinParamDescriptor; 3] = [
129    BuiltinParamDescriptor {
130        name: "obj",
131        ty: BuiltinParamType::Any,
132        arity: BuiltinParamArity::Required,
133        default: None,
134        description: "Table object receiver.",
135    },
136    BuiltinParamDescriptor {
137        name: "kind",
138        ty: BuiltinParamType::StringScalar,
139        arity: BuiltinParamArity::Required,
140        default: None,
141        description: "Index kind token.",
142    },
143    BuiltinParamDescriptor {
144        name: "payload",
145        ty: BuiltinParamType::Any,
146        arity: BuiltinParamArity::Required,
147        default: None,
148        description: "Index payload.",
149    },
150];
151const OBJECT_ASSIGN_INPUTS: [BuiltinParamDescriptor; 4] = [
152    BuiltinParamDescriptor {
153        name: "obj",
154        ty: BuiltinParamType::Any,
155        arity: BuiltinParamArity::Required,
156        default: None,
157        description: "Table object receiver.",
158    },
159    BuiltinParamDescriptor {
160        name: "kind",
161        ty: BuiltinParamType::StringScalar,
162        arity: BuiltinParamArity::Required,
163        default: None,
164        description: "Index kind token.",
165    },
166    BuiltinParamDescriptor {
167        name: "payload",
168        ty: BuiltinParamType::Any,
169        arity: BuiltinParamArity::Required,
170        default: None,
171        description: "Index payload.",
172    },
173    BuiltinParamDescriptor {
174        name: "rhs",
175        ty: BuiltinParamType::Any,
176        arity: BuiltinParamArity::Required,
177        default: None,
178        description: "Assigned value.",
179    },
180];
181
182const READTABLE_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
183    BuiltinSignatureDescriptor {
184        label: "T = readtable(filename)",
185        inputs: &READTABLE_INPUTS_FILENAME,
186        outputs: &ANY_OUTPUT,
187    },
188    BuiltinSignatureDescriptor {
189        label: "T = readtable(filename, nameValuePairs...)",
190        inputs: &READTABLE_INPUTS_NAME_VALUE,
191        outputs: &ANY_OUTPUT,
192    },
193];
194const TABLE_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
195    label: "T = table(variables...)",
196    inputs: &TABLE_INPUTS_VALUES,
197    outputs: &ANY_OUTPUT,
198}];
199const GROUPSUMMARY_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
200    label: "G = groupsummary(T, groupvars, method, datavars)",
201    inputs: &GROUPSUMMARY_INPUTS,
202    outputs: &ANY_OUTPUT,
203}];
204const HEIGHT_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
205    label: "n = height(T)",
206    inputs: &TABLE_INPUT,
207    outputs: &NUM_OUTPUT,
208}];
209const WIDTH_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
210    label: "n = width(T)",
211    inputs: &TABLE_INPUT,
212    outputs: &NUM_OUTPUT,
213}];
214const OBJECT_SUBSREF_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
215    label: "out = table.subsref(obj, kind, payload)",
216    inputs: &OBJECT_INDEX_INPUTS,
217    outputs: &ANY_OUTPUT,
218}];
219const OBJECT_SUBSASGN_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
220    label: "obj = table.subsasgn(obj, kind, payload, rhs)",
221    inputs: &OBJECT_ASSIGN_INPUTS,
222    outputs: &ANY_OUTPUT,
223}];
224
225const TABLE_ERROR_INVALID_ARGUMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
226    code: "RM.TABLE.INVALID_ARGUMENT",
227    identifier: Some("RunMat:table:InvalidArgument"),
228    when: "Arguments or table metadata are invalid.",
229    message: "table: invalid argument",
230};
231const TABLE_ERROR_INVALID_INDEX: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
232    code: "RM.TABLE.INVALID_INDEX",
233    identifier: Some("RunMat:table:InvalidIndex"),
234    when: "Table indexing is invalid.",
235    message: "table: invalid index",
236};
237const TABLE_ERROR_INVALID_VARIABLE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
238    code: "RM.TABLE.INVALID_VARIABLE",
239    identifier: Some("RunMat:table:InvalidVariable"),
240    when: "A table variable name or value is invalid.",
241    message: "table: invalid variable",
242};
243const TABLE_ERROR_IO: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
244    code: "RM.READTABLE.IO",
245    identifier: Some("RunMat:readtable:IOError"),
246    when: "readtable cannot open or read the requested file.",
247    message: "readtable: file read failed",
248};
249const TABLE_ERROR_UNSUPPORTED_FILE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
250    code: "RM.READTABLE.UNSUPPORTED_FILE",
251    identifier: Some("RunMat:readtable:UnsupportedFileType"),
252    when: "readtable receives a file type outside the text or spreadsheet import backends.",
253    message: "readtable: unsupported file type",
254};
255const TABLE_ERRORS: [BuiltinErrorDescriptor; 5] = [
256    TABLE_ERROR_INVALID_ARGUMENT,
257    TABLE_ERROR_INVALID_INDEX,
258    TABLE_ERROR_INVALID_VARIABLE,
259    TABLE_ERROR_IO,
260    TABLE_ERROR_UNSUPPORTED_FILE,
261];
262
263pub const READTABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
264    signatures: &READTABLE_SIGNATURES,
265    output_mode: BuiltinOutputMode::Fixed,
266    completion_policy: BuiltinCompletionPolicy::Public,
267    errors: &TABLE_ERRORS,
268};
269pub const TABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
270    signatures: &TABLE_SIGNATURES,
271    output_mode: BuiltinOutputMode::Fixed,
272    completion_policy: BuiltinCompletionPolicy::Public,
273    errors: &TABLE_ERRORS,
274};
275pub const GROUPSUMMARY_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
276    signatures: &GROUPSUMMARY_SIGNATURES,
277    output_mode: BuiltinOutputMode::Fixed,
278    completion_policy: BuiltinCompletionPolicy::Public,
279    errors: &TABLE_ERRORS,
280};
281pub const HEIGHT_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
282    signatures: &HEIGHT_SIGNATURES,
283    output_mode: BuiltinOutputMode::Fixed,
284    completion_policy: BuiltinCompletionPolicy::Public,
285    errors: &TABLE_ERRORS,
286};
287pub const WIDTH_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
288    signatures: &WIDTH_SIGNATURES,
289    output_mode: BuiltinOutputMode::Fixed,
290    completion_policy: BuiltinCompletionPolicy::Public,
291    errors: &TABLE_ERRORS,
292};
293pub const TABLE_SUBSREF_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
294    signatures: &OBJECT_SUBSREF_SIGNATURES,
295    output_mode: BuiltinOutputMode::Fixed,
296    completion_policy: BuiltinCompletionPolicy::MethodOnly,
297    errors: &TABLE_ERRORS,
298};
299pub const TABLE_SUBSASGN_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
300    signatures: &OBJECT_SUBSASGN_SIGNATURES,
301    output_mode: BuiltinOutputMode::Fixed,
302    completion_policy: BuiltinCompletionPolicy::MethodOnly,
303    errors: &TABLE_ERRORS,
304};
305
306#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::table")]
307pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
308    name: "table",
309    op_kind: GpuOpKind::Custom("table"),
310    supported_precisions: &[],
311    broadcast: BroadcastSemantics::None,
312    provider_hooks: &[],
313    constant_strategy: ConstantStrategy::InlineLiteral,
314    residency: ResidencyPolicy::GatherImmediately,
315    nan_mode: ReductionNaN::Include,
316    two_pass_threshold: None,
317    workgroup_size: None,
318    accepts_nan_mode: false,
319    notes: "Tables are host containers. GPU variables are gathered when tabular algorithms need row-wise access.",
320};
321
322#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::table")]
323pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
324    name: "table",
325    shape: ShapeRequirements::Any,
326    constant_strategy: ConstantStrategy::InlineLiteral,
327    elementwise: None,
328    reduction: None,
329    emits_nan: false,
330    notes: "Tables are structured host containers and are not fusion operands.",
331};
332
333fn table_error(error: &'static BuiltinErrorDescriptor, message: impl Into<String>) -> RuntimeError {
334    let mut builder = build_runtime_error(message).with_builtin(TABLE_CLASS);
335    if let Some(identifier) = error.identifier {
336        builder = builder.with_identifier(identifier);
337    }
338    builder.build()
339}
340
341fn table_error_with_source<E>(
342    error: &'static BuiltinErrorDescriptor,
343    message: impl Into<String>,
344    source: E,
345) -> RuntimeError
346where
347    E: std::error::Error + Send + Sync + 'static,
348{
349    let mut builder = build_runtime_error(message)
350        .with_builtin(TABLE_CLASS)
351        .with_source(source);
352    if let Some(identifier) = error.identifier {
353        builder = builder.with_identifier(identifier);
354    }
355    builder.build()
356}
357
358fn invalid_argument(message: impl Into<String>) -> RuntimeError {
359    table_error(&TABLE_ERROR_INVALID_ARGUMENT, message)
360}
361
362fn invalid_index(message: impl Into<String>) -> RuntimeError {
363    table_error(&TABLE_ERROR_INVALID_INDEX, message)
364}
365
366fn invalid_variable(message: impl Into<String>) -> RuntimeError {
367    table_error(&TABLE_ERROR_INVALID_VARIABLE, message)
368}
369
370fn map_control_flow(err: RuntimeError) -> RuntimeError {
371    let identifier = err.identifier().map(ToString::to_string);
372    let message = err.message().to_string();
373    let mut builder = build_runtime_error(message)
374        .with_builtin(TABLE_CLASS)
375        .with_source(err);
376    if let Some(identifier) = identifier {
377        builder = builder.with_identifier(identifier);
378    }
379    builder.build()
380}
381
382pub fn ensure_table_class_registered() {
383    TABLE_CLASS_REGISTERED.get_or_init(|| {
384        let mut properties = HashMap::new();
385        properties.insert(
386            PROPERTIES_MEMBER.to_string(),
387            PropertyDef {
388                name: PROPERTIES_MEMBER.to_string(),
389                is_static: false,
390                is_constant: false,
391                is_dependent: false,
392                get_access: Access::Public,
393                set_access: Access::Public,
394                default_value: Some(Value::Struct(default_properties(Vec::new(), None))),
395            },
396        );
397
398        let mut methods = HashMap::new();
399        for name in [OBJECT_SUBSREF_METHOD, OBJECT_SUBSASGN_METHOD] {
400            methods.insert(
401                name.to_string(),
402                MethodDef {
403                    name: name.to_string(),
404                    is_static: false,
405                    is_abstract: false,
406                    is_sealed: false,
407                    access: Access::Public,
408                    function_name: format!("{TABLE_CLASS}.{name}"),
409                    implicit_class_argument: None,
410                },
411            );
412        }
413
414        runmat_builtins::register_class(ClassDef {
415            name: TABLE_CLASS.to_string(),
416            parent: None,
417            properties,
418            methods,
419        });
420    });
421}
422
423#[runtime_builtin(
424    name = "table",
425    category = "table",
426    summary = "Create a table from named column variables.",
427    keywords = "table,VariableNames,RowNames,Properties",
428    accel = "cpu",
429    type_resolver(crate::builtins::io::type_resolvers::struct_type),
430    descriptor(crate::builtins::table::TABLE_DESCRIPTOR),
431    builtin_path = "crate::builtins::table"
432)]
433async fn table_builtin(args: Vec<Value>) -> BuiltinResult<Value> {
434    ensure_table_class_registered();
435    let gathered = gather_values(&args).await?;
436    let (variables, options) = split_table_constructor_args(gathered)?;
437    let names = if let Some(names) = options.variable_names {
438        names
439    } else {
440        generated_variable_names(variables.len())
441    };
442    table_from_columns_with_properties(names, variables, options.row_names)
443}
444
445#[runtime_builtin(
446    name = "readtable",
447    category = "io/tabular",
448    summary = "Import tabular text or spreadsheet data into a table.",
449    keywords = "readtable,table,csv,tsv,xlsx,xls,ods,spreadsheet,VariableNames,RowNames,Sheet,Range",
450    accel = "cpu",
451    type_resolver(crate::builtins::io::type_resolvers::struct_type),
452    descriptor(crate::builtins::table::READTABLE_DESCRIPTOR),
453    builtin_path = "crate::builtins::table"
454)]
455async fn readtable_builtin(path: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
456    ensure_table_class_registered();
457    let path_value = gather_if_needed_async(&path)
458        .await
459        .map_err(map_control_flow)?;
460    let args = gather_values(&rest).await?;
461    let options = ReadTableOptions::parse(&args)?;
462    let resolved = resolve_path(&path_value)?;
463    read_table_from_file(&resolved, &options).await
464}
465
466#[runtime_builtin(
467    name = "height",
468    category = "table",
469    summary = "Return the number of rows in a table.",
470    keywords = "height,table,rows",
471    descriptor(crate::builtins::table::HEIGHT_DESCRIPTOR),
472    builtin_path = "crate::builtins::table"
473)]
474async fn height_builtin(value: Value) -> BuiltinResult<Value> {
475    let host = gather_if_needed_async(&value)
476        .await
477        .map_err(map_control_flow)?;
478    if let Some(object) = table_object(&host) {
479        return Ok(Value::Num(table_height(object)? as f64));
480    }
481    value_row_count(&host).map(|n| Value::Num(n as f64))
482}
483
484#[runtime_builtin(
485    name = "width",
486    category = "table",
487    summary = "Return the number of variables in a table.",
488    keywords = "width,table,variables",
489    descriptor(crate::builtins::table::WIDTH_DESCRIPTOR),
490    builtin_path = "crate::builtins::table"
491)]
492async fn width_builtin(value: Value) -> BuiltinResult<Value> {
493    let host = gather_if_needed_async(&value)
494        .await
495        .map_err(map_control_flow)?;
496    if let Some(object) = table_object(&host) {
497        return Ok(Value::Num(table_width(object)? as f64));
498    }
499    match host {
500        Value::Tensor(t) => Ok(Value::Num(t.cols() as f64)),
501        Value::ComplexTensor(t) => Ok(Value::Num(t.cols as f64)),
502        Value::StringArray(sa) => Ok(Value::Num(sa.cols() as f64)),
503        Value::LogicalArray(la) => Ok(Value::Num(la.shape.get(1).copied().unwrap_or(1) as f64)),
504        Value::Cell(ca) => Ok(Value::Num(ca.cols as f64)),
505        Value::CharArray(ca) => Ok(Value::Num(ca.cols as f64)),
506        _ => Ok(Value::Num(1.0)),
507    }
508}
509
510#[runtime_builtin(
511    name = "groupsummary",
512    category = "table",
513    summary = "Group table rows and compute summary statistics for data variables.",
514    keywords = "groupsummary,group,table,mean,sum,count,median,min,max",
515    accel = "cpu",
516    descriptor(crate::builtins::table::GROUPSUMMARY_DESCRIPTOR),
517    builtin_path = "crate::builtins::table"
518)]
519async fn groupsummary_builtin(
520    table: Value,
521    groupvars: Value,
522    method: Value,
523    rest: Vec<Value>,
524) -> BuiltinResult<Value> {
525    let table = gather_if_needed_async(&table)
526        .await
527        .map_err(map_control_flow)?;
528    let groupvars = gather_if_needed_async(&groupvars)
529        .await
530        .map_err(map_control_flow)?;
531    let method = gather_if_needed_async(&method)
532        .await
533        .map_err(map_control_flow)?;
534    let rest = gather_values(&rest).await?;
535    groupsummary_impl(table, groupvars, method, rest)
536}
537
538#[runtime_builtin(
539    name = "table.subsref",
540    descriptor(crate::builtins::table::TABLE_SUBSREF_DESCRIPTOR),
541    builtin_path = "crate::builtins::table"
542)]
543async fn table_subsref(obj: Value, kind: String, payload: Value) -> BuiltinResult<Value> {
544    let object = into_table_object(obj, "table.subsref")?;
545    match kind.as_str() {
546        OBJECT_INDEX_MEMBER => table_member_get(&object, &payload),
547        OBJECT_INDEX_PAREN => table_paren_get(&object, &payload),
548        OBJECT_INDEX_BRACE => table_brace_get(&object, &payload),
549        other => Err(invalid_index(format!(
550            "table.subsref: unsupported indexing kind '{other}'"
551        ))),
552    }
553}
554
555#[runtime_builtin(
556    name = "table.subsasgn",
557    descriptor(crate::builtins::table::TABLE_SUBSASGN_DESCRIPTOR),
558    builtin_path = "crate::builtins::table"
559)]
560async fn table_subsasgn(
561    obj: Value,
562    kind: String,
563    payload: Value,
564    rhs: Value,
565) -> BuiltinResult<Value> {
566    let mut object = into_table_object(obj, "table.subsasgn")?;
567    match kind.as_str() {
568        OBJECT_INDEX_MEMBER => {
569            let field = scalar_text(&payload, "table member")?;
570            table_member_set(&mut object, &field, rhs)?;
571            Ok(Value::Object(object))
572        }
573        OBJECT_INDEX_PAREN => table_paren_assign(object, &payload, rhs),
574        OBJECT_INDEX_BRACE => table_brace_assign(object, &payload, rhs),
575        other => Err(invalid_index(format!(
576            "table.subsasgn: unsupported indexing kind '{other}'"
577        ))),
578    }
579}
580
581async fn gather_values(values: &[Value]) -> BuiltinResult<Vec<Value>> {
582    let mut out = Vec::with_capacity(values.len());
583    for value in values {
584        out.push(
585            gather_if_needed_async(value)
586                .await
587                .map_err(map_control_flow)?,
588        );
589    }
590    Ok(out)
591}
592
593#[derive(Default)]
594struct TableConstructorOptions {
595    variable_names: Option<Vec<String>>,
596    row_names: Option<Vec<String>>,
597}
598
599fn split_table_constructor_args(
600    args: Vec<Value>,
601) -> BuiltinResult<(Vec<Value>, TableConstructorOptions)> {
602    let mut variables = Vec::new();
603    let mut options = TableConstructorOptions::default();
604    let mut idx = 0usize;
605    while idx < args.len() {
606        if let Ok(name) = scalar_text(&args[idx], "table option") {
607            if idx + 1 < args.len() && is_table_constructor_option(&name) {
608                let value = &args[idx + 1];
609                if name.eq_ignore_ascii_case("VariableNames") {
610                    options.variable_names = Some(variable_name_list(value)?);
611                } else if name.eq_ignore_ascii_case("RowNames") {
612                    options.row_names = Some(string_list(value)?);
613                }
614                idx += 2;
615                continue;
616            }
617        }
618        variables.push(args[idx].clone());
619        idx += 1;
620    }
621    Ok((variables, options))
622}
623
624fn is_table_constructor_option(name: &str) -> bool {
625    name.eq_ignore_ascii_case("VariableNames") || name.eq_ignore_ascii_case("RowNames")
626}
627
628#[derive(Clone)]
629struct ReadTableOptions {
630    file_type: ImportFileType,
631    delimiter: Option<Delimiter>,
632    read_variable_names: Option<bool>,
633    read_row_names: bool,
634    variable_names: Option<Vec<String>>,
635    row_names: Option<Vec<String>>,
636    num_header_lines: usize,
637    range: Option<RangeSpec>,
638    sheet: Option<SheetSelector>,
639    preserve_variable_names: bool,
640    treat_as_missing: HashSet<String>,
641    empty_line_rule: EmptyLineRule,
642    encoding: String,
643}
644
645impl Default for ReadTableOptions {
646    fn default() -> Self {
647        Self {
648            file_type: ImportFileType::Auto,
649            delimiter: None,
650            read_variable_names: None,
651            read_row_names: false,
652            variable_names: None,
653            row_names: None,
654            num_header_lines: 0,
655            range: None,
656            sheet: None,
657            preserve_variable_names: false,
658            treat_as_missing: HashSet::new(),
659            empty_line_rule: EmptyLineRule::Skip,
660            encoding: "utf-8".to_string(),
661        }
662    }
663}
664
665impl ReadTableOptions {
666    fn parse(args: &[Value]) -> BuiltinResult<Self> {
667        let mut options = Self::default();
668        let mut idx = 0usize;
669        if let Some(Value::Struct(st)) = args.first() {
670            for (name, value) in &st.fields {
671                options.apply(name, value)?;
672            }
673            idx = 1;
674        }
675        while idx < args.len() {
676            if idx + 1 >= args.len() {
677                return Err(invalid_argument(
678                    "readtable: name-value options must be provided in pairs",
679                ));
680            }
681            let name = scalar_text(&args[idx], "readtable option")?;
682            options.apply(&name, &args[idx + 1])?;
683            idx += 2;
684        }
685        Ok(options)
686    }
687
688    fn apply(&mut self, name: &str, value: &Value) -> BuiltinResult<()> {
689        if name.eq_ignore_ascii_case("FileType") {
690            self.file_type = ImportFileType::parse(value)?;
691        } else if name.eq_ignore_ascii_case("Delimiter") {
692            self.delimiter = Some(Delimiter::parse(value)?);
693        } else if name.eq_ignore_ascii_case("ReadVariableNames") {
694            self.read_variable_names = Some(bool_scalar(value, "ReadVariableNames")?);
695        } else if name.eq_ignore_ascii_case("ReadRowNames") {
696            self.read_row_names = bool_scalar(value, "ReadRowNames")?;
697        } else if name.eq_ignore_ascii_case("VariableNames") {
698            self.variable_names = Some(variable_name_list(value)?);
699        } else if name.eq_ignore_ascii_case("RowNames") {
700            self.row_names = Some(string_list(value)?);
701        } else if name.eq_ignore_ascii_case("NumHeaderLines") {
702            self.num_header_lines = nonnegative_usize(value, "NumHeaderLines")?;
703        } else if name.eq_ignore_ascii_case("Range") {
704            self.range = Some(RangeSpec::parse(value)?);
705        } else if name.eq_ignore_ascii_case("Sheet") {
706            self.sheet = Some(SheetSelector::parse(value)?);
707        } else if name.eq_ignore_ascii_case("TreatAsMissing") {
708            for token in string_list(value)? {
709                self.treat_as_missing
710                    .insert(token.trim().to_ascii_lowercase());
711            }
712        } else if name.eq_ignore_ascii_case("PreserveVariableNames") {
713            self.preserve_variable_names = bool_scalar(value, "PreserveVariableNames")?;
714        } else if name.eq_ignore_ascii_case("VariableNamingRule") {
715            let rule = scalar_text(value, "VariableNamingRule")?;
716            if rule.eq_ignore_ascii_case("preserve") {
717                self.preserve_variable_names = true;
718            } else if rule.eq_ignore_ascii_case("modify") {
719                self.preserve_variable_names = false;
720            } else {
721                return Err(invalid_argument(format!(
722                    "readtable: unsupported VariableNamingRule '{rule}'"
723                )));
724            }
725        } else if name.eq_ignore_ascii_case("EmptyLineRule") {
726            let rule = scalar_text(value, "EmptyLineRule")?;
727            self.empty_line_rule = if rule.eq_ignore_ascii_case("read") {
728                EmptyLineRule::Read
729            } else if rule.eq_ignore_ascii_case("skip") {
730                EmptyLineRule::Skip
731            } else {
732                return Err(invalid_argument(format!(
733                    "readtable: unsupported EmptyLineRule '{rule}'"
734                )));
735            };
736        } else if name.eq_ignore_ascii_case("Encoding") {
737            let encoding = scalar_text(value, "Encoding")?;
738            validate_encoding_label(&encoding)?;
739            self.encoding = encoding;
740        } else if name.eq_ignore_ascii_case("TextType") {
741            let text_type = scalar_text(value, "TextType")?;
742            if !(text_type.eq_ignore_ascii_case("string") || text_type.eq_ignore_ascii_case("char"))
743            {
744                return Err(invalid_argument(format!(
745                    "readtable: unsupported TextType '{text_type}'"
746                )));
747            };
748        } else if name.eq_ignore_ascii_case("DatetimeType") {
749            let datetime_type = scalar_text(value, "DatetimeType")?;
750            if !(datetime_type.eq_ignore_ascii_case("datetime")
751                || datetime_type.eq_ignore_ascii_case("text")
752                || datetime_type.eq_ignore_ascii_case("exceldatenum"))
753            {
754                return Err(invalid_argument(format!(
755                    "readtable: unsupported DatetimeType '{datetime_type}'"
756                )));
757            }
758        } else {
759            return Err(invalid_argument(format!(
760                "readtable: unsupported option '{name}'"
761            )));
762        }
763        Ok(())
764    }
765
766    fn is_missing(&self, token: &str) -> bool {
767        let trimmed = token.trim();
768        trimmed.is_empty()
769            || self
770                .treat_as_missing
771                .contains(&trimmed.to_ascii_lowercase())
772    }
773}
774
775#[derive(Clone, Copy)]
776enum EmptyLineRule {
777    Skip,
778    Read,
779}
780
781#[derive(Clone, Copy, PartialEq, Eq)]
782enum ImportFileType {
783    Auto,
784    Text,
785    Spreadsheet,
786}
787
788impl ImportFileType {
789    fn parse(value: &Value) -> BuiltinResult<Self> {
790        let text = scalar_text(value, "FileType")?;
791        match text.trim().to_ascii_lowercase().as_str() {
792            "auto" => Ok(Self::Auto),
793            "text" | "delimitedtext" | "delimited" => Ok(Self::Text),
794            "spreadsheet" | "excel" => Ok(Self::Spreadsheet),
795            other => Err(invalid_argument(format!(
796                "readtable: unsupported FileType '{other}'"
797            ))),
798        }
799    }
800}
801
802#[derive(Clone)]
803enum SheetSelector {
804    Name(String),
805    Index(usize),
806}
807
808impl SheetSelector {
809    fn parse(value: &Value) -> BuiltinResult<Self> {
810        match value {
811            Value::Int(i) if i.to_i64() >= 1 => Ok(Self::Index(i.to_i64() as usize - 1)),
812            Value::Num(n)
813                if n.is_finite() && *n >= 1.0 && (n.round() - n).abs() <= f64::EPSILON =>
814            {
815                Ok(Self::Index(n.round() as usize - 1))
816            }
817            _ => {
818                let text = scalar_text(value, "Sheet")?;
819                if text.trim().is_empty() {
820                    return Err(invalid_argument("readtable: Sheet must not be empty"));
821                }
822                Ok(Self::Name(text))
823            }
824        }
825    }
826}
827
828#[derive(Clone)]
829enum Delimiter {
830    Char(char),
831    String(String),
832    Whitespace,
833}
834
835impl Delimiter {
836    fn parse(value: &Value) -> BuiltinResult<Self> {
837        let text = scalar_text(value, "Delimiter")?;
838        if text.is_empty() {
839            return Err(invalid_argument("readtable: Delimiter must not be empty"));
840        }
841        match text.trim().to_ascii_lowercase().as_str() {
842            "tab" => Ok(Self::Char('\t')),
843            "space" | "whitespace" => Ok(Self::Whitespace),
844            "comma" => Ok(Self::Char(',')),
845            "semicolon" => Ok(Self::Char(';')),
846            "bar" | "pipe" => Ok(Self::Char('|')),
847            _ if text.chars().count() == 1 => Ok(Self::Char(text.chars().next().unwrap())),
848            _ => Ok(Self::String(text)),
849        }
850    }
851}
852
853#[derive(Clone, Copy)]
854struct RangeSpec {
855    start_row: usize,
856    start_col: usize,
857    end_row: Option<usize>,
858    end_col: Option<usize>,
859}
860
861impl RangeSpec {
862    fn parse(value: &Value) -> BuiltinResult<Self> {
863        match value {
864            Value::String(text) => Self::parse_text(text),
865            Value::CharArray(ca) if ca.rows == 1 => {
866                let text: String = ca.data.iter().collect();
867                Self::parse_text(&text)
868            }
869            Value::StringArray(sa) if sa.data.len() == 1 => Self::parse_text(&sa.data[0]),
870            Value::Tensor(t) if t.data.len() == 2 || t.data.len() == 4 => {
871                let mut indices = Vec::with_capacity(t.data.len());
872                for value in &t.data {
873                    indices.push(one_based_to_zero(*value, usize::MAX, "Range")?);
874                }
875                Ok(Self {
876                    start_row: indices[0],
877                    start_col: indices[1],
878                    end_row: indices.get(2).copied(),
879                    end_col: indices.get(3).copied(),
880                })
881            }
882            _ => Err(invalid_argument(
883                "readtable: Range must be a cell reference string or numeric vector",
884            )),
885        }
886    }
887
888    fn parse_text(text: &str) -> BuiltinResult<Self> {
889        let trimmed = text.trim();
890        if trimmed.is_empty() {
891            return Err(invalid_argument("readtable: Range must not be empty"));
892        }
893        let parts: Vec<&str> = trimmed.split(':').collect();
894        if parts.len() > 2 {
895            return Err(invalid_argument(format!(
896                "readtable: invalid Range specification '{trimmed}'"
897            )));
898        }
899        let start = parse_cell_ref(parts[0])?;
900        let end = if parts.len() == 2 {
901            Some(parse_cell_ref(parts[1])?)
902        } else {
903            None
904        };
905        Ok(Self {
906            start_row: start.0.unwrap_or(0),
907            start_col: start.1.unwrap_or(0),
908            end_row: end.and_then(|item| item.0),
909            end_col: end.and_then(|item| item.1),
910        })
911    }
912}
913
914fn parse_cell_ref(token: &str) -> BuiltinResult<(Option<usize>, Option<usize>)> {
915    let mut letters = String::new();
916    let mut digits = String::new();
917    for ch in token.trim().chars() {
918        if ch == '$' {
919            continue;
920        }
921        if ch.is_ascii_alphabetic() {
922            letters.push(ch.to_ascii_uppercase());
923        } else if ch.is_ascii_digit() {
924            digits.push(ch);
925        } else {
926            return Err(invalid_argument(format!(
927                "readtable: invalid Range component '{token}'"
928            )));
929        }
930    }
931    let col = if letters.is_empty() {
932        None
933    } else {
934        let mut value = 0usize;
935        for ch in letters.chars() {
936            value = value
937                .checked_mul(26)
938                .and_then(|v| v.checked_add((ch as u8 - b'A' + 1) as usize))
939                .ok_or_else(|| invalid_argument("readtable: Range column overflow"))?;
940        }
941        Some(value - 1)
942    };
943    let row = if digits.is_empty() {
944        None
945    } else {
946        let parsed = digits
947            .parse::<usize>()
948            .map_err(|_| invalid_argument("readtable: invalid Range row"))?;
949        if parsed == 0 {
950            return Err(invalid_argument("readtable: Range rows are one-based"));
951        }
952        Some(parsed - 1)
953    };
954    Ok((row, col))
955}
956
957fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
958    let text = scalar_text(value, "filename").map_err(|_| {
959        table_error(
960            &TABLE_ERROR_INVALID_ARGUMENT,
961            "readtable: filename must be a string scalar or character vector",
962        )
963    })?;
964    if text.trim().is_empty() {
965        return Err(invalid_argument("readtable: filename must not be empty"));
966    }
967    let expanded =
968        expand_user_path(&text, "readtable").map_err(|msg| invalid_argument(msg.to_string()))?;
969    Ok(Path::new(&expanded).to_path_buf())
970}
971
972async fn read_table_from_file(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
973    match options.file_type {
974        ImportFileType::Spreadsheet => read_spreadsheet_table(path, options).await,
975        ImportFileType::Text => read_text_table(path, options).await,
976        ImportFileType::Auto if is_spreadsheet_path(path) => {
977            read_spreadsheet_table(path, options).await
978        }
979        ImportFileType::Auto => read_text_table(path, options).await,
980    }
981}
982
983async fn read_text_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
984    if options.sheet.is_some() {
985        return Err(invalid_argument(
986            "readtable: Sheet is only valid for spreadsheet files",
987        ));
988    }
989    let bytes = read_file_bytes(path).await?;
990    let text = decode_text_bytes(&bytes, &options.encoding)?;
991    let mut raw_lines = text.lines().map(ToString::to_string).collect::<Vec<_>>();
992    if let Some(first) = raw_lines.first_mut() {
993        if first.starts_with('\u{FEFF}') {
994            *first = first.trim_start_matches('\u{FEFF}').to_string();
995        }
996    }
997    let delimiter = options
998        .delimiter
999        .clone()
1000        .or_else(|| detect_delimiter(&raw_lines))
1001        .unwrap_or(Delimiter::Whitespace);
1002    let mut rows = parse_text_records(&text, &delimiter, options.empty_line_rule);
1003    if options.num_header_lines > 0 {
1004        rows = rows.into_iter().skip(options.num_header_lines).collect();
1005    }
1006    if let Some(range) = options.range {
1007        rows = apply_import_range(rows, range);
1008    }
1009    import_rows_to_table(rows, options)
1010}
1011
1012async fn read_spreadsheet_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1013    if options.delimiter.is_some() {
1014        return Err(invalid_argument(
1015            "readtable: Delimiter is only valid for text files",
1016        ));
1017    }
1018    let bytes = read_file_bytes(path).await?;
1019    let cursor = Cursor::new(bytes);
1020    let mut workbook = open_workbook_auto_from_rs(cursor).map_err(|err| {
1021        table_error(
1022            &TABLE_ERROR_UNSUPPORTED_FILE,
1023            format!(
1024                "readtable: unable to open spreadsheet '{}': {err}",
1025                path.display()
1026            ),
1027        )
1028    })?;
1029    let range = match &options.sheet {
1030        Some(SheetSelector::Name(name)) => workbook.worksheet_range(name).map_err(|err| {
1031            invalid_argument(format!("readtable: unable to read sheet '{name}': {err:?}"))
1032        })?,
1033        Some(SheetSelector::Index(index)) => workbook
1034            .worksheet_range_at(*index)
1035            .ok_or_else(|| {
1036                invalid_argument(format!(
1037                    "readtable: sheet index {} exceeds bounds",
1038                    index + 1
1039                ))
1040            })?
1041            .map_err(|err| {
1042                invalid_argument(format!(
1043                    "readtable: unable to read sheet {}: {err:?}",
1044                    index + 1
1045                ))
1046            })?,
1047        None => workbook
1048            .worksheet_range_at(0)
1049            .ok_or_else(|| invalid_argument("readtable: spreadsheet contains no worksheets"))?
1050            .map_err(|err| {
1051                invalid_argument(format!("readtable: unable to read first sheet: {err:?}"))
1052            })?,
1053    };
1054    let rows = spreadsheet_range_to_rows(&range, options)?;
1055    import_rows_to_table(rows, options)
1056}
1057
1058async fn read_file_bytes(path: &Path) -> BuiltinResult<Vec<u8>> {
1059    let mut file = File::open_async(path).await.map_err(|err| {
1060        table_error_with_source(
1061            &TABLE_ERROR_IO,
1062            format!("readtable: unable to open '{}': {err}", path.display()),
1063            err,
1064        )
1065    })?;
1066    let mut bytes = Vec::new();
1067    file.read_to_end(&mut bytes).map_err(|err| {
1068        table_error_with_source(
1069            &TABLE_ERROR_IO,
1070            format!("readtable: unable to read '{}': {err}", path.display()),
1071            err,
1072        )
1073    })?;
1074    Ok(bytes)
1075}
1076
1077fn is_spreadsheet_path(path: &Path) -> bool {
1078    matches!(
1079        path.extension()
1080            .and_then(|ext| ext.to_str())
1081            .map(|ext| ext.to_ascii_lowercase())
1082            .as_deref(),
1083        Some("xls") | Some("xlsx") | Some("xlsm") | Some("xlsb") | Some("ods")
1084    )
1085}
1086
1087fn validate_encoding_label(label: &str) -> BuiltinResult<()> {
1088    encoding_for_label(label)
1089        .map(|_| ())
1090        .ok_or_else(|| invalid_argument(format!("readtable: unsupported Encoding '{label}'")))
1091}
1092
1093fn encoding_for_label(label: &str) -> Option<&'static Encoding> {
1094    let label = label.trim();
1095    if label.is_empty()
1096        || label.eq_ignore_ascii_case("auto")
1097        || label.eq_ignore_ascii_case("default")
1098        || label.eq_ignore_ascii_case("system")
1099        || label.eq_ignore_ascii_case("native")
1100        || label.eq_ignore_ascii_case("utf-8")
1101        || label.eq_ignore_ascii_case("utf8")
1102        || label.eq_ignore_ascii_case("unicode")
1103    {
1104        return Some(UTF_8);
1105    }
1106    Encoding::for_label(label.as_bytes())
1107}
1108
1109fn decode_text_bytes(bytes: &[u8], encoding: &str) -> BuiltinResult<String> {
1110    let (encoding, offset) = if encoding.trim().eq_ignore_ascii_case("auto") {
1111        Encoding::for_bom(bytes).unwrap_or((UTF_8, 0))
1112    } else {
1113        (
1114            encoding_for_label(encoding).ok_or_else(|| {
1115                invalid_argument(format!("readtable: unsupported Encoding '{encoding}'"))
1116            })?,
1117            0,
1118        )
1119    };
1120    let (decoded, _, had_errors) = encoding.decode(&bytes[offset..]);
1121    if had_errors {
1122        return Err(table_error(
1123            &TABLE_ERROR_IO,
1124            format!(
1125                "readtable: unable to decode file contents using encoding '{}'",
1126                encoding.name()
1127            ),
1128        ));
1129    }
1130    Ok(decoded.into_owned())
1131}
1132
1133#[derive(Clone, Debug)]
1134enum ImportCell {
1135    Empty,
1136    Text(String),
1137    Number(f64),
1138    Logical(bool),
1139    DateTime(f64),
1140    Error(String),
1141}
1142
1143impl ImportCell {
1144    fn from_text(text: String) -> Self {
1145        if text.trim().is_empty() {
1146            Self::Empty
1147        } else {
1148            Self::Text(text)
1149        }
1150    }
1151
1152    fn display_text(&self) -> String {
1153        match self {
1154            Self::Empty => String::new(),
1155            Self::Text(text) => text.clone(),
1156            Self::Number(value) => format_key_number(*value),
1157            Self::Logical(value) => value.to_string(),
1158            Self::DateTime(serial) => format_key_number(*serial),
1159            Self::Error(text) => text.clone(),
1160        }
1161    }
1162
1163    fn is_missing(&self, options: &ReadTableOptions) -> bool {
1164        match self {
1165            Self::Empty => true,
1166            Self::Text(text) => options.is_missing(text),
1167            _ => false,
1168        }
1169    }
1170
1171    fn is_likely_data_token(&self, options: &ReadTableOptions) -> bool {
1172        match self {
1173            Self::Number(_) | Self::Logical(_) | Self::DateTime(_) => true,
1174            Self::Empty => false,
1175            Self::Text(text) => {
1176                let token = unquote(text.trim()).trim();
1177                options.is_missing(token)
1178                    || parse_numeric(token).is_some()
1179                    || parse_logical(token).is_some()
1180                    || parse_iso_datetime_to_datenum(token).is_some()
1181            }
1182            Self::Error(_) => true,
1183        }
1184    }
1185}
1186
1187fn spreadsheet_cell_to_import(cell: &SpreadsheetData) -> ImportCell {
1188    match cell {
1189        SpreadsheetData::Empty => ImportCell::Empty,
1190        SpreadsheetData::Int(value) => ImportCell::Number(*value as f64),
1191        SpreadsheetData::Float(value) => ImportCell::Number(*value),
1192        SpreadsheetData::String(text) => ImportCell::Text(text.clone()),
1193        SpreadsheetData::Bool(value) => ImportCell::Logical(*value),
1194        SpreadsheetData::DateTime(value) => value
1195            .as_datetime()
1196            .map(crate::builtins::datetime::datenum_from_naive)
1197            .map(ImportCell::DateTime)
1198            .unwrap_or_else(|| ImportCell::Number(value.as_f64())),
1199        SpreadsheetData::DateTimeIso(text) => parse_iso_datetime_to_datenum(text)
1200            .map(ImportCell::DateTime)
1201            .unwrap_or_else(|| ImportCell::Text(text.clone())),
1202        SpreadsheetData::DurationIso(text) => ImportCell::Text(text.clone()),
1203        SpreadsheetData::Error(err) => ImportCell::Error(err.to_string()),
1204    }
1205}
1206
1207fn spreadsheet_range_to_rows(
1208    range: &calamine::Range<SpreadsheetData>,
1209    options: &ReadTableOptions,
1210) -> BuiltinResult<Vec<Vec<ImportCell>>> {
1211    if range.is_empty() {
1212        return Ok(Vec::new());
1213    }
1214    let Some((range_start_row, range_start_col)) = range.start() else {
1215        return Ok(Vec::new());
1216    };
1217    let Some((range_end_row, range_end_col)) = range.end() else {
1218        return Ok(Vec::new());
1219    };
1220    let start_row = options
1221        .range
1222        .map(|spec| checked_u32(spec.start_row, "Range row"))
1223        .transpose()?
1224        .unwrap_or(range_start_row);
1225    let start_col = options
1226        .range
1227        .map(|spec| checked_u32(spec.start_col, "Range column"))
1228        .transpose()?
1229        .unwrap_or(range_start_col);
1230    let end_row = options
1231        .range
1232        .and_then(|spec| spec.end_row)
1233        .map(|row| checked_u32(row, "Range row"))
1234        .transpose()?
1235        .unwrap_or(range_end_row);
1236    let end_col = options
1237        .range
1238        .and_then(|spec| spec.end_col)
1239        .map(|col| checked_u32(col, "Range column"))
1240        .transpose()?
1241        .unwrap_or(range_end_col);
1242    if start_row > end_row || start_col > end_col {
1243        return Ok(Vec::new());
1244    }
1245    let mut rows = Vec::new();
1246    for row_idx in start_row..=end_row {
1247        let mut row = Vec::new();
1248        for col_idx in start_col..=end_col {
1249            row.push(
1250                range
1251                    .get_value((row_idx, col_idx))
1252                    .map(spreadsheet_cell_to_import)
1253                    .unwrap_or(ImportCell::Empty),
1254            );
1255        }
1256        if matches!(options.empty_line_rule, EmptyLineRule::Skip)
1257            && row.iter().all(|cell| cell.is_missing(options))
1258        {
1259            continue;
1260        }
1261        rows.push(row);
1262    }
1263    if options.num_header_lines > 0 {
1264        Ok(rows.into_iter().skip(options.num_header_lines).collect())
1265    } else {
1266        Ok(rows)
1267    }
1268}
1269
1270fn checked_u32(value: usize, context: &str) -> BuiltinResult<u32> {
1271    u32::try_from(value).map_err(|_| invalid_argument(format!("readtable: {context} overflow")))
1272}
1273
1274fn detect_delimiter(lines: &[String]) -> Option<Delimiter> {
1275    let candidates = [',', '\t', ';', '|'];
1276    let mut best: Option<(f64, Delimiter)> = None;
1277    for candidate in candidates {
1278        let counts = lines
1279            .iter()
1280            .take(32)
1281            .filter(|line| line.contains(candidate))
1282            .map(|line| split_with_char_delim(line, candidate).len())
1283            .filter(|count| *count >= 2)
1284            .collect::<Vec<_>>();
1285        if counts.is_empty() {
1286            continue;
1287        }
1288        let avg = counts.iter().copied().sum::<usize>() as f64 / counts.len() as f64;
1289        if avg >= 2.0
1290            && best
1291                .as_ref()
1292                .map(|(best_avg, _)| avg > *best_avg)
1293                .unwrap_or(true)
1294        {
1295            best = Some((avg, Delimiter::Char(candidate)));
1296        }
1297    }
1298    best.map(|(_, delimiter)| delimiter).or_else(|| {
1299        lines
1300            .iter()
1301            .take(32)
1302            .any(|line| line.split_whitespace().count() > 1)
1303            .then_some(Delimiter::Whitespace)
1304    })
1305}
1306
1307fn split_with_char_delim(line: &str, delimiter: char) -> Vec<String> {
1308    let mut out = Vec::new();
1309    let mut current = String::new();
1310    let mut in_quotes = false;
1311    let mut chars = line.chars().peekable();
1312    while let Some(ch) = chars.next() {
1313        if ch == '"' {
1314            if in_quotes && chars.peek() == Some(&'"') {
1315                current.push('"');
1316                chars.next();
1317            } else {
1318                in_quotes = !in_quotes;
1319            }
1320            continue;
1321        }
1322        if ch == delimiter && !in_quotes {
1323            out.push(current.clone());
1324            current.clear();
1325        } else {
1326            current.push(ch);
1327        }
1328    }
1329    out.push(current);
1330    out
1331}
1332
1333fn parse_text_records(
1334    text: &str,
1335    delimiter: &Delimiter,
1336    empty_line_rule: EmptyLineRule,
1337) -> Vec<Vec<ImportCell>> {
1338    match delimiter {
1339        Delimiter::Whitespace => parse_whitespace_records(text, empty_line_rule),
1340        Delimiter::Char(ch) => parse_delimited_records(text, &ch.to_string(), empty_line_rule),
1341        Delimiter::String(pattern) => parse_delimited_records(text, pattern, empty_line_rule),
1342    }
1343}
1344
1345fn parse_delimited_records(
1346    text: &str,
1347    delimiter: &str,
1348    empty_line_rule: EmptyLineRule,
1349) -> Vec<Vec<ImportCell>> {
1350    let mut records = Vec::new();
1351    let mut row = Vec::new();
1352    let mut current = String::new();
1353    let mut in_quotes = false;
1354    let mut idx = 0usize;
1355    while idx < text.len() {
1356        let ch = text[idx..].chars().next().expect("valid char boundary");
1357        if ch == '"' {
1358            if in_quotes && text[idx + ch.len_utf8()..].starts_with('"') {
1359                current.push('"');
1360                idx += ch.len_utf8() + 1;
1361                continue;
1362            }
1363            in_quotes = !in_quotes;
1364            idx += ch.len_utf8();
1365            continue;
1366        }
1367        if !in_quotes && !delimiter.is_empty() && text[idx..].starts_with(delimiter) {
1368            row.push(ImportCell::from_text(std::mem::take(&mut current)));
1369            idx += delimiter.len();
1370            continue;
1371        }
1372        if !in_quotes && (ch == '\n' || ch == '\r') {
1373            row.push(ImportCell::from_text(std::mem::take(&mut current)));
1374            push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
1375            idx += ch.len_utf8();
1376            if ch == '\r' && text[idx..].starts_with('\n') {
1377                idx += 1;
1378            }
1379            continue;
1380        }
1381        current.push(ch);
1382        idx += ch.len_utf8();
1383    }
1384    if !current.is_empty() || !row.is_empty() || text.ends_with(delimiter) {
1385        row.push(ImportCell::from_text(current));
1386        push_import_record(&mut records, row, empty_line_rule);
1387    }
1388    records
1389}
1390
1391fn parse_whitespace_records(text: &str, empty_line_rule: EmptyLineRule) -> Vec<Vec<ImportCell>> {
1392    let mut records = Vec::new();
1393    let mut row = Vec::new();
1394    let mut current = String::new();
1395    let mut in_quotes = false;
1396    let mut field_open = false;
1397    let mut chars = text.chars().peekable();
1398    while let Some(ch) = chars.next() {
1399        if ch == '"' {
1400            if in_quotes && chars.peek() == Some(&'"') {
1401                current.push('"');
1402                chars.next();
1403            } else {
1404                in_quotes = !in_quotes;
1405            }
1406            field_open = true;
1407            continue;
1408        }
1409        if !in_quotes && (ch == '\n' || ch == '\r') {
1410            if field_open || !current.is_empty() {
1411                row.push(ImportCell::from_text(std::mem::take(&mut current)));
1412            }
1413            field_open = false;
1414            push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
1415            if ch == '\r' && chars.peek() == Some(&'\n') {
1416                chars.next();
1417            }
1418            continue;
1419        }
1420        if !in_quotes && ch.is_whitespace() {
1421            if field_open || !current.is_empty() {
1422                row.push(ImportCell::from_text(std::mem::take(&mut current)));
1423                field_open = false;
1424            }
1425            continue;
1426        }
1427        current.push(ch);
1428        field_open = true;
1429    }
1430    if field_open || !current.is_empty() {
1431        row.push(ImportCell::from_text(current));
1432    }
1433    if !row.is_empty() {
1434        push_import_record(&mut records, row, empty_line_rule);
1435    }
1436    records
1437}
1438
1439fn push_import_record(
1440    records: &mut Vec<Vec<ImportCell>>,
1441    row: Vec<ImportCell>,
1442    empty_line_rule: EmptyLineRule,
1443) {
1444    if matches!(empty_line_rule, EmptyLineRule::Skip)
1445        && row.iter().all(|cell| matches!(cell, ImportCell::Empty))
1446    {
1447        return;
1448    }
1449    records.push(row);
1450}
1451
1452fn apply_import_range(rows: Vec<Vec<ImportCell>>, range: RangeSpec) -> Vec<Vec<ImportCell>> {
1453    if rows.is_empty() {
1454        return rows;
1455    }
1456    let end_row = range
1457        .end_row
1458        .unwrap_or_else(|| rows.len().saturating_sub(1));
1459    let max_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
1460    let end_col = range.end_col.unwrap_or_else(|| max_cols.saturating_sub(1));
1461    rows.into_iter()
1462        .enumerate()
1463        .filter_map(|(idx, row)| {
1464            if idx < range.start_row || idx > end_row {
1465                return None;
1466            }
1467            let selected = (range.start_col..=end_col)
1468                .map(|col| row.get(col).cloned().unwrap_or(ImportCell::Empty))
1469                .collect::<Vec<_>>();
1470            Some(selected)
1471        })
1472        .collect()
1473}
1474
1475fn import_rows_to_table(
1476    mut rows: Vec<Vec<ImportCell>>,
1477    options: &ReadTableOptions,
1478) -> BuiltinResult<Value> {
1479    if rows.is_empty() {
1480        return table_from_columns_with_properties(
1481            Vec::new(),
1482            Vec::new(),
1483            options.row_names.clone(),
1484        );
1485    }
1486
1487    let mut variable_names = options.variable_names.clone();
1488    let read_variable_names = options
1489        .read_variable_names
1490        .unwrap_or_else(|| variable_names.is_none() && should_read_variable_names(&rows, options));
1491    if variable_names.is_none() && read_variable_names && !rows.is_empty() {
1492        variable_names = Some(
1493            rows.remove(0)
1494                .into_iter()
1495                .map(|cell| cell.display_text())
1496                .collect(),
1497        );
1498    }
1499
1500    let mut row_names = options.row_names.clone();
1501    if options.read_row_names && !rows.is_empty() {
1502        row_names = Some(
1503            rows.iter_mut()
1504                .map(|row| {
1505                    if row.is_empty() {
1506                        String::new()
1507                    } else {
1508                        row.remove(0).display_text()
1509                    }
1510                })
1511                .collect(),
1512        );
1513        if let Some(names) = variable_names.as_mut() {
1514            if !names.is_empty() {
1515                names.remove(0);
1516            }
1517        }
1518    }
1519
1520    let max_cols = rows
1521        .iter()
1522        .map(Vec::len)
1523        .max()
1524        .unwrap_or(0)
1525        .max(variable_names.as_ref().map(Vec::len).unwrap_or(0));
1526    let names = match variable_names {
1527        Some(mut names) => {
1528            while names.len() < max_cols {
1529                names.push(format!("Var{}", names.len() + 1));
1530            }
1531            names.truncate(max_cols);
1532            if options.preserve_variable_names {
1533                make_unique_names(names)
1534            } else {
1535                make_unique_variable_names(names)
1536            }
1537        }
1538        None => generated_variable_names(max_cols),
1539    };
1540
1541    let mut columns = Vec::with_capacity(names.len());
1542    for col in 0..names.len() {
1543        let values = rows
1544            .iter()
1545            .map(|row| row.get(col).cloned().unwrap_or(ImportCell::Empty))
1546            .collect::<Vec<_>>();
1547        columns.push(infer_import_column(values, options)?);
1548    }
1549    table_from_columns_with_properties(names, columns, row_names)
1550}
1551
1552fn should_read_variable_names(rows: &[Vec<ImportCell>], options: &ReadTableOptions) -> bool {
1553    let Some(first) = rows.first() else {
1554        return false;
1555    };
1556    if first.is_empty() {
1557        return false;
1558    }
1559    let names = first
1560        .iter()
1561        .map(ImportCell::display_text)
1562        .map(|text| text.trim().to_string())
1563        .collect::<Vec<_>>();
1564    if names.iter().any(|name| name.is_empty()) {
1565        return false;
1566    }
1567    if first.iter().all(|cell| cell.is_likely_data_token(options)) {
1568        return false;
1569    }
1570    true
1571}
1572
1573fn infer_import_column(
1574    values: Vec<ImportCell>,
1575    options: &ReadTableOptions,
1576) -> BuiltinResult<Value> {
1577    let mut numeric = Vec::with_capacity(values.len());
1578    let mut all_numeric = true;
1579    for value in &values {
1580        match value {
1581            ImportCell::Empty => numeric.push(f64::NAN),
1582            ImportCell::Number(value) => numeric.push(*value),
1583            ImportCell::Text(text) => {
1584                let token = unquote(text.trim()).trim();
1585                if options.is_missing(token) {
1586                    numeric.push(f64::NAN);
1587                } else if let Some(value) = parse_numeric(token) {
1588                    numeric.push(value);
1589                } else {
1590                    all_numeric = false;
1591                    break;
1592                }
1593            }
1594            _ => {
1595                all_numeric = false;
1596                break;
1597            }
1598        }
1599    }
1600    if all_numeric {
1601        return Tensor::new(numeric, vec![values.len(), 1])
1602            .map(Value::Tensor)
1603            .map_err(|err| invalid_variable(format!("readtable: {err}")));
1604    }
1605
1606    let mut logical = Vec::with_capacity(values.len());
1607    let mut all_logical = true;
1608    for value in &values {
1609        match value {
1610            ImportCell::Empty => logical.push(0),
1611            ImportCell::Logical(value) => logical.push(i32::from(*value) as u8),
1612            ImportCell::Text(text) => {
1613                let token = unquote(text.trim()).trim();
1614                if options.is_missing(token) {
1615                    logical.push(0);
1616                } else if let Some(value) = parse_logical(token) {
1617                    logical.push(i32::from(value) as u8);
1618                } else {
1619                    all_logical = false;
1620                    break;
1621                }
1622            }
1623            _ => {
1624                all_logical = false;
1625                break;
1626            }
1627        }
1628    }
1629    if all_logical {
1630        return LogicalArray::new(logical, vec![values.len(), 1])
1631            .map(Value::LogicalArray)
1632            .map_err(|err| invalid_variable(format!("readtable: {err}")));
1633    }
1634
1635    let mut serials = Vec::with_capacity(values.len());
1636    let mut all_datetime = true;
1637    for value in &values {
1638        match value {
1639            ImportCell::Empty => serials.push(f64::NAN),
1640            ImportCell::DateTime(serial) => serials.push(*serial),
1641            ImportCell::Text(text) => {
1642                let token = unquote(text.trim()).trim();
1643                if options.is_missing(token) {
1644                    serials.push(f64::NAN);
1645                } else if let Some(serial) = parse_iso_datetime_to_datenum(token) {
1646                    serials.push(serial);
1647                } else {
1648                    all_datetime = false;
1649                    break;
1650                }
1651            }
1652            _ => {
1653                all_datetime = false;
1654                break;
1655            }
1656        }
1657    }
1658    if all_datetime {
1659        let tensor = Tensor::new(serials, vec![values.len(), 1])
1660            .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
1661        return crate::builtins::datetime::datetime_object_from_serial_tensor(
1662            tensor,
1663            "yyyy-MM-dd HH:mm:ss",
1664        );
1665    }
1666
1667    let strings = values
1668        .into_iter()
1669        .map(|value| {
1670            if value.is_missing(options) {
1671                return String::new();
1672            }
1673            unquote(value.display_text().trim()).to_string()
1674        })
1675        .collect::<Vec<_>>();
1676    let len = strings.len();
1677    StringArray::new(strings, vec![len, 1])
1678        .map(Value::StringArray)
1679        .map_err(|err| invalid_variable(format!("readtable: {err}")))
1680}
1681
1682fn parse_numeric(token: &str) -> Option<f64> {
1683    match token.to_ascii_lowercase().as_str() {
1684        "nan" => Some(f64::NAN),
1685        "inf" | "+inf" | "infinity" | "+infinity" => Some(f64::INFINITY),
1686        "-inf" | "-infinity" => Some(f64::NEG_INFINITY),
1687        _ => token.parse::<f64>().ok(),
1688    }
1689}
1690
1691fn parse_logical(token: &str) -> Option<bool> {
1692    match token.to_ascii_lowercase().as_str() {
1693        "true" | "t" | "yes" | "on" => Some(true),
1694        "false" | "f" | "no" | "off" => Some(false),
1695        _ => None,
1696    }
1697}
1698
1699fn parse_iso_datetime_to_datenum(token: &str) -> Option<f64> {
1700    let trimmed = token.trim();
1701    if trimmed.is_empty() {
1702        return None;
1703    }
1704    for format in [
1705        "%Y-%m-%dT%H:%M:%S%.f",
1706        "%Y-%m-%d %H:%M:%S%.f",
1707        "%Y/%m/%d %H:%M:%S%.f",
1708        "%m/%d/%Y %H:%M:%S%.f",
1709    ] {
1710        if let Ok(value) = NaiveDateTime::parse_from_str(trimmed, format) {
1711            return Some(crate::builtins::datetime::datenum_from_naive(value));
1712        }
1713    }
1714    for format in ["%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"] {
1715        if let Ok(date) = NaiveDate::parse_from_str(trimmed, format) {
1716            return Some(crate::builtins::datetime::datenum_from_naive(
1717                date.and_time(NaiveTime::MIN),
1718            ));
1719        }
1720    }
1721    None
1722}
1723
1724fn unquote(token: &str) -> &str {
1725    if token.len() >= 2 {
1726        let bytes = token.as_bytes();
1727        if (bytes[0] == b'"' && bytes[token.len() - 1] == b'"')
1728            || (bytes[0] == b'\'' && bytes[token.len() - 1] == b'\'')
1729        {
1730            return &token[1..token.len() - 1];
1731        }
1732    }
1733    token
1734}
1735
1736fn default_properties(variable_names: Vec<String>, row_names: Option<Vec<String>>) -> StructValue {
1737    let mut props = StructValue::new();
1738    props.insert(
1739        VARIABLE_NAMES,
1740        Value::StringArray(
1741            StringArray::new(variable_names.clone(), vec![1, variable_names.len()])
1742                .expect("VariableNames shape is valid"),
1743        ),
1744    );
1745    props.insert(
1746        ROW_NAMES,
1747        row_names
1748            .map(|names| {
1749                Value::StringArray(
1750                    StringArray::new(names.clone(), vec![names.len(), 1])
1751                        .expect("RowNames shape is valid"),
1752                )
1753            })
1754            .unwrap_or_else(|| {
1755                Value::StringArray(StringArray::new(Vec::new(), vec![0, 1]).unwrap())
1756            }),
1757    );
1758    props.insert(
1759        DIMENSION_NAMES,
1760        Value::StringArray(
1761            StringArray::new(
1762                vec![
1763                    DEFAULT_ROW_DIM_NAME.to_string(),
1764                    DEFAULT_VARIABLE_DIM_NAME.to_string(),
1765                ],
1766                vec![1, 2],
1767            )
1768            .expect("DimensionNames shape is valid"),
1769        ),
1770    );
1771    props.insert(
1772        VARIABLE_UNITS,
1773        Value::StringArray(
1774            StringArray::new(
1775                vec![String::new(); variable_names.len()],
1776                vec![1, variable_names.len()],
1777            )
1778            .expect("VariableUnits shape is valid"),
1779        ),
1780    );
1781    props.insert(
1782        VARIABLE_DESCRIPTIONS,
1783        Value::StringArray(
1784            StringArray::new(
1785                vec![String::new(); variable_names.len()],
1786                vec![1, variable_names.len()],
1787            )
1788            .expect("VariableDescriptions shape is valid"),
1789        ),
1790    );
1791    props.insert(DESCRIPTION, Value::String(String::new()));
1792    props.insert(USER_DATA, Value::Tensor(Tensor::zeros(vec![0, 0])));
1793    props
1794}
1795
1796pub fn table_from_columns(names: Vec<String>, columns: Vec<Value>) -> BuiltinResult<Value> {
1797    table_from_columns_with_properties(names, columns, None)
1798}
1799
1800fn table_from_columns_with_properties(
1801    names: Vec<String>,
1802    columns: Vec<Value>,
1803    row_names: Option<Vec<String>>,
1804) -> BuiltinResult<Value> {
1805    ensure_table_class_registered();
1806    if names.len() != columns.len() {
1807        return Err(invalid_variable(
1808            "table: number of variable names must match number of variables",
1809        ));
1810    }
1811    let names = make_unique_names(names);
1812    let height = validate_column_heights(&names, &columns)?;
1813    if let Some(row_names) = &row_names {
1814        if row_names.len() != height {
1815            return Err(invalid_variable(
1816                "table: number of row names must match table height",
1817            ));
1818        }
1819    }
1820    let mut variables = StructValue::new();
1821    for (name, value) in names.iter().cloned().zip(columns) {
1822        variables.insert(name, value);
1823    }
1824    let props = default_properties(names, row_names);
1825    let mut object = ObjectInstance::new(TABLE_CLASS.to_string());
1826    object
1827        .properties
1828        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
1829    object.properties.insert(
1830        TABLE_PROPERTIES_FIELD.to_string(),
1831        Value::Struct(props.clone()),
1832    );
1833    object
1834        .properties
1835        .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
1836    Ok(Value::Object(object))
1837}
1838
1839fn validate_column_heights(names: &[String], columns: &[Value]) -> BuiltinResult<usize> {
1840    if columns.is_empty() {
1841        return Ok(0);
1842    }
1843    let height = value_row_count(&columns[0])?;
1844    for (name, value) in names.iter().zip(columns) {
1845        let rows = value_row_count(value)?;
1846        if rows != height {
1847            return Err(invalid_variable(format!(
1848                "table: variable '{name}' has {rows} rows but expected {height}"
1849            )));
1850        }
1851    }
1852    Ok(height)
1853}
1854
1855pub fn is_table_value(value: &Value) -> bool {
1856    table_object(value).is_some()
1857}
1858
1859fn table_object(value: &Value) -> Option<&ObjectInstance> {
1860    match value {
1861        Value::Object(object) if object.is_class(TABLE_CLASS) => Some(object),
1862        _ => None,
1863    }
1864}
1865
1866fn into_table_object(value: Value, context: &str) -> BuiltinResult<ObjectInstance> {
1867    match value {
1868        Value::Object(object) if object.is_class(TABLE_CLASS) => Ok(object),
1869        other => Err(invalid_argument(format!(
1870            "{context}: expected table, got {other:?}"
1871        ))),
1872    }
1873}
1874
1875pub fn table_variables(object: &ObjectInstance) -> BuiltinResult<StructValue> {
1876    match object.properties.get(TABLE_VARIABLES_FIELD) {
1877        Some(Value::Struct(st)) => Ok(st.clone()),
1878        Some(other) => Err(invalid_variable(format!(
1879            "table: invalid internal variable storage {other:?}"
1880        ))),
1881        None => Ok(StructValue::new()),
1882    }
1883}
1884
1885pub fn table_variable_names_from_object(object: &ObjectInstance) -> BuiltinResult<Vec<String>> {
1886    let variables = table_variables(object)?;
1887    Ok(variables.fields.keys().cloned().collect())
1888}
1889
1890pub fn table_height(object: &ObjectInstance) -> BuiltinResult<usize> {
1891    let variables = table_variables(object)?;
1892    match variables.fields.values().next() {
1893        Some(value) => value_row_count(value),
1894        None => Ok(0),
1895    }
1896}
1897
1898pub fn table_width(object: &ObjectInstance) -> BuiltinResult<usize> {
1899    table_variables(object).map(|vars| vars.fields.len())
1900}
1901
1902fn table_public_properties(object: &ObjectInstance) -> BuiltinResult<StructValue> {
1903    match object
1904        .properties
1905        .get(TABLE_PROPERTIES_FIELD)
1906        .or_else(|| object.properties.get(PROPERTIES_MEMBER))
1907    {
1908        Some(Value::Struct(st)) => Ok(st.clone()),
1909        Some(other) => Err(invalid_variable(format!(
1910            "table: invalid Properties storage {other:?}"
1911        ))),
1912        None => Ok(default_properties(
1913            table_variable_names_from_object(object)?,
1914            None,
1915        )),
1916    }
1917}
1918
1919fn sync_table_properties(object: &mut ObjectInstance, props: StructValue) {
1920    object.properties.insert(
1921        TABLE_PROPERTIES_FIELD.to_string(),
1922        Value::Struct(props.clone()),
1923    );
1924    object
1925        .properties
1926        .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
1927}
1928
1929fn table_member_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
1930    let name = scalar_text(payload, "table member")?;
1931    if name == PROPERTIES_MEMBER {
1932        return Ok(Value::Struct(table_public_properties(object)?));
1933    }
1934    let variables = table_variables(object)?;
1935    variables
1936        .fields
1937        .get(&name)
1938        .cloned()
1939        .ok_or_else(|| invalid_variable(format!("table: unrecognized variable '{name}'")))
1940}
1941
1942fn table_member_set(object: &mut ObjectInstance, field: &str, rhs: Value) -> BuiltinResult<()> {
1943    if field == PROPERTIES_MEMBER {
1944        let Value::Struct(props) = rhs else {
1945            return Err(invalid_variable(
1946                "table: Properties assignment expects a scalar struct",
1947            ));
1948        };
1949        apply_properties(object, props)?;
1950        return Ok(());
1951    }
1952    let mut variables = table_variables(object)?;
1953    let mut names = table_variable_names_from_object(object)?;
1954    let height = table_height(object)?;
1955    let rhs_rows = value_row_count(&rhs)?;
1956    if !variables.fields.is_empty() && rhs_rows != height {
1957        return Err(invalid_variable(format!(
1958            "table: variable '{field}' has {rhs_rows} rows but table has {height}"
1959        )));
1960    }
1961    if !variables.fields.contains_key(field) {
1962        names.push(field.to_string());
1963    }
1964    variables.insert(field.to_string(), rhs);
1965    object
1966        .properties
1967        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
1968    let mut props = table_public_properties(object)?;
1969    update_variable_metadata_names(&mut props, names)?;
1970    sync_table_properties(object, props);
1971    Ok(())
1972}
1973
1974fn apply_properties(object: &mut ObjectInstance, mut props: StructValue) -> BuiltinResult<()> {
1975    if let Some(value) = props.fields.get(VARIABLE_NAMES) {
1976        let names = variable_name_list(value)?;
1977        rename_table_variables(object, names.clone())?;
1978        update_variable_metadata_names(&mut props, names)?;
1979    }
1980    sync_table_properties(object, props);
1981    Ok(())
1982}
1983
1984fn rename_table_variables(
1985    object: &mut ObjectInstance,
1986    new_names: Vec<String>,
1987) -> BuiltinResult<()> {
1988    let old_names = table_variable_names_from_object(object)?;
1989    if old_names.len() != new_names.len() {
1990        return Err(invalid_variable(
1991            "table: VariableNames assignment must preserve variable count",
1992        ));
1993    }
1994    let new_names = make_unique_variable_names(new_names);
1995    let variables = table_variables(object)?;
1996    let mut renamed = StructValue::new();
1997    for (old, new) in old_names.iter().zip(new_names.iter()) {
1998        let value = variables
1999            .fields
2000            .get(old)
2001            .cloned()
2002            .ok_or_else(|| invalid_variable(format!("table: missing variable '{old}'")))?;
2003        renamed.insert(new.clone(), value);
2004    }
2005    object
2006        .properties
2007        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(renamed));
2008    Ok(())
2009}
2010
2011fn update_variable_metadata_names(
2012    props: &mut StructValue,
2013    names: Vec<String>,
2014) -> BuiltinResult<()> {
2015    props.insert(
2016        VARIABLE_NAMES,
2017        Value::StringArray(
2018            StringArray::new(names.clone(), vec![1, names.len()])
2019                .map_err(|err| invalid_variable(format!("table: {err}")))?,
2020        ),
2021    );
2022    for field in [VARIABLE_UNITS, VARIABLE_DESCRIPTIONS] {
2023        let existing = props.fields.get(field).cloned();
2024        let values = match existing {
2025            Some(Value::StringArray(mut array)) => {
2026                array.data.resize(names.len(), String::new());
2027                array.data.truncate(names.len());
2028                array.data
2029            }
2030            _ => vec![String::new(); names.len()],
2031        };
2032        props.insert(
2033            field,
2034            Value::StringArray(
2035                StringArray::new(values, vec![1, names.len()])
2036                    .map_err(|err| invalid_variable(format!("table: {err}")))?,
2037            ),
2038        );
2039    }
2040    Ok(())
2041}
2042
2043fn table_paren_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
2044    let selectors = selector_values(payload)?;
2045    let rows = parse_row_selector(selectors.first(), table_height(object)?)?;
2046    let variable_names = table_variable_names_from_object(object)?;
2047    let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2048    let variables = table_variables(object)?;
2049    let mut out = Vec::with_capacity(selected_names.len());
2050    for name in &selected_names {
2051        let value = variables
2052            .fields
2053            .get(name)
2054            .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
2055        out.push(select_rows(value, &rows)?);
2056    }
2057    let row_names = selected_row_names(object, &rows)?;
2058    table_from_columns_with_properties(selected_names, out, row_names)
2059}
2060
2061fn table_brace_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
2062    let subset = table_paren_get(object, payload)?;
2063    let object = into_table_object(subset, "table brace indexing")?;
2064    let variables = table_variables(&object)?;
2065    if variables.fields.len() == 1 {
2066        return variables
2067            .fields
2068            .values()
2069            .next()
2070            .cloned()
2071            .ok_or_else(|| invalid_variable("table: missing selected variable"));
2072    }
2073    let values = variables.fields.values().collect::<Vec<_>>();
2074    if values.iter().all(|value| matches!(value, Value::Tensor(_))) {
2075        return concatenate_numeric_columns(&values);
2076    }
2077    CellArray::new(
2078        values.into_iter().cloned().collect(),
2079        1,
2080        variables.fields.len(),
2081    )
2082    .map(Value::Cell)
2083    .map_err(|err| invalid_variable(format!("table: {err}")))
2084}
2085
2086fn table_paren_assign(
2087    mut object: ObjectInstance,
2088    payload: &Value,
2089    rhs: Value,
2090) -> BuiltinResult<Value> {
2091    let rhs_table = into_table_object(rhs, "table paren assignment")?;
2092    let selectors = selector_values(payload)?;
2093    let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
2094    let variable_names = table_variable_names_from_object(&object)?;
2095    let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2096    let rhs_names = table_variable_names_from_object(&rhs_table)?;
2097    if selected_names.len() != rhs_names.len() {
2098        return Err(invalid_variable(
2099            "table: assignment variable count must match selected variables",
2100        ));
2101    }
2102    let mut variables = table_variables(&object)?;
2103    let rhs_variables = table_variables(&rhs_table)?;
2104    for (target_name, rhs_name) in selected_names.iter().zip(rhs_names.iter()) {
2105        let current =
2106            variables.fields.get(target_name).cloned().ok_or_else(|| {
2107                invalid_variable(format!("table: missing variable '{target_name}'"))
2108            })?;
2109        let rhs_col =
2110            rhs_variables.fields.get(rhs_name).cloned().ok_or_else(|| {
2111                invalid_variable(format!("table: missing rhs variable '{rhs_name}'"))
2112            })?;
2113        variables.insert(target_name.clone(), assign_rows(current, &rows, rhs_col)?);
2114    }
2115    object
2116        .properties
2117        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2118    Ok(Value::Object(object))
2119}
2120
2121fn table_brace_assign(
2122    mut object: ObjectInstance,
2123    payload: &Value,
2124    rhs: Value,
2125) -> BuiltinResult<Value> {
2126    let selectors = selector_values(payload)?;
2127    let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
2128    let variable_names = table_variable_names_from_object(&object)?;
2129    let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2130    if selected_names.len() != 1 {
2131        return Err(invalid_variable(
2132            "table: brace assignment supports one variable at a time",
2133        ));
2134    }
2135    let mut variables = table_variables(&object)?;
2136    let target = selected_names[0].clone();
2137    let current = variables
2138        .fields
2139        .get(&target)
2140        .cloned()
2141        .ok_or_else(|| invalid_variable(format!("table: missing variable '{target}'")))?;
2142    variables.insert(target, assign_rows(current, &rows, rhs)?);
2143    object
2144        .properties
2145        .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2146    Ok(Value::Object(object))
2147}
2148
2149fn selector_values(payload: &Value) -> BuiltinResult<Vec<Value>> {
2150    match payload {
2151        Value::Cell(cell) => {
2152            let mut out = Vec::with_capacity(cell.data.len());
2153            for handle in &cell.data {
2154                out.push(unsafe { &*handle.as_raw() }.clone());
2155            }
2156            Ok(out)
2157        }
2158        other => Ok(vec![other.clone()]),
2159    }
2160}
2161
2162fn parse_row_selector(selector: Option<&Value>, height: usize) -> BuiltinResult<Vec<usize>> {
2163    let Some(selector) = selector else {
2164        return Ok((0..height).collect());
2165    };
2166    if is_colon_selector(selector) {
2167        return Ok((0..height).collect());
2168    }
2169    if is_end_selector(selector) {
2170        return if height == 0 {
2171            Err(invalid_index(
2172                "table: end row index is invalid for empty table",
2173            ))
2174        } else {
2175            Ok(vec![height - 1])
2176        };
2177    }
2178    match selector {
2179        Value::Num(n) => Ok(vec![one_based_to_zero(*n, height, "row")?]),
2180        Value::Int(i) => Ok(vec![one_based_to_zero(i.to_f64(), height, "row")?]),
2181        Value::Tensor(tensor) => tensor
2182            .data
2183            .iter()
2184            .map(|value| one_based_to_zero(*value, height, "row"))
2185            .collect(),
2186        Value::LogicalArray(array) => {
2187            if array.data.len() != height {
2188                return Err(invalid_index(
2189                    "table: logical row selector length must match table height",
2190                ));
2191            }
2192            Ok(array
2193                .data
2194                .iter()
2195                .enumerate()
2196                .filter_map(|(idx, value)| (*value != 0).then_some(idx))
2197                .collect())
2198        }
2199        other => Err(invalid_index(format!(
2200            "table: unsupported row selector {other:?}"
2201        ))),
2202    }
2203}
2204
2205fn parse_variable_selector(
2206    selector: Option<&Value>,
2207    names: &[String],
2208) -> BuiltinResult<Vec<String>> {
2209    let Some(selector) = selector else {
2210        return Ok(names.to_vec());
2211    };
2212    if is_colon_selector(selector) {
2213        return Ok(names.to_vec());
2214    }
2215    match selector {
2216        Value::String(_) | Value::CharArray(_) | Value::StringArray(_) | Value::Cell(_) => {
2217            let selected = string_list(selector)?;
2218            for name in &selected {
2219                if !names.contains(name) {
2220                    return Err(invalid_variable(format!(
2221                        "table: unrecognized variable '{name}'"
2222                    )));
2223                }
2224            }
2225            Ok(selected)
2226        }
2227        Value::Num(n) => Ok(vec![name_at_index(names, *n)?]),
2228        Value::Int(i) => Ok(vec![name_at_index(names, i.to_f64())?]),
2229        Value::Tensor(tensor) => tensor
2230            .data
2231            .iter()
2232            .map(|value| name_at_index(names, *value))
2233            .collect(),
2234        Value::LogicalArray(array) => {
2235            if array.data.len() != names.len() {
2236                return Err(invalid_index(
2237                    "table: logical variable selector length must match table width",
2238                ));
2239            }
2240            Ok(array
2241                .data
2242                .iter()
2243                .zip(names.iter())
2244                .filter_map(|(flag, name)| (*flag != 0).then_some(name.clone()))
2245                .collect())
2246        }
2247        other => Err(invalid_index(format!(
2248            "table: unsupported variable selector {other:?}"
2249        ))),
2250    }
2251}
2252
2253fn is_colon_selector(value: &Value) -> bool {
2254    scalar_text(value, "selector")
2255        .map(|text| text == ":")
2256        .unwrap_or(false)
2257}
2258
2259fn is_end_selector(value: &Value) -> bool {
2260    scalar_text(value, "selector")
2261        .map(|text| text == "end")
2262        .unwrap_or(false)
2263}
2264
2265fn name_at_index(names: &[String], value: f64) -> BuiltinResult<String> {
2266    let idx = one_based_to_zero(value, names.len(), "variable")?;
2267    Ok(names[idx].clone())
2268}
2269
2270fn one_based_to_zero(value: f64, len: usize, context: &str) -> BuiltinResult<usize> {
2271    if !value.is_finite() || value < 1.0 || (value.round() - value).abs() > f64::EPSILON {
2272        return Err(invalid_index(format!(
2273            "table: {context} indices must be positive finite integers"
2274        )));
2275    }
2276    let idx = value.round() as usize - 1;
2277    if idx >= len {
2278        return Err(invalid_index(format!(
2279            "table: {context} index exceeds bounds"
2280        )));
2281    }
2282    Ok(idx)
2283}
2284
2285fn selected_row_names(
2286    object: &ObjectInstance,
2287    rows: &[usize],
2288) -> BuiltinResult<Option<Vec<String>>> {
2289    let props = table_public_properties(object)?;
2290    let Some(value) = props.fields.get(ROW_NAMES) else {
2291        return Ok(None);
2292    };
2293    let names = string_list(value)?;
2294    if names.is_empty() {
2295        return Ok(None);
2296    }
2297    Ok(Some(
2298        rows.iter()
2299            .filter_map(|row| names.get(*row).cloned())
2300            .collect(),
2301    ))
2302}
2303
2304fn value_row_count(value: &Value) -> BuiltinResult<usize> {
2305    match value {
2306        Value::Tensor(tensor) => Ok(tensor.rows()),
2307        Value::ComplexTensor(tensor) => Ok(tensor.rows),
2308        Value::StringArray(array) => Ok(array.rows()),
2309        Value::LogicalArray(array) => Ok(array.shape.first().copied().unwrap_or(array.data.len())),
2310        Value::Cell(cell) => Ok(cell.rows),
2311        Value::CharArray(array) => Ok(array.rows),
2312        Value::Object(obj) if obj.is_class("datetime") => {
2313            crate::builtins::datetime::serials_from_datetime_value(value)
2314                .map(|tensor| tensor.rows())
2315        }
2316        Value::Object(obj) if obj.is_class(TABLE_CLASS) => table_height(obj),
2317        _ => Ok(1),
2318    }
2319}
2320
2321fn select_rows(value: &Value, rows: &[usize]) -> BuiltinResult<Value> {
2322    match value {
2323        Value::Tensor(tensor) => {
2324            let cols = tensor.cols();
2325            let mut data = Vec::with_capacity(rows.len() * cols);
2326            for col in 0..cols {
2327                for &row in rows {
2328                    data.push(tensor.get2(row, col).map_err(invalid_index)?);
2329                }
2330            }
2331            Tensor::new_with_dtype(data, vec![rows.len(), cols], tensor.dtype)
2332                .map(Value::Tensor)
2333                .map_err(invalid_variable)
2334        }
2335        Value::ComplexTensor(tensor) => {
2336            let mut data = Vec::with_capacity(rows.len() * tensor.cols);
2337            for col in 0..tensor.cols {
2338                for &row in rows {
2339                    let idx = row + col * tensor.rows;
2340                    data.push(*tensor.data.get(idx).ok_or_else(|| {
2341                        invalid_index("table: complex variable row index out of bounds")
2342                    })?);
2343                }
2344            }
2345            ComplexTensor::new(data, vec![rows.len(), tensor.cols])
2346                .map(Value::ComplexTensor)
2347                .map_err(invalid_variable)
2348        }
2349        Value::StringArray(array) => {
2350            let cols = array.cols();
2351            let mut data = Vec::with_capacity(rows.len() * cols);
2352            for col in 0..cols {
2353                for &row in rows {
2354                    let idx = row + col * array.rows();
2355                    data.push(array.data.get(idx).cloned().ok_or_else(|| {
2356                        invalid_index("table: string variable row index out of bounds")
2357                    })?);
2358                }
2359            }
2360            StringArray::new(data, vec![rows.len(), cols])
2361                .map(Value::StringArray)
2362                .map_err(invalid_variable)
2363        }
2364        Value::LogicalArray(array) => {
2365            let source_rows = array.shape.first().copied().unwrap_or(array.data.len());
2366            let cols = array.shape.get(1).copied().unwrap_or(1);
2367            let mut data = Vec::with_capacity(rows.len() * cols);
2368            for col in 0..cols {
2369                for &row in rows {
2370                    let idx = row + col * source_rows;
2371                    data.push(*array.data.get(idx).ok_or_else(|| {
2372                        invalid_index("table: logical variable row index out of bounds")
2373                    })?);
2374                }
2375            }
2376            LogicalArray::new(data, vec![rows.len(), cols])
2377                .map(Value::LogicalArray)
2378                .map_err(invalid_variable)
2379        }
2380        Value::Cell(cell) => {
2381            let mut data = Vec::with_capacity(rows.len() * cell.cols);
2382            for col in 0..cell.cols {
2383                for &row in rows {
2384                    data.push(cell.get(row, col).map_err(invalid_index)?);
2385                }
2386            }
2387            CellArray::new(data, rows.len(), cell.cols)
2388                .map(Value::Cell)
2389                .map_err(invalid_variable)
2390        }
2391        Value::Object(obj) if obj.is_class("datetime") => {
2392            let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
2393            let selected = select_rows(&Value::Tensor(tensor), rows)?;
2394            match selected {
2395                Value::Tensor(tensor) => {
2396                    crate::builtins::datetime::datetime_object_from_serial_tensor(
2397                        tensor,
2398                        crate::builtins::datetime::datetime_format_from_value(value),
2399                    )
2400                }
2401                _ => unreachable!("select_rows tensor branch returns tensor"),
2402            }
2403        }
2404        _ if rows.len() == 1 && rows[0] == 0 => Ok(value.clone()),
2405        other => Err(invalid_variable(format!(
2406            "table: row selection unsupported for variable {other:?}"
2407        ))),
2408    }
2409}
2410
2411fn assign_rows(mut current: Value, rows: &[usize], rhs: Value) -> BuiltinResult<Value> {
2412    if value_row_count(&rhs)? != rows.len() {
2413        return Err(invalid_variable(
2414            "table: assignment row count must match selected row count",
2415        ));
2416    }
2417    let replacing_all_rows = rows.len() == value_row_count(&current)?;
2418    match (&mut current, rhs) {
2419        (Value::Tensor(target), Value::Tensor(source)) => {
2420            if target.cols() != source.cols() {
2421                return Err(invalid_variable(
2422                    "table: tensor assignment column count mismatch",
2423                ));
2424            }
2425            for col in 0..target.cols() {
2426                for (src_row, &dst_row) in rows.iter().enumerate() {
2427                    let value = source.get2(src_row, col).map_err(invalid_index)?;
2428                    target.set2(dst_row, col, value).map_err(invalid_index)?;
2429                }
2430            }
2431            Ok(current)
2432        }
2433        (_, source) if replacing_all_rows => Ok(source),
2434        _ => Err(invalid_variable(
2435            "table: assignment for this variable type requires replacing all rows",
2436        )),
2437    }
2438}
2439
2440fn concatenate_numeric_columns(values: &[&Value]) -> BuiltinResult<Value> {
2441    let rows = values
2442        .first()
2443        .and_then(|value| match value {
2444            Value::Tensor(t) => Some(t.rows()),
2445            _ => None,
2446        })
2447        .unwrap_or(0);
2448    let cols = values
2449        .iter()
2450        .map(|value| match value {
2451            Value::Tensor(t) => Ok(t.cols()),
2452            _ => Err(invalid_variable("table: expected numeric variable")),
2453        })
2454        .collect::<BuiltinResult<Vec<_>>>()?;
2455    let total_cols: usize = cols.iter().sum();
2456    let mut data = Vec::with_capacity(rows * total_cols);
2457    for value in values {
2458        let Value::Tensor(tensor) = value else {
2459            return Err(invalid_variable("table: expected numeric variable"));
2460        };
2461        for col in 0..tensor.cols() {
2462            for row in 0..rows {
2463                data.push(tensor.get2(row, col).map_err(invalid_index)?);
2464            }
2465        }
2466    }
2467    Tensor::new(data, vec![rows, total_cols])
2468        .map(Value::Tensor)
2469        .map_err(invalid_variable)
2470}
2471
2472pub fn sortrows_table(value: Value, rest: &[Value]) -> BuiltinResult<(Value, Tensor)> {
2473    let object = into_table_object(value, "sortrows")?;
2474    let names = table_variable_names_from_object(&object)?;
2475    let sort_spec = SortSpec::parse(rest, &names)?;
2476    let height = table_height(&object)?;
2477    let variables = table_variables(&object)?;
2478    let mut indices: Vec<usize> = (0..height).collect();
2479    indices.sort_by(|&a, &b| {
2480        for key in &sort_spec.keys {
2481            let Some(value) = variables.fields.get(&key.name) else {
2482                continue;
2483            };
2484            let ord = compare_table_cells(value, a, b).unwrap_or(Ordering::Equal);
2485            let ord = if key.descending { ord.reverse() } else { ord };
2486            if ord != Ordering::Equal {
2487                return ord;
2488            }
2489        }
2490        a.cmp(&b)
2491    });
2492    let mut sorted_columns = Vec::with_capacity(names.len());
2493    for name in &names {
2494        let value = variables
2495            .fields
2496            .get(name)
2497            .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
2498        sorted_columns.push(select_rows(value, &indices)?);
2499    }
2500    let row_names = selected_row_names(&object, &indices)?;
2501    let sorted = table_from_columns_with_properties(names, sorted_columns, row_names)?;
2502    let indices_tensor = Tensor::new(
2503        indices.iter().map(|idx| *idx as f64 + 1.0).collect(),
2504        vec![indices.len(), 1],
2505    )
2506    .map_err(invalid_variable)?;
2507    Ok((sorted, indices_tensor))
2508}
2509
2510struct SortSpec {
2511    keys: Vec<SortKey>,
2512}
2513
2514struct SortKey {
2515    name: String,
2516    descending: bool,
2517}
2518
2519impl SortSpec {
2520    fn parse(rest: &[Value], names: &[String]) -> BuiltinResult<Self> {
2521        let mut keys = if rest.is_empty() {
2522            names
2523                .iter()
2524                .map(|name| SortKey {
2525                    name: name.clone(),
2526                    descending: false,
2527                })
2528                .collect::<Vec<_>>()
2529        } else {
2530            parse_variable_selector(rest.first(), names)?
2531                .into_iter()
2532                .map(|name| SortKey {
2533                    name,
2534                    descending: false,
2535                })
2536                .collect()
2537        };
2538        if let Some(direction) = rest.get(1) {
2539            let directions = string_list(direction)?;
2540            if directions.len() == 1 {
2541                let descending = directions[0].eq_ignore_ascii_case("descend")
2542                    || directions[0].eq_ignore_ascii_case("desc");
2543                for key in &mut keys {
2544                    key.descending = descending;
2545                }
2546            } else {
2547                for (key, direction) in keys.iter_mut().zip(directions.iter()) {
2548                    key.descending = direction.eq_ignore_ascii_case("descend")
2549                        || direction.eq_ignore_ascii_case("desc");
2550                }
2551            }
2552        }
2553        Ok(Self { keys })
2554    }
2555}
2556
2557fn compare_table_cells(value: &Value, a: usize, b: usize) -> BuiltinResult<Ordering> {
2558    match value {
2559        Value::Tensor(tensor) => Ok(tensor
2560            .get2(a, 0)
2561            .map_err(invalid_index)?
2562            .partial_cmp(&tensor.get2(b, 0).map_err(invalid_index)?)
2563            .unwrap_or(Ordering::Greater)),
2564        Value::StringArray(array) => {
2565            let av = array.data.get(a).cloned().unwrap_or_default();
2566            let bv = array.data.get(b).cloned().unwrap_or_default();
2567            Ok(av.cmp(&bv))
2568        }
2569        Value::LogicalArray(array) => {
2570            let av = *array.data.get(a).unwrap_or(&0);
2571            let bv = *array.data.get(b).unwrap_or(&0);
2572            Ok(av.cmp(&bv))
2573        }
2574        Value::Object(obj) if obj.is_class("datetime") => {
2575            let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
2576            Ok(tensor
2577                .data
2578                .get(a)
2579                .copied()
2580                .unwrap_or(f64::NAN)
2581                .partial_cmp(&tensor.data.get(b).copied().unwrap_or(f64::NAN))
2582                .unwrap_or(Ordering::Greater))
2583        }
2584        other => Ok(cell_key_string(other, a).cmp(&cell_key_string(other, b))),
2585    }
2586}
2587
2588#[derive(Clone, Debug)]
2589enum GroupAtom {
2590    Number(f64),
2591    Text(String),
2592    Logical(bool),
2593    Missing,
2594}
2595
2596impl GroupAtom {
2597    fn rank(&self) -> u8 {
2598        match self {
2599            Self::Missing => 0,
2600            Self::Logical(_) => 1,
2601            Self::Number(_) => 2,
2602            Self::Text(_) => 3,
2603        }
2604    }
2605}
2606
2607impl PartialEq for GroupAtom {
2608    fn eq(&self, other: &Self) -> bool {
2609        self.cmp(other) == Ordering::Equal
2610    }
2611}
2612
2613impl Eq for GroupAtom {}
2614
2615impl PartialOrd for GroupAtom {
2616    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2617        Some(self.cmp(other))
2618    }
2619}
2620
2621impl Ord for GroupAtom {
2622    fn cmp(&self, other: &Self) -> Ordering {
2623        let rank = self.rank().cmp(&other.rank());
2624        if rank != Ordering::Equal {
2625            return rank;
2626        }
2627        match (self, other) {
2628            (Self::Missing, Self::Missing) => Ordering::Equal,
2629            (Self::Logical(a), Self::Logical(b)) => a.cmp(b),
2630            (Self::Number(a), Self::Number(b)) => a.total_cmp(b),
2631            (Self::Text(a), Self::Text(b)) => a.cmp(b),
2632            _ => Ordering::Equal,
2633        }
2634    }
2635}
2636
2637fn cell_group_atom(value: &Value, row: usize) -> GroupAtom {
2638    match value {
2639        Value::Tensor(tensor) => tensor
2640            .get2(row, 0)
2641            .map(GroupAtom::Number)
2642            .unwrap_or(GroupAtom::Missing),
2643        Value::StringArray(array) => array
2644            .data
2645            .get(row)
2646            .cloned()
2647            .map(GroupAtom::Text)
2648            .unwrap_or(GroupAtom::Missing),
2649        Value::LogicalArray(array) => array
2650            .data
2651            .get(row)
2652            .map(|value| GroupAtom::Logical(*value != 0))
2653            .unwrap_or(GroupAtom::Missing),
2654        Value::Object(obj) if obj.is_class("datetime") => {
2655            crate::builtins::datetime::serials_from_datetime_value(value)
2656                .ok()
2657                .and_then(|tensor| tensor.data.get(row).copied())
2658                .map(GroupAtom::Number)
2659                .unwrap_or(GroupAtom::Missing)
2660        }
2661        other => GroupAtom::Text(cell_key_string(other, row)),
2662    }
2663}
2664
2665fn groupsummary_impl(
2666    table: Value,
2667    groupvars: Value,
2668    method: Value,
2669    rest: Vec<Value>,
2670) -> BuiltinResult<Value> {
2671    let object = into_table_object(table, "groupsummary")?;
2672    let names = table_variable_names_from_object(&object)?;
2673    let group_names = parse_variable_selector(Some(&groupvars), &names)?;
2674    let methods = string_list(&method)?;
2675    if methods.is_empty() {
2676        return Err(invalid_argument(
2677            "groupsummary: method list must not be empty",
2678        ));
2679    }
2680    let data_names = if let Some(value) = rest.first() {
2681        parse_variable_selector(Some(value), &names)?
2682    } else {
2683        names
2684            .iter()
2685            .filter(|name| !group_names.contains(name))
2686            .filter(|name| {
2687                table_variables(&object)
2688                    .ok()
2689                    .and_then(|vars| vars.fields.get(*name).cloned())
2690                    .map(|value| matches!(value, Value::Tensor(_)))
2691                    .unwrap_or(false)
2692            })
2693            .cloned()
2694            .collect()
2695    };
2696    let variables = table_variables(&object)?;
2697    let height = table_height(&object)?;
2698    let mut groups: BTreeMap<Vec<GroupAtom>, Vec<usize>> = BTreeMap::new();
2699    for row in 0..height {
2700        let key = group_names
2701            .iter()
2702            .map(|name| {
2703                variables
2704                    .fields
2705                    .get(name)
2706                    .map(|value| cell_group_atom(value, row))
2707                    .unwrap_or(GroupAtom::Missing)
2708            })
2709            .collect::<Vec<_>>();
2710        groups.entry(key).or_default().push(row);
2711    }
2712    let group_rows = groups
2713        .values()
2714        .filter_map(|rows| rows.first().copied())
2715        .collect::<Vec<_>>();
2716    let mut out_names = Vec::new();
2717    let mut out_columns = Vec::new();
2718    for name in &group_names {
2719        let value = variables.fields.get(name).ok_or_else(|| {
2720            invalid_variable(format!("groupsummary: missing group variable '{name}'"))
2721        })?;
2722        out_names.push(name.clone());
2723        out_columns.push(select_rows(value, &group_rows)?);
2724    }
2725    out_names.push("GroupCount".to_string());
2726    out_columns.push(Value::Tensor(
2727        Tensor::new(
2728            groups.values().map(|rows| rows.len() as f64).collect(),
2729            vec![groups.len(), 1],
2730        )
2731        .map_err(invalid_variable)?,
2732    ));
2733    for method in &methods {
2734        for name in &data_names {
2735            let value = variables.fields.get(name).ok_or_else(|| {
2736                invalid_variable(format!("groupsummary: missing data variable '{name}'"))
2737            })?;
2738            let values = summarize_groups(value, groups.values(), method)?;
2739            out_names.push(format!("{}_{}", method.to_ascii_lowercase(), name));
2740            out_columns.push(Value::Tensor(
2741                Tensor::new(values, vec![groups.len(), 1]).map_err(invalid_variable)?,
2742            ));
2743        }
2744    }
2745    table_from_columns(out_names, out_columns)
2746}
2747
2748fn summarize_groups<'a>(
2749    value: &Value,
2750    groups: impl Iterator<Item = &'a Vec<usize>>,
2751    method: &str,
2752) -> BuiltinResult<Vec<f64>> {
2753    let tensor = match value {
2754        Value::Tensor(tensor) if tensor.cols() == 1 => tensor,
2755        _ => {
2756            return Err(invalid_variable(
2757                "groupsummary: summary data variables must be numeric column vectors",
2758            ))
2759        }
2760    };
2761    groups
2762        .map(|rows| {
2763            let mut values = rows
2764                .iter()
2765                .map(|row| tensor.get2(*row, 0).map_err(invalid_index))
2766                .collect::<BuiltinResult<Vec<_>>>()?;
2767            values.retain(|value| !value.is_nan());
2768            let result = match method.to_ascii_lowercase().as_str() {
2769                "mean" => {
2770                    if values.is_empty() {
2771                        f64::NAN
2772                    } else {
2773                        values.iter().sum::<f64>() / values.len() as f64
2774                    }
2775                }
2776                "sum" => values.iter().sum(),
2777                "min" => values.into_iter().fold(f64::INFINITY, f64::min),
2778                "max" => values.into_iter().fold(f64::NEG_INFINITY, f64::max),
2779                "median" => {
2780                    if values.is_empty() {
2781                        f64::NAN
2782                    } else {
2783                        values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
2784                        let mid = values.len() / 2;
2785                        if values.len() % 2 == 0 {
2786                            (values[mid - 1] + values[mid]) / 2.0
2787                        } else {
2788                            values[mid]
2789                        }
2790                    }
2791                }
2792                "count" | "numel" => values.len() as f64,
2793                other => {
2794                    return Err(invalid_argument(format!(
2795                        "groupsummary: unsupported method '{other}'"
2796                    )))
2797                }
2798            };
2799            Ok(result)
2800        })
2801        .collect()
2802}
2803
2804fn cell_key_string(value: &Value, row: usize) -> String {
2805    match value {
2806        Value::Tensor(tensor) => tensor
2807            .get2(row, 0)
2808            .map(format_key_number)
2809            .unwrap_or_default(),
2810        Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
2811        Value::LogicalArray(array) => array
2812            .data
2813            .get(row)
2814            .map(|value| value.to_string())
2815            .unwrap_or_default(),
2816        Value::Object(obj) if obj.is_class("datetime") => {
2817            crate::builtins::datetime::serials_from_datetime_value(value)
2818                .ok()
2819                .and_then(|tensor| tensor.data.get(row).copied())
2820                .map(format_key_number)
2821                .unwrap_or_default()
2822        }
2823        other => format!("{other}"),
2824    }
2825}
2826
2827pub fn table_display_text(value: &Value) -> BuiltinResult<String> {
2828    let object = match value {
2829        Value::Object(object) if object.is_class(TABLE_CLASS) => object,
2830        _ => return Err(invalid_argument("table display expects table object")),
2831    };
2832    let names = table_variable_names_from_object(object)?;
2833    let variables = table_variables(object)?;
2834    let rows = table_height(object)?;
2835    let preview = rows.min(12);
2836    let mut widths = names.iter().map(|name| name.len()).collect::<Vec<_>>();
2837    let rendered_cols = names
2838        .iter()
2839        .enumerate()
2840        .map(|(col, name)| {
2841            let value = variables
2842                .fields
2843                .get(name)
2844                .cloned()
2845                .unwrap_or_else(|| Value::String(String::new()));
2846            let cells = (0..preview)
2847                .map(|row| render_table_cell(&value, row))
2848                .collect::<Vec<_>>();
2849            for cell in &cells {
2850                widths[col] = widths[col].max(cell.len());
2851            }
2852            cells
2853        })
2854        .collect::<Vec<_>>();
2855
2856    let mut lines = Vec::new();
2857    lines.push(format!("{rows}x{} table", names.len()));
2858    if names.is_empty() {
2859        return Ok(lines.join("\n"));
2860    }
2861    let header = names
2862        .iter()
2863        .enumerate()
2864        .map(|(idx, name)| format!("{name:<width$}", width = widths[idx]))
2865        .collect::<Vec<_>>()
2866        .join("  ");
2867    lines.push(header);
2868    for row in 0..preview {
2869        lines.push(
2870            rendered_cols
2871                .iter()
2872                .enumerate()
2873                .map(|(col, cells)| format!("{:<width$}", cells[row], width = widths[col]))
2874                .collect::<Vec<_>>()
2875                .join("  "),
2876        );
2877    }
2878    if preview < rows {
2879        lines.push(format!("... {} more rows", rows - preview));
2880    }
2881    Ok(lines.join("\n"))
2882}
2883
2884pub fn table_summary_text(value: &Value) -> BuiltinResult<String> {
2885    let object = match value {
2886        Value::Object(object) if object.is_class(TABLE_CLASS) => object,
2887        _ => return Err(invalid_argument("table display expects table object")),
2888    };
2889    Ok(format!(
2890        "{}x{} table",
2891        table_height(object)?,
2892        table_width(object)?
2893    ))
2894}
2895
2896fn render_table_cell(value: &Value, row: usize) -> String {
2897    match value {
2898        Value::Tensor(tensor) => tensor
2899            .get2(row, 0)
2900            .map(format_table_number)
2901            .unwrap_or_default(),
2902        Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
2903        Value::LogicalArray(array) => array
2904            .data
2905            .get(row)
2906            .map(|value| if *value != 0 { "true" } else { "false" }.to_string())
2907            .unwrap_or_default(),
2908        Value::Object(obj) if obj.is_class("datetime") => {
2909            crate::builtins::datetime::datetime_string_array(value)
2910                .ok()
2911                .flatten()
2912                .and_then(|array| array.data.get(row).cloned())
2913                .unwrap_or_else(|| value.to_string())
2914        }
2915        other => other.to_string(),
2916    }
2917}
2918
2919fn format_table_number(value: f64) -> String {
2920    if value.is_nan() {
2921        "NaN".to_string()
2922    } else if value.fract() == 0.0 && value.abs() < 1e15 {
2923        format!("{}", value as i64)
2924    } else {
2925        trim_float(format!("{value:.6}"))
2926    }
2927}
2928
2929fn format_key_number(value: f64) -> String {
2930    if value.is_nan() {
2931        "NaN".to_string()
2932    } else if value.is_infinite() {
2933        value.to_string()
2934    } else {
2935        trim_float(format!("{value:.17}"))
2936    }
2937}
2938
2939fn trim_float(mut text: String) -> String {
2940    if let Some(dot) = text.find('.') {
2941        let mut end = text.len();
2942        while end > dot + 1 && text.as_bytes()[end - 1] == b'0' {
2943            end -= 1;
2944        }
2945        if end == dot + 1 {
2946            end -= 1;
2947        }
2948        text.truncate(end);
2949    }
2950    text
2951}
2952
2953fn scalar_text(value: &Value, context: &str) -> BuiltinResult<String> {
2954    match value {
2955        Value::String(text) => Ok(text.clone()),
2956        Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
2957        Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
2958        _ => Err(invalid_argument(format!(
2959            "table: {context} must be a string scalar or character vector"
2960        ))),
2961    }
2962}
2963
2964fn bool_scalar(value: &Value, context: &str) -> BuiltinResult<bool> {
2965    match value {
2966        Value::Bool(flag) => Ok(*flag),
2967        Value::Int(value) => Ok(value.to_i64() != 0),
2968        Value::Num(value) if value.is_finite() => Ok(*value != 0.0),
2969        Value::String(_) | Value::CharArray(_) | Value::StringArray(_) => {
2970            let text = scalar_text(value, context)?;
2971            match text.to_ascii_lowercase().as_str() {
2972                "true" | "on" | "yes" => Ok(true),
2973                "false" | "off" | "no" => Ok(false),
2974                _ => Err(invalid_argument(format!(
2975                    "table: {context} must be logical"
2976                ))),
2977            }
2978        }
2979        _ => Err(invalid_argument(format!(
2980            "table: {context} must be logical"
2981        ))),
2982    }
2983}
2984
2985fn nonnegative_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
2986    match value {
2987        Value::Int(value) if value.to_i64() >= 0 => Ok(value.to_i64() as usize),
2988        Value::Num(value)
2989            if value.is_finite()
2990                && *value >= 0.0
2991                && (value.round() - value).abs() <= f64::EPSILON =>
2992        {
2993            Ok(value.round() as usize)
2994        }
2995        _ => Err(invalid_argument(format!(
2996            "table: {context} must be a non-negative integer"
2997        ))),
2998    }
2999}
3000
3001fn string_list(value: &Value) -> BuiltinResult<Vec<String>> {
3002    match value {
3003        Value::String(text) => Ok(vec![text.clone()]),
3004        Value::CharArray(ca) if ca.rows == 1 => Ok(vec![ca.data.iter().collect()]),
3005        Value::StringArray(array) => Ok(array.data.clone()),
3006        Value::Cell(cell) => {
3007            let mut out = Vec::with_capacity(cell.data.len());
3008            for handle in &cell.data {
3009                let value = unsafe { &*handle.as_raw() };
3010                out.extend(string_list(value)?);
3011            }
3012            Ok(out)
3013        }
3014        _ => Err(invalid_argument(
3015            "table: expected string, string array, character vector, or cellstr",
3016        )),
3017    }
3018}
3019
3020fn variable_name_list(value: &Value) -> BuiltinResult<Vec<String>> {
3021    let names = string_list(value)?;
3022    if names.is_empty() {
3023        return Err(invalid_variable("table: variable names must not be empty"));
3024    }
3025    Ok(make_unique_variable_names(names))
3026}
3027
3028fn generated_variable_names(count: usize) -> Vec<String> {
3029    (1..=count).map(|idx| format!("Var{idx}")).collect()
3030}
3031
3032fn make_unique_variable_names(names: Vec<String>) -> Vec<String> {
3033    make_unique_names(
3034        names
3035            .into_iter()
3036            .enumerate()
3037            .map(|(idx, name)| make_valid_variable_name(&name, idx + 1))
3038            .collect(),
3039    )
3040}
3041
3042fn make_unique_names(names: Vec<String>) -> Vec<String> {
3043    let mut used = HashSet::new();
3044    let mut out = Vec::with_capacity(names.len());
3045    for (idx, name) in names.into_iter().enumerate() {
3046        let base = if name.trim().is_empty() {
3047            format!("Var{}", idx + 1)
3048        } else {
3049            name.trim().to_string()
3050        };
3051        let mut candidate = base.clone();
3052        let mut suffix = 1usize;
3053        while used.contains(&candidate.to_ascii_lowercase()) {
3054            suffix += 1;
3055            candidate = format!("{base}_{suffix}");
3056        }
3057        used.insert(candidate.to_ascii_lowercase());
3058        out.push(candidate);
3059    }
3060    out
3061}
3062
3063fn make_valid_variable_name(raw: &str, fallback_index: usize) -> String {
3064    let mut out = String::new();
3065    for (idx, ch) in raw.trim().chars().enumerate() {
3066        if (idx == 0 && (ch.is_ascii_alphabetic() || ch == '_'))
3067            || (idx > 0 && (ch.is_ascii_alphanumeric() || ch == '_'))
3068        {
3069            out.push(ch);
3070        } else if !out.ends_with('_') {
3071            out.push('_');
3072        }
3073    }
3074    while out.ends_with('_') {
3075        out.pop();
3076    }
3077    if out.is_empty() || !out.chars().next().unwrap().is_ascii_alphabetic() {
3078        format!("Var{fallback_index}")
3079    } else {
3080        out
3081    }
3082}
3083
3084#[cfg(test)]
3085mod tests {
3086    use super::*;
3087    use futures::executor::block_on;
3088    use runmat_time::unix_timestamp_ms;
3089    use std::fs;
3090    use std::io::Write;
3091
3092    fn unique_path(prefix: &str) -> PathBuf {
3093        let mut path = std::env::temp_dir();
3094        path.push(format!(
3095            "runmat_{prefix}_{}_{}",
3096            std::process::id(),
3097            unix_timestamp_ms()
3098        ));
3099        path
3100    }
3101
3102    fn read_table(path: &Path, args: Vec<Value>) -> Value {
3103        block_on(readtable_builtin(
3104            Value::from(path.to_string_lossy().to_string()),
3105            args,
3106        ))
3107        .expect("readtable")
3108    }
3109
3110    fn read_table_err(path: &Path, args: Vec<Value>) -> RuntimeError {
3111        block_on(readtable_builtin(
3112            Value::from(path.to_string_lossy().to_string()),
3113            args,
3114        ))
3115        .expect_err("expected readtable failure")
3116    }
3117
3118    fn object(value: Value) -> ObjectInstance {
3119        match value {
3120            Value::Object(object) => object,
3121            other => panic!("expected table object, got {other:?}"),
3122        }
3123    }
3124
3125    #[test]
3126    fn readtable_imports_headered_numeric_and_text_columns() {
3127        let path = unique_path("readtable_basic");
3128        fs::write(&path, "Name,Score\nAda,10\nGrace,12\n").expect("write sample");
3129        let table = object(read_table(&path, Vec::new()));
3130        assert_eq!(
3131            table_variable_names_from_object(&table).unwrap(),
3132            vec!["Name".to_string(), "Score".to_string()]
3133        );
3134        match table_member_get(&table, &Value::from("Score")).unwrap() {
3135            Value::Tensor(tensor) => {
3136                assert_eq!(tensor.shape, vec![2, 1]);
3137                assert_eq!(tensor.data, vec![10.0, 12.0]);
3138            }
3139            other => panic!("expected tensor, got {other:?}"),
3140        }
3141        match table_member_get(&table, &Value::from("Name")).unwrap() {
3142            Value::StringArray(array) => {
3143                assert_eq!(array.data, vec!["Ada".to_string(), "Grace".to_string()]);
3144            }
3145            other => panic!("expected string array, got {other:?}"),
3146        }
3147        let _ = fs::remove_file(&path);
3148    }
3149
3150    #[test]
3151    fn readtable_auto_does_not_consume_headerless_numeric_rows() {
3152        let path = unique_path("readtable_headerless_numeric");
3153        fs::write(&path, "1,2\n3,4\n").expect("write sample");
3154        let table = object(read_table(&path, Vec::new()));
3155        assert_eq!(
3156            table_variable_names_from_object(&table).unwrap(),
3157            vec!["Var1".to_string(), "Var2".to_string()]
3158        );
3159        match table_member_get(&table, &Value::from("Var1")).unwrap() {
3160            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![1.0, 3.0]),
3161            other => panic!("expected tensor, got {other:?}"),
3162        }
3163        match table_member_get(&table, &Value::from("Var2")).unwrap() {
3164            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 4.0]),
3165            other => panic!("expected tensor, got {other:?}"),
3166        }
3167        let _ = fs::remove_file(&path);
3168    }
3169
3170    #[test]
3171    fn readtable_rejects_unknown_and_invalid_options() {
3172        let path = unique_path("readtable_invalid_options");
3173        fs::write(&path, "A\n1\n").expect("write sample");
3174        let err = read_table_err(
3175            &path,
3176            vec![Value::from("DefinitelyNotAnOption"), Value::from(1.0)],
3177        );
3178        assert!(err.message().contains("unsupported option"));
3179        let err = read_table_err(
3180            &path,
3181            vec![Value::from("VariableNamingRule"), Value::from("mangle")],
3182        );
3183        assert!(err.message().contains("unsupported VariableNamingRule"));
3184        let _ = fs::remove_file(&path);
3185    }
3186
3187    #[test]
3188    fn readtable_handles_quoted_delimiters_and_newlines() {
3189        let path = unique_path("readtable_quoted_newlines");
3190        fs::write(
3191            &path,
3192            "Name,Note\nAda,\"hello, world\"\nGrace,\"line one\nline two\"\n",
3193        )
3194        .expect("write sample");
3195        let table = object(read_table(&path, Vec::new()));
3196        match table_member_get(&table, &Value::from("Note")).unwrap() {
3197            Value::StringArray(array) => assert_eq!(
3198                array.data,
3199                vec!["hello, world".to_string(), "line one\nline two".to_string()]
3200            ),
3201            other => panic!("expected string array, got {other:?}"),
3202        }
3203        let _ = fs::remove_file(&path);
3204    }
3205
3206    #[test]
3207    fn readtable_supports_explicit_names_and_missing_tokens() {
3208        let path = unique_path("readtable_options");
3209        fs::write(&path, "1,NA\n2,4\n").expect("write sample");
3210        let names =
3211            StringArray::new(vec!["A".to_string(), "B".to_string()], vec![1, 2]).expect("names");
3212        let table = object(read_table(
3213            &path,
3214            vec![
3215                Value::from("ReadVariableNames"),
3216                Value::Bool(false),
3217                Value::from("VariableNames"),
3218                Value::StringArray(names),
3219                Value::from("TreatAsMissing"),
3220                Value::from("NA"),
3221            ],
3222        ));
3223        match table_member_get(&table, &Value::from("B")).unwrap() {
3224            Value::Tensor(tensor) => {
3225                assert!(tensor.data[0].is_nan());
3226                assert_eq!(tensor.data[1], 4.0);
3227            }
3228            other => panic!("expected tensor, got {other:?}"),
3229        }
3230        let _ = fs::remove_file(&path);
3231    }
3232
3233    #[test]
3234    fn readtable_preserves_variable_names_when_requested() {
3235        let path = unique_path("readtable_preserve_names");
3236        fs::write(&path, "daily revenue,total orders\n100,10\n").expect("write sample");
3237        let table = object(read_table(
3238            &path,
3239            vec![Value::from("VariableNamingRule"), Value::from("preserve")],
3240        ));
3241        assert_eq!(
3242            table_variable_names_from_object(&table).unwrap(),
3243            vec!["daily revenue".to_string(), "total orders".to_string()]
3244        );
3245        let _ = fs::remove_file(&path);
3246    }
3247
3248    fn write_zip_file(zip: &mut zip::ZipWriter<std::fs::File>, name: &str, contents: &str) {
3249        let options = zip::write::SimpleFileOptions::default()
3250            .compression_method(zip::CompressionMethod::Stored);
3251        zip.start_file(name, options).expect("start xlsx part");
3252        zip.write_all(contents.as_bytes()).expect("write xlsx part");
3253    }
3254
3255    fn write_minimal_xlsx(path: &Path) {
3256        let file = std::fs::File::create(path).expect("create xlsx");
3257        let mut zip = zip::ZipWriter::new(file);
3258        write_zip_file(
3259            &mut zip,
3260            "[Content_Types].xml",
3261            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3262<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
3263  <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
3264  <Default Extension="xml" ContentType="application/xml"/>
3265  <Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
3266  <Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
3267  <Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/>
3268</Types>"#,
3269        );
3270        write_zip_file(
3271            &mut zip,
3272            "_rels/.rels",
3273            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3274<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
3275  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
3276</Relationships>"#,
3277        );
3278        write_zip_file(
3279            &mut zip,
3280            "xl/workbook.xml",
3281            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3282<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
3283  <sheets>
3284    <sheet name="Data" sheetId="1" r:id="rId1"/>
3285  </sheets>
3286</workbook>"#,
3287        );
3288        write_zip_file(
3289            &mut zip,
3290            "xl/_rels/workbook.xml.rels",
3291            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3292<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
3293  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
3294  <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
3295</Relationships>"#,
3296        );
3297        write_zip_file(
3298            &mut zip,
3299            "xl/styles.xml",
3300            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3301<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
3302  <fonts count="1"><font><sz val="11"/><name val="Calibri"/></font></fonts>
3303  <fills count="1"><fill><patternFill patternType="none"/></fill></fills>
3304  <borders count="1"><border/></borders>
3305  <cellStyleXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellStyleXfs>
3306  <cellXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellXfs>
3307</styleSheet>"#,
3308        );
3309        write_zip_file(
3310            &mut zip,
3311            "xl/worksheets/sheet1.xml",
3312            r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3313<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
3314  <sheetData>
3315    <row r="1">
3316      <c r="A1" t="inlineStr"><is><t>Date</t></is></c>
3317      <c r="B1" t="inlineStr"><is><t>Orders</t></is></c>
3318      <c r="C1" t="inlineStr"><is><t>Revenue</t></is></c>
3319    </row>
3320    <row r="2">
3321      <c r="A2" t="inlineStr"><is><t>2026-06-01</t></is></c>
3322      <c r="B2"><v>10</v></c>
3323      <c r="C2"><v>200</v></c>
3324    </row>
3325    <row r="3">
3326      <c r="A3" t="inlineStr"><is><t>2026-06-02</t></is></c>
3327      <c r="B3"><v>4</v></c>
3328      <c r="C3"><v>90</v></c>
3329    </row>
3330  </sheetData>
3331</worksheet>"#,
3332        );
3333        zip.finish().expect("finish xlsx");
3334    }
3335
3336    #[test]
3337    fn readtable_imports_xlsx_sheet_and_range() {
3338        let path = unique_path("readtable_spreadsheet");
3339        let path = path.with_extension("xlsx");
3340        write_minimal_xlsx(&path);
3341        let table = object(read_table(
3342            &path,
3343            vec![
3344                Value::from("Sheet"),
3345                Value::from("Data"),
3346                Value::from("Range"),
3347                Value::from("A1:C3"),
3348            ],
3349        ));
3350        assert_eq!(
3351            table_variable_names_from_object(&table).unwrap(),
3352            vec![
3353                "Date".to_string(),
3354                "Orders".to_string(),
3355                "Revenue".to_string()
3356            ]
3357        );
3358        match table_member_get(&table, &Value::from("Revenue")).unwrap() {
3359            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![200.0, 90.0]),
3360            other => panic!("expected tensor, got {other:?}"),
3361        }
3362        let _ = fs::remove_file(&path);
3363    }
3364
3365    #[test]
3366    fn table_properties_variable_names_rename_columns() {
3367        let a = Value::Tensor(Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap());
3368        let b = Value::Tensor(Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap());
3369        let mut table =
3370            object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
3371        let mut props = table_public_properties(&table).unwrap();
3372        props.insert(
3373            VARIABLE_NAMES,
3374            Value::StringArray(StringArray::new(vec!["X".into(), "Y".into()], vec![1, 2]).unwrap()),
3375        );
3376        table_member_set(&mut table, PROPERTIES_MEMBER, Value::Struct(props)).unwrap();
3377        assert_eq!(
3378            table_variable_names_from_object(&table).unwrap(),
3379            vec!["X".to_string(), "Y".to_string()]
3380        );
3381    }
3382
3383    #[test]
3384    fn table_paren_selects_rows_and_named_variables() {
3385        let a = Value::Tensor(Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap());
3386        let b = Value::Tensor(Tensor::new(vec![4.0, 5.0, 6.0], vec![3, 1]).unwrap());
3387        let table = object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
3388        let selector = CellArray::new(
3389            vec![
3390                Value::Tensor(Tensor::new(vec![3.0, 1.0], vec![1, 2]).unwrap()),
3391                Value::Cell(CellArray::new(vec![Value::from("B")], 1, 1).unwrap()),
3392            ],
3393            1,
3394            2,
3395        )
3396        .unwrap();
3397        let subset = object(table_paren_get(&table, &Value::Cell(selector)).unwrap());
3398        assert_eq!(
3399            table_variable_names_from_object(&subset).unwrap(),
3400            vec!["B".to_string()]
3401        );
3402        match table_member_get(&subset, &Value::from("B")).unwrap() {
3403            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![6.0, 4.0]),
3404            other => panic!("expected tensor, got {other:?}"),
3405        }
3406    }
3407
3408    #[test]
3409    fn sortrows_preserves_row_names() {
3410        let values = Value::Tensor(Tensor::new(vec![2.0, 1.0], vec![2, 1]).unwrap());
3411        let table = table_from_columns_with_properties(
3412            vec!["X".into()],
3413            vec![values],
3414            Some(vec!["second".into(), "first".into()]),
3415        )
3416        .unwrap();
3417        let (sorted, _) = sortrows_table(table, &[Value::from("X")]).unwrap();
3418        let sorted = object(sorted);
3419        let props = table_public_properties(&sorted).unwrap();
3420        match props.fields.get(ROW_NAMES).unwrap() {
3421            Value::StringArray(array) => {
3422                assert_eq!(array.data, vec!["first".to_string(), "second".to_string()]);
3423            }
3424            other => panic!("expected row names, got {other:?}"),
3425        }
3426    }
3427
3428    #[test]
3429    fn groupsummary_mean_counts_groups() {
3430        let group = Value::StringArray(
3431            StringArray::new(vec!["a".into(), "b".into(), "a".into()], vec![3, 1]).unwrap(),
3432        );
3433        let value = Value::Tensor(Tensor::new(vec![2.0, 5.0, 4.0], vec![3, 1]).unwrap());
3434        let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
3435        let summary = groupsummary_impl(
3436            table,
3437            Value::from("G"),
3438            Value::from("mean"),
3439            vec![Value::from("X")],
3440        )
3441        .unwrap();
3442        let summary = object(summary);
3443        assert_eq!(
3444            table_variable_names_from_object(&summary).unwrap(),
3445            vec![
3446                "G".to_string(),
3447                "GroupCount".to_string(),
3448                "mean_X".to_string()
3449            ]
3450        );
3451        match table_member_get(&summary, &Value::from("mean_X")).unwrap() {
3452            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![3.0, 5.0]),
3453            other => panic!("expected tensor, got {other:?}"),
3454        }
3455    }
3456
3457    #[test]
3458    fn groupsummary_orders_numeric_groups_numerically() {
3459        let group = Value::Tensor(Tensor::new(vec![10.0, 2.0, 10.0], vec![3, 1]).unwrap());
3460        let value = Value::Tensor(Tensor::new(vec![1.0, 5.0, 3.0], vec![3, 1]).unwrap());
3461        let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
3462        let summary =
3463            object(groupsummary_impl(table, Value::from("G"), Value::from("sum"), vec![]).unwrap());
3464        match table_member_get(&summary, &Value::from("G")).unwrap() {
3465            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 10.0]),
3466            other => panic!("expected tensor, got {other:?}"),
3467        }
3468        match table_member_get(&summary, &Value::from("sum_X")).unwrap() {
3469            Value::Tensor(tensor) => assert_eq!(tensor.data, vec![5.0, 4.0]),
3470            other => panic!("expected tensor, got {other:?}"),
3471        }
3472    }
3473}