1use std::cmp::Ordering;
4use std::collections::{BTreeMap, HashMap, HashSet};
5use std::io::{Cursor, Read};
6use std::path::{Path, PathBuf};
7use std::sync::OnceLock;
8
9use calamine::{open_workbook_auto_from_rs, Data as SpreadsheetData, Reader as SpreadsheetReader};
10use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
11use encoding_rs::{Encoding, UTF_8};
12use runmat_builtins::{
13 Access, BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
14 BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
15 CellArray, CharArray, ClassDef, ComplexTensor, LogicalArray, MethodDef, NumericDType,
16 ObjectInstance, PropertyDef, StringArray, StructValue, Tensor, Value,
17};
18use runmat_filesystem::File;
19use runmat_macros::runtime_builtin;
20
21use crate::builtins::common::fs::expand_user_path;
22use crate::builtins::common::spec::{
23 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
24 ReductionNaN, ResidencyPolicy, ShapeRequirements,
25};
26use crate::{
27 build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError, OBJECT_INDEX_BRACE,
28 OBJECT_INDEX_MEMBER, OBJECT_INDEX_PAREN, OBJECT_SUBSASGN_METHOD, OBJECT_SUBSREF_METHOD,
29};
30
31pub const TABLE_CLASS: &str = "table";
32const TABLE_VARIABLES_FIELD: &str = "__table_variables";
33const TABLE_PROPERTIES_FIELD: &str = "__table_properties";
34const PROPERTIES_MEMBER: &str = "Properties";
35const VARIABLE_NAMES: &str = "VariableNames";
36const ROW_NAMES: &str = "RowNames";
37const DIMENSION_NAMES: &str = "DimensionNames";
38const VARIABLE_UNITS: &str = "VariableUnits";
39const VARIABLE_DESCRIPTIONS: &str = "VariableDescriptions";
40const DESCRIPTION: &str = "Description";
41const USER_DATA: &str = "UserData";
42const DEFAULT_ROW_DIM_NAME: &str = "Rows";
43const DEFAULT_VARIABLE_DIM_NAME: &str = "Variables";
44
45static TABLE_CLASS_REGISTERED: OnceLock<()> = OnceLock::new();
46
47const ANY_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
48 name: "out",
49 ty: BuiltinParamType::Any,
50 arity: BuiltinParamArity::Required,
51 default: None,
52 description: "Result value.",
53}];
54const NUM_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
55 name: "n",
56 ty: BuiltinParamType::IntegerScalar,
57 arity: BuiltinParamArity::Required,
58 default: None,
59 description: "Count.",
60}];
61const TABLE_INPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
62 name: "T",
63 ty: BuiltinParamType::Any,
64 arity: BuiltinParamArity::Required,
65 default: None,
66 description: "Table input.",
67}];
68const READTABLE_INPUTS_FILENAME: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
69 name: "filename",
70 ty: BuiltinParamType::StringScalar,
71 arity: BuiltinParamArity::Required,
72 default: None,
73 description: "Text or spreadsheet file path.",
74}];
75const READTABLE_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 2] = [
76 BuiltinParamDescriptor {
77 name: "filename",
78 ty: BuiltinParamType::StringScalar,
79 arity: BuiltinParamArity::Required,
80 default: None,
81 description: "Text or spreadsheet file path.",
82 },
83 BuiltinParamDescriptor {
84 name: "nameValuePairs",
85 ty: BuiltinParamType::Any,
86 arity: BuiltinParamArity::Variadic,
87 default: None,
88 description: "Name-value import options.",
89 },
90];
91const SPREADSHEET_IMPORT_OPTIONS_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
92 name: "opts",
93 ty: BuiltinParamType::Any,
94 arity: BuiltinParamArity::Required,
95 default: None,
96 description: "Spreadsheet import options struct.",
97}];
98const SPREADSHEET_IMPORT_OPTIONS_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 1] =
99 [BuiltinParamDescriptor {
100 name: "nameValuePairs",
101 ty: BuiltinParamType::Any,
102 arity: BuiltinParamArity::Variadic,
103 default: None,
104 description: "Name-value option pairs.",
105 }];
106const TABLE_INPUTS_VALUES: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
107 name: "variables",
108 ty: BuiltinParamType::Any,
109 arity: BuiltinParamArity::Variadic,
110 default: None,
111 description: "Variables to assemble as table columns.",
112}];
113const GROUPSUMMARY_INPUTS: [BuiltinParamDescriptor; 4] = [
114 BuiltinParamDescriptor {
115 name: "T",
116 ty: BuiltinParamType::Any,
117 arity: BuiltinParamArity::Required,
118 default: None,
119 description: "Input table.",
120 },
121 BuiltinParamDescriptor {
122 name: "groupvars",
123 ty: BuiltinParamType::Any,
124 arity: BuiltinParamArity::Required,
125 default: None,
126 description: "Grouping variable name or names.",
127 },
128 BuiltinParamDescriptor {
129 name: "method",
130 ty: BuiltinParamType::Any,
131 arity: BuiltinParamArity::Required,
132 default: None,
133 description: "Summary method name or names.",
134 },
135 BuiltinParamDescriptor {
136 name: "datavars",
137 ty: BuiltinParamType::Any,
138 arity: BuiltinParamArity::Optional,
139 default: None,
140 description: "Data variable name or names.",
141 },
142];
143const OBJECT_INDEX_INPUTS: [BuiltinParamDescriptor; 3] = [
144 BuiltinParamDescriptor {
145 name: "obj",
146 ty: BuiltinParamType::Any,
147 arity: BuiltinParamArity::Required,
148 default: None,
149 description: "Table object receiver.",
150 },
151 BuiltinParamDescriptor {
152 name: "kind",
153 ty: BuiltinParamType::StringScalar,
154 arity: BuiltinParamArity::Required,
155 default: None,
156 description: "Index kind token.",
157 },
158 BuiltinParamDescriptor {
159 name: "payload",
160 ty: BuiltinParamType::Any,
161 arity: BuiltinParamArity::Required,
162 default: None,
163 description: "Index payload.",
164 },
165];
166const OBJECT_ASSIGN_INPUTS: [BuiltinParamDescriptor; 4] = [
167 BuiltinParamDescriptor {
168 name: "obj",
169 ty: BuiltinParamType::Any,
170 arity: BuiltinParamArity::Required,
171 default: None,
172 description: "Table object receiver.",
173 },
174 BuiltinParamDescriptor {
175 name: "kind",
176 ty: BuiltinParamType::StringScalar,
177 arity: BuiltinParamArity::Required,
178 default: None,
179 description: "Index kind token.",
180 },
181 BuiltinParamDescriptor {
182 name: "payload",
183 ty: BuiltinParamType::Any,
184 arity: BuiltinParamArity::Required,
185 default: None,
186 description: "Index payload.",
187 },
188 BuiltinParamDescriptor {
189 name: "rhs",
190 ty: BuiltinParamType::Any,
191 arity: BuiltinParamArity::Required,
192 default: None,
193 description: "Assigned value.",
194 },
195];
196
197const READTABLE_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
198 BuiltinSignatureDescriptor {
199 label: "T = readtable(filename)",
200 inputs: &READTABLE_INPUTS_FILENAME,
201 outputs: &ANY_OUTPUT,
202 },
203 BuiltinSignatureDescriptor {
204 label: "T = readtable(filename, nameValuePairs...)",
205 inputs: &READTABLE_INPUTS_NAME_VALUE,
206 outputs: &ANY_OUTPUT,
207 },
208];
209const SPREADSHEET_IMPORT_OPTIONS_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
210 BuiltinSignatureDescriptor {
211 label: "opts = spreadsheetImportOptions()",
212 inputs: &[],
213 outputs: &SPREADSHEET_IMPORT_OPTIONS_OUTPUT,
214 },
215 BuiltinSignatureDescriptor {
216 label: "opts = spreadsheetImportOptions(nameValuePairs...)",
217 inputs: &SPREADSHEET_IMPORT_OPTIONS_INPUTS_NAME_VALUE,
218 outputs: &SPREADSHEET_IMPORT_OPTIONS_OUTPUT,
219 },
220];
221const TABLE_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
222 label: "T = table(variables...)",
223 inputs: &TABLE_INPUTS_VALUES,
224 outputs: &ANY_OUTPUT,
225}];
226const GROUPSUMMARY_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
227 label: "G = groupsummary(T, groupvars, method, datavars)",
228 inputs: &GROUPSUMMARY_INPUTS,
229 outputs: &ANY_OUTPUT,
230}];
231const HEIGHT_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
232 label: "n = height(T)",
233 inputs: &TABLE_INPUT,
234 outputs: &NUM_OUTPUT,
235}];
236const WIDTH_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
237 label: "n = width(T)",
238 inputs: &TABLE_INPUT,
239 outputs: &NUM_OUTPUT,
240}];
241const OBJECT_SUBSREF_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
242 label: "out = table.subsref(obj, kind, payload)",
243 inputs: &OBJECT_INDEX_INPUTS,
244 outputs: &ANY_OUTPUT,
245}];
246const OBJECT_SUBSASGN_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
247 label: "obj = table.subsasgn(obj, kind, payload, rhs)",
248 inputs: &OBJECT_ASSIGN_INPUTS,
249 outputs: &ANY_OUTPUT,
250}];
251
252const TABLE_ERROR_INVALID_ARGUMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
253 code: "RM.TABLE.INVALID_ARGUMENT",
254 identifier: Some("RunMat:table:InvalidArgument"),
255 when: "Arguments or table metadata are invalid.",
256 message: "table: invalid argument",
257};
258const TABLE_ERROR_INVALID_INDEX: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
259 code: "RM.TABLE.INVALID_INDEX",
260 identifier: Some("RunMat:table:InvalidIndex"),
261 when: "Table indexing is invalid.",
262 message: "table: invalid index",
263};
264const TABLE_ERROR_INVALID_VARIABLE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
265 code: "RM.TABLE.INVALID_VARIABLE",
266 identifier: Some("RunMat:table:InvalidVariable"),
267 when: "A table variable name or value is invalid.",
268 message: "table: invalid variable",
269};
270const TABLE_ERROR_IO: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
271 code: "RM.READTABLE.IO",
272 identifier: Some("RunMat:readtable:IOError"),
273 when: "readtable cannot open or read the requested file.",
274 message: "readtable: file read failed",
275};
276const TABLE_ERROR_UNSUPPORTED_FILE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
277 code: "RM.READTABLE.UNSUPPORTED_FILE",
278 identifier: Some("RunMat:readtable:UnsupportedFileType"),
279 when: "readtable receives a file type outside the text or spreadsheet import backends.",
280 message: "readtable: unsupported file type",
281};
282const TABLE_ERRORS: [BuiltinErrorDescriptor; 5] = [
283 TABLE_ERROR_INVALID_ARGUMENT,
284 TABLE_ERROR_INVALID_INDEX,
285 TABLE_ERROR_INVALID_VARIABLE,
286 TABLE_ERROR_IO,
287 TABLE_ERROR_UNSUPPORTED_FILE,
288];
289
290pub const READTABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
291 signatures: &READTABLE_SIGNATURES,
292 output_mode: BuiltinOutputMode::Fixed,
293 completion_policy: BuiltinCompletionPolicy::Public,
294 errors: &TABLE_ERRORS,
295};
296pub const SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
297 signatures: &SPREADSHEET_IMPORT_OPTIONS_SIGNATURES,
298 output_mode: BuiltinOutputMode::Fixed,
299 completion_policy: BuiltinCompletionPolicy::Public,
300 errors: &TABLE_ERRORS,
301};
302pub const TABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
303 signatures: &TABLE_SIGNATURES,
304 output_mode: BuiltinOutputMode::Fixed,
305 completion_policy: BuiltinCompletionPolicy::Public,
306 errors: &TABLE_ERRORS,
307};
308pub const GROUPSUMMARY_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
309 signatures: &GROUPSUMMARY_SIGNATURES,
310 output_mode: BuiltinOutputMode::Fixed,
311 completion_policy: BuiltinCompletionPolicy::Public,
312 errors: &TABLE_ERRORS,
313};
314pub const HEIGHT_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
315 signatures: &HEIGHT_SIGNATURES,
316 output_mode: BuiltinOutputMode::Fixed,
317 completion_policy: BuiltinCompletionPolicy::Public,
318 errors: &TABLE_ERRORS,
319};
320pub const WIDTH_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
321 signatures: &WIDTH_SIGNATURES,
322 output_mode: BuiltinOutputMode::Fixed,
323 completion_policy: BuiltinCompletionPolicy::Public,
324 errors: &TABLE_ERRORS,
325};
326pub const TABLE_SUBSREF_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
327 signatures: &OBJECT_SUBSREF_SIGNATURES,
328 output_mode: BuiltinOutputMode::Fixed,
329 completion_policy: BuiltinCompletionPolicy::MethodOnly,
330 errors: &TABLE_ERRORS,
331};
332pub const TABLE_SUBSASGN_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
333 signatures: &OBJECT_SUBSASGN_SIGNATURES,
334 output_mode: BuiltinOutputMode::Fixed,
335 completion_policy: BuiltinCompletionPolicy::MethodOnly,
336 errors: &TABLE_ERRORS,
337};
338
339#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::table")]
340pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
341 name: "table",
342 op_kind: GpuOpKind::Custom("table"),
343 supported_precisions: &[],
344 broadcast: BroadcastSemantics::None,
345 provider_hooks: &[],
346 constant_strategy: ConstantStrategy::InlineLiteral,
347 residency: ResidencyPolicy::GatherImmediately,
348 nan_mode: ReductionNaN::Include,
349 two_pass_threshold: None,
350 workgroup_size: None,
351 accepts_nan_mode: false,
352 notes: "Tables are host containers. GPU variables are gathered when tabular algorithms need row-wise access.",
353};
354
355#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::table")]
356pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
357 name: "table",
358 shape: ShapeRequirements::Any,
359 constant_strategy: ConstantStrategy::InlineLiteral,
360 elementwise: None,
361 reduction: None,
362 emits_nan: false,
363 notes: "Tables are structured host containers and are not fusion operands.",
364};
365
366fn table_error(error: &'static BuiltinErrorDescriptor, message: impl Into<String>) -> RuntimeError {
367 let mut builder = build_runtime_error(message).with_builtin(TABLE_CLASS);
368 if let Some(identifier) = error.identifier {
369 builder = builder.with_identifier(identifier);
370 }
371 builder.build()
372}
373
374fn table_error_with_source<E>(
375 error: &'static BuiltinErrorDescriptor,
376 message: impl Into<String>,
377 source: E,
378) -> RuntimeError
379where
380 E: std::error::Error + Send + Sync + 'static,
381{
382 let mut builder = build_runtime_error(message)
383 .with_builtin(TABLE_CLASS)
384 .with_source(source);
385 if let Some(identifier) = error.identifier {
386 builder = builder.with_identifier(identifier);
387 }
388 builder.build()
389}
390
391fn invalid_argument(message: impl Into<String>) -> RuntimeError {
392 table_error(&TABLE_ERROR_INVALID_ARGUMENT, message)
393}
394
395fn invalid_index(message: impl Into<String>) -> RuntimeError {
396 table_error(&TABLE_ERROR_INVALID_INDEX, message)
397}
398
399fn invalid_variable(message: impl Into<String>) -> RuntimeError {
400 table_error(&TABLE_ERROR_INVALID_VARIABLE, message)
401}
402
403fn map_control_flow(err: RuntimeError) -> RuntimeError {
404 let identifier = err.identifier().map(ToString::to_string);
405 let message = err.message().to_string();
406 let mut builder = build_runtime_error(message)
407 .with_builtin(TABLE_CLASS)
408 .with_source(err);
409 if let Some(identifier) = identifier {
410 builder = builder.with_identifier(identifier);
411 }
412 builder.build()
413}
414
415pub fn ensure_table_class_registered() {
416 TABLE_CLASS_REGISTERED.get_or_init(|| {
417 let mut properties = HashMap::new();
418 properties.insert(
419 PROPERTIES_MEMBER.to_string(),
420 PropertyDef {
421 name: PROPERTIES_MEMBER.to_string(),
422 is_static: false,
423 is_constant: false,
424 is_dependent: false,
425 get_access: Access::Public,
426 set_access: Access::Public,
427 default_value: Some(Value::Struct(default_properties(Vec::new(), None))),
428 },
429 );
430
431 let mut methods = HashMap::new();
432 for name in [OBJECT_SUBSREF_METHOD, OBJECT_SUBSASGN_METHOD] {
433 methods.insert(
434 name.to_string(),
435 MethodDef {
436 name: name.to_string(),
437 is_static: false,
438 is_abstract: false,
439 is_sealed: false,
440 access: Access::Public,
441 function_name: format!("{TABLE_CLASS}.{name}"),
442 implicit_class_argument: None,
443 },
444 );
445 }
446
447 runmat_builtins::register_class(ClassDef {
448 name: TABLE_CLASS.to_string(),
449 parent: None,
450 properties,
451 methods,
452 });
453 });
454}
455
456#[runtime_builtin(
457 name = "table",
458 category = "table",
459 summary = "Create a table from named column variables.",
460 keywords = "table,VariableNames,RowNames,Properties",
461 accel = "cpu",
462 type_resolver(crate::builtins::io::type_resolvers::struct_type),
463 descriptor(crate::builtins::table::TABLE_DESCRIPTOR),
464 builtin_path = "crate::builtins::table"
465)]
466async fn table_builtin(args: Vec<Value>) -> BuiltinResult<Value> {
467 ensure_table_class_registered();
468 let gathered = gather_values(&args).await?;
469 let (variables, options) = split_table_constructor_args(gathered)?;
470 let names = if let Some(names) = options.variable_names {
471 names
472 } else {
473 generated_variable_names(variables.len())
474 };
475 table_from_columns_with_properties(names, variables, options.row_names)
476}
477
478#[runtime_builtin(
479 name = "readtable",
480 category = "io/tabular",
481 summary = "Import tabular text or spreadsheet data into a table.",
482 keywords = "readtable,table,csv,tsv,xlsx,xls,ods,spreadsheet,VariableNames,RowNames,Sheet,Range",
483 accel = "cpu",
484 type_resolver(crate::builtins::io::type_resolvers::struct_type),
485 descriptor(crate::builtins::table::READTABLE_DESCRIPTOR),
486 builtin_path = "crate::builtins::table"
487)]
488async fn readtable_builtin(path: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
489 ensure_table_class_registered();
490 let path_value = gather_if_needed_async(&path)
491 .await
492 .map_err(map_control_flow)?;
493 let args = gather_values(&rest).await?;
494 let options = ReadTableOptions::parse(&args)?;
495 let resolved = resolve_path(&path_value)?;
496 read_table_from_file(&resolved, &options).await
497}
498
499#[runtime_builtin(
500 name = "spreadsheetImportOptions",
501 category = "io/tabular",
502 summary = "Create spreadsheet import options for readtable.",
503 keywords = "spreadsheetImportOptions,readtable,spreadsheet,xlsx,xls,DataRange,VariableTypes,VariableNames,NumVariables",
504 accel = "cpu",
505 type_resolver(crate::builtins::io::type_resolvers::struct_type),
506 descriptor(crate::builtins::table::SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR),
507 builtin_path = "crate::builtins::table"
508)]
509async fn spreadsheet_import_options_builtin(args: Vec<Value>) -> BuiltinResult<Value> {
510 let gathered = gather_values(&args).await?;
511 spreadsheet_import_options(gathered)
512}
513
514#[runtime_builtin(
515 name = "height",
516 category = "table",
517 summary = "Return the number of rows in a table.",
518 keywords = "height,table,rows",
519 descriptor(crate::builtins::table::HEIGHT_DESCRIPTOR),
520 builtin_path = "crate::builtins::table"
521)]
522async fn height_builtin(value: Value) -> BuiltinResult<Value> {
523 let host = gather_if_needed_async(&value)
524 .await
525 .map_err(map_control_flow)?;
526 if let Some(object) = table_object(&host) {
527 return Ok(Value::Num(table_height(object)? as f64));
528 }
529 value_row_count(&host).map(|n| Value::Num(n as f64))
530}
531
532#[runtime_builtin(
533 name = "width",
534 category = "table",
535 summary = "Return the number of variables in a table.",
536 keywords = "width,table,variables",
537 descriptor(crate::builtins::table::WIDTH_DESCRIPTOR),
538 builtin_path = "crate::builtins::table"
539)]
540async fn width_builtin(value: Value) -> BuiltinResult<Value> {
541 let host = gather_if_needed_async(&value)
542 .await
543 .map_err(map_control_flow)?;
544 if let Some(object) = table_object(&host) {
545 return Ok(Value::Num(table_width(object)? as f64));
546 }
547 match host {
548 Value::Tensor(t) => Ok(Value::Num(t.cols() as f64)),
549 Value::ComplexTensor(t) => Ok(Value::Num(t.cols as f64)),
550 Value::StringArray(sa) => Ok(Value::Num(sa.cols() as f64)),
551 Value::LogicalArray(la) => Ok(Value::Num(la.shape.get(1).copied().unwrap_or(1) as f64)),
552 Value::Cell(ca) => Ok(Value::Num(ca.cols as f64)),
553 Value::CharArray(ca) => Ok(Value::Num(ca.cols as f64)),
554 _ => Ok(Value::Num(1.0)),
555 }
556}
557
558#[runtime_builtin(
559 name = "groupsummary",
560 category = "table",
561 summary = "Group table rows and compute summary statistics for data variables.",
562 keywords = "groupsummary,group,table,mean,sum,count,median,min,max",
563 accel = "cpu",
564 descriptor(crate::builtins::table::GROUPSUMMARY_DESCRIPTOR),
565 builtin_path = "crate::builtins::table"
566)]
567async fn groupsummary_builtin(
568 table: Value,
569 groupvars: Value,
570 method: Value,
571 rest: Vec<Value>,
572) -> BuiltinResult<Value> {
573 let table = gather_if_needed_async(&table)
574 .await
575 .map_err(map_control_flow)?;
576 let groupvars = gather_if_needed_async(&groupvars)
577 .await
578 .map_err(map_control_flow)?;
579 let method = gather_if_needed_async(&method)
580 .await
581 .map_err(map_control_flow)?;
582 let rest = gather_values(&rest).await?;
583 groupsummary_impl(table, groupvars, method, rest)
584}
585
586#[runtime_builtin(
587 name = "table.subsref",
588 descriptor(crate::builtins::table::TABLE_SUBSREF_DESCRIPTOR),
589 builtin_path = "crate::builtins::table"
590)]
591async fn table_subsref(obj: Value, kind: String, payload: Value) -> BuiltinResult<Value> {
592 let object = into_table_object(obj, "table.subsref")?;
593 match kind.as_str() {
594 OBJECT_INDEX_MEMBER => table_member_get(&object, &payload),
595 OBJECT_INDEX_PAREN => table_paren_get(&object, &payload),
596 OBJECT_INDEX_BRACE => table_brace_get(&object, &payload),
597 other => Err(invalid_index(format!(
598 "table.subsref: unsupported indexing kind '{other}'"
599 ))),
600 }
601}
602
603#[runtime_builtin(
604 name = "table.subsasgn",
605 descriptor(crate::builtins::table::TABLE_SUBSASGN_DESCRIPTOR),
606 builtin_path = "crate::builtins::table"
607)]
608async fn table_subsasgn(
609 obj: Value,
610 kind: String,
611 payload: Value,
612 rhs: Value,
613) -> BuiltinResult<Value> {
614 let mut object = into_table_object(obj, "table.subsasgn")?;
615 match kind.as_str() {
616 OBJECT_INDEX_MEMBER => {
617 let field = scalar_text(&payload, "table member")?;
618 table_member_set(&mut object, &field, rhs)?;
619 Ok(Value::Object(object))
620 }
621 OBJECT_INDEX_PAREN => table_paren_assign(object, &payload, rhs),
622 OBJECT_INDEX_BRACE => table_brace_assign(object, &payload, rhs),
623 other => Err(invalid_index(format!(
624 "table.subsasgn: unsupported indexing kind '{other}'"
625 ))),
626 }
627}
628
629async fn gather_values(values: &[Value]) -> BuiltinResult<Vec<Value>> {
630 let mut out = Vec::with_capacity(values.len());
631 for value in values {
632 out.push(
633 gather_if_needed_async(value)
634 .await
635 .map_err(map_control_flow)?,
636 );
637 }
638 Ok(out)
639}
640
641#[derive(Default)]
642struct TableConstructorOptions {
643 variable_names: Option<Vec<String>>,
644 row_names: Option<Vec<String>>,
645}
646
647fn split_table_constructor_args(
648 args: Vec<Value>,
649) -> BuiltinResult<(Vec<Value>, TableConstructorOptions)> {
650 let mut variables = Vec::new();
651 let mut options = TableConstructorOptions::default();
652 let mut idx = 0usize;
653 while idx < args.len() {
654 if let Ok(name) = scalar_text(&args[idx], "table option") {
655 if idx + 1 < args.len() && is_table_constructor_option(&name) {
656 let value = &args[idx + 1];
657 if name.eq_ignore_ascii_case("VariableNames") {
658 options.variable_names = Some(variable_name_list(value)?);
659 } else if name.eq_ignore_ascii_case("RowNames") {
660 options.row_names = Some(string_list(value)?);
661 }
662 idx += 2;
663 continue;
664 }
665 }
666 variables.push(args[idx].clone());
667 idx += 1;
668 }
669 Ok((variables, options))
670}
671
672fn is_table_constructor_option(name: &str) -> bool {
673 name.eq_ignore_ascii_case("VariableNames") || name.eq_ignore_ascii_case("RowNames")
674}
675
676#[derive(Clone)]
677struct ReadTableOptions {
678 file_type: ImportFileType,
679 delimiter: Option<Delimiter>,
680 read_variable_names: Option<bool>,
681 read_row_names: bool,
682 num_variables: Option<usize>,
683 variable_names: Option<Vec<String>>,
684 variable_types: Option<Vec<ImportVariableType>>,
685 row_names: Option<Vec<String>>,
686 num_header_lines: usize,
687 range: Option<RangeSpec>,
688 sheet: Option<SheetSelector>,
689 preserve_variable_names: bool,
690 treat_as_missing: HashSet<String>,
691 empty_line_rule: EmptyLineRule,
692 text_type: TextImportType,
693 encoding: String,
694 datetime_type: DatetimeImportType,
695}
696
697impl Default for ReadTableOptions {
698 fn default() -> Self {
699 Self {
700 file_type: ImportFileType::Auto,
701 delimiter: None,
702 read_variable_names: None,
703 read_row_names: false,
704 num_variables: None,
705 variable_names: None,
706 variable_types: None,
707 row_names: None,
708 num_header_lines: 0,
709 range: None,
710 sheet: None,
711 preserve_variable_names: false,
712 treat_as_missing: HashSet::new(),
713 empty_line_rule: EmptyLineRule::Skip,
714 text_type: TextImportType::String,
715 encoding: "utf-8".to_string(),
716 datetime_type: DatetimeImportType::Datetime,
717 }
718 }
719}
720
721impl ReadTableOptions {
722 fn parse(args: &[Value]) -> BuiltinResult<Self> {
723 let mut options = Self::default();
724 let mut idx = 0usize;
725 if let Some(Value::Struct(st)) = args.first() {
726 for (name, value) in &st.fields {
727 options.apply(name, value)?;
728 }
729 idx = 1;
730 }
731 while idx < args.len() {
732 if idx + 1 >= args.len() {
733 return Err(invalid_argument(
734 "readtable: name-value options must be provided in pairs",
735 ));
736 }
737 let name = scalar_text(&args[idx], "readtable option")?;
738 options.apply(&name, &args[idx + 1])?;
739 idx += 2;
740 }
741 Ok(options)
742 }
743
744 fn apply(&mut self, name: &str, value: &Value) -> BuiltinResult<()> {
745 if name.eq_ignore_ascii_case("FileType") {
746 self.file_type = ImportFileType::parse(value)?;
747 } else if name.eq_ignore_ascii_case("Delimiter") {
748 self.delimiter = Some(Delimiter::parse(value)?);
749 } else if name.eq_ignore_ascii_case("ReadVariableNames") {
750 self.read_variable_names = Some(bool_scalar(value, "ReadVariableNames")?);
751 } else if name.eq_ignore_ascii_case("ReadRowNames") {
752 self.read_row_names = bool_scalar(value, "ReadRowNames")?;
753 } else if name.eq_ignore_ascii_case("NumVariables") {
754 let count = nonnegative_usize(value, "NumVariables")?;
755 self.num_variables = (count > 0).then_some(count);
756 } else if name.eq_ignore_ascii_case("VariableNames") {
757 self.variable_names = optional_raw_variable_name_list(value)?;
758 } else if name.eq_ignore_ascii_case("VariableTypes") {
759 self.variable_types = optional_variable_type_list(value)?;
760 } else if name.eq_ignore_ascii_case("RowNames") {
761 self.row_names = Some(string_list(value)?);
762 } else if name.eq_ignore_ascii_case("NumHeaderLines") {
763 self.num_header_lines = nonnegative_usize(value, "NumHeaderLines")?;
764 } else if name.eq_ignore_ascii_case("Range") {
765 self.range = Some(RangeSpec::parse(value)?);
766 } else if name.eq_ignore_ascii_case("DataRange") {
767 self.range = optional_range_spec(value)?;
768 } else if name.eq_ignore_ascii_case("Sheet") {
769 self.sheet = optional_sheet_selector(value)?;
770 } else if name.eq_ignore_ascii_case("TreatAsMissing") {
771 for token in string_list(value)? {
772 self.treat_as_missing
773 .insert(token.trim().to_ascii_lowercase());
774 }
775 } else if name.eq_ignore_ascii_case("PreserveVariableNames") {
776 self.preserve_variable_names = bool_scalar(value, "PreserveVariableNames")?;
777 } else if name.eq_ignore_ascii_case("VariableNamingRule") {
778 let rule = scalar_text(value, "VariableNamingRule")?;
779 if rule.eq_ignore_ascii_case("preserve") {
780 self.preserve_variable_names = true;
781 } else if rule.eq_ignore_ascii_case("modify") {
782 self.preserve_variable_names = false;
783 } else {
784 return Err(invalid_argument(format!(
785 "readtable: unsupported VariableNamingRule '{rule}'"
786 )));
787 }
788 } else if name.eq_ignore_ascii_case("EmptyLineRule") {
789 let rule = scalar_text(value, "EmptyLineRule")?;
790 self.empty_line_rule = if rule.eq_ignore_ascii_case("read") {
791 EmptyLineRule::Read
792 } else if rule.eq_ignore_ascii_case("skip") {
793 EmptyLineRule::Skip
794 } else {
795 return Err(invalid_argument(format!(
796 "readtable: unsupported EmptyLineRule '{rule}'"
797 )));
798 };
799 } else if name.eq_ignore_ascii_case("Encoding") {
800 let encoding = scalar_text(value, "Encoding")?;
801 validate_encoding_label(&encoding)?;
802 self.encoding = encoding;
803 } else if name.eq_ignore_ascii_case("TextType") {
804 self.text_type = TextImportType::parse(value, "readtable")?;
805 } else if name.eq_ignore_ascii_case("DatetimeType") {
806 self.datetime_type = DatetimeImportType::parse(value)?;
807 } else {
808 return Err(invalid_argument(format!(
809 "readtable: unsupported option '{name}'"
810 )));
811 }
812 Ok(())
813 }
814
815 fn is_missing(&self, token: &str) -> bool {
816 let trimmed = token.trim();
817 trimmed.is_empty()
818 || self
819 .treat_as_missing
820 .contains(&trimmed.to_ascii_lowercase())
821 }
822}
823
824fn spreadsheet_import_options(args: Vec<Value>) -> BuiltinResult<Value> {
825 if !args.len().is_multiple_of(2) {
826 return Err(invalid_argument(
827 "spreadsheetImportOptions: name-value options must be provided in pairs",
828 ));
829 }
830 let mut options = SpreadsheetImportOptions::default();
831 let mut idx = 0usize;
832 while idx < args.len() {
833 let name = scalar_text(&args[idx], "spreadsheetImportOptions option")?;
834 options.apply(&name, &args[idx + 1])?;
835 idx += 2;
836 }
837 Ok(Value::Struct(options.into_struct()?))
838}
839
840#[derive(Clone)]
841struct SpreadsheetImportOptions {
842 num_variables: usize,
843 read_variable_names: Option<bool>,
844 read_row_names: bool,
845 variable_names: Vec<String>,
846 variable_types: Vec<String>,
847 data_range: Option<Value>,
848 sheet: Option<Value>,
849 treat_as_missing: Vec<String>,
850 preserve_variable_names: bool,
851 empty_line_rule: String,
852 text_type: String,
853 datetime_type: String,
854}
855
856impl Default for SpreadsheetImportOptions {
857 fn default() -> Self {
858 let num_variables = 0;
859 Self {
860 num_variables,
861 read_variable_names: None,
862 read_row_names: false,
863 variable_names: Vec::new(),
864 variable_types: Vec::new(),
865 data_range: None,
866 sheet: None,
867 treat_as_missing: Vec::new(),
868 preserve_variable_names: false,
869 empty_line_rule: "skip".to_string(),
870 text_type: "string".to_string(),
871 datetime_type: "datetime".to_string(),
872 }
873 }
874}
875
876impl SpreadsheetImportOptions {
877 fn apply(&mut self, name: &str, value: &Value) -> BuiltinResult<()> {
878 if name.eq_ignore_ascii_case("NumVariables") {
879 self.resize_variables(positive_usize(value, "NumVariables")?);
880 } else if name.eq_ignore_ascii_case("VariableNames") {
881 self.variable_names = raw_variable_name_list(value)?;
882 self.align_variable_metadata_count(self.variable_names.len(), "VariableNames")?;
883 self.ensure_variable_metadata_len();
884 } else if name.eq_ignore_ascii_case("VariableTypes") {
885 let types = variable_type_names(value)?;
886 self.variable_types = types;
887 self.align_variable_metadata_count(self.variable_types.len(), "VariableTypes")?;
888 self.ensure_variable_metadata_len();
889 } else if name.eq_ignore_ascii_case("DataRange") || name.eq_ignore_ascii_case("Range") {
890 self.data_range = if option_value_is_empty(value) {
891 None
892 } else {
893 RangeSpec::parse(value)?;
894 Some(value.clone())
895 };
896 } else if name.eq_ignore_ascii_case("Sheet") {
897 self.sheet = if option_value_is_empty(value) {
898 None
899 } else {
900 SheetSelector::parse(value)?;
901 Some(value.clone())
902 };
903 } else if name.eq_ignore_ascii_case("ReadVariableNames") {
904 self.read_variable_names = Some(bool_scalar(value, "ReadVariableNames")?);
905 } else if name.eq_ignore_ascii_case("ReadRowNames") {
906 self.read_row_names = bool_scalar(value, "ReadRowNames")?;
907 } else if name.eq_ignore_ascii_case("TreatAsMissing") {
908 self.treat_as_missing = string_list(value)?;
909 } else if name.eq_ignore_ascii_case("PreserveVariableNames") {
910 self.preserve_variable_names = bool_scalar(value, "PreserveVariableNames")?;
911 } else if name.eq_ignore_ascii_case("VariableNamingRule") {
912 let rule = scalar_text(value, "VariableNamingRule")?;
913 if rule.eq_ignore_ascii_case("preserve") {
914 self.preserve_variable_names = true;
915 } else if rule.eq_ignore_ascii_case("modify") {
916 self.preserve_variable_names = false;
917 } else {
918 return Err(invalid_argument(format!(
919 "spreadsheetImportOptions: unsupported VariableNamingRule '{rule}'"
920 )));
921 }
922 } else if name.eq_ignore_ascii_case("EmptyLineRule") {
923 let rule = scalar_text(value, "EmptyLineRule")?;
924 if !(rule.eq_ignore_ascii_case("read") || rule.eq_ignore_ascii_case("skip")) {
925 return Err(invalid_argument(format!(
926 "spreadsheetImportOptions: unsupported EmptyLineRule '{rule}'"
927 )));
928 }
929 self.empty_line_rule = rule.to_ascii_lowercase();
930 } else if name.eq_ignore_ascii_case("TextType") {
931 let text_type = scalar_text(value, "TextType")?;
932 if !(text_type.eq_ignore_ascii_case("string") || text_type.eq_ignore_ascii_case("char"))
933 {
934 return Err(invalid_argument(format!(
935 "spreadsheetImportOptions: unsupported TextType '{text_type}'"
936 )));
937 }
938 self.text_type = text_type.to_ascii_lowercase();
939 } else if name.eq_ignore_ascii_case("DatetimeType") {
940 let datetime_type = scalar_text(value, "DatetimeType")?;
941 if !(datetime_type.eq_ignore_ascii_case("datetime")
942 || datetime_type.eq_ignore_ascii_case("text")
943 || datetime_type.eq_ignore_ascii_case("exceldatenum"))
944 {
945 return Err(invalid_argument(format!(
946 "spreadsheetImportOptions: unsupported DatetimeType '{datetime_type}'"
947 )));
948 }
949 self.datetime_type = datetime_type.to_ascii_lowercase();
950 } else {
951 return Err(invalid_argument(format!(
952 "spreadsheetImportOptions: unsupported option '{name}'"
953 )));
954 }
955 Ok(())
956 }
957
958 fn resize_variables(&mut self, num_variables: usize) {
959 self.num_variables = num_variables;
960 if self.variable_names.len() > num_variables {
961 self.variable_names.truncate(num_variables);
962 }
963 if self.variable_types.len() > num_variables {
964 self.variable_types.truncate(num_variables);
965 }
966 self.ensure_variable_metadata_len();
967 }
968
969 fn align_variable_metadata_count(&mut self, len: usize, field: &str) -> BuiltinResult<()> {
970 if self.num_variables == 0 {
971 self.num_variables = len;
972 return Ok(());
973 }
974 if len > self.num_variables {
975 return Err(invalid_argument(format!(
976 "spreadsheetImportOptions: {field} length exceeds NumVariables"
977 )));
978 }
979 Ok(())
980 }
981
982 fn ensure_variable_metadata_len(&mut self) {
983 if self.num_variables == 0 {
984 return;
985 }
986 while self.variable_names.len() < self.num_variables {
987 self.variable_names
988 .push(format!("Var{}", self.variable_names.len() + 1));
989 }
990 self.variable_names.truncate(self.num_variables);
991 while self.variable_types.len() < self.num_variables {
992 self.variable_types.push("auto".to_string());
993 }
994 self.variable_types.truncate(self.num_variables);
995 }
996
997 fn into_struct(mut self) -> BuiltinResult<StructValue> {
998 self.ensure_variable_metadata_len();
999 let mut out = StructValue::new();
1000 out.insert("FileType", Value::String("spreadsheet".to_string()));
1001 out.insert("NumVariables", Value::Num(self.num_variables as f64));
1002 if let Some(read_variable_names) = self.read_variable_names {
1003 out.insert("ReadVariableNames", Value::Bool(read_variable_names));
1004 }
1005 out.insert("ReadRowNames", Value::Bool(self.read_row_names));
1006 out.insert(
1007 "VariableNames",
1008 Value::StringArray(
1009 StringArray::new(
1010 self.variable_names.clone(),
1011 vec![1, self.variable_names.len()],
1012 )
1013 .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1014 ),
1015 );
1016 out.insert(
1017 "VariableTypes",
1018 Value::StringArray(
1019 StringArray::new(
1020 self.variable_types.clone(),
1021 vec![1, self.variable_types.len()],
1022 )
1023 .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1024 ),
1025 );
1026 out.insert(
1027 "DataRange",
1028 self.data_range
1029 .unwrap_or_else(|| Value::String(String::new())),
1030 );
1031 out.insert(
1032 "Sheet",
1033 self.sheet.unwrap_or_else(|| Value::String(String::new())),
1034 );
1035 out.insert(
1036 "TreatAsMissing",
1037 Value::StringArray(
1038 StringArray::new(
1039 self.treat_as_missing.clone(),
1040 vec![1, self.treat_as_missing.len()],
1041 )
1042 .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1043 ),
1044 );
1045 out.insert(
1046 "PreserveVariableNames",
1047 Value::Bool(self.preserve_variable_names),
1048 );
1049 out.insert(
1050 "VariableNamingRule",
1051 Value::String(if self.preserve_variable_names {
1052 "preserve".to_string()
1053 } else {
1054 "modify".to_string()
1055 }),
1056 );
1057 out.insert("EmptyLineRule", Value::String(self.empty_line_rule));
1058 out.insert("TextType", Value::String(self.text_type));
1059 out.insert("DatetimeType", Value::String(self.datetime_type));
1060 Ok(out)
1061 }
1062}
1063
1064#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1065enum ImportVariableType {
1066 Auto,
1067 Numeric(NumericDType),
1068 Logical,
1069 Text(TextImportType),
1070 CellStr,
1071 Datetime,
1072 Duration,
1073}
1074
1075impl ImportVariableType {
1076 fn parse(raw: &str) -> BuiltinResult<Self> {
1077 match raw.trim().to_ascii_lowercase().as_str() {
1078 "" | "auto" => Ok(Self::Auto),
1079 "double" => Ok(Self::Numeric(NumericDType::F64)),
1080 "single" => Ok(Self::Numeric(NumericDType::F32)),
1081 "uint8" => Ok(Self::Numeric(NumericDType::U8)),
1082 "uint16" => Ok(Self::Numeric(NumericDType::U16)),
1083 "logical" | "bool" | "boolean" => Ok(Self::Logical),
1084 "string" => Ok(Self::Text(TextImportType::String)),
1085 "char" => Ok(Self::Text(TextImportType::Char)),
1086 "cellstr" => Ok(Self::CellStr),
1087 "int8" | "int16" | "int32" | "int64" | "uint32" | "uint64" => {
1088 Err(invalid_argument(format!(
1089 "readtable: unsupported VariableTypes entry '{}'; RunMat table imports currently support double, single, uint8, and uint16 numeric arrays",
1090 raw.trim()
1091 )))
1092 }
1093 "categorical" => Err(invalid_argument(
1094 "readtable: unsupported VariableTypes entry 'categorical'; categorical arrays are not implemented in RunMat yet",
1095 )),
1096 "datetime" => Ok(Self::Datetime),
1097 "duration" => Ok(Self::Duration),
1098 other => Err(invalid_argument(format!(
1099 "readtable: unsupported VariableTypes entry '{other}'"
1100 ))),
1101 }
1102 }
1103
1104 fn canonical_label(raw: &str) -> BuiltinResult<String> {
1105 Self::parse(raw)?;
1106 let label = raw.trim().to_ascii_lowercase();
1107 Ok(if label.is_empty() {
1108 "auto".to_string()
1109 } else {
1110 label
1111 })
1112 }
1113}
1114
1115#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1116enum TextImportType {
1117 String,
1118 Char,
1119}
1120
1121impl TextImportType {
1122 fn parse(value: &Value, context: &str) -> BuiltinResult<Self> {
1123 let text_type = scalar_text(value, "TextType")?;
1124 match text_type.trim().to_ascii_lowercase().as_str() {
1125 "string" => Ok(Self::String),
1126 "char" => Ok(Self::Char),
1127 other => Err(invalid_argument(format!(
1128 "{context}: unsupported TextType '{other}'"
1129 ))),
1130 }
1131 }
1132}
1133
1134#[derive(Clone, Copy)]
1135enum EmptyLineRule {
1136 Skip,
1137 Read,
1138}
1139
1140#[derive(Clone, Copy)]
1141enum DatetimeImportType {
1142 Datetime,
1143 Text,
1144 ExcelDatenum,
1145}
1146
1147impl DatetimeImportType {
1148 fn parse(value: &Value) -> BuiltinResult<Self> {
1149 let text = scalar_text(value, "DatetimeType")?;
1150 match text.trim().to_ascii_lowercase().as_str() {
1151 "datetime" => Ok(Self::Datetime),
1152 "text" => Ok(Self::Text),
1153 "exceldatenum" => Ok(Self::ExcelDatenum),
1154 other => Err(invalid_argument(format!(
1155 "readtable: unsupported DatetimeType '{other}'"
1156 ))),
1157 }
1158 }
1159}
1160
1161#[derive(Clone, Copy, PartialEq, Eq)]
1162enum ImportFileType {
1163 Auto,
1164 Text,
1165 Spreadsheet,
1166}
1167
1168impl ImportFileType {
1169 fn parse(value: &Value) -> BuiltinResult<Self> {
1170 let text = scalar_text(value, "FileType")?;
1171 match text.trim().to_ascii_lowercase().as_str() {
1172 "auto" => Ok(Self::Auto),
1173 "text" | "delimitedtext" | "delimited" => Ok(Self::Text),
1174 "spreadsheet" | "excel" => Ok(Self::Spreadsheet),
1175 other => Err(invalid_argument(format!(
1176 "readtable: unsupported FileType '{other}'"
1177 ))),
1178 }
1179 }
1180}
1181
1182#[derive(Clone)]
1183enum SheetSelector {
1184 Name(String),
1185 Index(usize),
1186}
1187
1188impl SheetSelector {
1189 fn parse(value: &Value) -> BuiltinResult<Self> {
1190 match value {
1191 Value::Int(i) if i.to_i64() >= 1 => Ok(Self::Index(i.to_i64() as usize - 1)),
1192 Value::Num(n)
1193 if n.is_finite() && *n >= 1.0 && (n.round() - n).abs() <= f64::EPSILON =>
1194 {
1195 Ok(Self::Index(n.round() as usize - 1))
1196 }
1197 _ => {
1198 let text = scalar_text(value, "Sheet")?;
1199 if text.trim().is_empty() {
1200 return Err(invalid_argument("readtable: Sheet must not be empty"));
1201 }
1202 Ok(Self::Name(text))
1203 }
1204 }
1205 }
1206}
1207
1208#[derive(Clone)]
1209enum Delimiter {
1210 Char(char),
1211 String(String),
1212 Whitespace,
1213}
1214
1215impl Delimiter {
1216 fn parse(value: &Value) -> BuiltinResult<Self> {
1217 let text = scalar_text(value, "Delimiter")?;
1218 if text.is_empty() {
1219 return Err(invalid_argument("readtable: Delimiter must not be empty"));
1220 }
1221 match text.trim().to_ascii_lowercase().as_str() {
1222 "tab" => Ok(Self::Char('\t')),
1223 "space" | "whitespace" => Ok(Self::Whitespace),
1224 "comma" => Ok(Self::Char(',')),
1225 "semicolon" => Ok(Self::Char(';')),
1226 "bar" | "pipe" => Ok(Self::Char('|')),
1227 _ if text.chars().count() == 1 => Ok(Self::Char(text.chars().next().unwrap())),
1228 _ => Ok(Self::String(text)),
1229 }
1230 }
1231}
1232
1233#[derive(Clone, Copy)]
1234struct RangeSpec {
1235 start_row: usize,
1236 start_col: usize,
1237 end_row: Option<usize>,
1238 end_col: Option<usize>,
1239}
1240
1241impl RangeSpec {
1242 fn parse(value: &Value) -> BuiltinResult<Self> {
1243 match value {
1244 Value::String(text) => Self::parse_text(text),
1245 Value::CharArray(ca) if ca.rows == 1 => {
1246 let text: String = ca.data.iter().collect();
1247 Self::parse_text(&text)
1248 }
1249 Value::StringArray(sa) if sa.data.len() == 1 => Self::parse_text(&sa.data[0]),
1250 Value::Tensor(t) if t.data.len() == 2 || t.data.len() == 4 => {
1251 let mut indices = Vec::with_capacity(t.data.len());
1252 for value in &t.data {
1253 indices.push(one_based_to_zero(*value, usize::MAX, "Range")?);
1254 }
1255 Ok(Self {
1256 start_row: indices[0],
1257 start_col: indices[1],
1258 end_row: indices.get(2).copied(),
1259 end_col: indices.get(3).copied(),
1260 })
1261 }
1262 _ => Err(invalid_argument(
1263 "readtable: Range must be a cell reference string or numeric vector",
1264 )),
1265 }
1266 }
1267
1268 fn parse_text(text: &str) -> BuiltinResult<Self> {
1269 let trimmed = text.trim();
1270 if trimmed.is_empty() {
1271 return Err(invalid_argument("readtable: Range must not be empty"));
1272 }
1273 let parts: Vec<&str> = trimmed.split(':').collect();
1274 if parts.len() > 2 {
1275 return Err(invalid_argument(format!(
1276 "readtable: invalid Range specification '{trimmed}'"
1277 )));
1278 }
1279 let start = parse_cell_ref(parts[0])?;
1280 let end = if parts.len() == 2 {
1281 Some(parse_cell_ref(parts[1])?)
1282 } else {
1283 None
1284 };
1285 Ok(Self {
1286 start_row: start.0.unwrap_or(0),
1287 start_col: start.1.unwrap_or(0),
1288 end_row: end.and_then(|item| item.0),
1289 end_col: end.and_then(|item| item.1),
1290 })
1291 }
1292}
1293
1294fn parse_cell_ref(token: &str) -> BuiltinResult<(Option<usize>, Option<usize>)> {
1295 let mut letters = String::new();
1296 let mut digits = String::new();
1297 for ch in token.trim().chars() {
1298 if ch == '$' {
1299 continue;
1300 }
1301 if ch.is_ascii_alphabetic() {
1302 letters.push(ch.to_ascii_uppercase());
1303 } else if ch.is_ascii_digit() {
1304 digits.push(ch);
1305 } else {
1306 return Err(invalid_argument(format!(
1307 "readtable: invalid Range component '{token}'"
1308 )));
1309 }
1310 }
1311 let col = if letters.is_empty() {
1312 None
1313 } else {
1314 let mut value = 0usize;
1315 for ch in letters.chars() {
1316 value = value
1317 .checked_mul(26)
1318 .and_then(|v| v.checked_add((ch as u8 - b'A' + 1) as usize))
1319 .ok_or_else(|| invalid_argument("readtable: Range column overflow"))?;
1320 }
1321 Some(value - 1)
1322 };
1323 let row = if digits.is_empty() {
1324 None
1325 } else {
1326 let parsed = digits
1327 .parse::<usize>()
1328 .map_err(|_| invalid_argument("readtable: invalid Range row"))?;
1329 if parsed == 0 {
1330 return Err(invalid_argument("readtable: Range rows are one-based"));
1331 }
1332 Some(parsed - 1)
1333 };
1334 Ok((row, col))
1335}
1336
1337fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
1338 let text = scalar_text(value, "filename").map_err(|_| {
1339 table_error(
1340 &TABLE_ERROR_INVALID_ARGUMENT,
1341 "readtable: filename must be a string scalar or character vector",
1342 )
1343 })?;
1344 if text.trim().is_empty() {
1345 return Err(invalid_argument("readtable: filename must not be empty"));
1346 }
1347 let expanded =
1348 expand_user_path(&text, "readtable").map_err(|msg| invalid_argument(msg.to_string()))?;
1349 Ok(Path::new(&expanded).to_path_buf())
1350}
1351
1352async fn read_table_from_file(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1353 match options.file_type {
1354 ImportFileType::Spreadsheet => read_spreadsheet_table(path, options).await,
1355 ImportFileType::Text => read_text_table(path, options).await,
1356 ImportFileType::Auto if is_spreadsheet_path(path) => {
1357 read_spreadsheet_table(path, options).await
1358 }
1359 ImportFileType::Auto => read_text_table(path, options).await,
1360 }
1361}
1362
1363async fn read_text_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1364 if options.sheet.is_some() {
1365 return Err(invalid_argument(
1366 "readtable: Sheet is only valid for spreadsheet files",
1367 ));
1368 }
1369 let bytes = read_file_bytes(path).await?;
1370 let text = decode_text_bytes(&bytes, &options.encoding)?;
1371 let mut raw_lines = text.lines().map(ToString::to_string).collect::<Vec<_>>();
1372 if let Some(first) = raw_lines.first_mut() {
1373 if first.starts_with('\u{FEFF}') {
1374 *first = first.trim_start_matches('\u{FEFF}').to_string();
1375 }
1376 }
1377 let delimiter = options
1378 .delimiter
1379 .clone()
1380 .or_else(|| detect_delimiter(&raw_lines))
1381 .unwrap_or(Delimiter::Whitespace);
1382 let mut rows = parse_text_records(&text, &delimiter, options.empty_line_rule);
1383 if options.num_header_lines > 0 {
1384 rows = rows.into_iter().skip(options.num_header_lines).collect();
1385 }
1386 if let Some(range) = options.range {
1387 rows = apply_import_range(rows, range);
1388 }
1389 import_rows_to_table(rows, options)
1390}
1391
1392async fn read_spreadsheet_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1393 if options.delimiter.is_some() {
1394 return Err(invalid_argument(
1395 "readtable: Delimiter is only valid for text files",
1396 ));
1397 }
1398 let bytes = read_file_bytes(path).await?;
1399 let cursor = Cursor::new(bytes);
1400 let mut workbook = open_workbook_auto_from_rs(cursor).map_err(|err| {
1401 table_error(
1402 &TABLE_ERROR_UNSUPPORTED_FILE,
1403 format!(
1404 "readtable: unable to open spreadsheet '{}': {err}",
1405 path.display()
1406 ),
1407 )
1408 })?;
1409 let range = match &options.sheet {
1410 Some(SheetSelector::Name(name)) => workbook.worksheet_range(name).map_err(|err| {
1411 invalid_argument(format!("readtable: unable to read sheet '{name}': {err:?}"))
1412 })?,
1413 Some(SheetSelector::Index(index)) => workbook
1414 .worksheet_range_at(*index)
1415 .ok_or_else(|| {
1416 invalid_argument(format!(
1417 "readtable: sheet index {} exceeds bounds",
1418 index + 1
1419 ))
1420 })?
1421 .map_err(|err| {
1422 invalid_argument(format!(
1423 "readtable: unable to read sheet {}: {err:?}",
1424 index + 1
1425 ))
1426 })?,
1427 None => workbook
1428 .worksheet_range_at(0)
1429 .ok_or_else(|| invalid_argument("readtable: spreadsheet contains no worksheets"))?
1430 .map_err(|err| {
1431 invalid_argument(format!("readtable: unable to read first sheet: {err:?}"))
1432 })?,
1433 };
1434 let rows = spreadsheet_range_to_rows(&range, options)?;
1435 import_rows_to_table(rows, options)
1436}
1437
1438async fn read_file_bytes(path: &Path) -> BuiltinResult<Vec<u8>> {
1439 let mut file = File::open_async(path).await.map_err(|err| {
1440 table_error_with_source(
1441 &TABLE_ERROR_IO,
1442 format!("readtable: unable to open '{}': {err}", path.display()),
1443 err,
1444 )
1445 })?;
1446 let mut bytes = Vec::new();
1447 file.read_to_end(&mut bytes).map_err(|err| {
1448 table_error_with_source(
1449 &TABLE_ERROR_IO,
1450 format!("readtable: unable to read '{}': {err}", path.display()),
1451 err,
1452 )
1453 })?;
1454 Ok(bytes)
1455}
1456
1457fn is_spreadsheet_path(path: &Path) -> bool {
1458 matches!(
1459 path.extension()
1460 .and_then(|ext| ext.to_str())
1461 .map(|ext| ext.to_ascii_lowercase())
1462 .as_deref(),
1463 Some("xls") | Some("xlsx") | Some("xlsm") | Some("xlsb") | Some("ods")
1464 )
1465}
1466
1467fn validate_encoding_label(label: &str) -> BuiltinResult<()> {
1468 encoding_for_label(label)
1469 .map(|_| ())
1470 .ok_or_else(|| invalid_argument(format!("readtable: unsupported Encoding '{label}'")))
1471}
1472
1473fn encoding_for_label(label: &str) -> Option<&'static Encoding> {
1474 let label = label.trim();
1475 if label.is_empty()
1476 || label.eq_ignore_ascii_case("auto")
1477 || label.eq_ignore_ascii_case("default")
1478 || label.eq_ignore_ascii_case("system")
1479 || label.eq_ignore_ascii_case("native")
1480 || label.eq_ignore_ascii_case("utf-8")
1481 || label.eq_ignore_ascii_case("utf8")
1482 || label.eq_ignore_ascii_case("unicode")
1483 {
1484 return Some(UTF_8);
1485 }
1486 Encoding::for_label(label.as_bytes())
1487}
1488
1489fn decode_text_bytes(bytes: &[u8], encoding: &str) -> BuiltinResult<String> {
1490 let (encoding, offset) = if encoding.trim().eq_ignore_ascii_case("auto") {
1491 Encoding::for_bom(bytes).unwrap_or((UTF_8, 0))
1492 } else {
1493 (
1494 encoding_for_label(encoding).ok_or_else(|| {
1495 invalid_argument(format!("readtable: unsupported Encoding '{encoding}'"))
1496 })?,
1497 0,
1498 )
1499 };
1500 let (decoded, _, had_errors) = encoding.decode(&bytes[offset..]);
1501 if had_errors {
1502 return Err(table_error(
1503 &TABLE_ERROR_IO,
1504 format!(
1505 "readtable: unable to decode file contents using encoding '{}'",
1506 encoding.name()
1507 ),
1508 ));
1509 }
1510 Ok(decoded.into_owned())
1511}
1512
1513#[derive(Clone, Debug)]
1514enum ImportCell {
1515 Empty,
1516 Text(String),
1517 Number(f64),
1518 Logical(bool),
1519 DateTime(f64),
1520 Error(String),
1521}
1522
1523impl ImportCell {
1524 fn from_text(text: String) -> Self {
1525 if text.trim().is_empty() {
1526 Self::Empty
1527 } else {
1528 Self::Text(text)
1529 }
1530 }
1531
1532 fn display_text(&self) -> String {
1533 match self {
1534 Self::Empty => String::new(),
1535 Self::Text(text) => text.clone(),
1536 Self::Number(value) => format_key_number(*value),
1537 Self::Logical(value) => value.to_string(),
1538 Self::DateTime(serial) => format_key_number(*serial),
1539 Self::Error(text) => text.clone(),
1540 }
1541 }
1542
1543 fn is_missing(&self, options: &ReadTableOptions) -> bool {
1544 match self {
1545 Self::Empty => true,
1546 Self::Text(text) => options.is_missing(text),
1547 _ => false,
1548 }
1549 }
1550
1551 fn is_likely_data_token(&self, options: &ReadTableOptions) -> bool {
1552 match self {
1553 Self::Number(_) | Self::Logical(_) | Self::DateTime(_) => true,
1554 Self::Empty => false,
1555 Self::Text(text) => {
1556 let token = unquote(text.trim()).trim();
1557 options.is_missing(token)
1558 || parse_numeric(token).is_some()
1559 || parse_logical(token).is_some()
1560 || parse_iso_datetime_to_datenum(token).is_some()
1561 }
1562 Self::Error(_) => true,
1563 }
1564 }
1565}
1566
1567fn spreadsheet_cell_to_import(cell: &SpreadsheetData) -> ImportCell {
1568 match cell {
1569 SpreadsheetData::Empty => ImportCell::Empty,
1570 SpreadsheetData::Int(value) => ImportCell::Number(*value as f64),
1571 SpreadsheetData::Float(value) => ImportCell::Number(*value),
1572 SpreadsheetData::String(text) => ImportCell::Text(text.clone()),
1573 SpreadsheetData::Bool(value) => ImportCell::Logical(*value),
1574 SpreadsheetData::DateTime(value) => value
1575 .as_datetime()
1576 .map(crate::builtins::datetime::datenum_from_naive)
1577 .map(ImportCell::DateTime)
1578 .unwrap_or_else(|| ImportCell::Number(value.as_f64())),
1579 SpreadsheetData::DateTimeIso(text) => parse_iso_datetime_to_datenum(text)
1580 .map(ImportCell::DateTime)
1581 .unwrap_or_else(|| ImportCell::Text(text.clone())),
1582 SpreadsheetData::DurationIso(text) => ImportCell::Text(text.clone()),
1583 SpreadsheetData::Error(err) => ImportCell::Error(err.to_string()),
1584 }
1585}
1586
1587fn spreadsheet_range_to_rows(
1588 range: &calamine::Range<SpreadsheetData>,
1589 options: &ReadTableOptions,
1590) -> BuiltinResult<Vec<Vec<ImportCell>>> {
1591 if range.is_empty() {
1592 return Ok(Vec::new());
1593 }
1594 let Some((range_start_row, range_start_col)) = range.start() else {
1595 return Ok(Vec::new());
1596 };
1597 let Some((range_end_row, range_end_col)) = range.end() else {
1598 return Ok(Vec::new());
1599 };
1600 let start_row = options
1601 .range
1602 .map(|spec| checked_u32(spec.start_row, "Range row"))
1603 .transpose()?
1604 .unwrap_or(range_start_row);
1605 let start_col = options
1606 .range
1607 .map(|spec| checked_u32(spec.start_col, "Range column"))
1608 .transpose()?
1609 .unwrap_or(range_start_col);
1610 let end_row = options
1611 .range
1612 .and_then(|spec| spec.end_row)
1613 .map(|row| checked_u32(row, "Range row"))
1614 .transpose()?
1615 .unwrap_or(range_end_row);
1616 let end_col = options
1617 .range
1618 .and_then(|spec| spec.end_col)
1619 .map(|col| checked_u32(col, "Range column"))
1620 .transpose()?
1621 .unwrap_or(range_end_col);
1622 if start_row > end_row || start_col > end_col {
1623 return Ok(Vec::new());
1624 }
1625 let mut rows = Vec::new();
1626 for row_idx in start_row..=end_row {
1627 let mut row = Vec::new();
1628 for col_idx in start_col..=end_col {
1629 row.push(
1630 range
1631 .get_value((row_idx, col_idx))
1632 .map(spreadsheet_cell_to_import)
1633 .unwrap_or(ImportCell::Empty),
1634 );
1635 }
1636 if matches!(options.empty_line_rule, EmptyLineRule::Skip)
1637 && row.iter().all(|cell| cell.is_missing(options))
1638 {
1639 continue;
1640 }
1641 rows.push(row);
1642 }
1643 if options.num_header_lines > 0 {
1644 Ok(rows.into_iter().skip(options.num_header_lines).collect())
1645 } else {
1646 Ok(rows)
1647 }
1648}
1649
1650fn checked_u32(value: usize, context: &str) -> BuiltinResult<u32> {
1651 u32::try_from(value).map_err(|_| invalid_argument(format!("readtable: {context} overflow")))
1652}
1653
1654fn detect_delimiter(lines: &[String]) -> Option<Delimiter> {
1655 let candidates = [',', '\t', ';', '|'];
1656 let mut best: Option<(f64, Delimiter)> = None;
1657 for candidate in candidates {
1658 let counts = lines
1659 .iter()
1660 .take(32)
1661 .filter(|line| line.contains(candidate))
1662 .map(|line| split_with_char_delim(line, candidate).len())
1663 .filter(|count| *count >= 2)
1664 .collect::<Vec<_>>();
1665 if counts.is_empty() {
1666 continue;
1667 }
1668 let avg = counts.iter().copied().sum::<usize>() as f64 / counts.len() as f64;
1669 if avg >= 2.0
1670 && best
1671 .as_ref()
1672 .map(|(best_avg, _)| avg > *best_avg)
1673 .unwrap_or(true)
1674 {
1675 best = Some((avg, Delimiter::Char(candidate)));
1676 }
1677 }
1678 best.map(|(_, delimiter)| delimiter).or_else(|| {
1679 lines
1680 .iter()
1681 .take(32)
1682 .any(|line| line.split_whitespace().count() > 1)
1683 .then_some(Delimiter::Whitespace)
1684 })
1685}
1686
1687fn split_with_char_delim(line: &str, delimiter: char) -> Vec<String> {
1688 let mut out = Vec::new();
1689 let mut current = String::new();
1690 let mut in_quotes = false;
1691 let mut chars = line.chars().peekable();
1692 while let Some(ch) = chars.next() {
1693 if ch == '"' {
1694 if in_quotes && chars.peek() == Some(&'"') {
1695 current.push('"');
1696 chars.next();
1697 } else {
1698 in_quotes = !in_quotes;
1699 }
1700 continue;
1701 }
1702 if ch == delimiter && !in_quotes {
1703 out.push(current.clone());
1704 current.clear();
1705 } else {
1706 current.push(ch);
1707 }
1708 }
1709 out.push(current);
1710 out
1711}
1712
1713fn parse_text_records(
1714 text: &str,
1715 delimiter: &Delimiter,
1716 empty_line_rule: EmptyLineRule,
1717) -> Vec<Vec<ImportCell>> {
1718 match delimiter {
1719 Delimiter::Whitespace => parse_whitespace_records(text, empty_line_rule),
1720 Delimiter::Char(ch) => parse_delimited_records(text, &ch.to_string(), empty_line_rule),
1721 Delimiter::String(pattern) => parse_delimited_records(text, pattern, empty_line_rule),
1722 }
1723}
1724
1725fn parse_delimited_records(
1726 text: &str,
1727 delimiter: &str,
1728 empty_line_rule: EmptyLineRule,
1729) -> Vec<Vec<ImportCell>> {
1730 let mut records = Vec::new();
1731 let mut row = Vec::new();
1732 let mut current = String::new();
1733 let mut in_quotes = false;
1734 let mut idx = 0usize;
1735 while idx < text.len() {
1736 let ch = text[idx..].chars().next().expect("valid char boundary");
1737 if ch == '"' {
1738 if in_quotes && text[idx + ch.len_utf8()..].starts_with('"') {
1739 current.push('"');
1740 idx += ch.len_utf8() + 1;
1741 continue;
1742 }
1743 in_quotes = !in_quotes;
1744 idx += ch.len_utf8();
1745 continue;
1746 }
1747 if !in_quotes && !delimiter.is_empty() && text[idx..].starts_with(delimiter) {
1748 row.push(ImportCell::from_text(std::mem::take(&mut current)));
1749 idx += delimiter.len();
1750 continue;
1751 }
1752 if !in_quotes && (ch == '\n' || ch == '\r') {
1753 row.push(ImportCell::from_text(std::mem::take(&mut current)));
1754 push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
1755 idx += ch.len_utf8();
1756 if ch == '\r' && text[idx..].starts_with('\n') {
1757 idx += 1;
1758 }
1759 continue;
1760 }
1761 current.push(ch);
1762 idx += ch.len_utf8();
1763 }
1764 if !current.is_empty() || !row.is_empty() || text.ends_with(delimiter) {
1765 row.push(ImportCell::from_text(current));
1766 push_import_record(&mut records, row, empty_line_rule);
1767 }
1768 records
1769}
1770
1771fn parse_whitespace_records(text: &str, empty_line_rule: EmptyLineRule) -> Vec<Vec<ImportCell>> {
1772 let mut records = Vec::new();
1773 let mut row = Vec::new();
1774 let mut current = String::new();
1775 let mut in_quotes = false;
1776 let mut field_open = false;
1777 let mut chars = text.chars().peekable();
1778 while let Some(ch) = chars.next() {
1779 if ch == '"' {
1780 if in_quotes && chars.peek() == Some(&'"') {
1781 current.push('"');
1782 chars.next();
1783 } else {
1784 in_quotes = !in_quotes;
1785 }
1786 field_open = true;
1787 continue;
1788 }
1789 if !in_quotes && (ch == '\n' || ch == '\r') {
1790 if field_open || !current.is_empty() {
1791 row.push(ImportCell::from_text(std::mem::take(&mut current)));
1792 }
1793 field_open = false;
1794 push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
1795 if ch == '\r' && chars.peek() == Some(&'\n') {
1796 chars.next();
1797 }
1798 continue;
1799 }
1800 if !in_quotes && ch.is_whitespace() {
1801 if field_open || !current.is_empty() {
1802 row.push(ImportCell::from_text(std::mem::take(&mut current)));
1803 field_open = false;
1804 }
1805 continue;
1806 }
1807 current.push(ch);
1808 field_open = true;
1809 }
1810 if field_open || !current.is_empty() {
1811 row.push(ImportCell::from_text(current));
1812 }
1813 if !row.is_empty() {
1814 push_import_record(&mut records, row, empty_line_rule);
1815 }
1816 records
1817}
1818
1819fn push_import_record(
1820 records: &mut Vec<Vec<ImportCell>>,
1821 row: Vec<ImportCell>,
1822 empty_line_rule: EmptyLineRule,
1823) {
1824 if matches!(empty_line_rule, EmptyLineRule::Skip)
1825 && row.iter().all(|cell| matches!(cell, ImportCell::Empty))
1826 {
1827 return;
1828 }
1829 records.push(row);
1830}
1831
1832fn apply_import_range(rows: Vec<Vec<ImportCell>>, range: RangeSpec) -> Vec<Vec<ImportCell>> {
1833 if rows.is_empty() {
1834 return rows;
1835 }
1836 let end_row = range
1837 .end_row
1838 .unwrap_or_else(|| rows.len().saturating_sub(1));
1839 let max_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
1840 let end_col = range.end_col.unwrap_or_else(|| max_cols.saturating_sub(1));
1841 rows.into_iter()
1842 .enumerate()
1843 .filter_map(|(idx, row)| {
1844 if idx < range.start_row || idx > end_row {
1845 return None;
1846 }
1847 let selected = (range.start_col..=end_col)
1848 .map(|col| row.get(col).cloned().unwrap_or(ImportCell::Empty))
1849 .collect::<Vec<_>>();
1850 Some(selected)
1851 })
1852 .collect()
1853}
1854
1855fn import_rows_to_table(
1856 mut rows: Vec<Vec<ImportCell>>,
1857 options: &ReadTableOptions,
1858) -> BuiltinResult<Value> {
1859 let mut variable_names = options.variable_names.clone();
1860 let read_variable_names = options
1861 .read_variable_names
1862 .unwrap_or_else(|| variable_names.is_none() && should_read_variable_names(&rows, options));
1863 if variable_names.is_none() && read_variable_names && !rows.is_empty() {
1864 variable_names = Some(
1865 rows.remove(0)
1866 .into_iter()
1867 .map(|cell| cell.display_text())
1868 .collect(),
1869 );
1870 }
1871
1872 let mut row_names = options.row_names.clone();
1873 if options.read_row_names && !rows.is_empty() {
1874 row_names = Some(
1875 rows.iter_mut()
1876 .map(|row| {
1877 if row.is_empty() {
1878 String::new()
1879 } else {
1880 row.remove(0).display_text()
1881 }
1882 })
1883 .collect(),
1884 );
1885 if let Some(names) = variable_names.as_mut() {
1886 if !names.is_empty() {
1887 names.remove(0);
1888 }
1889 }
1890 }
1891
1892 let column_count = import_column_count(&rows, &variable_names, options)?;
1893 let names = import_variable_names(variable_names, column_count, options);
1894
1895 let mut columns = Vec::with_capacity(names.len());
1896 for col in 0..names.len() {
1897 let values = rows
1898 .iter()
1899 .map(|row| row.get(col).cloned().unwrap_or(ImportCell::Empty))
1900 .collect::<Vec<_>>();
1901 let requested_type = options
1902 .variable_types
1903 .as_ref()
1904 .and_then(|types| types.get(col))
1905 .copied();
1906 columns.push(import_column(values, options, requested_type)?);
1907 }
1908 table_from_columns_with_properties(names, columns, row_names)
1909}
1910
1911fn import_column_count(
1912 rows: &[Vec<ImportCell>],
1913 variable_names: &Option<Vec<String>>,
1914 options: &ReadTableOptions,
1915) -> BuiltinResult<usize> {
1916 let data_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
1917 let name_cols = variable_names.as_ref().map(Vec::len).unwrap_or(0);
1918 let type_cols = options.variable_types.as_ref().map(Vec::len).unwrap_or(0);
1919 if let Some(count) = options.num_variables {
1920 if name_cols > count {
1921 return Err(invalid_argument(
1922 "readtable: VariableNames length exceeds NumVariables",
1923 ));
1924 }
1925 if type_cols > count {
1926 return Err(invalid_argument(
1927 "readtable: VariableTypes length exceeds NumVariables",
1928 ));
1929 }
1930 return Ok(count);
1931 }
1932 Ok(data_cols.max(name_cols).max(type_cols))
1933}
1934
1935fn import_variable_names(
1936 variable_names: Option<Vec<String>>,
1937 column_count: usize,
1938 options: &ReadTableOptions,
1939) -> Vec<String> {
1940 match variable_names {
1941 Some(mut names) => {
1942 while names.len() < column_count {
1943 names.push(format!("Var{}", names.len() + 1));
1944 }
1945 names.truncate(column_count);
1946 if options.preserve_variable_names {
1947 make_unique_names(names)
1948 } else {
1949 make_unique_variable_names(names)
1950 }
1951 }
1952 None => generated_variable_names(column_count),
1953 }
1954}
1955
1956fn should_read_variable_names(rows: &[Vec<ImportCell>], options: &ReadTableOptions) -> bool {
1957 let Some(first) = rows.first() else {
1958 return false;
1959 };
1960 if first.is_empty() {
1961 return false;
1962 }
1963 let names = first
1964 .iter()
1965 .map(ImportCell::display_text)
1966 .map(|text| text.trim().to_string())
1967 .collect::<Vec<_>>();
1968 if names.iter().any(|name| name.is_empty()) {
1969 return false;
1970 }
1971 if first.iter().all(|cell| cell.is_likely_data_token(options)) {
1972 return false;
1973 }
1974 true
1975}
1976
1977fn import_column(
1978 values: Vec<ImportCell>,
1979 options: &ReadTableOptions,
1980 requested_type: Option<ImportVariableType>,
1981) -> BuiltinResult<Value> {
1982 match requested_type.unwrap_or(ImportVariableType::Auto) {
1983 ImportVariableType::Auto => infer_import_column(values, options),
1984 ImportVariableType::Numeric(dtype) => import_numeric_column(values, options, dtype),
1985 ImportVariableType::Logical => import_logical_column(values, options),
1986 ImportVariableType::Text(kind) => import_text_column(values, options, kind),
1987 ImportVariableType::CellStr => import_cellstr_column(values, options),
1988 ImportVariableType::Datetime => import_datetime_column(values, options),
1989 ImportVariableType::Duration => import_duration_column(values, options),
1990 }
1991}
1992
1993fn import_numeric_column(
1994 values: Vec<ImportCell>,
1995 options: &ReadTableOptions,
1996 dtype: NumericDType,
1997) -> BuiltinResult<Value> {
1998 let mut numeric = Vec::with_capacity(values.len());
1999 for value in &values {
2000 let parsed = numeric_from_import_cell(value, options, dtype.class_name())?;
2001 numeric.push(cast_import_numeric(parsed, dtype));
2002 }
2003 Tensor::new_with_dtype(numeric, vec![values.len(), 1], dtype)
2004 .map(Value::Tensor)
2005 .map_err(|err| invalid_variable(format!("readtable: {err}")))
2006}
2007
2008fn numeric_from_import_cell(
2009 value: &ImportCell,
2010 options: &ReadTableOptions,
2011 context: &str,
2012) -> BuiltinResult<f64> {
2013 match value {
2014 ImportCell::Empty => Ok(f64::NAN),
2015 ImportCell::Number(value) => Ok(*value),
2016 ImportCell::Logical(value) => Ok(if *value { 1.0 } else { 0.0 }),
2017 ImportCell::DateTime(serial) => Ok(*serial),
2018 ImportCell::Text(text) => {
2019 let token = unquote(text.trim()).trim();
2020 if options.is_missing(token) {
2021 Ok(f64::NAN)
2022 } else {
2023 parse_numeric(token).ok_or_else(|| {
2024 invalid_variable(format!("readtable: cannot import '{token}' as {context}"))
2025 })
2026 }
2027 }
2028 ImportCell::Error(text) => Err(invalid_variable(format!(
2029 "readtable: cannot import spreadsheet error '{text}' as {context}"
2030 ))),
2031 }
2032}
2033
2034fn cast_import_numeric(value: f64, dtype: NumericDType) -> f64 {
2035 match dtype {
2036 NumericDType::F64 => value,
2037 NumericDType::F32 => (value as f32) as f64,
2038 NumericDType::U8 => {
2039 if value.is_finite() {
2040 value.round().clamp(0.0, u8::MAX as f64)
2041 } else {
2042 0.0
2043 }
2044 }
2045 NumericDType::U16 => {
2046 if value.is_finite() {
2047 value.round().clamp(0.0, u16::MAX as f64)
2048 } else {
2049 0.0
2050 }
2051 }
2052 }
2053}
2054
2055fn import_logical_column(
2056 values: Vec<ImportCell>,
2057 options: &ReadTableOptions,
2058) -> BuiltinResult<Value> {
2059 let mut logical = Vec::with_capacity(values.len());
2060 for value in &values {
2061 logical.push(logical_from_import_cell(value, options)?);
2062 }
2063 LogicalArray::new(logical, vec![values.len(), 1])
2064 .map(Value::LogicalArray)
2065 .map_err(|err| invalid_variable(format!("readtable: {err}")))
2066}
2067
2068fn logical_from_import_cell(value: &ImportCell, options: &ReadTableOptions) -> BuiltinResult<u8> {
2069 let flag = match value {
2070 ImportCell::Empty => false,
2071 ImportCell::Logical(value) => *value,
2072 ImportCell::Number(value) => *value != 0.0,
2073 ImportCell::DateTime(serial) => *serial != 0.0,
2074 ImportCell::Text(text) => {
2075 let token = unquote(text.trim()).trim();
2076 if options.is_missing(token) {
2077 false
2078 } else if let Some(value) = parse_logical(token) {
2079 value
2080 } else if let Some(value) = parse_numeric(token) {
2081 value != 0.0
2082 } else {
2083 return Err(invalid_variable(format!(
2084 "readtable: cannot import '{token}' as logical"
2085 )));
2086 }
2087 }
2088 ImportCell::Error(text) => {
2089 return Err(invalid_variable(format!(
2090 "readtable: cannot import spreadsheet error '{text}' as logical"
2091 )));
2092 }
2093 };
2094 Ok(u8::from(flag))
2095}
2096
2097fn import_text_column(
2098 values: Vec<ImportCell>,
2099 options: &ReadTableOptions,
2100 kind: TextImportType,
2101) -> BuiltinResult<Value> {
2102 let strings = import_text_values(values, options);
2103 match kind {
2104 TextImportType::String => StringArray::new(strings.clone(), vec![strings.len(), 1])
2105 .map(Value::StringArray)
2106 .map_err(|err| invalid_variable(format!("readtable: {err}"))),
2107 TextImportType::Char => import_char_column(strings),
2108 }
2109}
2110
2111fn import_text_values(values: Vec<ImportCell>, options: &ReadTableOptions) -> Vec<String> {
2112 values
2113 .into_iter()
2114 .map(|value| {
2115 if value.is_missing(options) {
2116 String::new()
2117 } else {
2118 unquote(value.display_text().trim()).to_string()
2119 }
2120 })
2121 .collect()
2122}
2123
2124fn import_char_column(strings: Vec<String>) -> BuiltinResult<Value> {
2125 let rows = strings.len();
2126 let cols = strings
2127 .iter()
2128 .map(|text| text.chars().count())
2129 .max()
2130 .unwrap_or(0);
2131 let mut data = vec![' '; rows * cols];
2132 for (row, text) in strings.iter().enumerate() {
2133 for (col, ch) in text.chars().enumerate() {
2134 data[row * cols + col] = ch;
2135 }
2136 }
2137 CharArray::new(data, rows, cols)
2138 .map(Value::CharArray)
2139 .map_err(|err| invalid_variable(format!("readtable: {err}")))
2140}
2141
2142fn import_cellstr_column(
2143 values: Vec<ImportCell>,
2144 options: &ReadTableOptions,
2145) -> BuiltinResult<Value> {
2146 let strings = import_text_values(values, options);
2147 let rows = strings.len();
2148 let cells = strings
2149 .into_iter()
2150 .map(|text| Value::CharArray(CharArray::new_row(&text)))
2151 .collect::<Vec<_>>();
2152 CellArray::new(cells, rows, 1)
2153 .map(Value::Cell)
2154 .map_err(|err| invalid_variable(format!("readtable: {err}")))
2155}
2156
2157fn import_datetime_column(
2158 values: Vec<ImportCell>,
2159 options: &ReadTableOptions,
2160) -> BuiltinResult<Value> {
2161 if matches!(options.datetime_type, DatetimeImportType::Text) {
2162 return import_text_column(values, options, options.text_type);
2163 }
2164
2165 let mut serials = Vec::with_capacity(values.len());
2166 for value in &values {
2167 serials.push(datetime_serial_from_import_cell(value, options)?);
2168 }
2169 let tensor = Tensor::new(serials, vec![values.len(), 1])
2170 .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2171 if matches!(options.datetime_type, DatetimeImportType::ExcelDatenum) {
2172 Ok(Value::Tensor(tensor))
2173 } else {
2174 crate::builtins::datetime::datetime_object_from_serial_tensor(tensor, "yyyy-MM-dd HH:mm:ss")
2175 }
2176}
2177
2178fn datetime_serial_from_import_cell(
2179 value: &ImportCell,
2180 options: &ReadTableOptions,
2181) -> BuiltinResult<f64> {
2182 match value {
2183 ImportCell::Empty => Ok(f64::NAN),
2184 ImportCell::DateTime(serial) => Ok(*serial),
2185 ImportCell::Number(value) => Ok(*value),
2186 ImportCell::Text(text) => {
2187 let token = unquote(text.trim()).trim();
2188 if options.is_missing(token) {
2189 Ok(f64::NAN)
2190 } else if let Some(serial) = parse_iso_datetime_to_datenum(token) {
2191 Ok(serial)
2192 } else if let Some(serial) = parse_numeric(token) {
2193 Ok(serial)
2194 } else {
2195 Err(invalid_variable(format!(
2196 "readtable: cannot import '{token}' as datetime"
2197 )))
2198 }
2199 }
2200 ImportCell::Logical(_) => Err(invalid_variable(
2201 "readtable: cannot import logical value as datetime",
2202 )),
2203 ImportCell::Error(text) => Err(invalid_variable(format!(
2204 "readtable: cannot import spreadsheet error '{text}' as datetime"
2205 ))),
2206 }
2207}
2208
2209fn import_duration_column(
2210 values: Vec<ImportCell>,
2211 options: &ReadTableOptions,
2212) -> BuiltinResult<Value> {
2213 let mut days = Vec::with_capacity(values.len());
2214 for value in &values {
2215 days.push(duration_days_from_import_cell(value, options)?);
2216 }
2217 let tensor = Tensor::new(days, vec![values.len(), 1])
2218 .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2219 crate::builtins::duration::duration_object_from_days_tensor(
2220 tensor,
2221 crate::builtins::duration::DEFAULT_DURATION_FORMAT,
2222 )
2223}
2224
2225fn duration_days_from_import_cell(
2226 value: &ImportCell,
2227 options: &ReadTableOptions,
2228) -> BuiltinResult<f64> {
2229 match value {
2230 ImportCell::Empty => Ok(f64::NAN),
2231 ImportCell::Number(value) => Ok(*value),
2232 ImportCell::Logical(value) => Ok(if *value { 1.0 } else { 0.0 }),
2233 ImportCell::Text(text) => {
2234 let token = unquote(text.trim()).trim();
2235 if options.is_missing(token) {
2236 Ok(f64::NAN)
2237 } else {
2238 parse_duration_to_days(token).ok_or_else(|| {
2239 invalid_variable(format!("readtable: cannot import '{token}' as duration"))
2240 })
2241 }
2242 }
2243 ImportCell::DateTime(_) => Err(invalid_variable(
2244 "readtable: cannot import datetime value as duration",
2245 )),
2246 ImportCell::Error(text) => Err(invalid_variable(format!(
2247 "readtable: cannot import spreadsheet error '{text}' as duration"
2248 ))),
2249 }
2250}
2251
2252fn infer_import_column(
2253 values: Vec<ImportCell>,
2254 options: &ReadTableOptions,
2255) -> BuiltinResult<Value> {
2256 let mut numeric = Vec::with_capacity(values.len());
2257 let mut all_numeric = true;
2258 for value in &values {
2259 match value {
2260 ImportCell::Empty => numeric.push(f64::NAN),
2261 ImportCell::Number(value) => numeric.push(*value),
2262 ImportCell::Text(text) => {
2263 let token = unquote(text.trim()).trim();
2264 if options.is_missing(token) {
2265 numeric.push(f64::NAN);
2266 } else if let Some(value) = parse_numeric(token) {
2267 numeric.push(value);
2268 } else {
2269 all_numeric = false;
2270 break;
2271 }
2272 }
2273 _ => {
2274 all_numeric = false;
2275 break;
2276 }
2277 }
2278 }
2279 if all_numeric {
2280 return Tensor::new(numeric, vec![values.len(), 1])
2281 .map(Value::Tensor)
2282 .map_err(|err| invalid_variable(format!("readtable: {err}")));
2283 }
2284
2285 let mut logical = Vec::with_capacity(values.len());
2286 let mut all_logical = true;
2287 for value in &values {
2288 match value {
2289 ImportCell::Empty => logical.push(0),
2290 ImportCell::Logical(value) => logical.push(i32::from(*value) as u8),
2291 ImportCell::Text(text) => {
2292 let token = unquote(text.trim()).trim();
2293 if options.is_missing(token) {
2294 logical.push(0);
2295 } else if let Some(value) = parse_logical(token) {
2296 logical.push(i32::from(value) as u8);
2297 } else {
2298 all_logical = false;
2299 break;
2300 }
2301 }
2302 _ => {
2303 all_logical = false;
2304 break;
2305 }
2306 }
2307 }
2308 if all_logical {
2309 return LogicalArray::new(logical, vec![values.len(), 1])
2310 .map(Value::LogicalArray)
2311 .map_err(|err| invalid_variable(format!("readtable: {err}")));
2312 }
2313
2314 if !matches!(options.datetime_type, DatetimeImportType::Text) {
2315 let mut serials = Vec::with_capacity(values.len());
2316 let mut all_datetime = true;
2317 for value in &values {
2318 match value {
2319 ImportCell::Empty => serials.push(f64::NAN),
2320 ImportCell::DateTime(serial) => serials.push(*serial),
2321 ImportCell::Text(text) => {
2322 let token = unquote(text.trim()).trim();
2323 if options.is_missing(token) {
2324 serials.push(f64::NAN);
2325 } else if let Some(serial) = parse_iso_datetime_to_datenum(token) {
2326 serials.push(serial);
2327 } else {
2328 all_datetime = false;
2329 break;
2330 }
2331 }
2332 _ => {
2333 all_datetime = false;
2334 break;
2335 }
2336 }
2337 }
2338 if all_datetime {
2339 let tensor = Tensor::new(serials, vec![values.len(), 1])
2340 .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2341 if matches!(options.datetime_type, DatetimeImportType::ExcelDatenum) {
2342 return Ok(Value::Tensor(tensor));
2343 }
2344 return crate::builtins::datetime::datetime_object_from_serial_tensor(
2345 tensor,
2346 "yyyy-MM-dd HH:mm:ss",
2347 );
2348 }
2349 }
2350
2351 import_text_column(values, options, options.text_type)
2352}
2353
2354fn parse_numeric(token: &str) -> Option<f64> {
2355 match token.to_ascii_lowercase().as_str() {
2356 "nan" => Some(f64::NAN),
2357 "inf" | "+inf" | "infinity" | "+infinity" => Some(f64::INFINITY),
2358 "-inf" | "-infinity" => Some(f64::NEG_INFINITY),
2359 _ => token.parse::<f64>().ok(),
2360 }
2361}
2362
2363fn parse_logical(token: &str) -> Option<bool> {
2364 match token.to_ascii_lowercase().as_str() {
2365 "true" | "t" | "yes" | "on" => Some(true),
2366 "false" | "f" | "no" | "off" => Some(false),
2367 _ => None,
2368 }
2369}
2370
2371fn parse_duration_to_days(token: &str) -> Option<f64> {
2372 parse_numeric(token).or_else(|| parse_clock_duration_to_days(token))
2373}
2374
2375fn parse_clock_duration_to_days(token: &str) -> Option<f64> {
2376 let trimmed = token.trim();
2377 if trimmed.is_empty() {
2378 return None;
2379 }
2380 let (sign, body) = if let Some(rest) = trimmed.strip_prefix('-') {
2381 (-1.0, rest)
2382 } else if let Some(rest) = trimmed.strip_prefix('+') {
2383 (1.0, rest)
2384 } else {
2385 (1.0, trimmed)
2386 };
2387 let parts = body.split(':').collect::<Vec<_>>();
2388 let (hours, minutes, seconds) = match parts.as_slice() {
2389 [hours, minutes] => (
2390 hours.parse::<f64>().ok()?,
2391 minutes.parse::<f64>().ok()?,
2392 0.0,
2393 ),
2394 [hours, minutes, seconds] => (
2395 hours.parse::<f64>().ok()?,
2396 minutes.parse::<f64>().ok()?,
2397 seconds.parse::<f64>().ok()?,
2398 ),
2399 _ => return None,
2400 };
2401 if !hours.is_finite()
2402 || !minutes.is_finite()
2403 || !seconds.is_finite()
2404 || !(0.0..60.0).contains(&minutes)
2405 || !(0.0..60.0).contains(&seconds)
2406 {
2407 return None;
2408 }
2409 Some(sign * (hours * 3600.0 + minutes * 60.0 + seconds) / 86_400.0)
2410}
2411
2412fn parse_iso_datetime_to_datenum(token: &str) -> Option<f64> {
2413 let trimmed = token.trim();
2414 if trimmed.is_empty() {
2415 return None;
2416 }
2417 for format in [
2418 "%Y-%m-%dT%H:%M:%S%.f",
2419 "%Y-%m-%d %H:%M:%S%.f",
2420 "%Y/%m/%d %H:%M:%S%.f",
2421 "%m/%d/%Y %H:%M:%S%.f",
2422 ] {
2423 if let Ok(value) = NaiveDateTime::parse_from_str(trimmed, format) {
2424 return Some(crate::builtins::datetime::datenum_from_naive(value));
2425 }
2426 }
2427 for format in ["%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"] {
2428 if let Ok(date) = NaiveDate::parse_from_str(trimmed, format) {
2429 return Some(crate::builtins::datetime::datenum_from_naive(
2430 date.and_time(NaiveTime::MIN),
2431 ));
2432 }
2433 }
2434 None
2435}
2436
2437fn unquote(token: &str) -> &str {
2438 if token.len() >= 2 {
2439 let bytes = token.as_bytes();
2440 if (bytes[0] == b'"' && bytes[token.len() - 1] == b'"')
2441 || (bytes[0] == b'\'' && bytes[token.len() - 1] == b'\'')
2442 {
2443 return &token[1..token.len() - 1];
2444 }
2445 }
2446 token
2447}
2448
2449fn default_properties(variable_names: Vec<String>, row_names: Option<Vec<String>>) -> StructValue {
2450 let mut props = StructValue::new();
2451 props.insert(
2452 VARIABLE_NAMES,
2453 Value::StringArray(
2454 StringArray::new(variable_names.clone(), vec![1, variable_names.len()])
2455 .expect("VariableNames shape is valid"),
2456 ),
2457 );
2458 props.insert(
2459 ROW_NAMES,
2460 row_names
2461 .map(|names| {
2462 Value::StringArray(
2463 StringArray::new(names.clone(), vec![names.len(), 1])
2464 .expect("RowNames shape is valid"),
2465 )
2466 })
2467 .unwrap_or_else(|| {
2468 Value::StringArray(StringArray::new(Vec::new(), vec![0, 1]).unwrap())
2469 }),
2470 );
2471 props.insert(
2472 DIMENSION_NAMES,
2473 Value::StringArray(
2474 StringArray::new(
2475 vec![
2476 DEFAULT_ROW_DIM_NAME.to_string(),
2477 DEFAULT_VARIABLE_DIM_NAME.to_string(),
2478 ],
2479 vec![1, 2],
2480 )
2481 .expect("DimensionNames shape is valid"),
2482 ),
2483 );
2484 props.insert(
2485 VARIABLE_UNITS,
2486 Value::StringArray(
2487 StringArray::new(
2488 vec![String::new(); variable_names.len()],
2489 vec![1, variable_names.len()],
2490 )
2491 .expect("VariableUnits shape is valid"),
2492 ),
2493 );
2494 props.insert(
2495 VARIABLE_DESCRIPTIONS,
2496 Value::StringArray(
2497 StringArray::new(
2498 vec![String::new(); variable_names.len()],
2499 vec![1, variable_names.len()],
2500 )
2501 .expect("VariableDescriptions shape is valid"),
2502 ),
2503 );
2504 props.insert(DESCRIPTION, Value::String(String::new()));
2505 props.insert(USER_DATA, Value::Tensor(Tensor::zeros(vec![0, 0])));
2506 props
2507}
2508
2509pub fn table_from_columns(names: Vec<String>, columns: Vec<Value>) -> BuiltinResult<Value> {
2510 table_from_columns_with_properties(names, columns, None)
2511}
2512
2513fn table_from_columns_with_properties(
2514 names: Vec<String>,
2515 columns: Vec<Value>,
2516 row_names: Option<Vec<String>>,
2517) -> BuiltinResult<Value> {
2518 ensure_table_class_registered();
2519 if names.len() != columns.len() {
2520 return Err(invalid_variable(
2521 "table: number of variable names must match number of variables",
2522 ));
2523 }
2524 let names = make_unique_names(names);
2525 let height = validate_column_heights(&names, &columns)?;
2526 if let Some(row_names) = &row_names {
2527 if row_names.len() != height {
2528 return Err(invalid_variable(
2529 "table: number of row names must match table height",
2530 ));
2531 }
2532 }
2533 let mut variables = StructValue::new();
2534 for (name, value) in names.iter().cloned().zip(columns) {
2535 variables.insert(name, value);
2536 }
2537 let props = default_properties(names, row_names);
2538 let mut object = ObjectInstance::new(TABLE_CLASS.to_string());
2539 object
2540 .properties
2541 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2542 object.properties.insert(
2543 TABLE_PROPERTIES_FIELD.to_string(),
2544 Value::Struct(props.clone()),
2545 );
2546 object
2547 .properties
2548 .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
2549 Ok(Value::Object(object))
2550}
2551
2552fn validate_column_heights(names: &[String], columns: &[Value]) -> BuiltinResult<usize> {
2553 if columns.is_empty() {
2554 return Ok(0);
2555 }
2556 let height = value_row_count(&columns[0])?;
2557 for (name, value) in names.iter().zip(columns) {
2558 let rows = value_row_count(value)?;
2559 if rows != height {
2560 return Err(invalid_variable(format!(
2561 "table: variable '{name}' has {rows} rows but expected {height}"
2562 )));
2563 }
2564 }
2565 Ok(height)
2566}
2567
2568pub fn is_table_value(value: &Value) -> bool {
2569 table_object(value).is_some()
2570}
2571
2572fn table_object(value: &Value) -> Option<&ObjectInstance> {
2573 match value {
2574 Value::Object(object) if object.is_class(TABLE_CLASS) => Some(object),
2575 _ => None,
2576 }
2577}
2578
2579fn into_table_object(value: Value, context: &str) -> BuiltinResult<ObjectInstance> {
2580 match value {
2581 Value::Object(object) if object.is_class(TABLE_CLASS) => Ok(object),
2582 other => Err(invalid_argument(format!(
2583 "{context}: expected table, got {other:?}"
2584 ))),
2585 }
2586}
2587
2588pub fn table_variables(object: &ObjectInstance) -> BuiltinResult<StructValue> {
2589 match object.properties.get(TABLE_VARIABLES_FIELD) {
2590 Some(Value::Struct(st)) => Ok(st.clone()),
2591 Some(other) => Err(invalid_variable(format!(
2592 "table: invalid internal variable storage {other:?}"
2593 ))),
2594 None => Ok(StructValue::new()),
2595 }
2596}
2597
2598pub fn table_variable_names_from_object(object: &ObjectInstance) -> BuiltinResult<Vec<String>> {
2599 let variables = table_variables(object)?;
2600 Ok(variables.fields.keys().cloned().collect())
2601}
2602
2603pub fn table_height(object: &ObjectInstance) -> BuiltinResult<usize> {
2604 let variables = table_variables(object)?;
2605 match variables.fields.values().next() {
2606 Some(value) => value_row_count(value),
2607 None => Ok(0),
2608 }
2609}
2610
2611pub fn table_width(object: &ObjectInstance) -> BuiltinResult<usize> {
2612 table_variables(object).map(|vars| vars.fields.len())
2613}
2614
2615fn table_public_properties(object: &ObjectInstance) -> BuiltinResult<StructValue> {
2616 match object
2617 .properties
2618 .get(TABLE_PROPERTIES_FIELD)
2619 .or_else(|| object.properties.get(PROPERTIES_MEMBER))
2620 {
2621 Some(Value::Struct(st)) => Ok(st.clone()),
2622 Some(other) => Err(invalid_variable(format!(
2623 "table: invalid Properties storage {other:?}"
2624 ))),
2625 None => Ok(default_properties(
2626 table_variable_names_from_object(object)?,
2627 None,
2628 )),
2629 }
2630}
2631
2632fn sync_table_properties(object: &mut ObjectInstance, props: StructValue) {
2633 object.properties.insert(
2634 TABLE_PROPERTIES_FIELD.to_string(),
2635 Value::Struct(props.clone()),
2636 );
2637 object
2638 .properties
2639 .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
2640}
2641
2642fn table_member_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
2643 let name = scalar_text(payload, "table member")?;
2644 if name == PROPERTIES_MEMBER {
2645 return Ok(Value::Struct(table_public_properties(object)?));
2646 }
2647 let variables = table_variables(object)?;
2648 variables
2649 .fields
2650 .get(&name)
2651 .cloned()
2652 .ok_or_else(|| invalid_variable(format!("table: unrecognized variable '{name}'")))
2653}
2654
2655fn table_member_set(object: &mut ObjectInstance, field: &str, rhs: Value) -> BuiltinResult<()> {
2656 if field == PROPERTIES_MEMBER {
2657 let Value::Struct(props) = rhs else {
2658 return Err(invalid_variable(
2659 "table: Properties assignment expects a scalar struct",
2660 ));
2661 };
2662 apply_properties(object, props)?;
2663 return Ok(());
2664 }
2665 let mut variables = table_variables(object)?;
2666 let mut names = table_variable_names_from_object(object)?;
2667 let height = table_height(object)?;
2668 let rhs_rows = value_row_count(&rhs)?;
2669 if !variables.fields.is_empty() && rhs_rows != height {
2670 return Err(invalid_variable(format!(
2671 "table: variable '{field}' has {rhs_rows} rows but table has {height}"
2672 )));
2673 }
2674 if !variables.fields.contains_key(field) {
2675 names.push(field.to_string());
2676 }
2677 variables.insert(field.to_string(), rhs);
2678 object
2679 .properties
2680 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2681 let mut props = table_public_properties(object)?;
2682 update_variable_metadata_names(&mut props, names)?;
2683 sync_table_properties(object, props);
2684 Ok(())
2685}
2686
2687fn apply_properties(object: &mut ObjectInstance, mut props: StructValue) -> BuiltinResult<()> {
2688 if let Some(value) = props.fields.get(VARIABLE_NAMES) {
2689 let names = variable_name_list(value)?;
2690 rename_table_variables(object, names.clone())?;
2691 update_variable_metadata_names(&mut props, names)?;
2692 }
2693 sync_table_properties(object, props);
2694 Ok(())
2695}
2696
2697fn rename_table_variables(
2698 object: &mut ObjectInstance,
2699 new_names: Vec<String>,
2700) -> BuiltinResult<()> {
2701 let old_names = table_variable_names_from_object(object)?;
2702 if old_names.len() != new_names.len() {
2703 return Err(invalid_variable(
2704 "table: VariableNames assignment must preserve variable count",
2705 ));
2706 }
2707 let new_names = make_unique_variable_names(new_names);
2708 let variables = table_variables(object)?;
2709 let mut renamed = StructValue::new();
2710 for (old, new) in old_names.iter().zip(new_names.iter()) {
2711 let value = variables
2712 .fields
2713 .get(old)
2714 .cloned()
2715 .ok_or_else(|| invalid_variable(format!("table: missing variable '{old}'")))?;
2716 renamed.insert(new.clone(), value);
2717 }
2718 object
2719 .properties
2720 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(renamed));
2721 Ok(())
2722}
2723
2724fn update_variable_metadata_names(
2725 props: &mut StructValue,
2726 names: Vec<String>,
2727) -> BuiltinResult<()> {
2728 props.insert(
2729 VARIABLE_NAMES,
2730 Value::StringArray(
2731 StringArray::new(names.clone(), vec![1, names.len()])
2732 .map_err(|err| invalid_variable(format!("table: {err}")))?,
2733 ),
2734 );
2735 for field in [VARIABLE_UNITS, VARIABLE_DESCRIPTIONS] {
2736 let existing = props.fields.get(field).cloned();
2737 let values = match existing {
2738 Some(Value::StringArray(mut array)) => {
2739 array.data.resize(names.len(), String::new());
2740 array.data.truncate(names.len());
2741 array.data
2742 }
2743 _ => vec![String::new(); names.len()],
2744 };
2745 props.insert(
2746 field,
2747 Value::StringArray(
2748 StringArray::new(values, vec![1, names.len()])
2749 .map_err(|err| invalid_variable(format!("table: {err}")))?,
2750 ),
2751 );
2752 }
2753 Ok(())
2754}
2755
2756fn table_paren_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
2757 let selectors = selector_values(payload)?;
2758 let rows = parse_row_selector(selectors.first(), table_height(object)?)?;
2759 let variable_names = table_variable_names_from_object(object)?;
2760 let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2761 let variables = table_variables(object)?;
2762 let mut out = Vec::with_capacity(selected_names.len());
2763 for name in &selected_names {
2764 let value = variables
2765 .fields
2766 .get(name)
2767 .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
2768 out.push(select_rows(value, &rows)?);
2769 }
2770 let row_names = selected_row_names(object, &rows)?;
2771 table_from_columns_with_properties(selected_names, out, row_names)
2772}
2773
2774fn table_brace_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
2775 let subset = table_paren_get(object, payload)?;
2776 let object = into_table_object(subset, "table brace indexing")?;
2777 let variables = table_variables(&object)?;
2778 if variables.fields.len() == 1 {
2779 return variables
2780 .fields
2781 .values()
2782 .next()
2783 .cloned()
2784 .ok_or_else(|| invalid_variable("table: missing selected variable"));
2785 }
2786 let values = variables.fields.values().collect::<Vec<_>>();
2787 if values.iter().all(|value| matches!(value, Value::Tensor(_))) {
2788 return concatenate_numeric_columns(&values);
2789 }
2790 CellArray::new(
2791 values.into_iter().cloned().collect(),
2792 1,
2793 variables.fields.len(),
2794 )
2795 .map(Value::Cell)
2796 .map_err(|err| invalid_variable(format!("table: {err}")))
2797}
2798
2799fn table_paren_assign(
2800 mut object: ObjectInstance,
2801 payload: &Value,
2802 rhs: Value,
2803) -> BuiltinResult<Value> {
2804 let rhs_table = into_table_object(rhs, "table paren assignment")?;
2805 let selectors = selector_values(payload)?;
2806 let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
2807 let variable_names = table_variable_names_from_object(&object)?;
2808 let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2809 let rhs_names = table_variable_names_from_object(&rhs_table)?;
2810 if selected_names.len() != rhs_names.len() {
2811 return Err(invalid_variable(
2812 "table: assignment variable count must match selected variables",
2813 ));
2814 }
2815 let mut variables = table_variables(&object)?;
2816 let rhs_variables = table_variables(&rhs_table)?;
2817 for (target_name, rhs_name) in selected_names.iter().zip(rhs_names.iter()) {
2818 let current =
2819 variables.fields.get(target_name).cloned().ok_or_else(|| {
2820 invalid_variable(format!("table: missing variable '{target_name}'"))
2821 })?;
2822 let rhs_col =
2823 rhs_variables.fields.get(rhs_name).cloned().ok_or_else(|| {
2824 invalid_variable(format!("table: missing rhs variable '{rhs_name}'"))
2825 })?;
2826 variables.insert(target_name.clone(), assign_rows(current, &rows, rhs_col)?);
2827 }
2828 object
2829 .properties
2830 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2831 Ok(Value::Object(object))
2832}
2833
2834fn table_brace_assign(
2835 mut object: ObjectInstance,
2836 payload: &Value,
2837 rhs: Value,
2838) -> BuiltinResult<Value> {
2839 let selectors = selector_values(payload)?;
2840 let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
2841 let variable_names = table_variable_names_from_object(&object)?;
2842 let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2843 if selected_names.len() != 1 {
2844 return Err(invalid_variable(
2845 "table: brace assignment supports one variable at a time",
2846 ));
2847 }
2848 let mut variables = table_variables(&object)?;
2849 let target = selected_names[0].clone();
2850 let current = variables
2851 .fields
2852 .get(&target)
2853 .cloned()
2854 .ok_or_else(|| invalid_variable(format!("table: missing variable '{target}'")))?;
2855 variables.insert(target, assign_rows(current, &rows, rhs)?);
2856 object
2857 .properties
2858 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2859 Ok(Value::Object(object))
2860}
2861
2862fn selector_values(payload: &Value) -> BuiltinResult<Vec<Value>> {
2863 match payload {
2864 Value::Cell(cell) => {
2865 let mut out = Vec::with_capacity(cell.data.len());
2866 for handle in &cell.data {
2867 out.push(unsafe { &*handle.as_raw() }.clone());
2868 }
2869 Ok(out)
2870 }
2871 other => Ok(vec![other.clone()]),
2872 }
2873}
2874
2875fn parse_row_selector(selector: Option<&Value>, height: usize) -> BuiltinResult<Vec<usize>> {
2876 let Some(selector) = selector else {
2877 return Ok((0..height).collect());
2878 };
2879 if is_colon_selector(selector) {
2880 return Ok((0..height).collect());
2881 }
2882 if is_end_selector(selector) {
2883 return if height == 0 {
2884 Err(invalid_index(
2885 "table: end row index is invalid for empty table",
2886 ))
2887 } else {
2888 Ok(vec![height - 1])
2889 };
2890 }
2891 match selector {
2892 Value::Num(n) => Ok(vec![one_based_to_zero(*n, height, "row")?]),
2893 Value::Int(i) => Ok(vec![one_based_to_zero(i.to_f64(), height, "row")?]),
2894 Value::Tensor(tensor) => tensor
2895 .data
2896 .iter()
2897 .map(|value| one_based_to_zero(*value, height, "row"))
2898 .collect(),
2899 Value::LogicalArray(array) => {
2900 if array.data.len() != height {
2901 return Err(invalid_index(
2902 "table: logical row selector length must match table height",
2903 ));
2904 }
2905 Ok(array
2906 .data
2907 .iter()
2908 .enumerate()
2909 .filter_map(|(idx, value)| (*value != 0).then_some(idx))
2910 .collect())
2911 }
2912 other => Err(invalid_index(format!(
2913 "table: unsupported row selector {other:?}"
2914 ))),
2915 }
2916}
2917
2918fn parse_variable_selector(
2919 selector: Option<&Value>,
2920 names: &[String],
2921) -> BuiltinResult<Vec<String>> {
2922 let Some(selector) = selector else {
2923 return Ok(names.to_vec());
2924 };
2925 if is_colon_selector(selector) {
2926 return Ok(names.to_vec());
2927 }
2928 match selector {
2929 Value::String(_) | Value::CharArray(_) | Value::StringArray(_) | Value::Cell(_) => {
2930 let selected = string_list(selector)?;
2931 for name in &selected {
2932 if !names.contains(name) {
2933 return Err(invalid_variable(format!(
2934 "table: unrecognized variable '{name}'"
2935 )));
2936 }
2937 }
2938 Ok(selected)
2939 }
2940 Value::Num(n) => Ok(vec![name_at_index(names, *n)?]),
2941 Value::Int(i) => Ok(vec![name_at_index(names, i.to_f64())?]),
2942 Value::Tensor(tensor) => tensor
2943 .data
2944 .iter()
2945 .map(|value| name_at_index(names, *value))
2946 .collect(),
2947 Value::LogicalArray(array) => {
2948 if array.data.len() != names.len() {
2949 return Err(invalid_index(
2950 "table: logical variable selector length must match table width",
2951 ));
2952 }
2953 Ok(array
2954 .data
2955 .iter()
2956 .zip(names.iter())
2957 .filter_map(|(flag, name)| (*flag != 0).then_some(name.clone()))
2958 .collect())
2959 }
2960 other => Err(invalid_index(format!(
2961 "table: unsupported variable selector {other:?}"
2962 ))),
2963 }
2964}
2965
2966fn is_colon_selector(value: &Value) -> bool {
2967 scalar_text(value, "selector")
2968 .map(|text| text == ":")
2969 .unwrap_or(false)
2970}
2971
2972fn is_end_selector(value: &Value) -> bool {
2973 scalar_text(value, "selector")
2974 .map(|text| text == "end")
2975 .unwrap_or(false)
2976}
2977
2978fn name_at_index(names: &[String], value: f64) -> BuiltinResult<String> {
2979 let idx = one_based_to_zero(value, names.len(), "variable")?;
2980 Ok(names[idx].clone())
2981}
2982
2983fn one_based_to_zero(value: f64, len: usize, context: &str) -> BuiltinResult<usize> {
2984 if !value.is_finite() || value < 1.0 || (value.round() - value).abs() > f64::EPSILON {
2985 return Err(invalid_index(format!(
2986 "table: {context} indices must be positive finite integers"
2987 )));
2988 }
2989 let idx = value.round() as usize - 1;
2990 if idx >= len {
2991 return Err(invalid_index(format!(
2992 "table: {context} index exceeds bounds"
2993 )));
2994 }
2995 Ok(idx)
2996}
2997
2998fn selected_row_names(
2999 object: &ObjectInstance,
3000 rows: &[usize],
3001) -> BuiltinResult<Option<Vec<String>>> {
3002 let props = table_public_properties(object)?;
3003 let Some(value) = props.fields.get(ROW_NAMES) else {
3004 return Ok(None);
3005 };
3006 let names = string_list(value)?;
3007 if names.is_empty() {
3008 return Ok(None);
3009 }
3010 Ok(Some(
3011 rows.iter()
3012 .filter_map(|row| names.get(*row).cloned())
3013 .collect(),
3014 ))
3015}
3016
3017fn value_row_count(value: &Value) -> BuiltinResult<usize> {
3018 match value {
3019 Value::Tensor(tensor) => Ok(tensor.rows()),
3020 Value::ComplexTensor(tensor) => Ok(tensor.rows),
3021 Value::StringArray(array) => Ok(array.rows()),
3022 Value::LogicalArray(array) => Ok(array.shape.first().copied().unwrap_or(array.data.len())),
3023 Value::Cell(cell) => Ok(cell.rows),
3024 Value::CharArray(array) => Ok(array.rows),
3025 Value::Object(obj) if obj.is_class("datetime") => {
3026 crate::builtins::datetime::serials_from_datetime_value(value)
3027 .map(|tensor| tensor.rows())
3028 }
3029 Value::Object(obj) if obj.is_class("duration") => {
3030 crate::builtins::duration::duration_tensor_from_duration_value(value)
3031 .map(|tensor| tensor.rows())
3032 }
3033 Value::Object(obj) if obj.is_class(TABLE_CLASS) => table_height(obj),
3034 _ => Ok(1),
3035 }
3036}
3037
3038fn select_rows(value: &Value, rows: &[usize]) -> BuiltinResult<Value> {
3039 match value {
3040 Value::Tensor(tensor) => {
3041 let cols = tensor.cols();
3042 let mut data = Vec::with_capacity(rows.len() * cols);
3043 for col in 0..cols {
3044 for &row in rows {
3045 data.push(tensor.get2(row, col).map_err(invalid_index)?);
3046 }
3047 }
3048 Tensor::new_with_dtype(data, vec![rows.len(), cols], tensor.dtype)
3049 .map(Value::Tensor)
3050 .map_err(invalid_variable)
3051 }
3052 Value::ComplexTensor(tensor) => {
3053 let mut data = Vec::with_capacity(rows.len() * tensor.cols);
3054 for col in 0..tensor.cols {
3055 for &row in rows {
3056 let idx = row + col * tensor.rows;
3057 data.push(*tensor.data.get(idx).ok_or_else(|| {
3058 invalid_index("table: complex variable row index out of bounds")
3059 })?);
3060 }
3061 }
3062 ComplexTensor::new(data, vec![rows.len(), tensor.cols])
3063 .map(Value::ComplexTensor)
3064 .map_err(invalid_variable)
3065 }
3066 Value::StringArray(array) => {
3067 let cols = array.cols();
3068 let mut data = Vec::with_capacity(rows.len() * cols);
3069 for col in 0..cols {
3070 for &row in rows {
3071 let idx = row + col * array.rows();
3072 data.push(array.data.get(idx).cloned().ok_or_else(|| {
3073 invalid_index("table: string variable row index out of bounds")
3074 })?);
3075 }
3076 }
3077 StringArray::new(data, vec![rows.len(), cols])
3078 .map(Value::StringArray)
3079 .map_err(invalid_variable)
3080 }
3081 Value::CharArray(array) => {
3082 let mut data = Vec::with_capacity(rows.len() * array.cols);
3083 for &row in rows {
3084 if row >= array.rows {
3085 return Err(invalid_index(
3086 "table: char variable row index out of bounds",
3087 ));
3088 }
3089 let start = row * array.cols;
3090 data.extend_from_slice(&array.data[start..start + array.cols]);
3091 }
3092 CharArray::new(data, rows.len(), array.cols)
3093 .map(Value::CharArray)
3094 .map_err(invalid_variable)
3095 }
3096 Value::LogicalArray(array) => {
3097 let source_rows = array.shape.first().copied().unwrap_or(array.data.len());
3098 let cols = array.shape.get(1).copied().unwrap_or(1);
3099 let mut data = Vec::with_capacity(rows.len() * cols);
3100 for col in 0..cols {
3101 for &row in rows {
3102 let idx = row + col * source_rows;
3103 data.push(*array.data.get(idx).ok_or_else(|| {
3104 invalid_index("table: logical variable row index out of bounds")
3105 })?);
3106 }
3107 }
3108 LogicalArray::new(data, vec![rows.len(), cols])
3109 .map(Value::LogicalArray)
3110 .map_err(invalid_variable)
3111 }
3112 Value::Cell(cell) => {
3113 let mut data = Vec::with_capacity(rows.len() * cell.cols);
3114 for col in 0..cell.cols {
3115 for &row in rows {
3116 data.push(cell.get(row, col).map_err(invalid_index)?);
3117 }
3118 }
3119 CellArray::new(data, rows.len(), cell.cols)
3120 .map(Value::Cell)
3121 .map_err(invalid_variable)
3122 }
3123 Value::Object(obj) if obj.is_class("datetime") => {
3124 let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
3125 let selected = select_rows(&Value::Tensor(tensor), rows)?;
3126 match selected {
3127 Value::Tensor(tensor) => {
3128 crate::builtins::datetime::datetime_object_from_serial_tensor(
3129 tensor,
3130 crate::builtins::datetime::datetime_format_from_value(value),
3131 )
3132 }
3133 _ => unreachable!("select_rows tensor branch returns tensor"),
3134 }
3135 }
3136 Value::Object(obj) if obj.is_class("duration") => {
3137 let tensor = crate::builtins::duration::duration_tensor_from_duration_value(value)?;
3138 let selected = select_rows(&Value::Tensor(tensor), rows)?;
3139 match selected {
3140 Value::Tensor(tensor) => {
3141 crate::builtins::duration::duration_object_from_days_tensor(
3142 tensor,
3143 crate::builtins::duration::duration_format_from_value(value),
3144 )
3145 }
3146 _ => unreachable!("select_rows tensor branch returns tensor"),
3147 }
3148 }
3149 _ if rows.len() == 1 && rows[0] == 0 => Ok(value.clone()),
3150 other => Err(invalid_variable(format!(
3151 "table: row selection unsupported for variable {other:?}"
3152 ))),
3153 }
3154}
3155
3156fn assign_rows(mut current: Value, rows: &[usize], rhs: Value) -> BuiltinResult<Value> {
3157 if value_row_count(&rhs)? != rows.len() {
3158 return Err(invalid_variable(
3159 "table: assignment row count must match selected row count",
3160 ));
3161 }
3162 let replacing_all_rows = rows.len() == value_row_count(¤t)?;
3163 match (&mut current, rhs) {
3164 (Value::Tensor(target), Value::Tensor(source)) => {
3165 if target.cols() != source.cols() {
3166 return Err(invalid_variable(
3167 "table: tensor assignment column count mismatch",
3168 ));
3169 }
3170 for col in 0..target.cols() {
3171 for (src_row, &dst_row) in rows.iter().enumerate() {
3172 let value = source.get2(src_row, col).map_err(invalid_index)?;
3173 target.set2(dst_row, col, value).map_err(invalid_index)?;
3174 }
3175 }
3176 Ok(current)
3177 }
3178 (_, source) if replacing_all_rows => Ok(source),
3179 _ => Err(invalid_variable(
3180 "table: assignment for this variable type requires replacing all rows",
3181 )),
3182 }
3183}
3184
3185fn concatenate_numeric_columns(values: &[&Value]) -> BuiltinResult<Value> {
3186 let rows = values
3187 .first()
3188 .and_then(|value| match value {
3189 Value::Tensor(t) => Some(t.rows()),
3190 _ => None,
3191 })
3192 .unwrap_or(0);
3193 let cols = values
3194 .iter()
3195 .map(|value| match value {
3196 Value::Tensor(t) => Ok(t.cols()),
3197 _ => Err(invalid_variable("table: expected numeric variable")),
3198 })
3199 .collect::<BuiltinResult<Vec<_>>>()?;
3200 let total_cols: usize = cols.iter().sum();
3201 let mut data = Vec::with_capacity(rows * total_cols);
3202 for value in values {
3203 let Value::Tensor(tensor) = value else {
3204 return Err(invalid_variable("table: expected numeric variable"));
3205 };
3206 for col in 0..tensor.cols() {
3207 for row in 0..rows {
3208 data.push(tensor.get2(row, col).map_err(invalid_index)?);
3209 }
3210 }
3211 }
3212 Tensor::new(data, vec![rows, total_cols])
3213 .map(Value::Tensor)
3214 .map_err(invalid_variable)
3215}
3216
3217pub fn sortrows_table(value: Value, rest: &[Value]) -> BuiltinResult<(Value, Tensor)> {
3218 let object = into_table_object(value, "sortrows")?;
3219 let names = table_variable_names_from_object(&object)?;
3220 let sort_spec = SortSpec::parse(rest, &names)?;
3221 let height = table_height(&object)?;
3222 let variables = table_variables(&object)?;
3223 let mut indices: Vec<usize> = (0..height).collect();
3224 indices.sort_by(|&a, &b| {
3225 for key in &sort_spec.keys {
3226 let Some(value) = variables.fields.get(&key.name) else {
3227 continue;
3228 };
3229 let ord = compare_table_cells(value, a, b).unwrap_or(Ordering::Equal);
3230 let ord = if key.descending { ord.reverse() } else { ord };
3231 if ord != Ordering::Equal {
3232 return ord;
3233 }
3234 }
3235 a.cmp(&b)
3236 });
3237 let mut sorted_columns = Vec::with_capacity(names.len());
3238 for name in &names {
3239 let value = variables
3240 .fields
3241 .get(name)
3242 .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
3243 sorted_columns.push(select_rows(value, &indices)?);
3244 }
3245 let row_names = selected_row_names(&object, &indices)?;
3246 let sorted = table_from_columns_with_properties(names, sorted_columns, row_names)?;
3247 let indices_tensor = Tensor::new(
3248 indices.iter().map(|idx| *idx as f64 + 1.0).collect(),
3249 vec![indices.len(), 1],
3250 )
3251 .map_err(invalid_variable)?;
3252 Ok((sorted, indices_tensor))
3253}
3254
3255struct SortSpec {
3256 keys: Vec<SortKey>,
3257}
3258
3259struct SortKey {
3260 name: String,
3261 descending: bool,
3262}
3263
3264impl SortSpec {
3265 fn parse(rest: &[Value], names: &[String]) -> BuiltinResult<Self> {
3266 let mut keys = if rest.is_empty() {
3267 names
3268 .iter()
3269 .map(|name| SortKey {
3270 name: name.clone(),
3271 descending: false,
3272 })
3273 .collect::<Vec<_>>()
3274 } else {
3275 parse_variable_selector(rest.first(), names)?
3276 .into_iter()
3277 .map(|name| SortKey {
3278 name,
3279 descending: false,
3280 })
3281 .collect()
3282 };
3283 if let Some(direction) = rest.get(1) {
3284 let directions = string_list(direction)?;
3285 if directions.len() == 1 {
3286 let descending = directions[0].eq_ignore_ascii_case("descend")
3287 || directions[0].eq_ignore_ascii_case("desc");
3288 for key in &mut keys {
3289 key.descending = descending;
3290 }
3291 } else {
3292 for (key, direction) in keys.iter_mut().zip(directions.iter()) {
3293 key.descending = direction.eq_ignore_ascii_case("descend")
3294 || direction.eq_ignore_ascii_case("desc");
3295 }
3296 }
3297 }
3298 Ok(Self { keys })
3299 }
3300}
3301
3302fn compare_table_cells(value: &Value, a: usize, b: usize) -> BuiltinResult<Ordering> {
3303 match value {
3304 Value::Tensor(tensor) => Ok(tensor
3305 .get2(a, 0)
3306 .map_err(invalid_index)?
3307 .partial_cmp(&tensor.get2(b, 0).map_err(invalid_index)?)
3308 .unwrap_or(Ordering::Greater)),
3309 Value::StringArray(array) => {
3310 let av = array.data.get(a).cloned().unwrap_or_default();
3311 let bv = array.data.get(b).cloned().unwrap_or_default();
3312 Ok(av.cmp(&bv))
3313 }
3314 Value::LogicalArray(array) => {
3315 let av = *array.data.get(a).unwrap_or(&0);
3316 let bv = *array.data.get(b).unwrap_or(&0);
3317 Ok(av.cmp(&bv))
3318 }
3319 Value::Object(obj) if obj.is_class("datetime") => {
3320 let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
3321 Ok(tensor
3322 .data
3323 .get(a)
3324 .copied()
3325 .unwrap_or(f64::NAN)
3326 .partial_cmp(&tensor.data.get(b).copied().unwrap_or(f64::NAN))
3327 .unwrap_or(Ordering::Greater))
3328 }
3329 other => Ok(cell_key_string(other, a).cmp(&cell_key_string(other, b))),
3330 }
3331}
3332
3333#[derive(Clone, Debug)]
3334enum GroupAtom {
3335 Number(f64),
3336 Text(String),
3337 Logical(bool),
3338 Missing,
3339}
3340
3341impl GroupAtom {
3342 fn rank(&self) -> u8 {
3343 match self {
3344 Self::Missing => 0,
3345 Self::Logical(_) => 1,
3346 Self::Number(_) => 2,
3347 Self::Text(_) => 3,
3348 }
3349 }
3350}
3351
3352impl PartialEq for GroupAtom {
3353 fn eq(&self, other: &Self) -> bool {
3354 self.cmp(other) == Ordering::Equal
3355 }
3356}
3357
3358impl Eq for GroupAtom {}
3359
3360impl PartialOrd for GroupAtom {
3361 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
3362 Some(self.cmp(other))
3363 }
3364}
3365
3366impl Ord for GroupAtom {
3367 fn cmp(&self, other: &Self) -> Ordering {
3368 let rank = self.rank().cmp(&other.rank());
3369 if rank != Ordering::Equal {
3370 return rank;
3371 }
3372 match (self, other) {
3373 (Self::Missing, Self::Missing) => Ordering::Equal,
3374 (Self::Logical(a), Self::Logical(b)) => a.cmp(b),
3375 (Self::Number(a), Self::Number(b)) => a.total_cmp(b),
3376 (Self::Text(a), Self::Text(b)) => a.cmp(b),
3377 _ => Ordering::Equal,
3378 }
3379 }
3380}
3381
3382fn cell_group_atom(value: &Value, row: usize) -> GroupAtom {
3383 match value {
3384 Value::Tensor(tensor) => tensor
3385 .get2(row, 0)
3386 .map(GroupAtom::Number)
3387 .unwrap_or(GroupAtom::Missing),
3388 Value::StringArray(array) => array
3389 .data
3390 .get(row)
3391 .cloned()
3392 .map(GroupAtom::Text)
3393 .unwrap_or(GroupAtom::Missing),
3394 Value::LogicalArray(array) => array
3395 .data
3396 .get(row)
3397 .map(|value| GroupAtom::Logical(*value != 0))
3398 .unwrap_or(GroupAtom::Missing),
3399 Value::Object(obj) if obj.is_class("datetime") => {
3400 crate::builtins::datetime::serials_from_datetime_value(value)
3401 .ok()
3402 .and_then(|tensor| tensor.data.get(row).copied())
3403 .map(GroupAtom::Number)
3404 .unwrap_or(GroupAtom::Missing)
3405 }
3406 other => GroupAtom::Text(cell_key_string(other, row)),
3407 }
3408}
3409
3410fn groupsummary_impl(
3411 table: Value,
3412 groupvars: Value,
3413 method: Value,
3414 rest: Vec<Value>,
3415) -> BuiltinResult<Value> {
3416 let object = into_table_object(table, "groupsummary")?;
3417 let names = table_variable_names_from_object(&object)?;
3418 let group_names = parse_variable_selector(Some(&groupvars), &names)?;
3419 let methods = string_list(&method)?;
3420 if methods.is_empty() {
3421 return Err(invalid_argument(
3422 "groupsummary: method list must not be empty",
3423 ));
3424 }
3425 let data_names = if let Some(value) = rest.first() {
3426 parse_variable_selector(Some(value), &names)?
3427 } else {
3428 names
3429 .iter()
3430 .filter(|name| !group_names.contains(name))
3431 .filter(|name| {
3432 table_variables(&object)
3433 .ok()
3434 .and_then(|vars| vars.fields.get(*name).cloned())
3435 .map(|value| matches!(value, Value::Tensor(_)))
3436 .unwrap_or(false)
3437 })
3438 .cloned()
3439 .collect()
3440 };
3441 let variables = table_variables(&object)?;
3442 let height = table_height(&object)?;
3443 let mut groups: BTreeMap<Vec<GroupAtom>, Vec<usize>> = BTreeMap::new();
3444 for row in 0..height {
3445 let key = group_names
3446 .iter()
3447 .map(|name| {
3448 variables
3449 .fields
3450 .get(name)
3451 .map(|value| cell_group_atom(value, row))
3452 .unwrap_or(GroupAtom::Missing)
3453 })
3454 .collect::<Vec<_>>();
3455 groups.entry(key).or_default().push(row);
3456 }
3457 let group_rows = groups
3458 .values()
3459 .filter_map(|rows| rows.first().copied())
3460 .collect::<Vec<_>>();
3461 let mut out_names = Vec::new();
3462 let mut out_columns = Vec::new();
3463 for name in &group_names {
3464 let value = variables.fields.get(name).ok_or_else(|| {
3465 invalid_variable(format!("groupsummary: missing group variable '{name}'"))
3466 })?;
3467 out_names.push(name.clone());
3468 out_columns.push(select_rows(value, &group_rows)?);
3469 }
3470 out_names.push("GroupCount".to_string());
3471 out_columns.push(Value::Tensor(
3472 Tensor::new(
3473 groups.values().map(|rows| rows.len() as f64).collect(),
3474 vec![groups.len(), 1],
3475 )
3476 .map_err(invalid_variable)?,
3477 ));
3478 for method in &methods {
3479 for name in &data_names {
3480 let value = variables.fields.get(name).ok_or_else(|| {
3481 invalid_variable(format!("groupsummary: missing data variable '{name}'"))
3482 })?;
3483 let values = summarize_groups(value, groups.values(), method)?;
3484 out_names.push(format!("{}_{}", method.to_ascii_lowercase(), name));
3485 out_columns.push(Value::Tensor(
3486 Tensor::new(values, vec![groups.len(), 1]).map_err(invalid_variable)?,
3487 ));
3488 }
3489 }
3490 table_from_columns(out_names, out_columns)
3491}
3492
3493fn summarize_groups<'a>(
3494 value: &Value,
3495 groups: impl Iterator<Item = &'a Vec<usize>>,
3496 method: &str,
3497) -> BuiltinResult<Vec<f64>> {
3498 let tensor = match value {
3499 Value::Tensor(tensor) if tensor.cols() == 1 => tensor,
3500 _ => {
3501 return Err(invalid_variable(
3502 "groupsummary: summary data variables must be numeric column vectors",
3503 ))
3504 }
3505 };
3506 groups
3507 .map(|rows| {
3508 let mut values = rows
3509 .iter()
3510 .map(|row| tensor.get2(*row, 0).map_err(invalid_index))
3511 .collect::<BuiltinResult<Vec<_>>>()?;
3512 values.retain(|value| !value.is_nan());
3513 let result = match method.to_ascii_lowercase().as_str() {
3514 "mean" => {
3515 if values.is_empty() {
3516 f64::NAN
3517 } else {
3518 values.iter().sum::<f64>() / values.len() as f64
3519 }
3520 }
3521 "sum" => values.iter().sum(),
3522 "min" => values.into_iter().fold(f64::INFINITY, f64::min),
3523 "max" => values.into_iter().fold(f64::NEG_INFINITY, f64::max),
3524 "median" => {
3525 if values.is_empty() {
3526 f64::NAN
3527 } else {
3528 values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
3529 let mid = values.len() / 2;
3530 if values.len() % 2 == 0 {
3531 (values[mid - 1] + values[mid]) / 2.0
3532 } else {
3533 values[mid]
3534 }
3535 }
3536 }
3537 "count" | "numel" => values.len() as f64,
3538 other => {
3539 return Err(invalid_argument(format!(
3540 "groupsummary: unsupported method '{other}'"
3541 )))
3542 }
3543 };
3544 Ok(result)
3545 })
3546 .collect()
3547}
3548
3549fn cell_key_string(value: &Value, row: usize) -> String {
3550 match value {
3551 Value::Tensor(tensor) => tensor
3552 .get2(row, 0)
3553 .map(format_key_number)
3554 .unwrap_or_default(),
3555 Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
3556 Value::LogicalArray(array) => array
3557 .data
3558 .get(row)
3559 .map(|value| value.to_string())
3560 .unwrap_or_default(),
3561 Value::Object(obj) if obj.is_class("datetime") => {
3562 crate::builtins::datetime::serials_from_datetime_value(value)
3563 .ok()
3564 .and_then(|tensor| tensor.data.get(row).copied())
3565 .map(format_key_number)
3566 .unwrap_or_default()
3567 }
3568 other => format!("{other}"),
3569 }
3570}
3571
3572pub fn table_display_text(value: &Value) -> BuiltinResult<String> {
3573 let object = match value {
3574 Value::Object(object) if object.is_class(TABLE_CLASS) => object,
3575 _ => return Err(invalid_argument("table display expects table object")),
3576 };
3577 let names = table_variable_names_from_object(object)?;
3578 let variables = table_variables(object)?;
3579 let rows = table_height(object)?;
3580 let preview = rows.min(12);
3581 let mut widths = names.iter().map(|name| name.len()).collect::<Vec<_>>();
3582 let rendered_cols = names
3583 .iter()
3584 .enumerate()
3585 .map(|(col, name)| {
3586 let value = variables
3587 .fields
3588 .get(name)
3589 .cloned()
3590 .unwrap_or_else(|| Value::String(String::new()));
3591 let cells = (0..preview)
3592 .map(|row| render_table_cell(&value, row))
3593 .collect::<Vec<_>>();
3594 for cell in &cells {
3595 widths[col] = widths[col].max(cell.len());
3596 }
3597 cells
3598 })
3599 .collect::<Vec<_>>();
3600
3601 let mut lines = Vec::new();
3602 lines.push(format!("{rows}x{} table", names.len()));
3603 if names.is_empty() {
3604 return Ok(lines.join("\n"));
3605 }
3606 let header = names
3607 .iter()
3608 .enumerate()
3609 .map(|(idx, name)| format!("{name:<width$}", width = widths[idx]))
3610 .collect::<Vec<_>>()
3611 .join(" ");
3612 lines.push(header);
3613 for row in 0..preview {
3614 lines.push(
3615 rendered_cols
3616 .iter()
3617 .enumerate()
3618 .map(|(col, cells)| format!("{:<width$}", cells[row], width = widths[col]))
3619 .collect::<Vec<_>>()
3620 .join(" "),
3621 );
3622 }
3623 if preview < rows {
3624 lines.push(format!("... {} more rows", rows - preview));
3625 }
3626 Ok(lines.join("\n"))
3627}
3628
3629pub fn table_summary_text(value: &Value) -> BuiltinResult<String> {
3630 let object = match value {
3631 Value::Object(object) if object.is_class(TABLE_CLASS) => object,
3632 _ => return Err(invalid_argument("table display expects table object")),
3633 };
3634 Ok(format!(
3635 "{}x{} table",
3636 table_height(object)?,
3637 table_width(object)?
3638 ))
3639}
3640
3641fn render_table_cell(value: &Value, row: usize) -> String {
3642 match value {
3643 Value::Tensor(tensor) => tensor
3644 .get2(row, 0)
3645 .map(format_table_number)
3646 .unwrap_or_default(),
3647 Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
3648 Value::LogicalArray(array) => array
3649 .data
3650 .get(row)
3651 .map(|value| if *value != 0 { "true" } else { "false" }.to_string())
3652 .unwrap_or_default(),
3653 Value::Object(obj) if obj.is_class("datetime") => {
3654 crate::builtins::datetime::datetime_string_array(value)
3655 .ok()
3656 .flatten()
3657 .and_then(|array| array.data.get(row).cloned())
3658 .unwrap_or_else(|| value.to_string())
3659 }
3660 other => other.to_string(),
3661 }
3662}
3663
3664fn format_table_number(value: f64) -> String {
3665 if value.is_nan() {
3666 "NaN".to_string()
3667 } else if value.fract() == 0.0 && value.abs() < 1e15 {
3668 format!("{}", value as i64)
3669 } else {
3670 trim_float(format!("{value:.6}"))
3671 }
3672}
3673
3674fn format_key_number(value: f64) -> String {
3675 if value.is_nan() {
3676 "NaN".to_string()
3677 } else if value.is_infinite() {
3678 value.to_string()
3679 } else {
3680 trim_float(format!("{value:.17}"))
3681 }
3682}
3683
3684fn trim_float(mut text: String) -> String {
3685 if let Some(dot) = text.find('.') {
3686 let mut end = text.len();
3687 while end > dot + 1 && text.as_bytes()[end - 1] == b'0' {
3688 end -= 1;
3689 }
3690 if end == dot + 1 {
3691 end -= 1;
3692 }
3693 text.truncate(end);
3694 }
3695 text
3696}
3697
3698fn scalar_text(value: &Value, context: &str) -> BuiltinResult<String> {
3699 match value {
3700 Value::String(text) => Ok(text.clone()),
3701 Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
3702 Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
3703 _ => Err(invalid_argument(format!(
3704 "table: {context} must be a string scalar or character vector"
3705 ))),
3706 }
3707}
3708
3709fn bool_scalar(value: &Value, context: &str) -> BuiltinResult<bool> {
3710 match value {
3711 Value::Bool(flag) => Ok(*flag),
3712 Value::Int(value) => Ok(value.to_i64() != 0),
3713 Value::Num(value) if value.is_finite() => Ok(*value != 0.0),
3714 Value::String(_) | Value::CharArray(_) | Value::StringArray(_) => {
3715 let text = scalar_text(value, context)?;
3716 match text.to_ascii_lowercase().as_str() {
3717 "true" | "on" | "yes" => Ok(true),
3718 "false" | "off" | "no" => Ok(false),
3719 _ => Err(invalid_argument(format!(
3720 "table: {context} must be logical"
3721 ))),
3722 }
3723 }
3724 _ => Err(invalid_argument(format!(
3725 "table: {context} must be logical"
3726 ))),
3727 }
3728}
3729
3730fn nonnegative_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
3731 match value {
3732 Value::Int(value) if value.to_i64() >= 0 => Ok(value.to_i64() as usize),
3733 Value::Num(value)
3734 if value.is_finite()
3735 && *value >= 0.0
3736 && (value.round() - value).abs() <= f64::EPSILON =>
3737 {
3738 Ok(value.round() as usize)
3739 }
3740 _ => Err(invalid_argument(format!(
3741 "table: {context} must be a non-negative integer"
3742 ))),
3743 }
3744}
3745
3746fn positive_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
3747 let value = nonnegative_usize(value, context)?;
3748 if value == 0 {
3749 return Err(invalid_argument(format!(
3750 "table: {context} must be a positive integer"
3751 )));
3752 }
3753 Ok(value)
3754}
3755
3756fn option_value_is_empty(value: &Value) -> bool {
3757 match value {
3758 Value::String(text) => text.trim().is_empty(),
3759 Value::CharArray(array) => {
3760 array.data.is_empty()
3761 || (array.rows == 1 && array.data.iter().all(|ch| ch.is_whitespace()))
3762 }
3763 Value::StringArray(array) => {
3764 array.data.is_empty() || (array.data.len() == 1 && array.data[0].trim().is_empty())
3765 }
3766 Value::Cell(cell) => {
3767 cell.data.is_empty()
3768 || cell
3769 .data
3770 .iter()
3771 .all(|handle| option_value_is_empty(unsafe { &*handle.as_raw() }))
3772 }
3773 _ => false,
3774 }
3775}
3776
3777fn string_list(value: &Value) -> BuiltinResult<Vec<String>> {
3778 match value {
3779 Value::String(text) => Ok(vec![text.clone()]),
3780 Value::CharArray(ca) if ca.rows == 1 => Ok(vec![ca.data.iter().collect()]),
3781 Value::StringArray(array) => Ok(array.data.clone()),
3782 Value::Cell(cell) => {
3783 let mut out = Vec::with_capacity(cell.data.len());
3784 for handle in &cell.data {
3785 let value = unsafe { &*handle.as_raw() };
3786 out.extend(string_list(value)?);
3787 }
3788 Ok(out)
3789 }
3790 _ => Err(invalid_argument(
3791 "table: expected string, string array, character vector, or cellstr",
3792 )),
3793 }
3794}
3795
3796fn optional_raw_variable_name_list(value: &Value) -> BuiltinResult<Option<Vec<String>>> {
3797 if option_value_is_empty(value) {
3798 Ok(None)
3799 } else {
3800 raw_variable_name_list(value).map(Some)
3801 }
3802}
3803
3804fn raw_variable_name_list(value: &Value) -> BuiltinResult<Vec<String>> {
3805 let names = string_list(value)?;
3806 if names.is_empty() {
3807 return Err(invalid_variable("table: variable names must not be empty"));
3808 }
3809 Ok(names)
3810}
3811
3812fn variable_name_list(value: &Value) -> BuiltinResult<Vec<String>> {
3813 raw_variable_name_list(value).map(make_unique_variable_names)
3814}
3815
3816fn optional_variable_type_list(value: &Value) -> BuiltinResult<Option<Vec<ImportVariableType>>> {
3817 if option_value_is_empty(value) {
3818 Ok(None)
3819 } else {
3820 variable_type_list(value).map(Some)
3821 }
3822}
3823
3824fn variable_type_list(value: &Value) -> BuiltinResult<Vec<ImportVariableType>> {
3825 string_list(value)?
3826 .iter()
3827 .map(|raw| ImportVariableType::parse(raw))
3828 .collect()
3829}
3830
3831fn variable_type_names(value: &Value) -> BuiltinResult<Vec<String>> {
3832 string_list(value)?
3833 .iter()
3834 .map(|raw| ImportVariableType::canonical_label(raw))
3835 .collect()
3836}
3837
3838fn optional_range_spec(value: &Value) -> BuiltinResult<Option<RangeSpec>> {
3839 if option_value_is_empty(value) {
3840 Ok(None)
3841 } else {
3842 RangeSpec::parse(value).map(Some)
3843 }
3844}
3845
3846fn optional_sheet_selector(value: &Value) -> BuiltinResult<Option<SheetSelector>> {
3847 if option_value_is_empty(value) {
3848 Ok(None)
3849 } else {
3850 SheetSelector::parse(value).map(Some)
3851 }
3852}
3853
3854fn generated_variable_names(count: usize) -> Vec<String> {
3855 (1..=count).map(|idx| format!("Var{idx}")).collect()
3856}
3857
3858fn make_unique_variable_names(names: Vec<String>) -> Vec<String> {
3859 make_unique_names(
3860 names
3861 .into_iter()
3862 .enumerate()
3863 .map(|(idx, name)| make_valid_variable_name(&name, idx + 1))
3864 .collect(),
3865 )
3866}
3867
3868fn make_unique_names(names: Vec<String>) -> Vec<String> {
3869 let mut used = HashSet::new();
3870 let mut out = Vec::with_capacity(names.len());
3871 for (idx, name) in names.into_iter().enumerate() {
3872 let base = if name.trim().is_empty() {
3873 format!("Var{}", idx + 1)
3874 } else {
3875 name.trim().to_string()
3876 };
3877 let mut candidate = base.clone();
3878 let mut suffix = 1usize;
3879 while used.contains(&candidate.to_ascii_lowercase()) {
3880 suffix += 1;
3881 candidate = format!("{base}_{suffix}");
3882 }
3883 used.insert(candidate.to_ascii_lowercase());
3884 out.push(candidate);
3885 }
3886 out
3887}
3888
3889fn make_valid_variable_name(raw: &str, fallback_index: usize) -> String {
3890 let mut out = String::new();
3891 for (idx, ch) in raw.trim().chars().enumerate() {
3892 if (idx == 0 && (ch.is_ascii_alphabetic() || ch == '_'))
3893 || (idx > 0 && (ch.is_ascii_alphanumeric() || ch == '_'))
3894 {
3895 out.push(ch);
3896 } else if !out.ends_with('_') {
3897 out.push('_');
3898 }
3899 }
3900 while out.ends_with('_') {
3901 out.pop();
3902 }
3903 if out.is_empty() || !out.chars().next().unwrap().is_ascii_alphabetic() {
3904 format!("Var{fallback_index}")
3905 } else {
3906 out
3907 }
3908}
3909
3910#[cfg(test)]
3911mod tests {
3912 use super::*;
3913 use futures::executor::block_on;
3914 use runmat_time::unix_timestamp_ms;
3915 use std::fs;
3916 use std::io::Write;
3917
3918 fn unique_path(prefix: &str) -> PathBuf {
3919 let mut path = std::env::temp_dir();
3920 path.push(format!(
3921 "runmat_{prefix}_{}_{}",
3922 std::process::id(),
3923 unix_timestamp_ms()
3924 ));
3925 path
3926 }
3927
3928 fn read_table(path: &Path, args: Vec<Value>) -> Value {
3929 block_on(readtable_builtin(
3930 Value::from(path.to_string_lossy().to_string()),
3931 args,
3932 ))
3933 .expect("readtable")
3934 }
3935
3936 fn read_table_err(path: &Path, args: Vec<Value>) -> RuntimeError {
3937 block_on(readtable_builtin(
3938 Value::from(path.to_string_lossy().to_string()),
3939 args,
3940 ))
3941 .expect_err("expected readtable failure")
3942 }
3943
3944 fn spreadsheet_options(args: Vec<Value>) -> StructValue {
3945 match block_on(spreadsheet_import_options_builtin(args)).expect("spreadsheetImportOptions")
3946 {
3947 Value::Struct(options) => options,
3948 other => panic!("expected struct options, got {other:?}"),
3949 }
3950 }
3951
3952 fn char_row(array: &CharArray, row: usize) -> String {
3953 let start = row * array.cols;
3954 array.data[start..start + array.cols].iter().collect()
3955 }
3956
3957 fn object(value: Value) -> ObjectInstance {
3958 match value {
3959 Value::Object(object) => object,
3960 other => panic!("expected table object, got {other:?}"),
3961 }
3962 }
3963
3964 #[test]
3965 fn readtable_imports_headered_numeric_and_text_columns() {
3966 let path = unique_path("readtable_basic");
3967 fs::write(&path, "Name,Score\nAda,10\nGrace,12\n").expect("write sample");
3968 let table = object(read_table(&path, Vec::new()));
3969 assert_eq!(
3970 table_variable_names_from_object(&table).unwrap(),
3971 vec!["Name".to_string(), "Score".to_string()]
3972 );
3973 match table_member_get(&table, &Value::from("Score")).unwrap() {
3974 Value::Tensor(tensor) => {
3975 assert_eq!(tensor.shape, vec![2, 1]);
3976 assert_eq!(tensor.data, vec![10.0, 12.0]);
3977 }
3978 other => panic!("expected tensor, got {other:?}"),
3979 }
3980 match table_member_get(&table, &Value::from("Name")).unwrap() {
3981 Value::StringArray(array) => {
3982 assert_eq!(array.data, vec!["Ada".to_string(), "Grace".to_string()]);
3983 }
3984 other => panic!("expected string array, got {other:?}"),
3985 }
3986 let _ = fs::remove_file(&path);
3987 }
3988
3989 #[test]
3990 fn readtable_auto_does_not_consume_headerless_numeric_rows() {
3991 let path = unique_path("readtable_headerless_numeric");
3992 fs::write(&path, "1,2\n3,4\n").expect("write sample");
3993 let table = object(read_table(&path, Vec::new()));
3994 assert_eq!(
3995 table_variable_names_from_object(&table).unwrap(),
3996 vec!["Var1".to_string(), "Var2".to_string()]
3997 );
3998 match table_member_get(&table, &Value::from("Var1")).unwrap() {
3999 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![1.0, 3.0]),
4000 other => panic!("expected tensor, got {other:?}"),
4001 }
4002 match table_member_get(&table, &Value::from("Var2")).unwrap() {
4003 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 4.0]),
4004 other => panic!("expected tensor, got {other:?}"),
4005 }
4006 let _ = fs::remove_file(&path);
4007 }
4008
4009 #[test]
4010 fn readtable_rejects_unknown_and_invalid_options() {
4011 let path = unique_path("readtable_invalid_options");
4012 fs::write(&path, "A\n1\n").expect("write sample");
4013 let err = read_table_err(
4014 &path,
4015 vec![Value::from("DefinitelyNotAnOption"), Value::from(1.0)],
4016 );
4017 assert!(err.message().contains("unsupported option"));
4018 let err = read_table_err(
4019 &path,
4020 vec![Value::from("VariableNamingRule"), Value::from("mangle")],
4021 );
4022 assert!(err.message().contains("unsupported VariableNamingRule"));
4023 let _ = fs::remove_file(&path);
4024 }
4025
4026 #[test]
4027 fn readtable_handles_quoted_delimiters_and_newlines() {
4028 let path = unique_path("readtable_quoted_newlines");
4029 fs::write(
4030 &path,
4031 "Name,Note\nAda,\"hello, world\"\nGrace,\"line one\nline two\"\n",
4032 )
4033 .expect("write sample");
4034 let table = object(read_table(&path, Vec::new()));
4035 match table_member_get(&table, &Value::from("Note")).unwrap() {
4036 Value::StringArray(array) => assert_eq!(
4037 array.data,
4038 vec!["hello, world".to_string(), "line one\nline two".to_string()]
4039 ),
4040 other => panic!("expected string array, got {other:?}"),
4041 }
4042 let _ = fs::remove_file(&path);
4043 }
4044
4045 #[test]
4046 fn readtable_supports_explicit_names_and_missing_tokens() {
4047 let path = unique_path("readtable_options");
4048 fs::write(&path, "1,NA\n2,4\n").expect("write sample");
4049 let names =
4050 StringArray::new(vec!["A".to_string(), "B".to_string()], vec![1, 2]).expect("names");
4051 let table = object(read_table(
4052 &path,
4053 vec![
4054 Value::from("ReadVariableNames"),
4055 Value::Bool(false),
4056 Value::from("VariableNames"),
4057 Value::StringArray(names),
4058 Value::from("TreatAsMissing"),
4059 Value::from("NA"),
4060 ],
4061 ));
4062 match table_member_get(&table, &Value::from("B")).unwrap() {
4063 Value::Tensor(tensor) => {
4064 assert!(tensor.data[0].is_nan());
4065 assert_eq!(tensor.data[1], 4.0);
4066 }
4067 other => panic!("expected tensor, got {other:?}"),
4068 }
4069 let _ = fs::remove_file(&path);
4070 }
4071
4072 #[test]
4073 fn readtable_preserves_variable_names_when_requested() {
4074 let path = unique_path("readtable_preserve_names");
4075 fs::write(&path, "daily revenue,total orders\n100,10\n").expect("write sample");
4076 let table = object(read_table(
4077 &path,
4078 vec![Value::from("VariableNamingRule"), Value::from("preserve")],
4079 ));
4080 assert_eq!(
4081 table_variable_names_from_object(&table).unwrap(),
4082 vec!["daily revenue".to_string(), "total orders".to_string()]
4083 );
4084 let _ = fs::remove_file(&path);
4085 }
4086
4087 fn write_zip_file(zip: &mut zip::ZipWriter<std::fs::File>, name: &str, contents: &str) {
4088 let options = zip::write::SimpleFileOptions::default()
4089 .compression_method(zip::CompressionMethod::Stored);
4090 zip.start_file(name, options).expect("start xlsx part");
4091 zip.write_all(contents.as_bytes()).expect("write xlsx part");
4092 }
4093
4094 fn write_minimal_xlsx(path: &Path) {
4095 let file = std::fs::File::create(path).expect("create xlsx");
4096 let mut zip = zip::ZipWriter::new(file);
4097 write_zip_file(
4098 &mut zip,
4099 "[Content_Types].xml",
4100 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4101<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
4102 <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
4103 <Default Extension="xml" ContentType="application/xml"/>
4104 <Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
4105 <Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
4106 <Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/>
4107</Types>"#,
4108 );
4109 write_zip_file(
4110 &mut zip,
4111 "_rels/.rels",
4112 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4113<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
4114 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
4115</Relationships>"#,
4116 );
4117 write_zip_file(
4118 &mut zip,
4119 "xl/workbook.xml",
4120 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4121<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
4122 <sheets>
4123 <sheet name="Data" sheetId="1" r:id="rId1"/>
4124 </sheets>
4125</workbook>"#,
4126 );
4127 write_zip_file(
4128 &mut zip,
4129 "xl/_rels/workbook.xml.rels",
4130 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4131<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
4132 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
4133 <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
4134</Relationships>"#,
4135 );
4136 write_zip_file(
4137 &mut zip,
4138 "xl/styles.xml",
4139 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4140<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
4141 <fonts count="1"><font><sz val="11"/><name val="Calibri"/></font></fonts>
4142 <fills count="1"><fill><patternFill patternType="none"/></fill></fills>
4143 <borders count="1"><border/></borders>
4144 <cellStyleXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellStyleXfs>
4145 <cellXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellXfs>
4146</styleSheet>"#,
4147 );
4148 write_zip_file(
4149 &mut zip,
4150 "xl/worksheets/sheet1.xml",
4151 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4152<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
4153 <sheetData>
4154 <row r="1">
4155 <c r="A1" t="inlineStr"><is><t>Date</t></is></c>
4156 <c r="B1" t="inlineStr"><is><t>Orders</t></is></c>
4157 <c r="C1" t="inlineStr"><is><t>Revenue</t></is></c>
4158 </row>
4159 <row r="2">
4160 <c r="A2" t="inlineStr"><is><t>2026-06-01</t></is></c>
4161 <c r="B2"><v>10</v></c>
4162 <c r="C2"><v>200</v></c>
4163 </row>
4164 <row r="3">
4165 <c r="A3" t="inlineStr"><is><t>2026-06-02</t></is></c>
4166 <c r="B3"><v>4</v></c>
4167 <c r="C3"><v>90</v></c>
4168 </row>
4169 </sheetData>
4170</worksheet>"#,
4171 );
4172 zip.finish().expect("finish xlsx");
4173 }
4174
4175 #[test]
4176 fn readtable_imports_xlsx_sheet_and_range() {
4177 let path = unique_path("readtable_spreadsheet");
4178 let path = path.with_extension("xlsx");
4179 write_minimal_xlsx(&path);
4180 let table = object(read_table(
4181 &path,
4182 vec![
4183 Value::from("Sheet"),
4184 Value::from("Data"),
4185 Value::from("Range"),
4186 Value::from("A1:C3"),
4187 ],
4188 ));
4189 assert_eq!(
4190 table_variable_names_from_object(&table).unwrap(),
4191 vec![
4192 "Date".to_string(),
4193 "Orders".to_string(),
4194 "Revenue".to_string()
4195 ]
4196 );
4197 match table_member_get(&table, &Value::from("Revenue")).unwrap() {
4198 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![200.0, 90.0]),
4199 other => panic!("expected tensor, got {other:?}"),
4200 }
4201 let _ = fs::remove_file(&path);
4202 }
4203
4204 #[test]
4205 fn spreadsheet_import_options_registers_public_descriptor() {
4206 assert!(runmat_builtins::builtin_function_by_name("spreadsheetImportOptions").is_some());
4207 let labels = SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR
4208 .signatures
4209 .iter()
4210 .map(|signature| signature.label)
4211 .collect::<Vec<_>>();
4212 assert!(labels.contains(&"opts = spreadsheetImportOptions()"));
4213 assert!(labels.contains(&"opts = spreadsheetImportOptions(nameValuePairs...)"));
4214 }
4215
4216 #[test]
4217 fn spreadsheet_import_options_builds_editable_options_struct() {
4218 let options = spreadsheet_options(vec![
4219 Value::from("NumVariables"),
4220 Value::Num(2.0),
4221 Value::from("VariableTypes"),
4222 Value::StringArray(
4223 StringArray::new(vec!["double".into(), "string".into()], vec![1, 2]).unwrap(),
4224 ),
4225 Value::from("DataRange"),
4226 Value::from("A2:B5"),
4227 ]);
4228 assert_eq!(
4229 options.fields.get("FileType"),
4230 Some(&Value::from("spreadsheet"))
4231 );
4232 assert_eq!(options.fields.get("NumVariables"), Some(&Value::Num(2.0)));
4233 assert_eq!(options.fields.get("DataRange"), Some(&Value::from("A2:B5")));
4234 match options.fields.get("VariableNames").unwrap() {
4235 Value::StringArray(array) => {
4236 assert_eq!(array.data, vec!["Var1".to_string(), "Var2".to_string()]);
4237 assert_eq!(array.shape, vec![1, 2]);
4238 }
4239 other => panic!("expected string array, got {other:?}"),
4240 }
4241 match options.fields.get("VariableTypes").unwrap() {
4242 Value::StringArray(array) => {
4243 assert_eq!(array.data, vec!["double".to_string(), "string".to_string()]);
4244 assert_eq!(array.shape, vec![1, 2]);
4245 }
4246 other => panic!("expected string array, got {other:?}"),
4247 }
4248 }
4249
4250 #[test]
4251 fn readtable_consumes_spreadsheet_import_options_struct() {
4252 let path = unique_path("readtable_spreadsheet_options");
4253 let path = path.with_extension("xlsx");
4254 write_minimal_xlsx(&path);
4255 let mut options = spreadsheet_options(vec![Value::from("NumVariables"), Value::Num(1.0)]);
4256 options.insert("Sheet", Value::from("Data"));
4257 options.insert("DataRange", Value::from("C2:C3"));
4258 options.insert(
4259 "VariableNames",
4260 Value::StringArray(StringArray::new(vec!["Amount".into()], vec![1, 1]).unwrap()),
4261 );
4262 options.insert(
4263 "VariableTypes",
4264 Value::StringArray(StringArray::new(vec!["double".into()], vec![1, 1]).unwrap()),
4265 );
4266 let table = object(read_table(&path, vec![Value::Struct(options)]));
4267 assert_eq!(
4268 table_variable_names_from_object(&table).unwrap(),
4269 vec!["Amount".to_string()]
4270 );
4271 match table_member_get(&table, &Value::from("Amount")).unwrap() {
4272 Value::Tensor(tensor) => {
4273 assert_eq!(tensor.shape, vec![2, 1]);
4274 assert_eq!(tensor.data, vec![200.0, 90.0]);
4275 assert_eq!(tensor.dtype, NumericDType::F64);
4276 }
4277 other => panic!("expected tensor, got {other:?}"),
4278 }
4279 let _ = fs::remove_file(&path);
4280 }
4281
4282 #[test]
4283 fn readtable_default_spreadsheet_options_still_infers_headers() {
4284 let path = unique_path("readtable_default_spreadsheet_options");
4285 let path = path.with_extension("xlsx");
4286 write_minimal_xlsx(&path);
4287 let options = spreadsheet_options(Vec::new());
4288 let table = object(read_table(&path, vec![Value::Struct(options)]));
4289 assert_eq!(
4290 table_variable_names_from_object(&table).unwrap(),
4291 vec![
4292 "Date".to_string(),
4293 "Orders".to_string(),
4294 "Revenue".to_string()
4295 ]
4296 );
4297 let _ = fs::remove_file(&path);
4298 }
4299
4300 #[test]
4301 fn readtable_variable_types_coerce_imported_columns() {
4302 let path = unique_path("readtable_variable_types");
4303 fs::write(
4304 &path,
4305 "Value,Flag,When,Elapsed\n1.5,true,2026-06-01,01:30:00\n2.25,false,2026-06-02,02:00:00\n",
4306 )
4307 .expect("write sample");
4308 let types = StringArray::new(
4309 vec![
4310 "single".to_string(),
4311 "logical".to_string(),
4312 "datetime".to_string(),
4313 "duration".to_string(),
4314 ],
4315 vec![1, 4],
4316 )
4317 .unwrap();
4318 let table = object(read_table(
4319 &path,
4320 vec![Value::from("VariableTypes"), Value::StringArray(types)],
4321 ));
4322 match table_member_get(&table, &Value::from("Value")).unwrap() {
4323 Value::Tensor(tensor) => {
4324 assert_eq!(tensor.dtype, NumericDType::F32);
4325 assert_eq!(tensor.data, vec![1.5, 2.25]);
4326 }
4327 other => panic!("expected tensor, got {other:?}"),
4328 }
4329 match table_member_get(&table, &Value::from("Flag")).unwrap() {
4330 Value::LogicalArray(array) => assert_eq!(array.data, vec![1, 0]),
4331 other => panic!("expected logical array, got {other:?}"),
4332 }
4333 match table_member_get(&table, &Value::from("When")).unwrap() {
4334 Value::Object(object) => assert!(object.is_class("datetime")),
4335 other => panic!("expected datetime object, got {other:?}"),
4336 }
4337 match table_member_get(&table, &Value::from("Elapsed")).unwrap() {
4338 Value::Object(object) => assert!(object.is_class("duration")),
4339 other => panic!("expected duration object, got {other:?}"),
4340 }
4341 let _ = fs::remove_file(&path);
4342 }
4343
4344 #[test]
4345 fn readtable_preserves_explicit_import_variable_names_when_requested() {
4346 let path = unique_path("readtable_preserve_explicit_names");
4347 fs::write(&path, "100,10\n125,12\n").expect("write sample");
4348 let names = StringArray::new(
4349 vec!["daily revenue".to_string(), "total orders".to_string()],
4350 vec![1, 2],
4351 )
4352 .unwrap();
4353 let table = object(read_table(
4354 &path,
4355 vec![
4356 Value::from("ReadVariableNames"),
4357 Value::Bool(false),
4358 Value::from("VariableNames"),
4359 Value::StringArray(names),
4360 Value::from("VariableNamingRule"),
4361 Value::from("preserve"),
4362 ],
4363 ));
4364 assert_eq!(
4365 table_variable_names_from_object(&table).unwrap(),
4366 vec!["daily revenue".to_string(), "total orders".to_string()]
4367 );
4368 let _ = fs::remove_file(&path);
4369 }
4370
4371 #[test]
4372 fn readtable_text_type_char_imports_text_columns_as_char_matrix() {
4373 let path = unique_path("readtable_text_type_char");
4374 fs::write(&path, "Name\nAda\nGrace\n").expect("write sample");
4375 let table = object(read_table(
4376 &path,
4377 vec![Value::from("TextType"), Value::from("char")],
4378 ));
4379 match table_member_get(&table, &Value::from("Name")).unwrap() {
4380 Value::CharArray(array) => {
4381 assert_eq!(array.rows, 2);
4382 assert_eq!(array.cols, 5);
4383 assert_eq!(char_row(&array, 0), "Ada ");
4384 assert_eq!(char_row(&array, 1), "Grace");
4385 }
4386 other => panic!("expected char array, got {other:?}"),
4387 }
4388 let _ = fs::remove_file(&path);
4389 }
4390
4391 #[test]
4392 fn readtable_variable_types_cellstr_imports_cell_column() {
4393 let path = unique_path("readtable_variable_types_cellstr");
4394 fs::write(&path, "Name\nAda\nGrace\n").expect("write sample");
4395 let types = StringArray::new(vec!["cellstr".to_string()], vec![1, 1]).unwrap();
4396 let table = object(read_table(
4397 &path,
4398 vec![Value::from("VariableTypes"), Value::StringArray(types)],
4399 ));
4400 match table_member_get(&table, &Value::from("Name")).unwrap() {
4401 Value::Cell(cell) => {
4402 assert_eq!(cell.rows, 2);
4403 assert_eq!(cell.cols, 1);
4404 assert_eq!(
4405 cell.get(0, 0).unwrap(),
4406 Value::CharArray(CharArray::new_row("Ada"))
4407 );
4408 assert_eq!(
4409 cell.get(1, 0).unwrap(),
4410 Value::CharArray(CharArray::new_row("Grace"))
4411 );
4412 }
4413 other => panic!("expected cell array, got {other:?}"),
4414 }
4415 let _ = fs::remove_file(&path);
4416 }
4417
4418 #[test]
4419 fn readtable_rejects_unrepresented_import_variable_types() {
4420 let path = unique_path("readtable_unsupported_variable_types");
4421 fs::write(&path, "A\n1\n").expect("write sample");
4422 let unsupported_integer = StringArray::new(vec!["int8".to_string()], vec![1, 1]).unwrap();
4423 let err = read_table_err(
4424 &path,
4425 vec![
4426 Value::from("VariableTypes"),
4427 Value::StringArray(unsupported_integer),
4428 ],
4429 );
4430 assert!(err
4431 .message()
4432 .contains("unsupported VariableTypes entry 'int8'"));
4433 let categorical = StringArray::new(vec!["categorical".to_string()], vec![1, 1]).unwrap();
4434 let err = read_table_err(
4435 &path,
4436 vec![
4437 Value::from("VariableTypes"),
4438 Value::StringArray(categorical),
4439 ],
4440 );
4441 assert!(err
4442 .message()
4443 .contains("unsupported VariableTypes entry 'categorical'"));
4444 let _ = fs::remove_file(&path);
4445 }
4446
4447 #[test]
4448 fn table_properties_variable_names_rename_columns() {
4449 let a = Value::Tensor(Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap());
4450 let b = Value::Tensor(Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap());
4451 let mut table =
4452 object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
4453 let mut props = table_public_properties(&table).unwrap();
4454 props.insert(
4455 VARIABLE_NAMES,
4456 Value::StringArray(StringArray::new(vec!["X".into(), "Y".into()], vec![1, 2]).unwrap()),
4457 );
4458 table_member_set(&mut table, PROPERTIES_MEMBER, Value::Struct(props)).unwrap();
4459 assert_eq!(
4460 table_variable_names_from_object(&table).unwrap(),
4461 vec!["X".to_string(), "Y".to_string()]
4462 );
4463 }
4464
4465 #[test]
4466 fn table_paren_selects_rows_and_named_variables() {
4467 let a = Value::Tensor(Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap());
4468 let b = Value::Tensor(Tensor::new(vec![4.0, 5.0, 6.0], vec![3, 1]).unwrap());
4469 let table = object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
4470 let selector = CellArray::new(
4471 vec![
4472 Value::Tensor(Tensor::new(vec![3.0, 1.0], vec![1, 2]).unwrap()),
4473 Value::Cell(CellArray::new(vec![Value::from("B")], 1, 1).unwrap()),
4474 ],
4475 1,
4476 2,
4477 )
4478 .unwrap();
4479 let subset = object(table_paren_get(&table, &Value::Cell(selector)).unwrap());
4480 assert_eq!(
4481 table_variable_names_from_object(&subset).unwrap(),
4482 vec!["B".to_string()]
4483 );
4484 match table_member_get(&subset, &Value::from("B")).unwrap() {
4485 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![6.0, 4.0]),
4486 other => panic!("expected tensor, got {other:?}"),
4487 }
4488 }
4489
4490 #[test]
4491 fn sortrows_preserves_row_names() {
4492 let values = Value::Tensor(Tensor::new(vec![2.0, 1.0], vec![2, 1]).unwrap());
4493 let table = table_from_columns_with_properties(
4494 vec!["X".into()],
4495 vec![values],
4496 Some(vec!["second".into(), "first".into()]),
4497 )
4498 .unwrap();
4499 let (sorted, _) = sortrows_table(table, &[Value::from("X")]).unwrap();
4500 let sorted = object(sorted);
4501 let props = table_public_properties(&sorted).unwrap();
4502 match props.fields.get(ROW_NAMES).unwrap() {
4503 Value::StringArray(array) => {
4504 assert_eq!(array.data, vec!["first".to_string(), "second".to_string()]);
4505 }
4506 other => panic!("expected row names, got {other:?}"),
4507 }
4508 }
4509
4510 #[test]
4511 fn groupsummary_mean_counts_groups() {
4512 let group = Value::StringArray(
4513 StringArray::new(vec!["a".into(), "b".into(), "a".into()], vec![3, 1]).unwrap(),
4514 );
4515 let value = Value::Tensor(Tensor::new(vec![2.0, 5.0, 4.0], vec![3, 1]).unwrap());
4516 let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
4517 let summary = groupsummary_impl(
4518 table,
4519 Value::from("G"),
4520 Value::from("mean"),
4521 vec![Value::from("X")],
4522 )
4523 .unwrap();
4524 let summary = object(summary);
4525 assert_eq!(
4526 table_variable_names_from_object(&summary).unwrap(),
4527 vec![
4528 "G".to_string(),
4529 "GroupCount".to_string(),
4530 "mean_X".to_string()
4531 ]
4532 );
4533 match table_member_get(&summary, &Value::from("mean_X")).unwrap() {
4534 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![3.0, 5.0]),
4535 other => panic!("expected tensor, got {other:?}"),
4536 }
4537 }
4538
4539 #[test]
4540 fn groupsummary_orders_numeric_groups_numerically() {
4541 let group = Value::Tensor(Tensor::new(vec![10.0, 2.0, 10.0], vec![3, 1]).unwrap());
4542 let value = Value::Tensor(Tensor::new(vec![1.0, 5.0, 3.0], vec![3, 1]).unwrap());
4543 let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
4544 let summary =
4545 object(groupsummary_impl(table, Value::from("G"), Value::from("sum"), vec![]).unwrap());
4546 match table_member_get(&summary, &Value::from("G")).unwrap() {
4547 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 10.0]),
4548 other => panic!("expected tensor, got {other:?}"),
4549 }
4550 match table_member_get(&summary, &Value::from("sum_X")).unwrap() {
4551 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![5.0, 4.0]),
4552 other => panic!("expected tensor, got {other:?}"),
4553 }
4554 }
4555}