1use std::cmp::Ordering;
4use std::collections::{BTreeMap, HashMap, HashSet};
5use std::io::{Cursor, Read};
6use std::path::{Path, PathBuf};
7use std::sync::OnceLock;
8
9use calamine::{open_workbook_auto_from_rs, Data as SpreadsheetData, Reader as SpreadsheetReader};
10use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
11use encoding_rs::{Encoding, UTF_8};
12use runmat_builtins::{
13 Access, BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
14 BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
15 CellArray, ClassDef, ComplexTensor, LogicalArray, MethodDef, ObjectInstance, PropertyDef,
16 StringArray, StructValue, Tensor, Value,
17};
18use runmat_filesystem::File;
19use runmat_macros::runtime_builtin;
20
21use crate::builtins::common::fs::expand_user_path;
22use crate::builtins::common::spec::{
23 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
24 ReductionNaN, ResidencyPolicy, ShapeRequirements,
25};
26use crate::{
27 build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError, OBJECT_INDEX_BRACE,
28 OBJECT_INDEX_MEMBER, OBJECT_INDEX_PAREN, OBJECT_SUBSASGN_METHOD, OBJECT_SUBSREF_METHOD,
29};
30
31pub const TABLE_CLASS: &str = "table";
32const TABLE_VARIABLES_FIELD: &str = "__table_variables";
33const TABLE_PROPERTIES_FIELD: &str = "__table_properties";
34const PROPERTIES_MEMBER: &str = "Properties";
35const VARIABLE_NAMES: &str = "VariableNames";
36const ROW_NAMES: &str = "RowNames";
37const DIMENSION_NAMES: &str = "DimensionNames";
38const VARIABLE_UNITS: &str = "VariableUnits";
39const VARIABLE_DESCRIPTIONS: &str = "VariableDescriptions";
40const DESCRIPTION: &str = "Description";
41const USER_DATA: &str = "UserData";
42const DEFAULT_ROW_DIM_NAME: &str = "Rows";
43const DEFAULT_VARIABLE_DIM_NAME: &str = "Variables";
44
45static TABLE_CLASS_REGISTERED: OnceLock<()> = OnceLock::new();
46
47const ANY_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
48 name: "out",
49 ty: BuiltinParamType::Any,
50 arity: BuiltinParamArity::Required,
51 default: None,
52 description: "Result value.",
53}];
54const NUM_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
55 name: "n",
56 ty: BuiltinParamType::IntegerScalar,
57 arity: BuiltinParamArity::Required,
58 default: None,
59 description: "Count.",
60}];
61const TABLE_INPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
62 name: "T",
63 ty: BuiltinParamType::Any,
64 arity: BuiltinParamArity::Required,
65 default: None,
66 description: "Table input.",
67}];
68const READTABLE_INPUTS_FILENAME: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
69 name: "filename",
70 ty: BuiltinParamType::StringScalar,
71 arity: BuiltinParamArity::Required,
72 default: None,
73 description: "Text or spreadsheet file path.",
74}];
75const READTABLE_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 2] = [
76 BuiltinParamDescriptor {
77 name: "filename",
78 ty: BuiltinParamType::StringScalar,
79 arity: BuiltinParamArity::Required,
80 default: None,
81 description: "Text or spreadsheet file path.",
82 },
83 BuiltinParamDescriptor {
84 name: "nameValuePairs",
85 ty: BuiltinParamType::Any,
86 arity: BuiltinParamArity::Variadic,
87 default: None,
88 description: "Name-value import options.",
89 },
90];
91const TABLE_INPUTS_VALUES: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
92 name: "variables",
93 ty: BuiltinParamType::Any,
94 arity: BuiltinParamArity::Variadic,
95 default: None,
96 description: "Variables to assemble as table columns.",
97}];
98const GROUPSUMMARY_INPUTS: [BuiltinParamDescriptor; 4] = [
99 BuiltinParamDescriptor {
100 name: "T",
101 ty: BuiltinParamType::Any,
102 arity: BuiltinParamArity::Required,
103 default: None,
104 description: "Input table.",
105 },
106 BuiltinParamDescriptor {
107 name: "groupvars",
108 ty: BuiltinParamType::Any,
109 arity: BuiltinParamArity::Required,
110 default: None,
111 description: "Grouping variable name or names.",
112 },
113 BuiltinParamDescriptor {
114 name: "method",
115 ty: BuiltinParamType::Any,
116 arity: BuiltinParamArity::Required,
117 default: None,
118 description: "Summary method name or names.",
119 },
120 BuiltinParamDescriptor {
121 name: "datavars",
122 ty: BuiltinParamType::Any,
123 arity: BuiltinParamArity::Optional,
124 default: None,
125 description: "Data variable name or names.",
126 },
127];
128const OBJECT_INDEX_INPUTS: [BuiltinParamDescriptor; 3] = [
129 BuiltinParamDescriptor {
130 name: "obj",
131 ty: BuiltinParamType::Any,
132 arity: BuiltinParamArity::Required,
133 default: None,
134 description: "Table object receiver.",
135 },
136 BuiltinParamDescriptor {
137 name: "kind",
138 ty: BuiltinParamType::StringScalar,
139 arity: BuiltinParamArity::Required,
140 default: None,
141 description: "Index kind token.",
142 },
143 BuiltinParamDescriptor {
144 name: "payload",
145 ty: BuiltinParamType::Any,
146 arity: BuiltinParamArity::Required,
147 default: None,
148 description: "Index payload.",
149 },
150];
151const OBJECT_ASSIGN_INPUTS: [BuiltinParamDescriptor; 4] = [
152 BuiltinParamDescriptor {
153 name: "obj",
154 ty: BuiltinParamType::Any,
155 arity: BuiltinParamArity::Required,
156 default: None,
157 description: "Table object receiver.",
158 },
159 BuiltinParamDescriptor {
160 name: "kind",
161 ty: BuiltinParamType::StringScalar,
162 arity: BuiltinParamArity::Required,
163 default: None,
164 description: "Index kind token.",
165 },
166 BuiltinParamDescriptor {
167 name: "payload",
168 ty: BuiltinParamType::Any,
169 arity: BuiltinParamArity::Required,
170 default: None,
171 description: "Index payload.",
172 },
173 BuiltinParamDescriptor {
174 name: "rhs",
175 ty: BuiltinParamType::Any,
176 arity: BuiltinParamArity::Required,
177 default: None,
178 description: "Assigned value.",
179 },
180];
181
182const READTABLE_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
183 BuiltinSignatureDescriptor {
184 label: "T = readtable(filename)",
185 inputs: &READTABLE_INPUTS_FILENAME,
186 outputs: &ANY_OUTPUT,
187 },
188 BuiltinSignatureDescriptor {
189 label: "T = readtable(filename, nameValuePairs...)",
190 inputs: &READTABLE_INPUTS_NAME_VALUE,
191 outputs: &ANY_OUTPUT,
192 },
193];
194const TABLE_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
195 label: "T = table(variables...)",
196 inputs: &TABLE_INPUTS_VALUES,
197 outputs: &ANY_OUTPUT,
198}];
199const GROUPSUMMARY_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
200 label: "G = groupsummary(T, groupvars, method, datavars)",
201 inputs: &GROUPSUMMARY_INPUTS,
202 outputs: &ANY_OUTPUT,
203}];
204const HEIGHT_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
205 label: "n = height(T)",
206 inputs: &TABLE_INPUT,
207 outputs: &NUM_OUTPUT,
208}];
209const WIDTH_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
210 label: "n = width(T)",
211 inputs: &TABLE_INPUT,
212 outputs: &NUM_OUTPUT,
213}];
214const OBJECT_SUBSREF_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
215 label: "out = table.subsref(obj, kind, payload)",
216 inputs: &OBJECT_INDEX_INPUTS,
217 outputs: &ANY_OUTPUT,
218}];
219const OBJECT_SUBSASGN_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
220 label: "obj = table.subsasgn(obj, kind, payload, rhs)",
221 inputs: &OBJECT_ASSIGN_INPUTS,
222 outputs: &ANY_OUTPUT,
223}];
224
225const TABLE_ERROR_INVALID_ARGUMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
226 code: "RM.TABLE.INVALID_ARGUMENT",
227 identifier: Some("RunMat:table:InvalidArgument"),
228 when: "Arguments or table metadata are invalid.",
229 message: "table: invalid argument",
230};
231const TABLE_ERROR_INVALID_INDEX: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
232 code: "RM.TABLE.INVALID_INDEX",
233 identifier: Some("RunMat:table:InvalidIndex"),
234 when: "Table indexing is invalid.",
235 message: "table: invalid index",
236};
237const TABLE_ERROR_INVALID_VARIABLE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
238 code: "RM.TABLE.INVALID_VARIABLE",
239 identifier: Some("RunMat:table:InvalidVariable"),
240 when: "A table variable name or value is invalid.",
241 message: "table: invalid variable",
242};
243const TABLE_ERROR_IO: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
244 code: "RM.READTABLE.IO",
245 identifier: Some("RunMat:readtable:IOError"),
246 when: "readtable cannot open or read the requested file.",
247 message: "readtable: file read failed",
248};
249const TABLE_ERROR_UNSUPPORTED_FILE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
250 code: "RM.READTABLE.UNSUPPORTED_FILE",
251 identifier: Some("RunMat:readtable:UnsupportedFileType"),
252 when: "readtable receives a file type outside the text or spreadsheet import backends.",
253 message: "readtable: unsupported file type",
254};
255const TABLE_ERRORS: [BuiltinErrorDescriptor; 5] = [
256 TABLE_ERROR_INVALID_ARGUMENT,
257 TABLE_ERROR_INVALID_INDEX,
258 TABLE_ERROR_INVALID_VARIABLE,
259 TABLE_ERROR_IO,
260 TABLE_ERROR_UNSUPPORTED_FILE,
261];
262
263pub const READTABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
264 signatures: &READTABLE_SIGNATURES,
265 output_mode: BuiltinOutputMode::Fixed,
266 completion_policy: BuiltinCompletionPolicy::Public,
267 errors: &TABLE_ERRORS,
268};
269pub const TABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
270 signatures: &TABLE_SIGNATURES,
271 output_mode: BuiltinOutputMode::Fixed,
272 completion_policy: BuiltinCompletionPolicy::Public,
273 errors: &TABLE_ERRORS,
274};
275pub const GROUPSUMMARY_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
276 signatures: &GROUPSUMMARY_SIGNATURES,
277 output_mode: BuiltinOutputMode::Fixed,
278 completion_policy: BuiltinCompletionPolicy::Public,
279 errors: &TABLE_ERRORS,
280};
281pub const HEIGHT_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
282 signatures: &HEIGHT_SIGNATURES,
283 output_mode: BuiltinOutputMode::Fixed,
284 completion_policy: BuiltinCompletionPolicy::Public,
285 errors: &TABLE_ERRORS,
286};
287pub const WIDTH_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
288 signatures: &WIDTH_SIGNATURES,
289 output_mode: BuiltinOutputMode::Fixed,
290 completion_policy: BuiltinCompletionPolicy::Public,
291 errors: &TABLE_ERRORS,
292};
293pub const TABLE_SUBSREF_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
294 signatures: &OBJECT_SUBSREF_SIGNATURES,
295 output_mode: BuiltinOutputMode::Fixed,
296 completion_policy: BuiltinCompletionPolicy::MethodOnly,
297 errors: &TABLE_ERRORS,
298};
299pub const TABLE_SUBSASGN_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
300 signatures: &OBJECT_SUBSASGN_SIGNATURES,
301 output_mode: BuiltinOutputMode::Fixed,
302 completion_policy: BuiltinCompletionPolicy::MethodOnly,
303 errors: &TABLE_ERRORS,
304};
305
306#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::table")]
307pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
308 name: "table",
309 op_kind: GpuOpKind::Custom("table"),
310 supported_precisions: &[],
311 broadcast: BroadcastSemantics::None,
312 provider_hooks: &[],
313 constant_strategy: ConstantStrategy::InlineLiteral,
314 residency: ResidencyPolicy::GatherImmediately,
315 nan_mode: ReductionNaN::Include,
316 two_pass_threshold: None,
317 workgroup_size: None,
318 accepts_nan_mode: false,
319 notes: "Tables are host containers. GPU variables are gathered when tabular algorithms need row-wise access.",
320};
321
322#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::table")]
323pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
324 name: "table",
325 shape: ShapeRequirements::Any,
326 constant_strategy: ConstantStrategy::InlineLiteral,
327 elementwise: None,
328 reduction: None,
329 emits_nan: false,
330 notes: "Tables are structured host containers and are not fusion operands.",
331};
332
333fn table_error(error: &'static BuiltinErrorDescriptor, message: impl Into<String>) -> RuntimeError {
334 let mut builder = build_runtime_error(message).with_builtin(TABLE_CLASS);
335 if let Some(identifier) = error.identifier {
336 builder = builder.with_identifier(identifier);
337 }
338 builder.build()
339}
340
341fn table_error_with_source<E>(
342 error: &'static BuiltinErrorDescriptor,
343 message: impl Into<String>,
344 source: E,
345) -> RuntimeError
346where
347 E: std::error::Error + Send + Sync + 'static,
348{
349 let mut builder = build_runtime_error(message)
350 .with_builtin(TABLE_CLASS)
351 .with_source(source);
352 if let Some(identifier) = error.identifier {
353 builder = builder.with_identifier(identifier);
354 }
355 builder.build()
356}
357
358fn invalid_argument(message: impl Into<String>) -> RuntimeError {
359 table_error(&TABLE_ERROR_INVALID_ARGUMENT, message)
360}
361
362fn invalid_index(message: impl Into<String>) -> RuntimeError {
363 table_error(&TABLE_ERROR_INVALID_INDEX, message)
364}
365
366fn invalid_variable(message: impl Into<String>) -> RuntimeError {
367 table_error(&TABLE_ERROR_INVALID_VARIABLE, message)
368}
369
370fn map_control_flow(err: RuntimeError) -> RuntimeError {
371 let identifier = err.identifier().map(ToString::to_string);
372 let message = err.message().to_string();
373 let mut builder = build_runtime_error(message)
374 .with_builtin(TABLE_CLASS)
375 .with_source(err);
376 if let Some(identifier) = identifier {
377 builder = builder.with_identifier(identifier);
378 }
379 builder.build()
380}
381
382pub fn ensure_table_class_registered() {
383 TABLE_CLASS_REGISTERED.get_or_init(|| {
384 let mut properties = HashMap::new();
385 properties.insert(
386 PROPERTIES_MEMBER.to_string(),
387 PropertyDef {
388 name: PROPERTIES_MEMBER.to_string(),
389 is_static: false,
390 is_constant: false,
391 is_dependent: false,
392 get_access: Access::Public,
393 set_access: Access::Public,
394 default_value: Some(Value::Struct(default_properties(Vec::new(), None))),
395 },
396 );
397
398 let mut methods = HashMap::new();
399 for name in [OBJECT_SUBSREF_METHOD, OBJECT_SUBSASGN_METHOD] {
400 methods.insert(
401 name.to_string(),
402 MethodDef {
403 name: name.to_string(),
404 is_static: false,
405 is_abstract: false,
406 is_sealed: false,
407 access: Access::Public,
408 function_name: format!("{TABLE_CLASS}.{name}"),
409 implicit_class_argument: None,
410 },
411 );
412 }
413
414 runmat_builtins::register_class(ClassDef {
415 name: TABLE_CLASS.to_string(),
416 parent: None,
417 properties,
418 methods,
419 });
420 });
421}
422
423#[runtime_builtin(
424 name = "table",
425 category = "table",
426 summary = "Create a table from named column variables.",
427 keywords = "table,VariableNames,RowNames,Properties",
428 accel = "cpu",
429 type_resolver(crate::builtins::io::type_resolvers::struct_type),
430 descriptor(crate::builtins::table::TABLE_DESCRIPTOR),
431 builtin_path = "crate::builtins::table"
432)]
433async fn table_builtin(args: Vec<Value>) -> BuiltinResult<Value> {
434 ensure_table_class_registered();
435 let gathered = gather_values(&args).await?;
436 let (variables, options) = split_table_constructor_args(gathered)?;
437 let names = if let Some(names) = options.variable_names {
438 names
439 } else {
440 generated_variable_names(variables.len())
441 };
442 table_from_columns_with_properties(names, variables, options.row_names)
443}
444
445#[runtime_builtin(
446 name = "readtable",
447 category = "io/tabular",
448 summary = "Import tabular text or spreadsheet data into a table.",
449 keywords = "readtable,table,csv,tsv,xlsx,xls,ods,spreadsheet,VariableNames,RowNames,Sheet,Range",
450 accel = "cpu",
451 type_resolver(crate::builtins::io::type_resolvers::struct_type),
452 descriptor(crate::builtins::table::READTABLE_DESCRIPTOR),
453 builtin_path = "crate::builtins::table"
454)]
455async fn readtable_builtin(path: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
456 ensure_table_class_registered();
457 let path_value = gather_if_needed_async(&path)
458 .await
459 .map_err(map_control_flow)?;
460 let args = gather_values(&rest).await?;
461 let options = ReadTableOptions::parse(&args)?;
462 let resolved = resolve_path(&path_value)?;
463 read_table_from_file(&resolved, &options).await
464}
465
466#[runtime_builtin(
467 name = "height",
468 category = "table",
469 summary = "Return the number of rows in a table.",
470 keywords = "height,table,rows",
471 descriptor(crate::builtins::table::HEIGHT_DESCRIPTOR),
472 builtin_path = "crate::builtins::table"
473)]
474async fn height_builtin(value: Value) -> BuiltinResult<Value> {
475 let host = gather_if_needed_async(&value)
476 .await
477 .map_err(map_control_flow)?;
478 if let Some(object) = table_object(&host) {
479 return Ok(Value::Num(table_height(object)? as f64));
480 }
481 value_row_count(&host).map(|n| Value::Num(n as f64))
482}
483
484#[runtime_builtin(
485 name = "width",
486 category = "table",
487 summary = "Return the number of variables in a table.",
488 keywords = "width,table,variables",
489 descriptor(crate::builtins::table::WIDTH_DESCRIPTOR),
490 builtin_path = "crate::builtins::table"
491)]
492async fn width_builtin(value: Value) -> BuiltinResult<Value> {
493 let host = gather_if_needed_async(&value)
494 .await
495 .map_err(map_control_flow)?;
496 if let Some(object) = table_object(&host) {
497 return Ok(Value::Num(table_width(object)? as f64));
498 }
499 match host {
500 Value::Tensor(t) => Ok(Value::Num(t.cols() as f64)),
501 Value::ComplexTensor(t) => Ok(Value::Num(t.cols as f64)),
502 Value::StringArray(sa) => Ok(Value::Num(sa.cols() as f64)),
503 Value::LogicalArray(la) => Ok(Value::Num(la.shape.get(1).copied().unwrap_or(1) as f64)),
504 Value::Cell(ca) => Ok(Value::Num(ca.cols as f64)),
505 Value::CharArray(ca) => Ok(Value::Num(ca.cols as f64)),
506 _ => Ok(Value::Num(1.0)),
507 }
508}
509
510#[runtime_builtin(
511 name = "groupsummary",
512 category = "table",
513 summary = "Group table rows and compute summary statistics for data variables.",
514 keywords = "groupsummary,group,table,mean,sum,count,median,min,max",
515 accel = "cpu",
516 descriptor(crate::builtins::table::GROUPSUMMARY_DESCRIPTOR),
517 builtin_path = "crate::builtins::table"
518)]
519async fn groupsummary_builtin(
520 table: Value,
521 groupvars: Value,
522 method: Value,
523 rest: Vec<Value>,
524) -> BuiltinResult<Value> {
525 let table = gather_if_needed_async(&table)
526 .await
527 .map_err(map_control_flow)?;
528 let groupvars = gather_if_needed_async(&groupvars)
529 .await
530 .map_err(map_control_flow)?;
531 let method = gather_if_needed_async(&method)
532 .await
533 .map_err(map_control_flow)?;
534 let rest = gather_values(&rest).await?;
535 groupsummary_impl(table, groupvars, method, rest)
536}
537
538#[runtime_builtin(
539 name = "table.subsref",
540 descriptor(crate::builtins::table::TABLE_SUBSREF_DESCRIPTOR),
541 builtin_path = "crate::builtins::table"
542)]
543async fn table_subsref(obj: Value, kind: String, payload: Value) -> BuiltinResult<Value> {
544 let object = into_table_object(obj, "table.subsref")?;
545 match kind.as_str() {
546 OBJECT_INDEX_MEMBER => table_member_get(&object, &payload),
547 OBJECT_INDEX_PAREN => table_paren_get(&object, &payload),
548 OBJECT_INDEX_BRACE => table_brace_get(&object, &payload),
549 other => Err(invalid_index(format!(
550 "table.subsref: unsupported indexing kind '{other}'"
551 ))),
552 }
553}
554
555#[runtime_builtin(
556 name = "table.subsasgn",
557 descriptor(crate::builtins::table::TABLE_SUBSASGN_DESCRIPTOR),
558 builtin_path = "crate::builtins::table"
559)]
560async fn table_subsasgn(
561 obj: Value,
562 kind: String,
563 payload: Value,
564 rhs: Value,
565) -> BuiltinResult<Value> {
566 let mut object = into_table_object(obj, "table.subsasgn")?;
567 match kind.as_str() {
568 OBJECT_INDEX_MEMBER => {
569 let field = scalar_text(&payload, "table member")?;
570 table_member_set(&mut object, &field, rhs)?;
571 Ok(Value::Object(object))
572 }
573 OBJECT_INDEX_PAREN => table_paren_assign(object, &payload, rhs),
574 OBJECT_INDEX_BRACE => table_brace_assign(object, &payload, rhs),
575 other => Err(invalid_index(format!(
576 "table.subsasgn: unsupported indexing kind '{other}'"
577 ))),
578 }
579}
580
581async fn gather_values(values: &[Value]) -> BuiltinResult<Vec<Value>> {
582 let mut out = Vec::with_capacity(values.len());
583 for value in values {
584 out.push(
585 gather_if_needed_async(value)
586 .await
587 .map_err(map_control_flow)?,
588 );
589 }
590 Ok(out)
591}
592
593#[derive(Default)]
594struct TableConstructorOptions {
595 variable_names: Option<Vec<String>>,
596 row_names: Option<Vec<String>>,
597}
598
599fn split_table_constructor_args(
600 args: Vec<Value>,
601) -> BuiltinResult<(Vec<Value>, TableConstructorOptions)> {
602 let mut variables = Vec::new();
603 let mut options = TableConstructorOptions::default();
604 let mut idx = 0usize;
605 while idx < args.len() {
606 if let Ok(name) = scalar_text(&args[idx], "table option") {
607 if idx + 1 < args.len() && is_table_constructor_option(&name) {
608 let value = &args[idx + 1];
609 if name.eq_ignore_ascii_case("VariableNames") {
610 options.variable_names = Some(variable_name_list(value)?);
611 } else if name.eq_ignore_ascii_case("RowNames") {
612 options.row_names = Some(string_list(value)?);
613 }
614 idx += 2;
615 continue;
616 }
617 }
618 variables.push(args[idx].clone());
619 idx += 1;
620 }
621 Ok((variables, options))
622}
623
624fn is_table_constructor_option(name: &str) -> bool {
625 name.eq_ignore_ascii_case("VariableNames") || name.eq_ignore_ascii_case("RowNames")
626}
627
628#[derive(Clone)]
629struct ReadTableOptions {
630 file_type: ImportFileType,
631 delimiter: Option<Delimiter>,
632 read_variable_names: Option<bool>,
633 read_row_names: bool,
634 variable_names: Option<Vec<String>>,
635 row_names: Option<Vec<String>>,
636 num_header_lines: usize,
637 range: Option<RangeSpec>,
638 sheet: Option<SheetSelector>,
639 preserve_variable_names: bool,
640 treat_as_missing: HashSet<String>,
641 empty_line_rule: EmptyLineRule,
642 encoding: String,
643}
644
645impl Default for ReadTableOptions {
646 fn default() -> Self {
647 Self {
648 file_type: ImportFileType::Auto,
649 delimiter: None,
650 read_variable_names: None,
651 read_row_names: false,
652 variable_names: None,
653 row_names: None,
654 num_header_lines: 0,
655 range: None,
656 sheet: None,
657 preserve_variable_names: false,
658 treat_as_missing: HashSet::new(),
659 empty_line_rule: EmptyLineRule::Skip,
660 encoding: "utf-8".to_string(),
661 }
662 }
663}
664
665impl ReadTableOptions {
666 fn parse(args: &[Value]) -> BuiltinResult<Self> {
667 let mut options = Self::default();
668 let mut idx = 0usize;
669 if let Some(Value::Struct(st)) = args.first() {
670 for (name, value) in &st.fields {
671 options.apply(name, value)?;
672 }
673 idx = 1;
674 }
675 while idx < args.len() {
676 if idx + 1 >= args.len() {
677 return Err(invalid_argument(
678 "readtable: name-value options must be provided in pairs",
679 ));
680 }
681 let name = scalar_text(&args[idx], "readtable option")?;
682 options.apply(&name, &args[idx + 1])?;
683 idx += 2;
684 }
685 Ok(options)
686 }
687
688 fn apply(&mut self, name: &str, value: &Value) -> BuiltinResult<()> {
689 if name.eq_ignore_ascii_case("FileType") {
690 self.file_type = ImportFileType::parse(value)?;
691 } else if name.eq_ignore_ascii_case("Delimiter") {
692 self.delimiter = Some(Delimiter::parse(value)?);
693 } else if name.eq_ignore_ascii_case("ReadVariableNames") {
694 self.read_variable_names = Some(bool_scalar(value, "ReadVariableNames")?);
695 } else if name.eq_ignore_ascii_case("ReadRowNames") {
696 self.read_row_names = bool_scalar(value, "ReadRowNames")?;
697 } else if name.eq_ignore_ascii_case("VariableNames") {
698 self.variable_names = Some(variable_name_list(value)?);
699 } else if name.eq_ignore_ascii_case("RowNames") {
700 self.row_names = Some(string_list(value)?);
701 } else if name.eq_ignore_ascii_case("NumHeaderLines") {
702 self.num_header_lines = nonnegative_usize(value, "NumHeaderLines")?;
703 } else if name.eq_ignore_ascii_case("Range") {
704 self.range = Some(RangeSpec::parse(value)?);
705 } else if name.eq_ignore_ascii_case("Sheet") {
706 self.sheet = Some(SheetSelector::parse(value)?);
707 } else if name.eq_ignore_ascii_case("TreatAsMissing") {
708 for token in string_list(value)? {
709 self.treat_as_missing
710 .insert(token.trim().to_ascii_lowercase());
711 }
712 } else if name.eq_ignore_ascii_case("PreserveVariableNames") {
713 self.preserve_variable_names = bool_scalar(value, "PreserveVariableNames")?;
714 } else if name.eq_ignore_ascii_case("VariableNamingRule") {
715 let rule = scalar_text(value, "VariableNamingRule")?;
716 if rule.eq_ignore_ascii_case("preserve") {
717 self.preserve_variable_names = true;
718 } else if rule.eq_ignore_ascii_case("modify") {
719 self.preserve_variable_names = false;
720 } else {
721 return Err(invalid_argument(format!(
722 "readtable: unsupported VariableNamingRule '{rule}'"
723 )));
724 }
725 } else if name.eq_ignore_ascii_case("EmptyLineRule") {
726 let rule = scalar_text(value, "EmptyLineRule")?;
727 self.empty_line_rule = if rule.eq_ignore_ascii_case("read") {
728 EmptyLineRule::Read
729 } else if rule.eq_ignore_ascii_case("skip") {
730 EmptyLineRule::Skip
731 } else {
732 return Err(invalid_argument(format!(
733 "readtable: unsupported EmptyLineRule '{rule}'"
734 )));
735 };
736 } else if name.eq_ignore_ascii_case("Encoding") {
737 let encoding = scalar_text(value, "Encoding")?;
738 validate_encoding_label(&encoding)?;
739 self.encoding = encoding;
740 } else if name.eq_ignore_ascii_case("TextType") {
741 let text_type = scalar_text(value, "TextType")?;
742 if !(text_type.eq_ignore_ascii_case("string") || text_type.eq_ignore_ascii_case("char"))
743 {
744 return Err(invalid_argument(format!(
745 "readtable: unsupported TextType '{text_type}'"
746 )));
747 };
748 } else if name.eq_ignore_ascii_case("DatetimeType") {
749 let datetime_type = scalar_text(value, "DatetimeType")?;
750 if !(datetime_type.eq_ignore_ascii_case("datetime")
751 || datetime_type.eq_ignore_ascii_case("text")
752 || datetime_type.eq_ignore_ascii_case("exceldatenum"))
753 {
754 return Err(invalid_argument(format!(
755 "readtable: unsupported DatetimeType '{datetime_type}'"
756 )));
757 }
758 } else {
759 return Err(invalid_argument(format!(
760 "readtable: unsupported option '{name}'"
761 )));
762 }
763 Ok(())
764 }
765
766 fn is_missing(&self, token: &str) -> bool {
767 let trimmed = token.trim();
768 trimmed.is_empty()
769 || self
770 .treat_as_missing
771 .contains(&trimmed.to_ascii_lowercase())
772 }
773}
774
775#[derive(Clone, Copy)]
776enum EmptyLineRule {
777 Skip,
778 Read,
779}
780
781#[derive(Clone, Copy, PartialEq, Eq)]
782enum ImportFileType {
783 Auto,
784 Text,
785 Spreadsheet,
786}
787
788impl ImportFileType {
789 fn parse(value: &Value) -> BuiltinResult<Self> {
790 let text = scalar_text(value, "FileType")?;
791 match text.trim().to_ascii_lowercase().as_str() {
792 "auto" => Ok(Self::Auto),
793 "text" | "delimitedtext" | "delimited" => Ok(Self::Text),
794 "spreadsheet" | "excel" => Ok(Self::Spreadsheet),
795 other => Err(invalid_argument(format!(
796 "readtable: unsupported FileType '{other}'"
797 ))),
798 }
799 }
800}
801
802#[derive(Clone)]
803enum SheetSelector {
804 Name(String),
805 Index(usize),
806}
807
808impl SheetSelector {
809 fn parse(value: &Value) -> BuiltinResult<Self> {
810 match value {
811 Value::Int(i) if i.to_i64() >= 1 => Ok(Self::Index(i.to_i64() as usize - 1)),
812 Value::Num(n)
813 if n.is_finite() && *n >= 1.0 && (n.round() - n).abs() <= f64::EPSILON =>
814 {
815 Ok(Self::Index(n.round() as usize - 1))
816 }
817 _ => {
818 let text = scalar_text(value, "Sheet")?;
819 if text.trim().is_empty() {
820 return Err(invalid_argument("readtable: Sheet must not be empty"));
821 }
822 Ok(Self::Name(text))
823 }
824 }
825 }
826}
827
828#[derive(Clone)]
829enum Delimiter {
830 Char(char),
831 String(String),
832 Whitespace,
833}
834
835impl Delimiter {
836 fn parse(value: &Value) -> BuiltinResult<Self> {
837 let text = scalar_text(value, "Delimiter")?;
838 if text.is_empty() {
839 return Err(invalid_argument("readtable: Delimiter must not be empty"));
840 }
841 match text.trim().to_ascii_lowercase().as_str() {
842 "tab" => Ok(Self::Char('\t')),
843 "space" | "whitespace" => Ok(Self::Whitespace),
844 "comma" => Ok(Self::Char(',')),
845 "semicolon" => Ok(Self::Char(';')),
846 "bar" | "pipe" => Ok(Self::Char('|')),
847 _ if text.chars().count() == 1 => Ok(Self::Char(text.chars().next().unwrap())),
848 _ => Ok(Self::String(text)),
849 }
850 }
851}
852
853#[derive(Clone, Copy)]
854struct RangeSpec {
855 start_row: usize,
856 start_col: usize,
857 end_row: Option<usize>,
858 end_col: Option<usize>,
859}
860
861impl RangeSpec {
862 fn parse(value: &Value) -> BuiltinResult<Self> {
863 match value {
864 Value::String(text) => Self::parse_text(text),
865 Value::CharArray(ca) if ca.rows == 1 => {
866 let text: String = ca.data.iter().collect();
867 Self::parse_text(&text)
868 }
869 Value::StringArray(sa) if sa.data.len() == 1 => Self::parse_text(&sa.data[0]),
870 Value::Tensor(t) if t.data.len() == 2 || t.data.len() == 4 => {
871 let mut indices = Vec::with_capacity(t.data.len());
872 for value in &t.data {
873 indices.push(one_based_to_zero(*value, usize::MAX, "Range")?);
874 }
875 Ok(Self {
876 start_row: indices[0],
877 start_col: indices[1],
878 end_row: indices.get(2).copied(),
879 end_col: indices.get(3).copied(),
880 })
881 }
882 _ => Err(invalid_argument(
883 "readtable: Range must be a cell reference string or numeric vector",
884 )),
885 }
886 }
887
888 fn parse_text(text: &str) -> BuiltinResult<Self> {
889 let trimmed = text.trim();
890 if trimmed.is_empty() {
891 return Err(invalid_argument("readtable: Range must not be empty"));
892 }
893 let parts: Vec<&str> = trimmed.split(':').collect();
894 if parts.len() > 2 {
895 return Err(invalid_argument(format!(
896 "readtable: invalid Range specification '{trimmed}'"
897 )));
898 }
899 let start = parse_cell_ref(parts[0])?;
900 let end = if parts.len() == 2 {
901 Some(parse_cell_ref(parts[1])?)
902 } else {
903 None
904 };
905 Ok(Self {
906 start_row: start.0.unwrap_or(0),
907 start_col: start.1.unwrap_or(0),
908 end_row: end.and_then(|item| item.0),
909 end_col: end.and_then(|item| item.1),
910 })
911 }
912}
913
914fn parse_cell_ref(token: &str) -> BuiltinResult<(Option<usize>, Option<usize>)> {
915 let mut letters = String::new();
916 let mut digits = String::new();
917 for ch in token.trim().chars() {
918 if ch == '$' {
919 continue;
920 }
921 if ch.is_ascii_alphabetic() {
922 letters.push(ch.to_ascii_uppercase());
923 } else if ch.is_ascii_digit() {
924 digits.push(ch);
925 } else {
926 return Err(invalid_argument(format!(
927 "readtable: invalid Range component '{token}'"
928 )));
929 }
930 }
931 let col = if letters.is_empty() {
932 None
933 } else {
934 let mut value = 0usize;
935 for ch in letters.chars() {
936 value = value
937 .checked_mul(26)
938 .and_then(|v| v.checked_add((ch as u8 - b'A' + 1) as usize))
939 .ok_or_else(|| invalid_argument("readtable: Range column overflow"))?;
940 }
941 Some(value - 1)
942 };
943 let row = if digits.is_empty() {
944 None
945 } else {
946 let parsed = digits
947 .parse::<usize>()
948 .map_err(|_| invalid_argument("readtable: invalid Range row"))?;
949 if parsed == 0 {
950 return Err(invalid_argument("readtable: Range rows are one-based"));
951 }
952 Some(parsed - 1)
953 };
954 Ok((row, col))
955}
956
957fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
958 let text = scalar_text(value, "filename").map_err(|_| {
959 table_error(
960 &TABLE_ERROR_INVALID_ARGUMENT,
961 "readtable: filename must be a string scalar or character vector",
962 )
963 })?;
964 if text.trim().is_empty() {
965 return Err(invalid_argument("readtable: filename must not be empty"));
966 }
967 let expanded =
968 expand_user_path(&text, "readtable").map_err(|msg| invalid_argument(msg.to_string()))?;
969 Ok(Path::new(&expanded).to_path_buf())
970}
971
972async fn read_table_from_file(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
973 match options.file_type {
974 ImportFileType::Spreadsheet => read_spreadsheet_table(path, options).await,
975 ImportFileType::Text => read_text_table(path, options).await,
976 ImportFileType::Auto if is_spreadsheet_path(path) => {
977 read_spreadsheet_table(path, options).await
978 }
979 ImportFileType::Auto => read_text_table(path, options).await,
980 }
981}
982
983async fn read_text_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
984 if options.sheet.is_some() {
985 return Err(invalid_argument(
986 "readtable: Sheet is only valid for spreadsheet files",
987 ));
988 }
989 let bytes = read_file_bytes(path).await?;
990 let text = decode_text_bytes(&bytes, &options.encoding)?;
991 let mut raw_lines = text.lines().map(ToString::to_string).collect::<Vec<_>>();
992 if let Some(first) = raw_lines.first_mut() {
993 if first.starts_with('\u{FEFF}') {
994 *first = first.trim_start_matches('\u{FEFF}').to_string();
995 }
996 }
997 let delimiter = options
998 .delimiter
999 .clone()
1000 .or_else(|| detect_delimiter(&raw_lines))
1001 .unwrap_or(Delimiter::Whitespace);
1002 let mut rows = parse_text_records(&text, &delimiter, options.empty_line_rule);
1003 if options.num_header_lines > 0 {
1004 rows = rows.into_iter().skip(options.num_header_lines).collect();
1005 }
1006 if let Some(range) = options.range {
1007 rows = apply_import_range(rows, range);
1008 }
1009 import_rows_to_table(rows, options)
1010}
1011
1012async fn read_spreadsheet_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1013 if options.delimiter.is_some() {
1014 return Err(invalid_argument(
1015 "readtable: Delimiter is only valid for text files",
1016 ));
1017 }
1018 let bytes = read_file_bytes(path).await?;
1019 let cursor = Cursor::new(bytes);
1020 let mut workbook = open_workbook_auto_from_rs(cursor).map_err(|err| {
1021 table_error(
1022 &TABLE_ERROR_UNSUPPORTED_FILE,
1023 format!(
1024 "readtable: unable to open spreadsheet '{}': {err}",
1025 path.display()
1026 ),
1027 )
1028 })?;
1029 let range = match &options.sheet {
1030 Some(SheetSelector::Name(name)) => workbook.worksheet_range(name).map_err(|err| {
1031 invalid_argument(format!("readtable: unable to read sheet '{name}': {err:?}"))
1032 })?,
1033 Some(SheetSelector::Index(index)) => workbook
1034 .worksheet_range_at(*index)
1035 .ok_or_else(|| {
1036 invalid_argument(format!(
1037 "readtable: sheet index {} exceeds bounds",
1038 index + 1
1039 ))
1040 })?
1041 .map_err(|err| {
1042 invalid_argument(format!(
1043 "readtable: unable to read sheet {}: {err:?}",
1044 index + 1
1045 ))
1046 })?,
1047 None => workbook
1048 .worksheet_range_at(0)
1049 .ok_or_else(|| invalid_argument("readtable: spreadsheet contains no worksheets"))?
1050 .map_err(|err| {
1051 invalid_argument(format!("readtable: unable to read first sheet: {err:?}"))
1052 })?,
1053 };
1054 let rows = spreadsheet_range_to_rows(&range, options)?;
1055 import_rows_to_table(rows, options)
1056}
1057
1058async fn read_file_bytes(path: &Path) -> BuiltinResult<Vec<u8>> {
1059 let mut file = File::open_async(path).await.map_err(|err| {
1060 table_error_with_source(
1061 &TABLE_ERROR_IO,
1062 format!("readtable: unable to open '{}': {err}", path.display()),
1063 err,
1064 )
1065 })?;
1066 let mut bytes = Vec::new();
1067 file.read_to_end(&mut bytes).map_err(|err| {
1068 table_error_with_source(
1069 &TABLE_ERROR_IO,
1070 format!("readtable: unable to read '{}': {err}", path.display()),
1071 err,
1072 )
1073 })?;
1074 Ok(bytes)
1075}
1076
1077fn is_spreadsheet_path(path: &Path) -> bool {
1078 matches!(
1079 path.extension()
1080 .and_then(|ext| ext.to_str())
1081 .map(|ext| ext.to_ascii_lowercase())
1082 .as_deref(),
1083 Some("xls") | Some("xlsx") | Some("xlsm") | Some("xlsb") | Some("ods")
1084 )
1085}
1086
1087fn validate_encoding_label(label: &str) -> BuiltinResult<()> {
1088 encoding_for_label(label)
1089 .map(|_| ())
1090 .ok_or_else(|| invalid_argument(format!("readtable: unsupported Encoding '{label}'")))
1091}
1092
1093fn encoding_for_label(label: &str) -> Option<&'static Encoding> {
1094 let label = label.trim();
1095 if label.is_empty()
1096 || label.eq_ignore_ascii_case("auto")
1097 || label.eq_ignore_ascii_case("default")
1098 || label.eq_ignore_ascii_case("system")
1099 || label.eq_ignore_ascii_case("native")
1100 || label.eq_ignore_ascii_case("utf-8")
1101 || label.eq_ignore_ascii_case("utf8")
1102 || label.eq_ignore_ascii_case("unicode")
1103 {
1104 return Some(UTF_8);
1105 }
1106 Encoding::for_label(label.as_bytes())
1107}
1108
1109fn decode_text_bytes(bytes: &[u8], encoding: &str) -> BuiltinResult<String> {
1110 let (encoding, offset) = if encoding.trim().eq_ignore_ascii_case("auto") {
1111 Encoding::for_bom(bytes).unwrap_or((UTF_8, 0))
1112 } else {
1113 (
1114 encoding_for_label(encoding).ok_or_else(|| {
1115 invalid_argument(format!("readtable: unsupported Encoding '{encoding}'"))
1116 })?,
1117 0,
1118 )
1119 };
1120 let (decoded, _, had_errors) = encoding.decode(&bytes[offset..]);
1121 if had_errors {
1122 return Err(table_error(
1123 &TABLE_ERROR_IO,
1124 format!(
1125 "readtable: unable to decode file contents using encoding '{}'",
1126 encoding.name()
1127 ),
1128 ));
1129 }
1130 Ok(decoded.into_owned())
1131}
1132
1133#[derive(Clone, Debug)]
1134enum ImportCell {
1135 Empty,
1136 Text(String),
1137 Number(f64),
1138 Logical(bool),
1139 DateTime(f64),
1140 Error(String),
1141}
1142
1143impl ImportCell {
1144 fn from_text(text: String) -> Self {
1145 if text.trim().is_empty() {
1146 Self::Empty
1147 } else {
1148 Self::Text(text)
1149 }
1150 }
1151
1152 fn display_text(&self) -> String {
1153 match self {
1154 Self::Empty => String::new(),
1155 Self::Text(text) => text.clone(),
1156 Self::Number(value) => format_key_number(*value),
1157 Self::Logical(value) => value.to_string(),
1158 Self::DateTime(serial) => format_key_number(*serial),
1159 Self::Error(text) => text.clone(),
1160 }
1161 }
1162
1163 fn is_missing(&self, options: &ReadTableOptions) -> bool {
1164 match self {
1165 Self::Empty => true,
1166 Self::Text(text) => options.is_missing(text),
1167 _ => false,
1168 }
1169 }
1170
1171 fn is_likely_data_token(&self, options: &ReadTableOptions) -> bool {
1172 match self {
1173 Self::Number(_) | Self::Logical(_) | Self::DateTime(_) => true,
1174 Self::Empty => false,
1175 Self::Text(text) => {
1176 let token = unquote(text.trim()).trim();
1177 options.is_missing(token)
1178 || parse_numeric(token).is_some()
1179 || parse_logical(token).is_some()
1180 || parse_iso_datetime_to_datenum(token).is_some()
1181 }
1182 Self::Error(_) => true,
1183 }
1184 }
1185}
1186
1187fn spreadsheet_cell_to_import(cell: &SpreadsheetData) -> ImportCell {
1188 match cell {
1189 SpreadsheetData::Empty => ImportCell::Empty,
1190 SpreadsheetData::Int(value) => ImportCell::Number(*value as f64),
1191 SpreadsheetData::Float(value) => ImportCell::Number(*value),
1192 SpreadsheetData::String(text) => ImportCell::Text(text.clone()),
1193 SpreadsheetData::Bool(value) => ImportCell::Logical(*value),
1194 SpreadsheetData::DateTime(value) => value
1195 .as_datetime()
1196 .map(crate::builtins::datetime::datenum_from_naive)
1197 .map(ImportCell::DateTime)
1198 .unwrap_or_else(|| ImportCell::Number(value.as_f64())),
1199 SpreadsheetData::DateTimeIso(text) => parse_iso_datetime_to_datenum(text)
1200 .map(ImportCell::DateTime)
1201 .unwrap_or_else(|| ImportCell::Text(text.clone())),
1202 SpreadsheetData::DurationIso(text) => ImportCell::Text(text.clone()),
1203 SpreadsheetData::Error(err) => ImportCell::Error(err.to_string()),
1204 }
1205}
1206
1207fn spreadsheet_range_to_rows(
1208 range: &calamine::Range<SpreadsheetData>,
1209 options: &ReadTableOptions,
1210) -> BuiltinResult<Vec<Vec<ImportCell>>> {
1211 if range.is_empty() {
1212 return Ok(Vec::new());
1213 }
1214 let Some((range_start_row, range_start_col)) = range.start() else {
1215 return Ok(Vec::new());
1216 };
1217 let Some((range_end_row, range_end_col)) = range.end() else {
1218 return Ok(Vec::new());
1219 };
1220 let start_row = options
1221 .range
1222 .map(|spec| checked_u32(spec.start_row, "Range row"))
1223 .transpose()?
1224 .unwrap_or(range_start_row);
1225 let start_col = options
1226 .range
1227 .map(|spec| checked_u32(spec.start_col, "Range column"))
1228 .transpose()?
1229 .unwrap_or(range_start_col);
1230 let end_row = options
1231 .range
1232 .and_then(|spec| spec.end_row)
1233 .map(|row| checked_u32(row, "Range row"))
1234 .transpose()?
1235 .unwrap_or(range_end_row);
1236 let end_col = options
1237 .range
1238 .and_then(|spec| spec.end_col)
1239 .map(|col| checked_u32(col, "Range column"))
1240 .transpose()?
1241 .unwrap_or(range_end_col);
1242 if start_row > end_row || start_col > end_col {
1243 return Ok(Vec::new());
1244 }
1245 let mut rows = Vec::new();
1246 for row_idx in start_row..=end_row {
1247 let mut row = Vec::new();
1248 for col_idx in start_col..=end_col {
1249 row.push(
1250 range
1251 .get_value((row_idx, col_idx))
1252 .map(spreadsheet_cell_to_import)
1253 .unwrap_or(ImportCell::Empty),
1254 );
1255 }
1256 if matches!(options.empty_line_rule, EmptyLineRule::Skip)
1257 && row.iter().all(|cell| cell.is_missing(options))
1258 {
1259 continue;
1260 }
1261 rows.push(row);
1262 }
1263 if options.num_header_lines > 0 {
1264 Ok(rows.into_iter().skip(options.num_header_lines).collect())
1265 } else {
1266 Ok(rows)
1267 }
1268}
1269
1270fn checked_u32(value: usize, context: &str) -> BuiltinResult<u32> {
1271 u32::try_from(value).map_err(|_| invalid_argument(format!("readtable: {context} overflow")))
1272}
1273
1274fn detect_delimiter(lines: &[String]) -> Option<Delimiter> {
1275 let candidates = [',', '\t', ';', '|'];
1276 let mut best: Option<(f64, Delimiter)> = None;
1277 for candidate in candidates {
1278 let counts = lines
1279 .iter()
1280 .take(32)
1281 .filter(|line| line.contains(candidate))
1282 .map(|line| split_with_char_delim(line, candidate).len())
1283 .filter(|count| *count >= 2)
1284 .collect::<Vec<_>>();
1285 if counts.is_empty() {
1286 continue;
1287 }
1288 let avg = counts.iter().copied().sum::<usize>() as f64 / counts.len() as f64;
1289 if avg >= 2.0
1290 && best
1291 .as_ref()
1292 .map(|(best_avg, _)| avg > *best_avg)
1293 .unwrap_or(true)
1294 {
1295 best = Some((avg, Delimiter::Char(candidate)));
1296 }
1297 }
1298 best.map(|(_, delimiter)| delimiter).or_else(|| {
1299 lines
1300 .iter()
1301 .take(32)
1302 .any(|line| line.split_whitespace().count() > 1)
1303 .then_some(Delimiter::Whitespace)
1304 })
1305}
1306
1307fn split_with_char_delim(line: &str, delimiter: char) -> Vec<String> {
1308 let mut out = Vec::new();
1309 let mut current = String::new();
1310 let mut in_quotes = false;
1311 let mut chars = line.chars().peekable();
1312 while let Some(ch) = chars.next() {
1313 if ch == '"' {
1314 if in_quotes && chars.peek() == Some(&'"') {
1315 current.push('"');
1316 chars.next();
1317 } else {
1318 in_quotes = !in_quotes;
1319 }
1320 continue;
1321 }
1322 if ch == delimiter && !in_quotes {
1323 out.push(current.clone());
1324 current.clear();
1325 } else {
1326 current.push(ch);
1327 }
1328 }
1329 out.push(current);
1330 out
1331}
1332
1333fn parse_text_records(
1334 text: &str,
1335 delimiter: &Delimiter,
1336 empty_line_rule: EmptyLineRule,
1337) -> Vec<Vec<ImportCell>> {
1338 match delimiter {
1339 Delimiter::Whitespace => parse_whitespace_records(text, empty_line_rule),
1340 Delimiter::Char(ch) => parse_delimited_records(text, &ch.to_string(), empty_line_rule),
1341 Delimiter::String(pattern) => parse_delimited_records(text, pattern, empty_line_rule),
1342 }
1343}
1344
1345fn parse_delimited_records(
1346 text: &str,
1347 delimiter: &str,
1348 empty_line_rule: EmptyLineRule,
1349) -> Vec<Vec<ImportCell>> {
1350 let mut records = Vec::new();
1351 let mut row = Vec::new();
1352 let mut current = String::new();
1353 let mut in_quotes = false;
1354 let mut idx = 0usize;
1355 while idx < text.len() {
1356 let ch = text[idx..].chars().next().expect("valid char boundary");
1357 if ch == '"' {
1358 if in_quotes && text[idx + ch.len_utf8()..].starts_with('"') {
1359 current.push('"');
1360 idx += ch.len_utf8() + 1;
1361 continue;
1362 }
1363 in_quotes = !in_quotes;
1364 idx += ch.len_utf8();
1365 continue;
1366 }
1367 if !in_quotes && !delimiter.is_empty() && text[idx..].starts_with(delimiter) {
1368 row.push(ImportCell::from_text(std::mem::take(&mut current)));
1369 idx += delimiter.len();
1370 continue;
1371 }
1372 if !in_quotes && (ch == '\n' || ch == '\r') {
1373 row.push(ImportCell::from_text(std::mem::take(&mut current)));
1374 push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
1375 idx += ch.len_utf8();
1376 if ch == '\r' && text[idx..].starts_with('\n') {
1377 idx += 1;
1378 }
1379 continue;
1380 }
1381 current.push(ch);
1382 idx += ch.len_utf8();
1383 }
1384 if !current.is_empty() || !row.is_empty() || text.ends_with(delimiter) {
1385 row.push(ImportCell::from_text(current));
1386 push_import_record(&mut records, row, empty_line_rule);
1387 }
1388 records
1389}
1390
1391fn parse_whitespace_records(text: &str, empty_line_rule: EmptyLineRule) -> Vec<Vec<ImportCell>> {
1392 let mut records = Vec::new();
1393 let mut row = Vec::new();
1394 let mut current = String::new();
1395 let mut in_quotes = false;
1396 let mut field_open = false;
1397 let mut chars = text.chars().peekable();
1398 while let Some(ch) = chars.next() {
1399 if ch == '"' {
1400 if in_quotes && chars.peek() == Some(&'"') {
1401 current.push('"');
1402 chars.next();
1403 } else {
1404 in_quotes = !in_quotes;
1405 }
1406 field_open = true;
1407 continue;
1408 }
1409 if !in_quotes && (ch == '\n' || ch == '\r') {
1410 if field_open || !current.is_empty() {
1411 row.push(ImportCell::from_text(std::mem::take(&mut current)));
1412 }
1413 field_open = false;
1414 push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
1415 if ch == '\r' && chars.peek() == Some(&'\n') {
1416 chars.next();
1417 }
1418 continue;
1419 }
1420 if !in_quotes && ch.is_whitespace() {
1421 if field_open || !current.is_empty() {
1422 row.push(ImportCell::from_text(std::mem::take(&mut current)));
1423 field_open = false;
1424 }
1425 continue;
1426 }
1427 current.push(ch);
1428 field_open = true;
1429 }
1430 if field_open || !current.is_empty() {
1431 row.push(ImportCell::from_text(current));
1432 }
1433 if !row.is_empty() {
1434 push_import_record(&mut records, row, empty_line_rule);
1435 }
1436 records
1437}
1438
1439fn push_import_record(
1440 records: &mut Vec<Vec<ImportCell>>,
1441 row: Vec<ImportCell>,
1442 empty_line_rule: EmptyLineRule,
1443) {
1444 if matches!(empty_line_rule, EmptyLineRule::Skip)
1445 && row.iter().all(|cell| matches!(cell, ImportCell::Empty))
1446 {
1447 return;
1448 }
1449 records.push(row);
1450}
1451
1452fn apply_import_range(rows: Vec<Vec<ImportCell>>, range: RangeSpec) -> Vec<Vec<ImportCell>> {
1453 if rows.is_empty() {
1454 return rows;
1455 }
1456 let end_row = range
1457 .end_row
1458 .unwrap_or_else(|| rows.len().saturating_sub(1));
1459 let max_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
1460 let end_col = range.end_col.unwrap_or_else(|| max_cols.saturating_sub(1));
1461 rows.into_iter()
1462 .enumerate()
1463 .filter_map(|(idx, row)| {
1464 if idx < range.start_row || idx > end_row {
1465 return None;
1466 }
1467 let selected = (range.start_col..=end_col)
1468 .map(|col| row.get(col).cloned().unwrap_or(ImportCell::Empty))
1469 .collect::<Vec<_>>();
1470 Some(selected)
1471 })
1472 .collect()
1473}
1474
1475fn import_rows_to_table(
1476 mut rows: Vec<Vec<ImportCell>>,
1477 options: &ReadTableOptions,
1478) -> BuiltinResult<Value> {
1479 if rows.is_empty() {
1480 return table_from_columns_with_properties(
1481 Vec::new(),
1482 Vec::new(),
1483 options.row_names.clone(),
1484 );
1485 }
1486
1487 let mut variable_names = options.variable_names.clone();
1488 let read_variable_names = options
1489 .read_variable_names
1490 .unwrap_or_else(|| variable_names.is_none() && should_read_variable_names(&rows, options));
1491 if variable_names.is_none() && read_variable_names && !rows.is_empty() {
1492 variable_names = Some(
1493 rows.remove(0)
1494 .into_iter()
1495 .map(|cell| cell.display_text())
1496 .collect(),
1497 );
1498 }
1499
1500 let mut row_names = options.row_names.clone();
1501 if options.read_row_names && !rows.is_empty() {
1502 row_names = Some(
1503 rows.iter_mut()
1504 .map(|row| {
1505 if row.is_empty() {
1506 String::new()
1507 } else {
1508 row.remove(0).display_text()
1509 }
1510 })
1511 .collect(),
1512 );
1513 if let Some(names) = variable_names.as_mut() {
1514 if !names.is_empty() {
1515 names.remove(0);
1516 }
1517 }
1518 }
1519
1520 let max_cols = rows
1521 .iter()
1522 .map(Vec::len)
1523 .max()
1524 .unwrap_or(0)
1525 .max(variable_names.as_ref().map(Vec::len).unwrap_or(0));
1526 let names = match variable_names {
1527 Some(mut names) => {
1528 while names.len() < max_cols {
1529 names.push(format!("Var{}", names.len() + 1));
1530 }
1531 names.truncate(max_cols);
1532 if options.preserve_variable_names {
1533 make_unique_names(names)
1534 } else {
1535 make_unique_variable_names(names)
1536 }
1537 }
1538 None => generated_variable_names(max_cols),
1539 };
1540
1541 let mut columns = Vec::with_capacity(names.len());
1542 for col in 0..names.len() {
1543 let values = rows
1544 .iter()
1545 .map(|row| row.get(col).cloned().unwrap_or(ImportCell::Empty))
1546 .collect::<Vec<_>>();
1547 columns.push(infer_import_column(values, options)?);
1548 }
1549 table_from_columns_with_properties(names, columns, row_names)
1550}
1551
1552fn should_read_variable_names(rows: &[Vec<ImportCell>], options: &ReadTableOptions) -> bool {
1553 let Some(first) = rows.first() else {
1554 return false;
1555 };
1556 if first.is_empty() {
1557 return false;
1558 }
1559 let names = first
1560 .iter()
1561 .map(ImportCell::display_text)
1562 .map(|text| text.trim().to_string())
1563 .collect::<Vec<_>>();
1564 if names.iter().any(|name| name.is_empty()) {
1565 return false;
1566 }
1567 if first.iter().all(|cell| cell.is_likely_data_token(options)) {
1568 return false;
1569 }
1570 true
1571}
1572
1573fn infer_import_column(
1574 values: Vec<ImportCell>,
1575 options: &ReadTableOptions,
1576) -> BuiltinResult<Value> {
1577 let mut numeric = Vec::with_capacity(values.len());
1578 let mut all_numeric = true;
1579 for value in &values {
1580 match value {
1581 ImportCell::Empty => numeric.push(f64::NAN),
1582 ImportCell::Number(value) => numeric.push(*value),
1583 ImportCell::Text(text) => {
1584 let token = unquote(text.trim()).trim();
1585 if options.is_missing(token) {
1586 numeric.push(f64::NAN);
1587 } else if let Some(value) = parse_numeric(token) {
1588 numeric.push(value);
1589 } else {
1590 all_numeric = false;
1591 break;
1592 }
1593 }
1594 _ => {
1595 all_numeric = false;
1596 break;
1597 }
1598 }
1599 }
1600 if all_numeric {
1601 return Tensor::new(numeric, vec![values.len(), 1])
1602 .map(Value::Tensor)
1603 .map_err(|err| invalid_variable(format!("readtable: {err}")));
1604 }
1605
1606 let mut logical = Vec::with_capacity(values.len());
1607 let mut all_logical = true;
1608 for value in &values {
1609 match value {
1610 ImportCell::Empty => logical.push(0),
1611 ImportCell::Logical(value) => logical.push(i32::from(*value) as u8),
1612 ImportCell::Text(text) => {
1613 let token = unquote(text.trim()).trim();
1614 if options.is_missing(token) {
1615 logical.push(0);
1616 } else if let Some(value) = parse_logical(token) {
1617 logical.push(i32::from(value) as u8);
1618 } else {
1619 all_logical = false;
1620 break;
1621 }
1622 }
1623 _ => {
1624 all_logical = false;
1625 break;
1626 }
1627 }
1628 }
1629 if all_logical {
1630 return LogicalArray::new(logical, vec![values.len(), 1])
1631 .map(Value::LogicalArray)
1632 .map_err(|err| invalid_variable(format!("readtable: {err}")));
1633 }
1634
1635 let mut serials = Vec::with_capacity(values.len());
1636 let mut all_datetime = true;
1637 for value in &values {
1638 match value {
1639 ImportCell::Empty => serials.push(f64::NAN),
1640 ImportCell::DateTime(serial) => serials.push(*serial),
1641 ImportCell::Text(text) => {
1642 let token = unquote(text.trim()).trim();
1643 if options.is_missing(token) {
1644 serials.push(f64::NAN);
1645 } else if let Some(serial) = parse_iso_datetime_to_datenum(token) {
1646 serials.push(serial);
1647 } else {
1648 all_datetime = false;
1649 break;
1650 }
1651 }
1652 _ => {
1653 all_datetime = false;
1654 break;
1655 }
1656 }
1657 }
1658 if all_datetime {
1659 let tensor = Tensor::new(serials, vec![values.len(), 1])
1660 .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
1661 return crate::builtins::datetime::datetime_object_from_serial_tensor(
1662 tensor,
1663 "yyyy-MM-dd HH:mm:ss",
1664 );
1665 }
1666
1667 let strings = values
1668 .into_iter()
1669 .map(|value| {
1670 if value.is_missing(options) {
1671 return String::new();
1672 }
1673 unquote(value.display_text().trim()).to_string()
1674 })
1675 .collect::<Vec<_>>();
1676 let len = strings.len();
1677 StringArray::new(strings, vec![len, 1])
1678 .map(Value::StringArray)
1679 .map_err(|err| invalid_variable(format!("readtable: {err}")))
1680}
1681
1682fn parse_numeric(token: &str) -> Option<f64> {
1683 match token.to_ascii_lowercase().as_str() {
1684 "nan" => Some(f64::NAN),
1685 "inf" | "+inf" | "infinity" | "+infinity" => Some(f64::INFINITY),
1686 "-inf" | "-infinity" => Some(f64::NEG_INFINITY),
1687 _ => token.parse::<f64>().ok(),
1688 }
1689}
1690
1691fn parse_logical(token: &str) -> Option<bool> {
1692 match token.to_ascii_lowercase().as_str() {
1693 "true" | "t" | "yes" | "on" => Some(true),
1694 "false" | "f" | "no" | "off" => Some(false),
1695 _ => None,
1696 }
1697}
1698
1699fn parse_iso_datetime_to_datenum(token: &str) -> Option<f64> {
1700 let trimmed = token.trim();
1701 if trimmed.is_empty() {
1702 return None;
1703 }
1704 for format in [
1705 "%Y-%m-%dT%H:%M:%S%.f",
1706 "%Y-%m-%d %H:%M:%S%.f",
1707 "%Y/%m/%d %H:%M:%S%.f",
1708 "%m/%d/%Y %H:%M:%S%.f",
1709 ] {
1710 if let Ok(value) = NaiveDateTime::parse_from_str(trimmed, format) {
1711 return Some(crate::builtins::datetime::datenum_from_naive(value));
1712 }
1713 }
1714 for format in ["%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"] {
1715 if let Ok(date) = NaiveDate::parse_from_str(trimmed, format) {
1716 return Some(crate::builtins::datetime::datenum_from_naive(
1717 date.and_time(NaiveTime::MIN),
1718 ));
1719 }
1720 }
1721 None
1722}
1723
1724fn unquote(token: &str) -> &str {
1725 if token.len() >= 2 {
1726 let bytes = token.as_bytes();
1727 if (bytes[0] == b'"' && bytes[token.len() - 1] == b'"')
1728 || (bytes[0] == b'\'' && bytes[token.len() - 1] == b'\'')
1729 {
1730 return &token[1..token.len() - 1];
1731 }
1732 }
1733 token
1734}
1735
1736fn default_properties(variable_names: Vec<String>, row_names: Option<Vec<String>>) -> StructValue {
1737 let mut props = StructValue::new();
1738 props.insert(
1739 VARIABLE_NAMES,
1740 Value::StringArray(
1741 StringArray::new(variable_names.clone(), vec![1, variable_names.len()])
1742 .expect("VariableNames shape is valid"),
1743 ),
1744 );
1745 props.insert(
1746 ROW_NAMES,
1747 row_names
1748 .map(|names| {
1749 Value::StringArray(
1750 StringArray::new(names.clone(), vec![names.len(), 1])
1751 .expect("RowNames shape is valid"),
1752 )
1753 })
1754 .unwrap_or_else(|| {
1755 Value::StringArray(StringArray::new(Vec::new(), vec![0, 1]).unwrap())
1756 }),
1757 );
1758 props.insert(
1759 DIMENSION_NAMES,
1760 Value::StringArray(
1761 StringArray::new(
1762 vec![
1763 DEFAULT_ROW_DIM_NAME.to_string(),
1764 DEFAULT_VARIABLE_DIM_NAME.to_string(),
1765 ],
1766 vec![1, 2],
1767 )
1768 .expect("DimensionNames shape is valid"),
1769 ),
1770 );
1771 props.insert(
1772 VARIABLE_UNITS,
1773 Value::StringArray(
1774 StringArray::new(
1775 vec![String::new(); variable_names.len()],
1776 vec![1, variable_names.len()],
1777 )
1778 .expect("VariableUnits shape is valid"),
1779 ),
1780 );
1781 props.insert(
1782 VARIABLE_DESCRIPTIONS,
1783 Value::StringArray(
1784 StringArray::new(
1785 vec![String::new(); variable_names.len()],
1786 vec![1, variable_names.len()],
1787 )
1788 .expect("VariableDescriptions shape is valid"),
1789 ),
1790 );
1791 props.insert(DESCRIPTION, Value::String(String::new()));
1792 props.insert(USER_DATA, Value::Tensor(Tensor::zeros(vec![0, 0])));
1793 props
1794}
1795
1796pub fn table_from_columns(names: Vec<String>, columns: Vec<Value>) -> BuiltinResult<Value> {
1797 table_from_columns_with_properties(names, columns, None)
1798}
1799
1800fn table_from_columns_with_properties(
1801 names: Vec<String>,
1802 columns: Vec<Value>,
1803 row_names: Option<Vec<String>>,
1804) -> BuiltinResult<Value> {
1805 ensure_table_class_registered();
1806 if names.len() != columns.len() {
1807 return Err(invalid_variable(
1808 "table: number of variable names must match number of variables",
1809 ));
1810 }
1811 let names = make_unique_names(names);
1812 let height = validate_column_heights(&names, &columns)?;
1813 if let Some(row_names) = &row_names {
1814 if row_names.len() != height {
1815 return Err(invalid_variable(
1816 "table: number of row names must match table height",
1817 ));
1818 }
1819 }
1820 let mut variables = StructValue::new();
1821 for (name, value) in names.iter().cloned().zip(columns) {
1822 variables.insert(name, value);
1823 }
1824 let props = default_properties(names, row_names);
1825 let mut object = ObjectInstance::new(TABLE_CLASS.to_string());
1826 object
1827 .properties
1828 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
1829 object.properties.insert(
1830 TABLE_PROPERTIES_FIELD.to_string(),
1831 Value::Struct(props.clone()),
1832 );
1833 object
1834 .properties
1835 .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
1836 Ok(Value::Object(object))
1837}
1838
1839fn validate_column_heights(names: &[String], columns: &[Value]) -> BuiltinResult<usize> {
1840 if columns.is_empty() {
1841 return Ok(0);
1842 }
1843 let height = value_row_count(&columns[0])?;
1844 for (name, value) in names.iter().zip(columns) {
1845 let rows = value_row_count(value)?;
1846 if rows != height {
1847 return Err(invalid_variable(format!(
1848 "table: variable '{name}' has {rows} rows but expected {height}"
1849 )));
1850 }
1851 }
1852 Ok(height)
1853}
1854
1855pub fn is_table_value(value: &Value) -> bool {
1856 table_object(value).is_some()
1857}
1858
1859fn table_object(value: &Value) -> Option<&ObjectInstance> {
1860 match value {
1861 Value::Object(object) if object.is_class(TABLE_CLASS) => Some(object),
1862 _ => None,
1863 }
1864}
1865
1866fn into_table_object(value: Value, context: &str) -> BuiltinResult<ObjectInstance> {
1867 match value {
1868 Value::Object(object) if object.is_class(TABLE_CLASS) => Ok(object),
1869 other => Err(invalid_argument(format!(
1870 "{context}: expected table, got {other:?}"
1871 ))),
1872 }
1873}
1874
1875pub fn table_variables(object: &ObjectInstance) -> BuiltinResult<StructValue> {
1876 match object.properties.get(TABLE_VARIABLES_FIELD) {
1877 Some(Value::Struct(st)) => Ok(st.clone()),
1878 Some(other) => Err(invalid_variable(format!(
1879 "table: invalid internal variable storage {other:?}"
1880 ))),
1881 None => Ok(StructValue::new()),
1882 }
1883}
1884
1885pub fn table_variable_names_from_object(object: &ObjectInstance) -> BuiltinResult<Vec<String>> {
1886 let variables = table_variables(object)?;
1887 Ok(variables.fields.keys().cloned().collect())
1888}
1889
1890pub fn table_height(object: &ObjectInstance) -> BuiltinResult<usize> {
1891 let variables = table_variables(object)?;
1892 match variables.fields.values().next() {
1893 Some(value) => value_row_count(value),
1894 None => Ok(0),
1895 }
1896}
1897
1898pub fn table_width(object: &ObjectInstance) -> BuiltinResult<usize> {
1899 table_variables(object).map(|vars| vars.fields.len())
1900}
1901
1902fn table_public_properties(object: &ObjectInstance) -> BuiltinResult<StructValue> {
1903 match object
1904 .properties
1905 .get(TABLE_PROPERTIES_FIELD)
1906 .or_else(|| object.properties.get(PROPERTIES_MEMBER))
1907 {
1908 Some(Value::Struct(st)) => Ok(st.clone()),
1909 Some(other) => Err(invalid_variable(format!(
1910 "table: invalid Properties storage {other:?}"
1911 ))),
1912 None => Ok(default_properties(
1913 table_variable_names_from_object(object)?,
1914 None,
1915 )),
1916 }
1917}
1918
1919fn sync_table_properties(object: &mut ObjectInstance, props: StructValue) {
1920 object.properties.insert(
1921 TABLE_PROPERTIES_FIELD.to_string(),
1922 Value::Struct(props.clone()),
1923 );
1924 object
1925 .properties
1926 .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
1927}
1928
1929fn table_member_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
1930 let name = scalar_text(payload, "table member")?;
1931 if name == PROPERTIES_MEMBER {
1932 return Ok(Value::Struct(table_public_properties(object)?));
1933 }
1934 let variables = table_variables(object)?;
1935 variables
1936 .fields
1937 .get(&name)
1938 .cloned()
1939 .ok_or_else(|| invalid_variable(format!("table: unrecognized variable '{name}'")))
1940}
1941
1942fn table_member_set(object: &mut ObjectInstance, field: &str, rhs: Value) -> BuiltinResult<()> {
1943 if field == PROPERTIES_MEMBER {
1944 let Value::Struct(props) = rhs else {
1945 return Err(invalid_variable(
1946 "table: Properties assignment expects a scalar struct",
1947 ));
1948 };
1949 apply_properties(object, props)?;
1950 return Ok(());
1951 }
1952 let mut variables = table_variables(object)?;
1953 let mut names = table_variable_names_from_object(object)?;
1954 let height = table_height(object)?;
1955 let rhs_rows = value_row_count(&rhs)?;
1956 if !variables.fields.is_empty() && rhs_rows != height {
1957 return Err(invalid_variable(format!(
1958 "table: variable '{field}' has {rhs_rows} rows but table has {height}"
1959 )));
1960 }
1961 if !variables.fields.contains_key(field) {
1962 names.push(field.to_string());
1963 }
1964 variables.insert(field.to_string(), rhs);
1965 object
1966 .properties
1967 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
1968 let mut props = table_public_properties(object)?;
1969 update_variable_metadata_names(&mut props, names)?;
1970 sync_table_properties(object, props);
1971 Ok(())
1972}
1973
1974fn apply_properties(object: &mut ObjectInstance, mut props: StructValue) -> BuiltinResult<()> {
1975 if let Some(value) = props.fields.get(VARIABLE_NAMES) {
1976 let names = variable_name_list(value)?;
1977 rename_table_variables(object, names.clone())?;
1978 update_variable_metadata_names(&mut props, names)?;
1979 }
1980 sync_table_properties(object, props);
1981 Ok(())
1982}
1983
1984fn rename_table_variables(
1985 object: &mut ObjectInstance,
1986 new_names: Vec<String>,
1987) -> BuiltinResult<()> {
1988 let old_names = table_variable_names_from_object(object)?;
1989 if old_names.len() != new_names.len() {
1990 return Err(invalid_variable(
1991 "table: VariableNames assignment must preserve variable count",
1992 ));
1993 }
1994 let new_names = make_unique_variable_names(new_names);
1995 let variables = table_variables(object)?;
1996 let mut renamed = StructValue::new();
1997 for (old, new) in old_names.iter().zip(new_names.iter()) {
1998 let value = variables
1999 .fields
2000 .get(old)
2001 .cloned()
2002 .ok_or_else(|| invalid_variable(format!("table: missing variable '{old}'")))?;
2003 renamed.insert(new.clone(), value);
2004 }
2005 object
2006 .properties
2007 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(renamed));
2008 Ok(())
2009}
2010
2011fn update_variable_metadata_names(
2012 props: &mut StructValue,
2013 names: Vec<String>,
2014) -> BuiltinResult<()> {
2015 props.insert(
2016 VARIABLE_NAMES,
2017 Value::StringArray(
2018 StringArray::new(names.clone(), vec![1, names.len()])
2019 .map_err(|err| invalid_variable(format!("table: {err}")))?,
2020 ),
2021 );
2022 for field in [VARIABLE_UNITS, VARIABLE_DESCRIPTIONS] {
2023 let existing = props.fields.get(field).cloned();
2024 let values = match existing {
2025 Some(Value::StringArray(mut array)) => {
2026 array.data.resize(names.len(), String::new());
2027 array.data.truncate(names.len());
2028 array.data
2029 }
2030 _ => vec![String::new(); names.len()],
2031 };
2032 props.insert(
2033 field,
2034 Value::StringArray(
2035 StringArray::new(values, vec![1, names.len()])
2036 .map_err(|err| invalid_variable(format!("table: {err}")))?,
2037 ),
2038 );
2039 }
2040 Ok(())
2041}
2042
2043fn table_paren_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
2044 let selectors = selector_values(payload)?;
2045 let rows = parse_row_selector(selectors.first(), table_height(object)?)?;
2046 let variable_names = table_variable_names_from_object(object)?;
2047 let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2048 let variables = table_variables(object)?;
2049 let mut out = Vec::with_capacity(selected_names.len());
2050 for name in &selected_names {
2051 let value = variables
2052 .fields
2053 .get(name)
2054 .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
2055 out.push(select_rows(value, &rows)?);
2056 }
2057 let row_names = selected_row_names(object, &rows)?;
2058 table_from_columns_with_properties(selected_names, out, row_names)
2059}
2060
2061fn table_brace_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
2062 let subset = table_paren_get(object, payload)?;
2063 let object = into_table_object(subset, "table brace indexing")?;
2064 let variables = table_variables(&object)?;
2065 if variables.fields.len() == 1 {
2066 return variables
2067 .fields
2068 .values()
2069 .next()
2070 .cloned()
2071 .ok_or_else(|| invalid_variable("table: missing selected variable"));
2072 }
2073 let values = variables.fields.values().collect::<Vec<_>>();
2074 if values.iter().all(|value| matches!(value, Value::Tensor(_))) {
2075 return concatenate_numeric_columns(&values);
2076 }
2077 CellArray::new(
2078 values.into_iter().cloned().collect(),
2079 1,
2080 variables.fields.len(),
2081 )
2082 .map(Value::Cell)
2083 .map_err(|err| invalid_variable(format!("table: {err}")))
2084}
2085
2086fn table_paren_assign(
2087 mut object: ObjectInstance,
2088 payload: &Value,
2089 rhs: Value,
2090) -> BuiltinResult<Value> {
2091 let rhs_table = into_table_object(rhs, "table paren assignment")?;
2092 let selectors = selector_values(payload)?;
2093 let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
2094 let variable_names = table_variable_names_from_object(&object)?;
2095 let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2096 let rhs_names = table_variable_names_from_object(&rhs_table)?;
2097 if selected_names.len() != rhs_names.len() {
2098 return Err(invalid_variable(
2099 "table: assignment variable count must match selected variables",
2100 ));
2101 }
2102 let mut variables = table_variables(&object)?;
2103 let rhs_variables = table_variables(&rhs_table)?;
2104 for (target_name, rhs_name) in selected_names.iter().zip(rhs_names.iter()) {
2105 let current =
2106 variables.fields.get(target_name).cloned().ok_or_else(|| {
2107 invalid_variable(format!("table: missing variable '{target_name}'"))
2108 })?;
2109 let rhs_col =
2110 rhs_variables.fields.get(rhs_name).cloned().ok_or_else(|| {
2111 invalid_variable(format!("table: missing rhs variable '{rhs_name}'"))
2112 })?;
2113 variables.insert(target_name.clone(), assign_rows(current, &rows, rhs_col)?);
2114 }
2115 object
2116 .properties
2117 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2118 Ok(Value::Object(object))
2119}
2120
2121fn table_brace_assign(
2122 mut object: ObjectInstance,
2123 payload: &Value,
2124 rhs: Value,
2125) -> BuiltinResult<Value> {
2126 let selectors = selector_values(payload)?;
2127 let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
2128 let variable_names = table_variable_names_from_object(&object)?;
2129 let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
2130 if selected_names.len() != 1 {
2131 return Err(invalid_variable(
2132 "table: brace assignment supports one variable at a time",
2133 ));
2134 }
2135 let mut variables = table_variables(&object)?;
2136 let target = selected_names[0].clone();
2137 let current = variables
2138 .fields
2139 .get(&target)
2140 .cloned()
2141 .ok_or_else(|| invalid_variable(format!("table: missing variable '{target}'")))?;
2142 variables.insert(target, assign_rows(current, &rows, rhs)?);
2143 object
2144 .properties
2145 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
2146 Ok(Value::Object(object))
2147}
2148
2149fn selector_values(payload: &Value) -> BuiltinResult<Vec<Value>> {
2150 match payload {
2151 Value::Cell(cell) => {
2152 let mut out = Vec::with_capacity(cell.data.len());
2153 for handle in &cell.data {
2154 out.push(unsafe { &*handle.as_raw() }.clone());
2155 }
2156 Ok(out)
2157 }
2158 other => Ok(vec![other.clone()]),
2159 }
2160}
2161
2162fn parse_row_selector(selector: Option<&Value>, height: usize) -> BuiltinResult<Vec<usize>> {
2163 let Some(selector) = selector else {
2164 return Ok((0..height).collect());
2165 };
2166 if is_colon_selector(selector) {
2167 return Ok((0..height).collect());
2168 }
2169 if is_end_selector(selector) {
2170 return if height == 0 {
2171 Err(invalid_index(
2172 "table: end row index is invalid for empty table",
2173 ))
2174 } else {
2175 Ok(vec![height - 1])
2176 };
2177 }
2178 match selector {
2179 Value::Num(n) => Ok(vec![one_based_to_zero(*n, height, "row")?]),
2180 Value::Int(i) => Ok(vec![one_based_to_zero(i.to_f64(), height, "row")?]),
2181 Value::Tensor(tensor) => tensor
2182 .data
2183 .iter()
2184 .map(|value| one_based_to_zero(*value, height, "row"))
2185 .collect(),
2186 Value::LogicalArray(array) => {
2187 if array.data.len() != height {
2188 return Err(invalid_index(
2189 "table: logical row selector length must match table height",
2190 ));
2191 }
2192 Ok(array
2193 .data
2194 .iter()
2195 .enumerate()
2196 .filter_map(|(idx, value)| (*value != 0).then_some(idx))
2197 .collect())
2198 }
2199 other => Err(invalid_index(format!(
2200 "table: unsupported row selector {other:?}"
2201 ))),
2202 }
2203}
2204
2205fn parse_variable_selector(
2206 selector: Option<&Value>,
2207 names: &[String],
2208) -> BuiltinResult<Vec<String>> {
2209 let Some(selector) = selector else {
2210 return Ok(names.to_vec());
2211 };
2212 if is_colon_selector(selector) {
2213 return Ok(names.to_vec());
2214 }
2215 match selector {
2216 Value::String(_) | Value::CharArray(_) | Value::StringArray(_) | Value::Cell(_) => {
2217 let selected = string_list(selector)?;
2218 for name in &selected {
2219 if !names.contains(name) {
2220 return Err(invalid_variable(format!(
2221 "table: unrecognized variable '{name}'"
2222 )));
2223 }
2224 }
2225 Ok(selected)
2226 }
2227 Value::Num(n) => Ok(vec![name_at_index(names, *n)?]),
2228 Value::Int(i) => Ok(vec![name_at_index(names, i.to_f64())?]),
2229 Value::Tensor(tensor) => tensor
2230 .data
2231 .iter()
2232 .map(|value| name_at_index(names, *value))
2233 .collect(),
2234 Value::LogicalArray(array) => {
2235 if array.data.len() != names.len() {
2236 return Err(invalid_index(
2237 "table: logical variable selector length must match table width",
2238 ));
2239 }
2240 Ok(array
2241 .data
2242 .iter()
2243 .zip(names.iter())
2244 .filter_map(|(flag, name)| (*flag != 0).then_some(name.clone()))
2245 .collect())
2246 }
2247 other => Err(invalid_index(format!(
2248 "table: unsupported variable selector {other:?}"
2249 ))),
2250 }
2251}
2252
2253fn is_colon_selector(value: &Value) -> bool {
2254 scalar_text(value, "selector")
2255 .map(|text| text == ":")
2256 .unwrap_or(false)
2257}
2258
2259fn is_end_selector(value: &Value) -> bool {
2260 scalar_text(value, "selector")
2261 .map(|text| text == "end")
2262 .unwrap_or(false)
2263}
2264
2265fn name_at_index(names: &[String], value: f64) -> BuiltinResult<String> {
2266 let idx = one_based_to_zero(value, names.len(), "variable")?;
2267 Ok(names[idx].clone())
2268}
2269
2270fn one_based_to_zero(value: f64, len: usize, context: &str) -> BuiltinResult<usize> {
2271 if !value.is_finite() || value < 1.0 || (value.round() - value).abs() > f64::EPSILON {
2272 return Err(invalid_index(format!(
2273 "table: {context} indices must be positive finite integers"
2274 )));
2275 }
2276 let idx = value.round() as usize - 1;
2277 if idx >= len {
2278 return Err(invalid_index(format!(
2279 "table: {context} index exceeds bounds"
2280 )));
2281 }
2282 Ok(idx)
2283}
2284
2285fn selected_row_names(
2286 object: &ObjectInstance,
2287 rows: &[usize],
2288) -> BuiltinResult<Option<Vec<String>>> {
2289 let props = table_public_properties(object)?;
2290 let Some(value) = props.fields.get(ROW_NAMES) else {
2291 return Ok(None);
2292 };
2293 let names = string_list(value)?;
2294 if names.is_empty() {
2295 return Ok(None);
2296 }
2297 Ok(Some(
2298 rows.iter()
2299 .filter_map(|row| names.get(*row).cloned())
2300 .collect(),
2301 ))
2302}
2303
2304fn value_row_count(value: &Value) -> BuiltinResult<usize> {
2305 match value {
2306 Value::Tensor(tensor) => Ok(tensor.rows()),
2307 Value::ComplexTensor(tensor) => Ok(tensor.rows),
2308 Value::StringArray(array) => Ok(array.rows()),
2309 Value::LogicalArray(array) => Ok(array.shape.first().copied().unwrap_or(array.data.len())),
2310 Value::Cell(cell) => Ok(cell.rows),
2311 Value::CharArray(array) => Ok(array.rows),
2312 Value::Object(obj) if obj.is_class("datetime") => {
2313 crate::builtins::datetime::serials_from_datetime_value(value)
2314 .map(|tensor| tensor.rows())
2315 }
2316 Value::Object(obj) if obj.is_class(TABLE_CLASS) => table_height(obj),
2317 _ => Ok(1),
2318 }
2319}
2320
2321fn select_rows(value: &Value, rows: &[usize]) -> BuiltinResult<Value> {
2322 match value {
2323 Value::Tensor(tensor) => {
2324 let cols = tensor.cols();
2325 let mut data = Vec::with_capacity(rows.len() * cols);
2326 for col in 0..cols {
2327 for &row in rows {
2328 data.push(tensor.get2(row, col).map_err(invalid_index)?);
2329 }
2330 }
2331 Tensor::new_with_dtype(data, vec![rows.len(), cols], tensor.dtype)
2332 .map(Value::Tensor)
2333 .map_err(invalid_variable)
2334 }
2335 Value::ComplexTensor(tensor) => {
2336 let mut data = Vec::with_capacity(rows.len() * tensor.cols);
2337 for col in 0..tensor.cols {
2338 for &row in rows {
2339 let idx = row + col * tensor.rows;
2340 data.push(*tensor.data.get(idx).ok_or_else(|| {
2341 invalid_index("table: complex variable row index out of bounds")
2342 })?);
2343 }
2344 }
2345 ComplexTensor::new(data, vec![rows.len(), tensor.cols])
2346 .map(Value::ComplexTensor)
2347 .map_err(invalid_variable)
2348 }
2349 Value::StringArray(array) => {
2350 let cols = array.cols();
2351 let mut data = Vec::with_capacity(rows.len() * cols);
2352 for col in 0..cols {
2353 for &row in rows {
2354 let idx = row + col * array.rows();
2355 data.push(array.data.get(idx).cloned().ok_or_else(|| {
2356 invalid_index("table: string variable row index out of bounds")
2357 })?);
2358 }
2359 }
2360 StringArray::new(data, vec![rows.len(), cols])
2361 .map(Value::StringArray)
2362 .map_err(invalid_variable)
2363 }
2364 Value::LogicalArray(array) => {
2365 let source_rows = array.shape.first().copied().unwrap_or(array.data.len());
2366 let cols = array.shape.get(1).copied().unwrap_or(1);
2367 let mut data = Vec::with_capacity(rows.len() * cols);
2368 for col in 0..cols {
2369 for &row in rows {
2370 let idx = row + col * source_rows;
2371 data.push(*array.data.get(idx).ok_or_else(|| {
2372 invalid_index("table: logical variable row index out of bounds")
2373 })?);
2374 }
2375 }
2376 LogicalArray::new(data, vec![rows.len(), cols])
2377 .map(Value::LogicalArray)
2378 .map_err(invalid_variable)
2379 }
2380 Value::Cell(cell) => {
2381 let mut data = Vec::with_capacity(rows.len() * cell.cols);
2382 for col in 0..cell.cols {
2383 for &row in rows {
2384 data.push(cell.get(row, col).map_err(invalid_index)?);
2385 }
2386 }
2387 CellArray::new(data, rows.len(), cell.cols)
2388 .map(Value::Cell)
2389 .map_err(invalid_variable)
2390 }
2391 Value::Object(obj) if obj.is_class("datetime") => {
2392 let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
2393 let selected = select_rows(&Value::Tensor(tensor), rows)?;
2394 match selected {
2395 Value::Tensor(tensor) => {
2396 crate::builtins::datetime::datetime_object_from_serial_tensor(
2397 tensor,
2398 crate::builtins::datetime::datetime_format_from_value(value),
2399 )
2400 }
2401 _ => unreachable!("select_rows tensor branch returns tensor"),
2402 }
2403 }
2404 _ if rows.len() == 1 && rows[0] == 0 => Ok(value.clone()),
2405 other => Err(invalid_variable(format!(
2406 "table: row selection unsupported for variable {other:?}"
2407 ))),
2408 }
2409}
2410
2411fn assign_rows(mut current: Value, rows: &[usize], rhs: Value) -> BuiltinResult<Value> {
2412 if value_row_count(&rhs)? != rows.len() {
2413 return Err(invalid_variable(
2414 "table: assignment row count must match selected row count",
2415 ));
2416 }
2417 let replacing_all_rows = rows.len() == value_row_count(¤t)?;
2418 match (&mut current, rhs) {
2419 (Value::Tensor(target), Value::Tensor(source)) => {
2420 if target.cols() != source.cols() {
2421 return Err(invalid_variable(
2422 "table: tensor assignment column count mismatch",
2423 ));
2424 }
2425 for col in 0..target.cols() {
2426 for (src_row, &dst_row) in rows.iter().enumerate() {
2427 let value = source.get2(src_row, col).map_err(invalid_index)?;
2428 target.set2(dst_row, col, value).map_err(invalid_index)?;
2429 }
2430 }
2431 Ok(current)
2432 }
2433 (_, source) if replacing_all_rows => Ok(source),
2434 _ => Err(invalid_variable(
2435 "table: assignment for this variable type requires replacing all rows",
2436 )),
2437 }
2438}
2439
2440fn concatenate_numeric_columns(values: &[&Value]) -> BuiltinResult<Value> {
2441 let rows = values
2442 .first()
2443 .and_then(|value| match value {
2444 Value::Tensor(t) => Some(t.rows()),
2445 _ => None,
2446 })
2447 .unwrap_or(0);
2448 let cols = values
2449 .iter()
2450 .map(|value| match value {
2451 Value::Tensor(t) => Ok(t.cols()),
2452 _ => Err(invalid_variable("table: expected numeric variable")),
2453 })
2454 .collect::<BuiltinResult<Vec<_>>>()?;
2455 let total_cols: usize = cols.iter().sum();
2456 let mut data = Vec::with_capacity(rows * total_cols);
2457 for value in values {
2458 let Value::Tensor(tensor) = value else {
2459 return Err(invalid_variable("table: expected numeric variable"));
2460 };
2461 for col in 0..tensor.cols() {
2462 for row in 0..rows {
2463 data.push(tensor.get2(row, col).map_err(invalid_index)?);
2464 }
2465 }
2466 }
2467 Tensor::new(data, vec![rows, total_cols])
2468 .map(Value::Tensor)
2469 .map_err(invalid_variable)
2470}
2471
2472pub fn sortrows_table(value: Value, rest: &[Value]) -> BuiltinResult<(Value, Tensor)> {
2473 let object = into_table_object(value, "sortrows")?;
2474 let names = table_variable_names_from_object(&object)?;
2475 let sort_spec = SortSpec::parse(rest, &names)?;
2476 let height = table_height(&object)?;
2477 let variables = table_variables(&object)?;
2478 let mut indices: Vec<usize> = (0..height).collect();
2479 indices.sort_by(|&a, &b| {
2480 for key in &sort_spec.keys {
2481 let Some(value) = variables.fields.get(&key.name) else {
2482 continue;
2483 };
2484 let ord = compare_table_cells(value, a, b).unwrap_or(Ordering::Equal);
2485 let ord = if key.descending { ord.reverse() } else { ord };
2486 if ord != Ordering::Equal {
2487 return ord;
2488 }
2489 }
2490 a.cmp(&b)
2491 });
2492 let mut sorted_columns = Vec::with_capacity(names.len());
2493 for name in &names {
2494 let value = variables
2495 .fields
2496 .get(name)
2497 .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
2498 sorted_columns.push(select_rows(value, &indices)?);
2499 }
2500 let row_names = selected_row_names(&object, &indices)?;
2501 let sorted = table_from_columns_with_properties(names, sorted_columns, row_names)?;
2502 let indices_tensor = Tensor::new(
2503 indices.iter().map(|idx| *idx as f64 + 1.0).collect(),
2504 vec![indices.len(), 1],
2505 )
2506 .map_err(invalid_variable)?;
2507 Ok((sorted, indices_tensor))
2508}
2509
2510struct SortSpec {
2511 keys: Vec<SortKey>,
2512}
2513
2514struct SortKey {
2515 name: String,
2516 descending: bool,
2517}
2518
2519impl SortSpec {
2520 fn parse(rest: &[Value], names: &[String]) -> BuiltinResult<Self> {
2521 let mut keys = if rest.is_empty() {
2522 names
2523 .iter()
2524 .map(|name| SortKey {
2525 name: name.clone(),
2526 descending: false,
2527 })
2528 .collect::<Vec<_>>()
2529 } else {
2530 parse_variable_selector(rest.first(), names)?
2531 .into_iter()
2532 .map(|name| SortKey {
2533 name,
2534 descending: false,
2535 })
2536 .collect()
2537 };
2538 if let Some(direction) = rest.get(1) {
2539 let directions = string_list(direction)?;
2540 if directions.len() == 1 {
2541 let descending = directions[0].eq_ignore_ascii_case("descend")
2542 || directions[0].eq_ignore_ascii_case("desc");
2543 for key in &mut keys {
2544 key.descending = descending;
2545 }
2546 } else {
2547 for (key, direction) in keys.iter_mut().zip(directions.iter()) {
2548 key.descending = direction.eq_ignore_ascii_case("descend")
2549 || direction.eq_ignore_ascii_case("desc");
2550 }
2551 }
2552 }
2553 Ok(Self { keys })
2554 }
2555}
2556
2557fn compare_table_cells(value: &Value, a: usize, b: usize) -> BuiltinResult<Ordering> {
2558 match value {
2559 Value::Tensor(tensor) => Ok(tensor
2560 .get2(a, 0)
2561 .map_err(invalid_index)?
2562 .partial_cmp(&tensor.get2(b, 0).map_err(invalid_index)?)
2563 .unwrap_or(Ordering::Greater)),
2564 Value::StringArray(array) => {
2565 let av = array.data.get(a).cloned().unwrap_or_default();
2566 let bv = array.data.get(b).cloned().unwrap_or_default();
2567 Ok(av.cmp(&bv))
2568 }
2569 Value::LogicalArray(array) => {
2570 let av = *array.data.get(a).unwrap_or(&0);
2571 let bv = *array.data.get(b).unwrap_or(&0);
2572 Ok(av.cmp(&bv))
2573 }
2574 Value::Object(obj) if obj.is_class("datetime") => {
2575 let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
2576 Ok(tensor
2577 .data
2578 .get(a)
2579 .copied()
2580 .unwrap_or(f64::NAN)
2581 .partial_cmp(&tensor.data.get(b).copied().unwrap_or(f64::NAN))
2582 .unwrap_or(Ordering::Greater))
2583 }
2584 other => Ok(cell_key_string(other, a).cmp(&cell_key_string(other, b))),
2585 }
2586}
2587
2588#[derive(Clone, Debug)]
2589enum GroupAtom {
2590 Number(f64),
2591 Text(String),
2592 Logical(bool),
2593 Missing,
2594}
2595
2596impl GroupAtom {
2597 fn rank(&self) -> u8 {
2598 match self {
2599 Self::Missing => 0,
2600 Self::Logical(_) => 1,
2601 Self::Number(_) => 2,
2602 Self::Text(_) => 3,
2603 }
2604 }
2605}
2606
2607impl PartialEq for GroupAtom {
2608 fn eq(&self, other: &Self) -> bool {
2609 self.cmp(other) == Ordering::Equal
2610 }
2611}
2612
2613impl Eq for GroupAtom {}
2614
2615impl PartialOrd for GroupAtom {
2616 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2617 Some(self.cmp(other))
2618 }
2619}
2620
2621impl Ord for GroupAtom {
2622 fn cmp(&self, other: &Self) -> Ordering {
2623 let rank = self.rank().cmp(&other.rank());
2624 if rank != Ordering::Equal {
2625 return rank;
2626 }
2627 match (self, other) {
2628 (Self::Missing, Self::Missing) => Ordering::Equal,
2629 (Self::Logical(a), Self::Logical(b)) => a.cmp(b),
2630 (Self::Number(a), Self::Number(b)) => a.total_cmp(b),
2631 (Self::Text(a), Self::Text(b)) => a.cmp(b),
2632 _ => Ordering::Equal,
2633 }
2634 }
2635}
2636
2637fn cell_group_atom(value: &Value, row: usize) -> GroupAtom {
2638 match value {
2639 Value::Tensor(tensor) => tensor
2640 .get2(row, 0)
2641 .map(GroupAtom::Number)
2642 .unwrap_or(GroupAtom::Missing),
2643 Value::StringArray(array) => array
2644 .data
2645 .get(row)
2646 .cloned()
2647 .map(GroupAtom::Text)
2648 .unwrap_or(GroupAtom::Missing),
2649 Value::LogicalArray(array) => array
2650 .data
2651 .get(row)
2652 .map(|value| GroupAtom::Logical(*value != 0))
2653 .unwrap_or(GroupAtom::Missing),
2654 Value::Object(obj) if obj.is_class("datetime") => {
2655 crate::builtins::datetime::serials_from_datetime_value(value)
2656 .ok()
2657 .and_then(|tensor| tensor.data.get(row).copied())
2658 .map(GroupAtom::Number)
2659 .unwrap_or(GroupAtom::Missing)
2660 }
2661 other => GroupAtom::Text(cell_key_string(other, row)),
2662 }
2663}
2664
2665fn groupsummary_impl(
2666 table: Value,
2667 groupvars: Value,
2668 method: Value,
2669 rest: Vec<Value>,
2670) -> BuiltinResult<Value> {
2671 let object = into_table_object(table, "groupsummary")?;
2672 let names = table_variable_names_from_object(&object)?;
2673 let group_names = parse_variable_selector(Some(&groupvars), &names)?;
2674 let methods = string_list(&method)?;
2675 if methods.is_empty() {
2676 return Err(invalid_argument(
2677 "groupsummary: method list must not be empty",
2678 ));
2679 }
2680 let data_names = if let Some(value) = rest.first() {
2681 parse_variable_selector(Some(value), &names)?
2682 } else {
2683 names
2684 .iter()
2685 .filter(|name| !group_names.contains(name))
2686 .filter(|name| {
2687 table_variables(&object)
2688 .ok()
2689 .and_then(|vars| vars.fields.get(*name).cloned())
2690 .map(|value| matches!(value, Value::Tensor(_)))
2691 .unwrap_or(false)
2692 })
2693 .cloned()
2694 .collect()
2695 };
2696 let variables = table_variables(&object)?;
2697 let height = table_height(&object)?;
2698 let mut groups: BTreeMap<Vec<GroupAtom>, Vec<usize>> = BTreeMap::new();
2699 for row in 0..height {
2700 let key = group_names
2701 .iter()
2702 .map(|name| {
2703 variables
2704 .fields
2705 .get(name)
2706 .map(|value| cell_group_atom(value, row))
2707 .unwrap_or(GroupAtom::Missing)
2708 })
2709 .collect::<Vec<_>>();
2710 groups.entry(key).or_default().push(row);
2711 }
2712 let group_rows = groups
2713 .values()
2714 .filter_map(|rows| rows.first().copied())
2715 .collect::<Vec<_>>();
2716 let mut out_names = Vec::new();
2717 let mut out_columns = Vec::new();
2718 for name in &group_names {
2719 let value = variables.fields.get(name).ok_or_else(|| {
2720 invalid_variable(format!("groupsummary: missing group variable '{name}'"))
2721 })?;
2722 out_names.push(name.clone());
2723 out_columns.push(select_rows(value, &group_rows)?);
2724 }
2725 out_names.push("GroupCount".to_string());
2726 out_columns.push(Value::Tensor(
2727 Tensor::new(
2728 groups.values().map(|rows| rows.len() as f64).collect(),
2729 vec![groups.len(), 1],
2730 )
2731 .map_err(invalid_variable)?,
2732 ));
2733 for method in &methods {
2734 for name in &data_names {
2735 let value = variables.fields.get(name).ok_or_else(|| {
2736 invalid_variable(format!("groupsummary: missing data variable '{name}'"))
2737 })?;
2738 let values = summarize_groups(value, groups.values(), method)?;
2739 out_names.push(format!("{}_{}", method.to_ascii_lowercase(), name));
2740 out_columns.push(Value::Tensor(
2741 Tensor::new(values, vec![groups.len(), 1]).map_err(invalid_variable)?,
2742 ));
2743 }
2744 }
2745 table_from_columns(out_names, out_columns)
2746}
2747
2748fn summarize_groups<'a>(
2749 value: &Value,
2750 groups: impl Iterator<Item = &'a Vec<usize>>,
2751 method: &str,
2752) -> BuiltinResult<Vec<f64>> {
2753 let tensor = match value {
2754 Value::Tensor(tensor) if tensor.cols() == 1 => tensor,
2755 _ => {
2756 return Err(invalid_variable(
2757 "groupsummary: summary data variables must be numeric column vectors",
2758 ))
2759 }
2760 };
2761 groups
2762 .map(|rows| {
2763 let mut values = rows
2764 .iter()
2765 .map(|row| tensor.get2(*row, 0).map_err(invalid_index))
2766 .collect::<BuiltinResult<Vec<_>>>()?;
2767 values.retain(|value| !value.is_nan());
2768 let result = match method.to_ascii_lowercase().as_str() {
2769 "mean" => {
2770 if values.is_empty() {
2771 f64::NAN
2772 } else {
2773 values.iter().sum::<f64>() / values.len() as f64
2774 }
2775 }
2776 "sum" => values.iter().sum(),
2777 "min" => values.into_iter().fold(f64::INFINITY, f64::min),
2778 "max" => values.into_iter().fold(f64::NEG_INFINITY, f64::max),
2779 "median" => {
2780 if values.is_empty() {
2781 f64::NAN
2782 } else {
2783 values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
2784 let mid = values.len() / 2;
2785 if values.len() % 2 == 0 {
2786 (values[mid - 1] + values[mid]) / 2.0
2787 } else {
2788 values[mid]
2789 }
2790 }
2791 }
2792 "count" | "numel" => values.len() as f64,
2793 other => {
2794 return Err(invalid_argument(format!(
2795 "groupsummary: unsupported method '{other}'"
2796 )))
2797 }
2798 };
2799 Ok(result)
2800 })
2801 .collect()
2802}
2803
2804fn cell_key_string(value: &Value, row: usize) -> String {
2805 match value {
2806 Value::Tensor(tensor) => tensor
2807 .get2(row, 0)
2808 .map(format_key_number)
2809 .unwrap_or_default(),
2810 Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
2811 Value::LogicalArray(array) => array
2812 .data
2813 .get(row)
2814 .map(|value| value.to_string())
2815 .unwrap_or_default(),
2816 Value::Object(obj) if obj.is_class("datetime") => {
2817 crate::builtins::datetime::serials_from_datetime_value(value)
2818 .ok()
2819 .and_then(|tensor| tensor.data.get(row).copied())
2820 .map(format_key_number)
2821 .unwrap_or_default()
2822 }
2823 other => format!("{other}"),
2824 }
2825}
2826
2827pub fn table_display_text(value: &Value) -> BuiltinResult<String> {
2828 let object = match value {
2829 Value::Object(object) if object.is_class(TABLE_CLASS) => object,
2830 _ => return Err(invalid_argument("table display expects table object")),
2831 };
2832 let names = table_variable_names_from_object(object)?;
2833 let variables = table_variables(object)?;
2834 let rows = table_height(object)?;
2835 let preview = rows.min(12);
2836 let mut widths = names.iter().map(|name| name.len()).collect::<Vec<_>>();
2837 let rendered_cols = names
2838 .iter()
2839 .enumerate()
2840 .map(|(col, name)| {
2841 let value = variables
2842 .fields
2843 .get(name)
2844 .cloned()
2845 .unwrap_or_else(|| Value::String(String::new()));
2846 let cells = (0..preview)
2847 .map(|row| render_table_cell(&value, row))
2848 .collect::<Vec<_>>();
2849 for cell in &cells {
2850 widths[col] = widths[col].max(cell.len());
2851 }
2852 cells
2853 })
2854 .collect::<Vec<_>>();
2855
2856 let mut lines = Vec::new();
2857 lines.push(format!("{rows}x{} table", names.len()));
2858 if names.is_empty() {
2859 return Ok(lines.join("\n"));
2860 }
2861 let header = names
2862 .iter()
2863 .enumerate()
2864 .map(|(idx, name)| format!("{name:<width$}", width = widths[idx]))
2865 .collect::<Vec<_>>()
2866 .join(" ");
2867 lines.push(header);
2868 for row in 0..preview {
2869 lines.push(
2870 rendered_cols
2871 .iter()
2872 .enumerate()
2873 .map(|(col, cells)| format!("{:<width$}", cells[row], width = widths[col]))
2874 .collect::<Vec<_>>()
2875 .join(" "),
2876 );
2877 }
2878 if preview < rows {
2879 lines.push(format!("... {} more rows", rows - preview));
2880 }
2881 Ok(lines.join("\n"))
2882}
2883
2884pub fn table_summary_text(value: &Value) -> BuiltinResult<String> {
2885 let object = match value {
2886 Value::Object(object) if object.is_class(TABLE_CLASS) => object,
2887 _ => return Err(invalid_argument("table display expects table object")),
2888 };
2889 Ok(format!(
2890 "{}x{} table",
2891 table_height(object)?,
2892 table_width(object)?
2893 ))
2894}
2895
2896fn render_table_cell(value: &Value, row: usize) -> String {
2897 match value {
2898 Value::Tensor(tensor) => tensor
2899 .get2(row, 0)
2900 .map(format_table_number)
2901 .unwrap_or_default(),
2902 Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
2903 Value::LogicalArray(array) => array
2904 .data
2905 .get(row)
2906 .map(|value| if *value != 0 { "true" } else { "false" }.to_string())
2907 .unwrap_or_default(),
2908 Value::Object(obj) if obj.is_class("datetime") => {
2909 crate::builtins::datetime::datetime_string_array(value)
2910 .ok()
2911 .flatten()
2912 .and_then(|array| array.data.get(row).cloned())
2913 .unwrap_or_else(|| value.to_string())
2914 }
2915 other => other.to_string(),
2916 }
2917}
2918
2919fn format_table_number(value: f64) -> String {
2920 if value.is_nan() {
2921 "NaN".to_string()
2922 } else if value.fract() == 0.0 && value.abs() < 1e15 {
2923 format!("{}", value as i64)
2924 } else {
2925 trim_float(format!("{value:.6}"))
2926 }
2927}
2928
2929fn format_key_number(value: f64) -> String {
2930 if value.is_nan() {
2931 "NaN".to_string()
2932 } else if value.is_infinite() {
2933 value.to_string()
2934 } else {
2935 trim_float(format!("{value:.17}"))
2936 }
2937}
2938
2939fn trim_float(mut text: String) -> String {
2940 if let Some(dot) = text.find('.') {
2941 let mut end = text.len();
2942 while end > dot + 1 && text.as_bytes()[end - 1] == b'0' {
2943 end -= 1;
2944 }
2945 if end == dot + 1 {
2946 end -= 1;
2947 }
2948 text.truncate(end);
2949 }
2950 text
2951}
2952
2953fn scalar_text(value: &Value, context: &str) -> BuiltinResult<String> {
2954 match value {
2955 Value::String(text) => Ok(text.clone()),
2956 Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
2957 Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
2958 _ => Err(invalid_argument(format!(
2959 "table: {context} must be a string scalar or character vector"
2960 ))),
2961 }
2962}
2963
2964fn bool_scalar(value: &Value, context: &str) -> BuiltinResult<bool> {
2965 match value {
2966 Value::Bool(flag) => Ok(*flag),
2967 Value::Int(value) => Ok(value.to_i64() != 0),
2968 Value::Num(value) if value.is_finite() => Ok(*value != 0.0),
2969 Value::String(_) | Value::CharArray(_) | Value::StringArray(_) => {
2970 let text = scalar_text(value, context)?;
2971 match text.to_ascii_lowercase().as_str() {
2972 "true" | "on" | "yes" => Ok(true),
2973 "false" | "off" | "no" => Ok(false),
2974 _ => Err(invalid_argument(format!(
2975 "table: {context} must be logical"
2976 ))),
2977 }
2978 }
2979 _ => Err(invalid_argument(format!(
2980 "table: {context} must be logical"
2981 ))),
2982 }
2983}
2984
2985fn nonnegative_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
2986 match value {
2987 Value::Int(value) if value.to_i64() >= 0 => Ok(value.to_i64() as usize),
2988 Value::Num(value)
2989 if value.is_finite()
2990 && *value >= 0.0
2991 && (value.round() - value).abs() <= f64::EPSILON =>
2992 {
2993 Ok(value.round() as usize)
2994 }
2995 _ => Err(invalid_argument(format!(
2996 "table: {context} must be a non-negative integer"
2997 ))),
2998 }
2999}
3000
3001fn string_list(value: &Value) -> BuiltinResult<Vec<String>> {
3002 match value {
3003 Value::String(text) => Ok(vec![text.clone()]),
3004 Value::CharArray(ca) if ca.rows == 1 => Ok(vec![ca.data.iter().collect()]),
3005 Value::StringArray(array) => Ok(array.data.clone()),
3006 Value::Cell(cell) => {
3007 let mut out = Vec::with_capacity(cell.data.len());
3008 for handle in &cell.data {
3009 let value = unsafe { &*handle.as_raw() };
3010 out.extend(string_list(value)?);
3011 }
3012 Ok(out)
3013 }
3014 _ => Err(invalid_argument(
3015 "table: expected string, string array, character vector, or cellstr",
3016 )),
3017 }
3018}
3019
3020fn variable_name_list(value: &Value) -> BuiltinResult<Vec<String>> {
3021 let names = string_list(value)?;
3022 if names.is_empty() {
3023 return Err(invalid_variable("table: variable names must not be empty"));
3024 }
3025 Ok(make_unique_variable_names(names))
3026}
3027
3028fn generated_variable_names(count: usize) -> Vec<String> {
3029 (1..=count).map(|idx| format!("Var{idx}")).collect()
3030}
3031
3032fn make_unique_variable_names(names: Vec<String>) -> Vec<String> {
3033 make_unique_names(
3034 names
3035 .into_iter()
3036 .enumerate()
3037 .map(|(idx, name)| make_valid_variable_name(&name, idx + 1))
3038 .collect(),
3039 )
3040}
3041
3042fn make_unique_names(names: Vec<String>) -> Vec<String> {
3043 let mut used = HashSet::new();
3044 let mut out = Vec::with_capacity(names.len());
3045 for (idx, name) in names.into_iter().enumerate() {
3046 let base = if name.trim().is_empty() {
3047 format!("Var{}", idx + 1)
3048 } else {
3049 name.trim().to_string()
3050 };
3051 let mut candidate = base.clone();
3052 let mut suffix = 1usize;
3053 while used.contains(&candidate.to_ascii_lowercase()) {
3054 suffix += 1;
3055 candidate = format!("{base}_{suffix}");
3056 }
3057 used.insert(candidate.to_ascii_lowercase());
3058 out.push(candidate);
3059 }
3060 out
3061}
3062
3063fn make_valid_variable_name(raw: &str, fallback_index: usize) -> String {
3064 let mut out = String::new();
3065 for (idx, ch) in raw.trim().chars().enumerate() {
3066 if (idx == 0 && (ch.is_ascii_alphabetic() || ch == '_'))
3067 || (idx > 0 && (ch.is_ascii_alphanumeric() || ch == '_'))
3068 {
3069 out.push(ch);
3070 } else if !out.ends_with('_') {
3071 out.push('_');
3072 }
3073 }
3074 while out.ends_with('_') {
3075 out.pop();
3076 }
3077 if out.is_empty() || !out.chars().next().unwrap().is_ascii_alphabetic() {
3078 format!("Var{fallback_index}")
3079 } else {
3080 out
3081 }
3082}
3083
3084#[cfg(test)]
3085mod tests {
3086 use super::*;
3087 use futures::executor::block_on;
3088 use runmat_time::unix_timestamp_ms;
3089 use std::fs;
3090 use std::io::Write;
3091
3092 fn unique_path(prefix: &str) -> PathBuf {
3093 let mut path = std::env::temp_dir();
3094 path.push(format!(
3095 "runmat_{prefix}_{}_{}",
3096 std::process::id(),
3097 unix_timestamp_ms()
3098 ));
3099 path
3100 }
3101
3102 fn read_table(path: &Path, args: Vec<Value>) -> Value {
3103 block_on(readtable_builtin(
3104 Value::from(path.to_string_lossy().to_string()),
3105 args,
3106 ))
3107 .expect("readtable")
3108 }
3109
3110 fn read_table_err(path: &Path, args: Vec<Value>) -> RuntimeError {
3111 block_on(readtable_builtin(
3112 Value::from(path.to_string_lossy().to_string()),
3113 args,
3114 ))
3115 .expect_err("expected readtable failure")
3116 }
3117
3118 fn object(value: Value) -> ObjectInstance {
3119 match value {
3120 Value::Object(object) => object,
3121 other => panic!("expected table object, got {other:?}"),
3122 }
3123 }
3124
3125 #[test]
3126 fn readtable_imports_headered_numeric_and_text_columns() {
3127 let path = unique_path("readtable_basic");
3128 fs::write(&path, "Name,Score\nAda,10\nGrace,12\n").expect("write sample");
3129 let table = object(read_table(&path, Vec::new()));
3130 assert_eq!(
3131 table_variable_names_from_object(&table).unwrap(),
3132 vec!["Name".to_string(), "Score".to_string()]
3133 );
3134 match table_member_get(&table, &Value::from("Score")).unwrap() {
3135 Value::Tensor(tensor) => {
3136 assert_eq!(tensor.shape, vec![2, 1]);
3137 assert_eq!(tensor.data, vec![10.0, 12.0]);
3138 }
3139 other => panic!("expected tensor, got {other:?}"),
3140 }
3141 match table_member_get(&table, &Value::from("Name")).unwrap() {
3142 Value::StringArray(array) => {
3143 assert_eq!(array.data, vec!["Ada".to_string(), "Grace".to_string()]);
3144 }
3145 other => panic!("expected string array, got {other:?}"),
3146 }
3147 let _ = fs::remove_file(&path);
3148 }
3149
3150 #[test]
3151 fn readtable_auto_does_not_consume_headerless_numeric_rows() {
3152 let path = unique_path("readtable_headerless_numeric");
3153 fs::write(&path, "1,2\n3,4\n").expect("write sample");
3154 let table = object(read_table(&path, Vec::new()));
3155 assert_eq!(
3156 table_variable_names_from_object(&table).unwrap(),
3157 vec!["Var1".to_string(), "Var2".to_string()]
3158 );
3159 match table_member_get(&table, &Value::from("Var1")).unwrap() {
3160 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![1.0, 3.0]),
3161 other => panic!("expected tensor, got {other:?}"),
3162 }
3163 match table_member_get(&table, &Value::from("Var2")).unwrap() {
3164 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 4.0]),
3165 other => panic!("expected tensor, got {other:?}"),
3166 }
3167 let _ = fs::remove_file(&path);
3168 }
3169
3170 #[test]
3171 fn readtable_rejects_unknown_and_invalid_options() {
3172 let path = unique_path("readtable_invalid_options");
3173 fs::write(&path, "A\n1\n").expect("write sample");
3174 let err = read_table_err(
3175 &path,
3176 vec![Value::from("DefinitelyNotAnOption"), Value::from(1.0)],
3177 );
3178 assert!(err.message().contains("unsupported option"));
3179 let err = read_table_err(
3180 &path,
3181 vec![Value::from("VariableNamingRule"), Value::from("mangle")],
3182 );
3183 assert!(err.message().contains("unsupported VariableNamingRule"));
3184 let _ = fs::remove_file(&path);
3185 }
3186
3187 #[test]
3188 fn readtable_handles_quoted_delimiters_and_newlines() {
3189 let path = unique_path("readtable_quoted_newlines");
3190 fs::write(
3191 &path,
3192 "Name,Note\nAda,\"hello, world\"\nGrace,\"line one\nline two\"\n",
3193 )
3194 .expect("write sample");
3195 let table = object(read_table(&path, Vec::new()));
3196 match table_member_get(&table, &Value::from("Note")).unwrap() {
3197 Value::StringArray(array) => assert_eq!(
3198 array.data,
3199 vec!["hello, world".to_string(), "line one\nline two".to_string()]
3200 ),
3201 other => panic!("expected string array, got {other:?}"),
3202 }
3203 let _ = fs::remove_file(&path);
3204 }
3205
3206 #[test]
3207 fn readtable_supports_explicit_names_and_missing_tokens() {
3208 let path = unique_path("readtable_options");
3209 fs::write(&path, "1,NA\n2,4\n").expect("write sample");
3210 let names =
3211 StringArray::new(vec!["A".to_string(), "B".to_string()], vec![1, 2]).expect("names");
3212 let table = object(read_table(
3213 &path,
3214 vec![
3215 Value::from("ReadVariableNames"),
3216 Value::Bool(false),
3217 Value::from("VariableNames"),
3218 Value::StringArray(names),
3219 Value::from("TreatAsMissing"),
3220 Value::from("NA"),
3221 ],
3222 ));
3223 match table_member_get(&table, &Value::from("B")).unwrap() {
3224 Value::Tensor(tensor) => {
3225 assert!(tensor.data[0].is_nan());
3226 assert_eq!(tensor.data[1], 4.0);
3227 }
3228 other => panic!("expected tensor, got {other:?}"),
3229 }
3230 let _ = fs::remove_file(&path);
3231 }
3232
3233 #[test]
3234 fn readtable_preserves_variable_names_when_requested() {
3235 let path = unique_path("readtable_preserve_names");
3236 fs::write(&path, "daily revenue,total orders\n100,10\n").expect("write sample");
3237 let table = object(read_table(
3238 &path,
3239 vec![Value::from("VariableNamingRule"), Value::from("preserve")],
3240 ));
3241 assert_eq!(
3242 table_variable_names_from_object(&table).unwrap(),
3243 vec!["daily revenue".to_string(), "total orders".to_string()]
3244 );
3245 let _ = fs::remove_file(&path);
3246 }
3247
3248 fn write_zip_file(zip: &mut zip::ZipWriter<std::fs::File>, name: &str, contents: &str) {
3249 let options = zip::write::SimpleFileOptions::default()
3250 .compression_method(zip::CompressionMethod::Stored);
3251 zip.start_file(name, options).expect("start xlsx part");
3252 zip.write_all(contents.as_bytes()).expect("write xlsx part");
3253 }
3254
3255 fn write_minimal_xlsx(path: &Path) {
3256 let file = std::fs::File::create(path).expect("create xlsx");
3257 let mut zip = zip::ZipWriter::new(file);
3258 write_zip_file(
3259 &mut zip,
3260 "[Content_Types].xml",
3261 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3262<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
3263 <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
3264 <Default Extension="xml" ContentType="application/xml"/>
3265 <Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
3266 <Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
3267 <Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/>
3268</Types>"#,
3269 );
3270 write_zip_file(
3271 &mut zip,
3272 "_rels/.rels",
3273 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3274<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
3275 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
3276</Relationships>"#,
3277 );
3278 write_zip_file(
3279 &mut zip,
3280 "xl/workbook.xml",
3281 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3282<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
3283 <sheets>
3284 <sheet name="Data" sheetId="1" r:id="rId1"/>
3285 </sheets>
3286</workbook>"#,
3287 );
3288 write_zip_file(
3289 &mut zip,
3290 "xl/_rels/workbook.xml.rels",
3291 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3292<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
3293 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
3294 <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
3295</Relationships>"#,
3296 );
3297 write_zip_file(
3298 &mut zip,
3299 "xl/styles.xml",
3300 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3301<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
3302 <fonts count="1"><font><sz val="11"/><name val="Calibri"/></font></fonts>
3303 <fills count="1"><fill><patternFill patternType="none"/></fill></fills>
3304 <borders count="1"><border/></borders>
3305 <cellStyleXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellStyleXfs>
3306 <cellXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellXfs>
3307</styleSheet>"#,
3308 );
3309 write_zip_file(
3310 &mut zip,
3311 "xl/worksheets/sheet1.xml",
3312 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
3313<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
3314 <sheetData>
3315 <row r="1">
3316 <c r="A1" t="inlineStr"><is><t>Date</t></is></c>
3317 <c r="B1" t="inlineStr"><is><t>Orders</t></is></c>
3318 <c r="C1" t="inlineStr"><is><t>Revenue</t></is></c>
3319 </row>
3320 <row r="2">
3321 <c r="A2" t="inlineStr"><is><t>2026-06-01</t></is></c>
3322 <c r="B2"><v>10</v></c>
3323 <c r="C2"><v>200</v></c>
3324 </row>
3325 <row r="3">
3326 <c r="A3" t="inlineStr"><is><t>2026-06-02</t></is></c>
3327 <c r="B3"><v>4</v></c>
3328 <c r="C3"><v>90</v></c>
3329 </row>
3330 </sheetData>
3331</worksheet>"#,
3332 );
3333 zip.finish().expect("finish xlsx");
3334 }
3335
3336 #[test]
3337 fn readtable_imports_xlsx_sheet_and_range() {
3338 let path = unique_path("readtable_spreadsheet");
3339 let path = path.with_extension("xlsx");
3340 write_minimal_xlsx(&path);
3341 let table = object(read_table(
3342 &path,
3343 vec![
3344 Value::from("Sheet"),
3345 Value::from("Data"),
3346 Value::from("Range"),
3347 Value::from("A1:C3"),
3348 ],
3349 ));
3350 assert_eq!(
3351 table_variable_names_from_object(&table).unwrap(),
3352 vec![
3353 "Date".to_string(),
3354 "Orders".to_string(),
3355 "Revenue".to_string()
3356 ]
3357 );
3358 match table_member_get(&table, &Value::from("Revenue")).unwrap() {
3359 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![200.0, 90.0]),
3360 other => panic!("expected tensor, got {other:?}"),
3361 }
3362 let _ = fs::remove_file(&path);
3363 }
3364
3365 #[test]
3366 fn table_properties_variable_names_rename_columns() {
3367 let a = Value::Tensor(Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap());
3368 let b = Value::Tensor(Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap());
3369 let mut table =
3370 object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
3371 let mut props = table_public_properties(&table).unwrap();
3372 props.insert(
3373 VARIABLE_NAMES,
3374 Value::StringArray(StringArray::new(vec!["X".into(), "Y".into()], vec![1, 2]).unwrap()),
3375 );
3376 table_member_set(&mut table, PROPERTIES_MEMBER, Value::Struct(props)).unwrap();
3377 assert_eq!(
3378 table_variable_names_from_object(&table).unwrap(),
3379 vec!["X".to_string(), "Y".to_string()]
3380 );
3381 }
3382
3383 #[test]
3384 fn table_paren_selects_rows_and_named_variables() {
3385 let a = Value::Tensor(Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap());
3386 let b = Value::Tensor(Tensor::new(vec![4.0, 5.0, 6.0], vec![3, 1]).unwrap());
3387 let table = object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
3388 let selector = CellArray::new(
3389 vec![
3390 Value::Tensor(Tensor::new(vec![3.0, 1.0], vec![1, 2]).unwrap()),
3391 Value::Cell(CellArray::new(vec![Value::from("B")], 1, 1).unwrap()),
3392 ],
3393 1,
3394 2,
3395 )
3396 .unwrap();
3397 let subset = object(table_paren_get(&table, &Value::Cell(selector)).unwrap());
3398 assert_eq!(
3399 table_variable_names_from_object(&subset).unwrap(),
3400 vec!["B".to_string()]
3401 );
3402 match table_member_get(&subset, &Value::from("B")).unwrap() {
3403 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![6.0, 4.0]),
3404 other => panic!("expected tensor, got {other:?}"),
3405 }
3406 }
3407
3408 #[test]
3409 fn sortrows_preserves_row_names() {
3410 let values = Value::Tensor(Tensor::new(vec![2.0, 1.0], vec![2, 1]).unwrap());
3411 let table = table_from_columns_with_properties(
3412 vec!["X".into()],
3413 vec![values],
3414 Some(vec!["second".into(), "first".into()]),
3415 )
3416 .unwrap();
3417 let (sorted, _) = sortrows_table(table, &[Value::from("X")]).unwrap();
3418 let sorted = object(sorted);
3419 let props = table_public_properties(&sorted).unwrap();
3420 match props.fields.get(ROW_NAMES).unwrap() {
3421 Value::StringArray(array) => {
3422 assert_eq!(array.data, vec!["first".to_string(), "second".to_string()]);
3423 }
3424 other => panic!("expected row names, got {other:?}"),
3425 }
3426 }
3427
3428 #[test]
3429 fn groupsummary_mean_counts_groups() {
3430 let group = Value::StringArray(
3431 StringArray::new(vec!["a".into(), "b".into(), "a".into()], vec![3, 1]).unwrap(),
3432 );
3433 let value = Value::Tensor(Tensor::new(vec![2.0, 5.0, 4.0], vec![3, 1]).unwrap());
3434 let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
3435 let summary = groupsummary_impl(
3436 table,
3437 Value::from("G"),
3438 Value::from("mean"),
3439 vec![Value::from("X")],
3440 )
3441 .unwrap();
3442 let summary = object(summary);
3443 assert_eq!(
3444 table_variable_names_from_object(&summary).unwrap(),
3445 vec![
3446 "G".to_string(),
3447 "GroupCount".to_string(),
3448 "mean_X".to_string()
3449 ]
3450 );
3451 match table_member_get(&summary, &Value::from("mean_X")).unwrap() {
3452 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![3.0, 5.0]),
3453 other => panic!("expected tensor, got {other:?}"),
3454 }
3455 }
3456
3457 #[test]
3458 fn groupsummary_orders_numeric_groups_numerically() {
3459 let group = Value::Tensor(Tensor::new(vec![10.0, 2.0, 10.0], vec![3, 1]).unwrap());
3460 let value = Value::Tensor(Tensor::new(vec![1.0, 5.0, 3.0], vec![3, 1]).unwrap());
3461 let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
3462 let summary =
3463 object(groupsummary_impl(table, Value::from("G"), Value::from("sum"), vec![]).unwrap());
3464 match table_member_get(&summary, &Value::from("G")).unwrap() {
3465 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 10.0]),
3466 other => panic!("expected tensor, got {other:?}"),
3467 }
3468 match table_member_get(&summary, &Value::from("sum_X")).unwrap() {
3469 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![5.0, 4.0]),
3470 other => panic!("expected tensor, got {other:?}"),
3471 }
3472 }
3473}