1use std::cell::Cell;
4use std::cmp::Ordering;
5use std::collections::{BTreeMap, HashMap, HashSet};
6use std::io::{Cursor, Read};
7use std::path::{Path, PathBuf};
8
9use calamine::{open_workbook_auto_from_rs, Data as SpreadsheetData, Reader as SpreadsheetReader};
10use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
11use encoding_rs::{Encoding, UTF_8};
12use runmat_builtins::{
13 Access, BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
14 BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
15 CellArray, CharArray, ClassDef, ComplexTensor, LogicalArray, MethodDef, NumericDType,
16 ObjectInstance, PropertyDef, StringArray, StructValue, Tensor, Value,
17};
18use runmat_filesystem::File;
19use runmat_macros::runtime_builtin;
20
21use crate::builtins::common::fs::expand_user_path;
22use crate::builtins::common::spec::{
23 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
24 ReductionNaN, ResidencyPolicy, ShapeRequirements,
25};
26use crate::{
27 build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError, OBJECT_INDEX_BRACE,
28 OBJECT_INDEX_MEMBER, OBJECT_INDEX_PAREN, OBJECT_SUBSASGN_METHOD, OBJECT_SUBSREF_METHOD,
29};
30
31pub const TABLE_CLASS: &str = "table";
32const TABLE_VARIABLES_FIELD: &str = "__table_variables";
33const TABLE_PROPERTIES_FIELD: &str = "__table_properties";
34const PROPERTIES_MEMBER: &str = "Properties";
35const VARIABLE_NAMES: &str = "VariableNames";
36const ROW_NAMES: &str = "RowNames";
37const DIMENSION_NAMES: &str = "DimensionNames";
38const VARIABLE_UNITS: &str = "VariableUnits";
39const VARIABLE_DESCRIPTIONS: &str = "VariableDescriptions";
40const DESCRIPTION: &str = "Description";
41const USER_DATA: &str = "UserData";
42const DEFAULT_ROW_DIM_NAME: &str = "Rows";
43const DEFAULT_VARIABLE_DIM_NAME: &str = "Variables";
44
45thread_local! {
46 static TABLE_CLASS_REGISTERED: Cell<bool> = const { Cell::new(false) };
47}
48
49const ANY_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
50 name: "out",
51 ty: BuiltinParamType::Any,
52 arity: BuiltinParamArity::Required,
53 default: None,
54 description: "Result value.",
55}];
56const NUM_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
57 name: "n",
58 ty: BuiltinParamType::IntegerScalar,
59 arity: BuiltinParamArity::Required,
60 default: None,
61 description: "Count.",
62}];
63const TABLE_INPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
64 name: "T",
65 ty: BuiltinParamType::Any,
66 arity: BuiltinParamArity::Required,
67 default: None,
68 description: "Table input.",
69}];
70const READTABLE_INPUTS_FILENAME: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
71 name: "filename",
72 ty: BuiltinParamType::StringScalar,
73 arity: BuiltinParamArity::Required,
74 default: None,
75 description: "Text or spreadsheet file path.",
76}];
77const READTABLE_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 2] = [
78 BuiltinParamDescriptor {
79 name: "filename",
80 ty: BuiltinParamType::StringScalar,
81 arity: BuiltinParamArity::Required,
82 default: None,
83 description: "Text or spreadsheet file path.",
84 },
85 BuiltinParamDescriptor {
86 name: "nameValuePairs",
87 ty: BuiltinParamType::Any,
88 arity: BuiltinParamArity::Variadic,
89 default: None,
90 description: "Name-value import options.",
91 },
92];
93const SPREADSHEET_IMPORT_OPTIONS_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
94 name: "opts",
95 ty: BuiltinParamType::Any,
96 arity: BuiltinParamArity::Required,
97 default: None,
98 description: "Spreadsheet import options struct.",
99}];
100const SPREADSHEET_IMPORT_OPTIONS_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 1] =
101 [BuiltinParamDescriptor {
102 name: "nameValuePairs",
103 ty: BuiltinParamType::Any,
104 arity: BuiltinParamArity::Variadic,
105 default: None,
106 description: "Name-value option pairs.",
107 }];
108const DETECT_IMPORT_OPTIONS_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
109 name: "opts",
110 ty: BuiltinParamType::Any,
111 arity: BuiltinParamArity::Required,
112 default: None,
113 description: "Detected import options struct accepted by readtable/readmatrix.",
114}];
115const DETECT_IMPORT_OPTIONS_INPUTS_FILENAME: [BuiltinParamDescriptor; 1] =
116 [BuiltinParamDescriptor {
117 name: "filename",
118 ty: BuiltinParamType::StringScalar,
119 arity: BuiltinParamArity::Required,
120 default: None,
121 description: "Text or spreadsheet file path to inspect.",
122 }];
123const DETECT_IMPORT_OPTIONS_INPUTS_NAME_VALUE: [BuiltinParamDescriptor; 2] = [
124 BuiltinParamDescriptor {
125 name: "filename",
126 ty: BuiltinParamType::StringScalar,
127 arity: BuiltinParamArity::Required,
128 default: None,
129 description: "Text or spreadsheet file path to inspect.",
130 },
131 BuiltinParamDescriptor {
132 name: "nameValuePairs",
133 ty: BuiltinParamType::Any,
134 arity: BuiltinParamArity::Variadic,
135 default: None,
136 description: "Detection overrides such as Delimiter, Range, Sheet, Encoding, or TextType.",
137 },
138];
139const TABLE_INPUTS_VALUES: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
140 name: "variables",
141 ty: BuiltinParamType::Any,
142 arity: BuiltinParamArity::Variadic,
143 default: None,
144 description: "Variables to assemble as table columns.",
145}];
146const GROUPSUMMARY_INPUTS: [BuiltinParamDescriptor; 4] = [
147 BuiltinParamDescriptor {
148 name: "T",
149 ty: BuiltinParamType::Any,
150 arity: BuiltinParamArity::Required,
151 default: None,
152 description: "Input table.",
153 },
154 BuiltinParamDescriptor {
155 name: "groupvars",
156 ty: BuiltinParamType::Any,
157 arity: BuiltinParamArity::Required,
158 default: None,
159 description: "Grouping variable name or names.",
160 },
161 BuiltinParamDescriptor {
162 name: "method",
163 ty: BuiltinParamType::Any,
164 arity: BuiltinParamArity::Required,
165 default: None,
166 description: "Summary method name or names.",
167 },
168 BuiltinParamDescriptor {
169 name: "datavars",
170 ty: BuiltinParamType::Any,
171 arity: BuiltinParamArity::Optional,
172 default: None,
173 description: "Data variable name or names.",
174 },
175];
176const OBJECT_INDEX_INPUTS: [BuiltinParamDescriptor; 3] = [
177 BuiltinParamDescriptor {
178 name: "obj",
179 ty: BuiltinParamType::Any,
180 arity: BuiltinParamArity::Required,
181 default: None,
182 description: "Table object receiver.",
183 },
184 BuiltinParamDescriptor {
185 name: "kind",
186 ty: BuiltinParamType::StringScalar,
187 arity: BuiltinParamArity::Required,
188 default: None,
189 description: "Index kind token.",
190 },
191 BuiltinParamDescriptor {
192 name: "payload",
193 ty: BuiltinParamType::Any,
194 arity: BuiltinParamArity::Required,
195 default: None,
196 description: "Index payload.",
197 },
198];
199const OBJECT_ASSIGN_INPUTS: [BuiltinParamDescriptor; 4] = [
200 BuiltinParamDescriptor {
201 name: "obj",
202 ty: BuiltinParamType::Any,
203 arity: BuiltinParamArity::Required,
204 default: None,
205 description: "Table object receiver.",
206 },
207 BuiltinParamDescriptor {
208 name: "kind",
209 ty: BuiltinParamType::StringScalar,
210 arity: BuiltinParamArity::Required,
211 default: None,
212 description: "Index kind token.",
213 },
214 BuiltinParamDescriptor {
215 name: "payload",
216 ty: BuiltinParamType::Any,
217 arity: BuiltinParamArity::Required,
218 default: None,
219 description: "Index payload.",
220 },
221 BuiltinParamDescriptor {
222 name: "rhs",
223 ty: BuiltinParamType::Any,
224 arity: BuiltinParamArity::Required,
225 default: None,
226 description: "Assigned value.",
227 },
228];
229
230const READTABLE_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
231 BuiltinSignatureDescriptor {
232 label: "T = readtable(filename)",
233 inputs: &READTABLE_INPUTS_FILENAME,
234 outputs: &ANY_OUTPUT,
235 },
236 BuiltinSignatureDescriptor {
237 label: "T = readtable(filename, nameValuePairs...)",
238 inputs: &READTABLE_INPUTS_NAME_VALUE,
239 outputs: &ANY_OUTPUT,
240 },
241];
242const SPREADSHEET_IMPORT_OPTIONS_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
243 BuiltinSignatureDescriptor {
244 label: "opts = spreadsheetImportOptions()",
245 inputs: &[],
246 outputs: &SPREADSHEET_IMPORT_OPTIONS_OUTPUT,
247 },
248 BuiltinSignatureDescriptor {
249 label: "opts = spreadsheetImportOptions(nameValuePairs...)",
250 inputs: &SPREADSHEET_IMPORT_OPTIONS_INPUTS_NAME_VALUE,
251 outputs: &SPREADSHEET_IMPORT_OPTIONS_OUTPUT,
252 },
253];
254const DETECT_IMPORT_OPTIONS_SIGNATURES: [BuiltinSignatureDescriptor; 2] = [
255 BuiltinSignatureDescriptor {
256 label: "opts = detectImportOptions(filename)",
257 inputs: &DETECT_IMPORT_OPTIONS_INPUTS_FILENAME,
258 outputs: &DETECT_IMPORT_OPTIONS_OUTPUT,
259 },
260 BuiltinSignatureDescriptor {
261 label: "opts = detectImportOptions(filename, nameValuePairs...)",
262 inputs: &DETECT_IMPORT_OPTIONS_INPUTS_NAME_VALUE,
263 outputs: &DETECT_IMPORT_OPTIONS_OUTPUT,
264 },
265];
266const TABLE_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
267 label: "T = table(variables...)",
268 inputs: &TABLE_INPUTS_VALUES,
269 outputs: &ANY_OUTPUT,
270}];
271const GROUPSUMMARY_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
272 label: "G = groupsummary(T, groupvars, method, datavars)",
273 inputs: &GROUPSUMMARY_INPUTS,
274 outputs: &ANY_OUTPUT,
275}];
276const HEIGHT_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
277 label: "n = height(T)",
278 inputs: &TABLE_INPUT,
279 outputs: &NUM_OUTPUT,
280}];
281const WIDTH_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
282 label: "n = width(T)",
283 inputs: &TABLE_INPUT,
284 outputs: &NUM_OUTPUT,
285}];
286const OBJECT_SUBSREF_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
287 label: "out = table.subsref(obj, kind, payload)",
288 inputs: &OBJECT_INDEX_INPUTS,
289 outputs: &ANY_OUTPUT,
290}];
291const OBJECT_SUBSASGN_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
292 label: "obj = table.subsasgn(obj, kind, payload, rhs)",
293 inputs: &OBJECT_ASSIGN_INPUTS,
294 outputs: &ANY_OUTPUT,
295}];
296
297const TABLE_ERROR_INVALID_ARGUMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
298 code: "RM.TABLE.INVALID_ARGUMENT",
299 identifier: Some("RunMat:table:InvalidArgument"),
300 when: "Arguments or table metadata are invalid.",
301 message: "table: invalid argument",
302};
303const TABLE_ERROR_INVALID_INDEX: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
304 code: "RM.TABLE.INVALID_INDEX",
305 identifier: Some("RunMat:table:InvalidIndex"),
306 when: "Table indexing is invalid.",
307 message: "table: invalid index",
308};
309const TABLE_ERROR_INVALID_VARIABLE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
310 code: "RM.TABLE.INVALID_VARIABLE",
311 identifier: Some("RunMat:table:InvalidVariable"),
312 when: "A table variable name or value is invalid.",
313 message: "table: invalid variable",
314};
315const TABLE_ERROR_IO: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
316 code: "RM.READTABLE.IO",
317 identifier: Some("RunMat:readtable:IOError"),
318 when: "readtable cannot open or read the requested file.",
319 message: "readtable: file read failed",
320};
321const TABLE_ERROR_UNSUPPORTED_FILE: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
322 code: "RM.READTABLE.UNSUPPORTED_FILE",
323 identifier: Some("RunMat:readtable:UnsupportedFileType"),
324 when: "readtable receives a file type outside the text or spreadsheet import backends.",
325 message: "readtable: unsupported file type",
326};
327const TABLE_ERRORS: [BuiltinErrorDescriptor; 5] = [
328 TABLE_ERROR_INVALID_ARGUMENT,
329 TABLE_ERROR_INVALID_INDEX,
330 TABLE_ERROR_INVALID_VARIABLE,
331 TABLE_ERROR_IO,
332 TABLE_ERROR_UNSUPPORTED_FILE,
333];
334
335pub const READTABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
336 signatures: &READTABLE_SIGNATURES,
337 output_mode: BuiltinOutputMode::Fixed,
338 completion_policy: BuiltinCompletionPolicy::Public,
339 errors: &TABLE_ERRORS,
340};
341pub const SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
342 signatures: &SPREADSHEET_IMPORT_OPTIONS_SIGNATURES,
343 output_mode: BuiltinOutputMode::Fixed,
344 completion_policy: BuiltinCompletionPolicy::Public,
345 errors: &TABLE_ERRORS,
346};
347pub const DETECT_IMPORT_OPTIONS_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
348 signatures: &DETECT_IMPORT_OPTIONS_SIGNATURES,
349 output_mode: BuiltinOutputMode::Fixed,
350 completion_policy: BuiltinCompletionPolicy::Public,
351 errors: &TABLE_ERRORS,
352};
353pub const TABLE_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
354 signatures: &TABLE_SIGNATURES,
355 output_mode: BuiltinOutputMode::Fixed,
356 completion_policy: BuiltinCompletionPolicy::Public,
357 errors: &TABLE_ERRORS,
358};
359pub const GROUPSUMMARY_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
360 signatures: &GROUPSUMMARY_SIGNATURES,
361 output_mode: BuiltinOutputMode::Fixed,
362 completion_policy: BuiltinCompletionPolicy::Public,
363 errors: &TABLE_ERRORS,
364};
365pub const HEIGHT_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
366 signatures: &HEIGHT_SIGNATURES,
367 output_mode: BuiltinOutputMode::Fixed,
368 completion_policy: BuiltinCompletionPolicy::Public,
369 errors: &TABLE_ERRORS,
370};
371pub const WIDTH_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
372 signatures: &WIDTH_SIGNATURES,
373 output_mode: BuiltinOutputMode::Fixed,
374 completion_policy: BuiltinCompletionPolicy::Public,
375 errors: &TABLE_ERRORS,
376};
377pub const TABLE_SUBSREF_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
378 signatures: &OBJECT_SUBSREF_SIGNATURES,
379 output_mode: BuiltinOutputMode::Fixed,
380 completion_policy: BuiltinCompletionPolicy::MethodOnly,
381 errors: &TABLE_ERRORS,
382};
383pub const TABLE_SUBSASGN_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
384 signatures: &OBJECT_SUBSASGN_SIGNATURES,
385 output_mode: BuiltinOutputMode::Fixed,
386 completion_policy: BuiltinCompletionPolicy::MethodOnly,
387 errors: &TABLE_ERRORS,
388};
389
390#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::table")]
391pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
392 name: "table",
393 op_kind: GpuOpKind::Custom("table"),
394 supported_precisions: &[],
395 broadcast: BroadcastSemantics::None,
396 provider_hooks: &[],
397 constant_strategy: ConstantStrategy::InlineLiteral,
398 residency: ResidencyPolicy::GatherImmediately,
399 nan_mode: ReductionNaN::Include,
400 two_pass_threshold: None,
401 workgroup_size: None,
402 accepts_nan_mode: false,
403 notes: "Tables are host containers. GPU variables are gathered when tabular algorithms need row-wise access.",
404};
405
406#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::table")]
407pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
408 name: "table",
409 shape: ShapeRequirements::Any,
410 constant_strategy: ConstantStrategy::InlineLiteral,
411 elementwise: None,
412 reduction: None,
413 emits_nan: false,
414 notes: "Tables are structured host containers and are not fusion operands.",
415};
416
417fn table_error(error: &'static BuiltinErrorDescriptor, message: impl Into<String>) -> RuntimeError {
418 let mut builder = build_runtime_error(message).with_builtin(TABLE_CLASS);
419 if let Some(identifier) = error.identifier {
420 builder = builder.with_identifier(identifier);
421 }
422 builder.build()
423}
424
425fn table_error_with_source<E>(
426 error: &'static BuiltinErrorDescriptor,
427 message: impl Into<String>,
428 source: E,
429) -> RuntimeError
430where
431 E: std::error::Error + Send + Sync + 'static,
432{
433 let mut builder = build_runtime_error(message)
434 .with_builtin(TABLE_CLASS)
435 .with_source(source);
436 if let Some(identifier) = error.identifier {
437 builder = builder.with_identifier(identifier);
438 }
439 builder.build()
440}
441
442fn invalid_argument(message: impl Into<String>) -> RuntimeError {
443 table_error(&TABLE_ERROR_INVALID_ARGUMENT, message)
444}
445
446fn invalid_index(message: impl Into<String>) -> RuntimeError {
447 table_error(&TABLE_ERROR_INVALID_INDEX, message)
448}
449
450fn invalid_variable(message: impl Into<String>) -> RuntimeError {
451 table_error(&TABLE_ERROR_INVALID_VARIABLE, message)
452}
453
454fn map_control_flow(err: RuntimeError) -> RuntimeError {
455 let identifier = err.identifier().map(ToString::to_string);
456 let message = err.message().to_string();
457 let mut builder = build_runtime_error(message)
458 .with_builtin(TABLE_CLASS)
459 .with_source(err);
460 if let Some(identifier) = identifier {
461 builder = builder.with_identifier(identifier);
462 }
463 builder.build()
464}
465
466pub fn ensure_table_class_registered() {
467 TABLE_CLASS_REGISTERED.with(|registered| {
468 if registered.get() {
469 return;
470 }
471 let mut properties = HashMap::new();
472 properties.insert(
473 PROPERTIES_MEMBER.to_string(),
474 PropertyDef {
475 name: PROPERTIES_MEMBER.to_string(),
476 is_static: false,
477 is_constant: false,
478 is_dependent: false,
479 get_access: Access::Public,
480 set_access: Access::Public,
481 default_value: Some(Value::Struct(default_properties(Vec::new(), None))),
482 },
483 );
484
485 let mut methods = HashMap::new();
486 for name in [OBJECT_SUBSREF_METHOD, OBJECT_SUBSASGN_METHOD] {
487 methods.insert(
488 name.to_string(),
489 MethodDef {
490 name: name.to_string(),
491 is_static: false,
492 is_abstract: false,
493 is_sealed: false,
494 access: Access::Public,
495 function_name: format!("{TABLE_CLASS}.{name}"),
496 implicit_class_argument: None,
497 },
498 );
499 }
500
501 runmat_builtins::register_class(ClassDef {
502 name: TABLE_CLASS.to_string(),
503 parent: None,
504 properties,
505 methods,
506 });
507 registered.set(true);
508 });
509}
510
511#[runtime_builtin(
512 name = "table",
513 category = "table",
514 summary = "Create a table from named column variables.",
515 keywords = "table,VariableNames,RowNames,Properties",
516 accel = "cpu",
517 type_resolver(crate::builtins::io::type_resolvers::struct_type),
518 descriptor(crate::builtins::table::TABLE_DESCRIPTOR),
519 builtin_path = "crate::builtins::table"
520)]
521async fn table_builtin(args: Vec<Value>) -> BuiltinResult<Value> {
522 ensure_table_class_registered();
523 let gathered = gather_values(&args).await?;
524 let (variables, options) = split_table_constructor_args(gathered)?;
525 let names = if let Some(names) = options.variable_names {
526 names
527 } else {
528 generated_variable_names(variables.len())
529 };
530 table_from_columns_with_properties(names, variables, options.row_names)
531}
532
533#[runtime_builtin(
534 name = "readtable",
535 category = "io/tabular",
536 summary = "Import tabular text or spreadsheet data into a table.",
537 keywords = "readtable,table,csv,tsv,xlsx,xls,ods,spreadsheet,VariableNames,RowNames,Sheet,Range",
538 accel = "cpu",
539 type_resolver(crate::builtins::io::type_resolvers::struct_type),
540 descriptor(crate::builtins::table::READTABLE_DESCRIPTOR),
541 builtin_path = "crate::builtins::table"
542)]
543async fn readtable_builtin(path: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
544 ensure_table_class_registered();
545 let path_value = gather_if_needed_async(&path)
546 .await
547 .map_err(map_control_flow)?;
548 let args = gather_values(&rest).await?;
549 let options = ReadTableOptions::parse(&args)?;
550 let resolved = resolve_path(&path_value)?;
551 read_table_from_file(&resolved, &options).await
552}
553
554#[runtime_builtin(
555 name = "spreadsheetImportOptions",
556 category = "io/tabular",
557 summary = "Create spreadsheet import options for readtable.",
558 keywords = "spreadsheetImportOptions,readtable,spreadsheet,xlsx,xls,DataRange,VariableTypes,VariableNames,NumVariables",
559 accel = "cpu",
560 type_resolver(crate::builtins::io::type_resolvers::struct_type),
561 descriptor(crate::builtins::table::SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR),
562 builtin_path = "crate::builtins::table"
563)]
564async fn spreadsheet_import_options_builtin(args: Vec<Value>) -> BuiltinResult<Value> {
565 let gathered = gather_values(&args).await?;
566 spreadsheet_import_options(gathered)
567}
568
569#[runtime_builtin(
570 name = "detectImportOptions",
571 category = "io/tabular",
572 summary = "Inspect a text or spreadsheet file and create import options.",
573 keywords = "detectImportOptions,readtable,readmatrix,csv,tsv,xlsx,Delimiter,VariableTypes,VariableNames",
574 accel = "cpu",
575 type_resolver(crate::builtins::io::type_resolvers::struct_type),
576 descriptor(crate::builtins::table::DETECT_IMPORT_OPTIONS_DESCRIPTOR),
577 builtin_path = "crate::builtins::table"
578)]
579async fn detect_import_options_builtin(path: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
580 let path_value = gather_if_needed_async(&path)
581 .await
582 .map_err(map_control_flow)?;
583 let args = gather_values(&rest).await?;
584 let options = ReadTableOptions::parse(&args)?;
585 let resolved = resolve_path(&path_value)?;
586 detect_import_options_from_file(&resolved, &options).await
587}
588
589#[runtime_builtin(
590 name = "height",
591 category = "table",
592 summary = "Return the number of rows in a table.",
593 keywords = "height,table,rows",
594 descriptor(crate::builtins::table::HEIGHT_DESCRIPTOR),
595 builtin_path = "crate::builtins::table"
596)]
597async fn height_builtin(value: Value) -> BuiltinResult<Value> {
598 let host = gather_if_needed_async(&value)
599 .await
600 .map_err(map_control_flow)?;
601 if let Some(object) = table_object(&host) {
602 return Ok(Value::Num(table_height(object)? as f64));
603 }
604 value_row_count(&host).map(|n| Value::Num(n as f64))
605}
606
607#[runtime_builtin(
608 name = "width",
609 category = "table",
610 summary = "Return the number of variables in a table.",
611 keywords = "width,table,variables",
612 descriptor(crate::builtins::table::WIDTH_DESCRIPTOR),
613 builtin_path = "crate::builtins::table"
614)]
615async fn width_builtin(value: Value) -> BuiltinResult<Value> {
616 let host = gather_if_needed_async(&value)
617 .await
618 .map_err(map_control_flow)?;
619 if let Some(object) = table_object(&host) {
620 return Ok(Value::Num(table_width(object)? as f64));
621 }
622 match host {
623 Value::Tensor(t) => Ok(Value::Num(t.cols() as f64)),
624 Value::ComplexTensor(t) => Ok(Value::Num(t.cols as f64)),
625 Value::StringArray(sa) => Ok(Value::Num(sa.cols() as f64)),
626 Value::LogicalArray(la) => Ok(Value::Num(la.shape.get(1).copied().unwrap_or(1) as f64)),
627 Value::Cell(ca) => Ok(Value::Num(ca.cols as f64)),
628 Value::CharArray(ca) => Ok(Value::Num(ca.cols as f64)),
629 _ => Ok(Value::Num(1.0)),
630 }
631}
632
633#[runtime_builtin(
634 name = "groupsummary",
635 category = "table",
636 summary = "Group table rows and compute summary statistics for data variables.",
637 keywords = "groupsummary,group,table,mean,sum,count,median,min,max",
638 accel = "cpu",
639 descriptor(crate::builtins::table::GROUPSUMMARY_DESCRIPTOR),
640 builtin_path = "crate::builtins::table"
641)]
642async fn groupsummary_builtin(
643 table: Value,
644 groupvars: Value,
645 method: Value,
646 rest: Vec<Value>,
647) -> BuiltinResult<Value> {
648 let table = gather_if_needed_async(&table)
649 .await
650 .map_err(map_control_flow)?;
651 let groupvars = gather_if_needed_async(&groupvars)
652 .await
653 .map_err(map_control_flow)?;
654 let method = gather_if_needed_async(&method)
655 .await
656 .map_err(map_control_flow)?;
657 let rest = gather_values(&rest).await?;
658 groupsummary_impl(table, groupvars, method, rest)
659}
660
661#[runtime_builtin(
662 name = "table.subsref",
663 descriptor(crate::builtins::table::TABLE_SUBSREF_DESCRIPTOR),
664 builtin_path = "crate::builtins::table"
665)]
666async fn table_subsref(obj: Value, kind: String, payload: Value) -> BuiltinResult<Value> {
667 let object = into_table_object(obj, "table.subsref")?;
668 match kind.as_str() {
669 OBJECT_INDEX_MEMBER => table_member_get(&object, &payload),
670 OBJECT_INDEX_PAREN => table_paren_get(&object, &payload),
671 OBJECT_INDEX_BRACE => table_brace_get(&object, &payload),
672 other => Err(invalid_index(format!(
673 "table.subsref: unsupported indexing kind '{other}'"
674 ))),
675 }
676}
677
678#[runtime_builtin(
679 name = "table.subsasgn",
680 descriptor(crate::builtins::table::TABLE_SUBSASGN_DESCRIPTOR),
681 builtin_path = "crate::builtins::table"
682)]
683async fn table_subsasgn(
684 obj: Value,
685 kind: String,
686 payload: Value,
687 rhs: Value,
688) -> BuiltinResult<Value> {
689 let mut object = into_table_object(obj, "table.subsasgn")?;
690 match kind.as_str() {
691 OBJECT_INDEX_MEMBER => {
692 let field = scalar_text(&payload, "table member")?;
693 table_member_set(&mut object, &field, rhs)?;
694 Ok(Value::Object(object))
695 }
696 OBJECT_INDEX_PAREN => table_paren_assign(object, &payload, rhs),
697 OBJECT_INDEX_BRACE => table_brace_assign(object, &payload, rhs),
698 other => Err(invalid_index(format!(
699 "table.subsasgn: unsupported indexing kind '{other}'"
700 ))),
701 }
702}
703
704async fn gather_values(values: &[Value]) -> BuiltinResult<Vec<Value>> {
705 let mut out = Vec::with_capacity(values.len());
706 for value in values {
707 out.push(
708 gather_if_needed_async(value)
709 .await
710 .map_err(map_control_flow)?,
711 );
712 }
713 Ok(out)
714}
715
716#[derive(Default)]
717struct TableConstructorOptions {
718 variable_names: Option<Vec<String>>,
719 row_names: Option<Vec<String>>,
720}
721
722fn split_table_constructor_args(
723 args: Vec<Value>,
724) -> BuiltinResult<(Vec<Value>, TableConstructorOptions)> {
725 let mut variables = Vec::new();
726 let mut options = TableConstructorOptions::default();
727 let mut idx = 0usize;
728 while idx < args.len() {
729 if let Ok(name) = scalar_text(&args[idx], "table option") {
730 if idx + 1 < args.len() && is_table_constructor_option(&name) {
731 let value = &args[idx + 1];
732 if name.eq_ignore_ascii_case("VariableNames") {
733 options.variable_names = Some(variable_name_list(value)?);
734 } else if name.eq_ignore_ascii_case("RowNames") {
735 options.row_names = Some(string_list(value)?);
736 }
737 idx += 2;
738 continue;
739 }
740 }
741 variables.push(args[idx].clone());
742 idx += 1;
743 }
744 Ok((variables, options))
745}
746
747fn is_table_constructor_option(name: &str) -> bool {
748 name.eq_ignore_ascii_case("VariableNames") || name.eq_ignore_ascii_case("RowNames")
749}
750
751#[derive(Clone)]
752struct ReadTableOptions {
753 file_type: ImportFileType,
754 delimiter: Option<Delimiter>,
755 read_variable_names: Option<bool>,
756 read_row_names: bool,
757 num_variables: Option<usize>,
758 variable_names: Option<Vec<String>>,
759 variable_types: Option<Vec<ImportVariableType>>,
760 row_names: Option<Vec<String>>,
761 num_header_lines: usize,
762 range: Option<RangeSpec>,
763 sheet: Option<SheetSelector>,
764 preserve_variable_names: bool,
765 treat_as_missing: HashSet<String>,
766 empty_line_rule: EmptyLineRule,
767 text_type: TextImportType,
768 encoding: String,
769 datetime_type: DatetimeImportType,
770}
771
772impl Default for ReadTableOptions {
773 fn default() -> Self {
774 Self {
775 file_type: ImportFileType::Auto,
776 delimiter: None,
777 read_variable_names: None,
778 read_row_names: false,
779 num_variables: None,
780 variable_names: None,
781 variable_types: None,
782 row_names: None,
783 num_header_lines: 0,
784 range: None,
785 sheet: None,
786 preserve_variable_names: false,
787 treat_as_missing: HashSet::new(),
788 empty_line_rule: EmptyLineRule::Skip,
789 text_type: TextImportType::String,
790 encoding: "utf-8".to_string(),
791 datetime_type: DatetimeImportType::Datetime,
792 }
793 }
794}
795
796impl ReadTableOptions {
797 fn parse(args: &[Value]) -> BuiltinResult<Self> {
798 let mut options = Self::default();
799 let mut idx = 0usize;
800 if let Some(Value::Struct(st)) = args.first() {
801 for (name, value) in &st.fields {
802 options.apply(name, value)?;
803 }
804 idx = 1;
805 }
806 while idx < args.len() {
807 if idx + 1 >= args.len() {
808 return Err(invalid_argument(
809 "readtable: name-value options must be provided in pairs",
810 ));
811 }
812 let name = scalar_text(&args[idx], "readtable option")?;
813 options.apply(&name, &args[idx + 1])?;
814 idx += 2;
815 }
816 Ok(options)
817 }
818
819 fn apply(&mut self, name: &str, value: &Value) -> BuiltinResult<()> {
820 if name.eq_ignore_ascii_case("FileType") {
821 self.file_type = ImportFileType::parse(value)?;
822 } else if name.eq_ignore_ascii_case("Delimiter") {
823 self.delimiter = Some(Delimiter::parse(value)?);
824 } else if name.eq_ignore_ascii_case("ReadVariableNames") {
825 self.read_variable_names = Some(bool_scalar(value, "ReadVariableNames")?);
826 } else if name.eq_ignore_ascii_case("ReadRowNames") {
827 self.read_row_names = bool_scalar(value, "ReadRowNames")?;
828 } else if name.eq_ignore_ascii_case("NumVariables") {
829 let count = nonnegative_usize(value, "NumVariables")?;
830 self.num_variables = (count > 0).then_some(count);
831 } else if name.eq_ignore_ascii_case("VariableNames") {
832 self.variable_names = optional_raw_variable_name_list(value)?;
833 } else if name.eq_ignore_ascii_case("VariableTypes") {
834 self.variable_types = optional_variable_type_list(value)?;
835 } else if name.eq_ignore_ascii_case("RowNames") {
836 self.row_names = Some(string_list(value)?);
837 } else if name.eq_ignore_ascii_case("NumHeaderLines") {
838 self.num_header_lines = nonnegative_usize(value, "NumHeaderLines")?;
839 } else if name.eq_ignore_ascii_case("Range") {
840 self.range = Some(RangeSpec::parse(value)?);
841 } else if name.eq_ignore_ascii_case("DataRange") {
842 self.range = optional_range_spec(value)?;
843 } else if name.eq_ignore_ascii_case("Sheet") {
844 self.sheet = optional_sheet_selector(value)?;
845 } else if name.eq_ignore_ascii_case("TreatAsMissing") {
846 for token in string_list(value)? {
847 self.treat_as_missing
848 .insert(token.trim().to_ascii_lowercase());
849 }
850 } else if name.eq_ignore_ascii_case("PreserveVariableNames") {
851 self.preserve_variable_names = bool_scalar(value, "PreserveVariableNames")?;
852 } else if name.eq_ignore_ascii_case("VariableNamingRule") {
853 let rule = scalar_text(value, "VariableNamingRule")?;
854 if rule.eq_ignore_ascii_case("preserve") {
855 self.preserve_variable_names = true;
856 } else if rule.eq_ignore_ascii_case("modify") {
857 self.preserve_variable_names = false;
858 } else {
859 return Err(invalid_argument(format!(
860 "readtable: unsupported VariableNamingRule '{rule}'"
861 )));
862 }
863 } else if name.eq_ignore_ascii_case("EmptyLineRule") {
864 let rule = scalar_text(value, "EmptyLineRule")?;
865 self.empty_line_rule = if rule.eq_ignore_ascii_case("read") {
866 EmptyLineRule::Read
867 } else if rule.eq_ignore_ascii_case("skip") {
868 EmptyLineRule::Skip
869 } else {
870 return Err(invalid_argument(format!(
871 "readtable: unsupported EmptyLineRule '{rule}'"
872 )));
873 };
874 } else if name.eq_ignore_ascii_case("Encoding") {
875 let encoding = scalar_text(value, "Encoding")?;
876 validate_encoding_label(&encoding)?;
877 self.encoding = encoding;
878 } else if name.eq_ignore_ascii_case("TextType") {
879 self.text_type = TextImportType::parse(value, "readtable")?;
880 } else if name.eq_ignore_ascii_case("DatetimeType") {
881 self.datetime_type = DatetimeImportType::parse(value)?;
882 } else {
883 return Err(invalid_argument(format!(
884 "readtable: unsupported option '{name}'"
885 )));
886 }
887 Ok(())
888 }
889
890 fn is_missing(&self, token: &str) -> bool {
891 let trimmed = token.trim();
892 trimmed.is_empty()
893 || self
894 .treat_as_missing
895 .contains(&trimmed.to_ascii_lowercase())
896 }
897}
898
899fn spreadsheet_import_options(args: Vec<Value>) -> BuiltinResult<Value> {
900 if !args.len().is_multiple_of(2) {
901 return Err(invalid_argument(
902 "spreadsheetImportOptions: name-value options must be provided in pairs",
903 ));
904 }
905 let mut options = SpreadsheetImportOptions::default();
906 let mut idx = 0usize;
907 while idx < args.len() {
908 let name = scalar_text(&args[idx], "spreadsheetImportOptions option")?;
909 options.apply(&name, &args[idx + 1])?;
910 idx += 2;
911 }
912 Ok(Value::Struct(options.into_struct()?))
913}
914
915async fn detect_import_options_from_file(
916 path: &Path,
917 options: &ReadTableOptions,
918) -> BuiltinResult<Value> {
919 match options.file_type {
920 ImportFileType::Spreadsheet => detect_spreadsheet_import_options(path, options).await,
921 ImportFileType::Text => detect_text_import_options(path, options).await,
922 ImportFileType::Auto if is_spreadsheet_path(path) => {
923 detect_spreadsheet_import_options(path, options).await
924 }
925 ImportFileType::Auto => detect_text_import_options(path, options).await,
926 }
927}
928
929async fn detect_text_import_options(
930 path: &Path,
931 options: &ReadTableOptions,
932) -> BuiltinResult<Value> {
933 if options.sheet.is_some() {
934 return Err(invalid_argument(
935 "detectImportOptions: Sheet is only valid for spreadsheet files",
936 ));
937 }
938 let bytes = read_file_bytes(path).await?;
939 let text = strip_utf8_bom(decode_text_bytes(&bytes, &options.encoding)?);
940 let mut raw_lines = text.lines().map(ToString::to_string).collect::<Vec<_>>();
941 if let Some(first) = raw_lines.first_mut() {
942 if first.starts_with('\u{FEFF}') {
943 *first = first.trim_start_matches('\u{FEFF}').to_string();
944 }
945 }
946 let delimiter = options
947 .delimiter
948 .clone()
949 .or_else(|| detect_delimiter(&raw_lines))
950 .unwrap_or(Delimiter::Whitespace);
951 let mut rows = parse_text_records(&text, &delimiter, options.empty_line_rule);
952 if options.num_header_lines > 0 {
953 rows = rows.into_iter().skip(options.num_header_lines).collect();
954 }
955 if let Some(range) = options.range {
956 rows = apply_import_range(rows, range);
957 }
958 detected_options_from_rows(
959 ImportFileType::Text,
960 rows,
961 options,
962 Some(delimiter),
963 options.sheet.as_ref(),
964 )
965}
966
967async fn detect_spreadsheet_import_options(
968 path: &Path,
969 options: &ReadTableOptions,
970) -> BuiltinResult<Value> {
971 if options.delimiter.is_some() {
972 return Err(invalid_argument(
973 "detectImportOptions: Delimiter is only valid for text files",
974 ));
975 }
976 let bytes = read_file_bytes(path).await?;
977 let cursor = Cursor::new(bytes);
978 let mut workbook = open_workbook_auto_from_rs(cursor).map_err(|err| {
979 table_error(
980 &TABLE_ERROR_UNSUPPORTED_FILE,
981 format!(
982 "detectImportOptions: unable to open spreadsheet '{}': {err}",
983 path.display()
984 ),
985 )
986 })?;
987 let range = match &options.sheet {
988 Some(SheetSelector::Name(name)) => workbook.worksheet_range(name).map_err(|err| {
989 invalid_argument(format!(
990 "detectImportOptions: unable to read sheet '{name}': {err:?}"
991 ))
992 })?,
993 Some(SheetSelector::Index(index)) => workbook
994 .worksheet_range_at(*index)
995 .ok_or_else(|| {
996 invalid_argument(format!(
997 "detectImportOptions: sheet index {} exceeds bounds",
998 index + 1
999 ))
1000 })?
1001 .map_err(|err| {
1002 invalid_argument(format!(
1003 "detectImportOptions: unable to read sheet {}: {err:?}",
1004 index + 1
1005 ))
1006 })?,
1007 None => workbook
1008 .worksheet_range_at(0)
1009 .ok_or_else(|| {
1010 invalid_argument("detectImportOptions: spreadsheet contains no worksheets")
1011 })?
1012 .map_err(|err| {
1013 invalid_argument(format!(
1014 "detectImportOptions: unable to read first sheet: {err:?}"
1015 ))
1016 })?,
1017 };
1018 let rows = spreadsheet_range_to_rows(&range, options)?;
1019 detected_options_from_rows(
1020 ImportFileType::Spreadsheet,
1021 rows,
1022 options,
1023 None,
1024 options.sheet.as_ref(),
1025 )
1026}
1027
1028fn detected_options_from_rows(
1029 file_type: ImportFileType,
1030 mut rows: Vec<Vec<ImportCell>>,
1031 options: &ReadTableOptions,
1032 delimiter: Option<Delimiter>,
1033 sheet: Option<&SheetSelector>,
1034) -> BuiltinResult<Value> {
1035 let mut variable_names = options.variable_names.clone();
1036 let read_variable_names = options
1037 .read_variable_names
1038 .unwrap_or_else(|| variable_names.is_none() && should_read_variable_names(&rows, options));
1039 let header_rows_consumed = usize::from(read_variable_names && variable_names.is_none());
1040 if header_rows_consumed > 0 && !rows.is_empty() {
1041 variable_names = Some(
1042 rows.remove(0)
1043 .into_iter()
1044 .map(|cell| cell.display_text())
1045 .collect(),
1046 );
1047 }
1048
1049 let mut data_rows = rows;
1050 let mut data_variable_names = variable_names.clone();
1051 let row_name_header = if options.read_row_names {
1052 for row in &mut data_rows {
1053 if !row.is_empty() {
1054 row.remove(0);
1055 }
1056 }
1057 let mut header = None;
1058 if let Some(names) = data_variable_names.as_mut() {
1059 if !names.is_empty() {
1060 header = Some(names.remove(0));
1061 }
1062 }
1063 Some(
1064 header
1065 .filter(|name| !name.is_empty())
1066 .unwrap_or_else(|| "Row".to_string()),
1067 )
1068 } else {
1069 None
1070 };
1071
1072 let column_count = import_column_count(&data_rows, &data_variable_names, options)?;
1073 let data_names = import_variable_names(data_variable_names, column_count, options);
1074 let names = if let Some(row_name_header) = row_name_header {
1075 let mut names = Vec::with_capacity(data_names.len() + 1);
1076 names.push(row_name_header);
1077 names.extend(data_names);
1078 names
1079 } else {
1080 data_names
1081 };
1082 let types = detected_variable_type_labels(&data_rows, options, column_count)?;
1083 let output_num_header_lines = detected_output_header_lines(options, header_rows_consumed);
1084 let output_range = detected_output_range(options.range, header_rows_consumed);
1085
1086 let mut out = StructValue::new();
1087 out.insert("FileType", Value::String(import_file_type_label(file_type)));
1088 if let Some(delimiter) = delimiter {
1089 out.insert("Delimiter", Value::String(delimiter_label(&delimiter)));
1090 }
1091 out.insert("NumHeaderLines", Value::Num(output_num_header_lines as f64));
1092 out.insert("ReadVariableNames", Value::Bool(false));
1093 out.insert("ReadRowNames", Value::Bool(options.read_row_names));
1094 out.insert("NumVariables", Value::Num(column_count as f64));
1095 out.insert(
1096 "VariableNames",
1097 string_array_value(names, "detectImportOptions")?,
1098 );
1099 out.insert(
1100 "VariableTypes",
1101 string_array_value(types, "detectImportOptions")?,
1102 );
1103 if let Some(range) = output_range {
1104 out.insert("Range", range_spec_value(range)?);
1105 out.insert("DataRange", range_spec_value(range)?);
1106 }
1107 if let Some(sheet) = sheet {
1108 out.insert("Sheet", sheet_value(sheet));
1109 }
1110 let mut treat_as_missing = options.treat_as_missing.iter().cloned().collect::<Vec<_>>();
1111 treat_as_missing.sort();
1112 out.insert(
1113 "TreatAsMissing",
1114 string_array_value(treat_as_missing, "detectImportOptions")?,
1115 );
1116 out.insert(
1117 "PreserveVariableNames",
1118 Value::Bool(options.preserve_variable_names),
1119 );
1120 out.insert(
1121 "VariableNamingRule",
1122 Value::String(if options.preserve_variable_names {
1123 "preserve".to_string()
1124 } else {
1125 "modify".to_string()
1126 }),
1127 );
1128 out.insert(
1129 "EmptyLineRule",
1130 Value::String(
1131 match options.empty_line_rule {
1132 EmptyLineRule::Skip => "skip",
1133 EmptyLineRule::Read => "read",
1134 }
1135 .to_string(),
1136 ),
1137 );
1138 out.insert(
1139 "TextType",
1140 Value::String(
1141 match options.text_type {
1142 TextImportType::String => "string",
1143 TextImportType::Char => "char",
1144 }
1145 .to_string(),
1146 ),
1147 );
1148 out.insert(
1149 "DatetimeType",
1150 Value::String(
1151 match options.datetime_type {
1152 DatetimeImportType::Datetime => "datetime",
1153 DatetimeImportType::Text => "text",
1154 DatetimeImportType::ExcelDatenum => "exceldatenum",
1155 }
1156 .to_string(),
1157 ),
1158 );
1159 out.insert("Encoding", Value::String(options.encoding.clone()));
1160 Ok(Value::Struct(out))
1161}
1162
1163fn detected_variable_type_labels(
1164 rows: &[Vec<ImportCell>],
1165 options: &ReadTableOptions,
1166 column_count: usize,
1167) -> BuiltinResult<Vec<String>> {
1168 if let Some(requested) = &options.variable_types {
1169 let mut labels = requested
1170 .iter()
1171 .map(import_variable_type_label)
1172 .collect::<Vec<_>>();
1173 while labels.len() < column_count {
1174 labels.push("auto".to_string());
1175 }
1176 labels.truncate(column_count);
1177 return Ok(labels);
1178 }
1179 Ok((0..column_count)
1180 .map(|col| {
1181 let values = rows
1182 .iter()
1183 .map(|row| row.get(col).cloned().unwrap_or(ImportCell::Empty))
1184 .collect::<Vec<_>>();
1185 infer_import_type_label(&values, options)
1186 })
1187 .collect())
1188}
1189
1190fn infer_import_type_label(values: &[ImportCell], options: &ReadTableOptions) -> String {
1191 if values
1192 .iter()
1193 .all(|value| is_detected_numeric(value, options))
1194 {
1195 return "double".to_string();
1196 }
1197 if values
1198 .iter()
1199 .all(|value| is_detected_logical(value, options))
1200 {
1201 return "logical".to_string();
1202 }
1203 if !matches!(options.datetime_type, DatetimeImportType::Text)
1204 && values
1205 .iter()
1206 .all(|value| is_detected_datetime(value, options))
1207 {
1208 return "datetime".to_string();
1209 }
1210 match options.text_type {
1211 TextImportType::String => "string".to_string(),
1212 TextImportType::Char => "char".to_string(),
1213 }
1214}
1215
1216fn is_detected_numeric(value: &ImportCell, options: &ReadTableOptions) -> bool {
1217 match value {
1218 ImportCell::Empty | ImportCell::Number(_) => true,
1219 ImportCell::Text(text) => {
1220 let token = unquote(text.trim()).trim();
1221 options.is_missing(token) || parse_numeric(token).is_some()
1222 }
1223 _ => false,
1224 }
1225}
1226
1227fn is_detected_logical(value: &ImportCell, options: &ReadTableOptions) -> bool {
1228 match value {
1229 ImportCell::Empty | ImportCell::Logical(_) => true,
1230 ImportCell::Text(text) => {
1231 let token = unquote(text.trim()).trim();
1232 options.is_missing(token) || parse_logical(token).is_some()
1233 }
1234 _ => false,
1235 }
1236}
1237
1238fn is_detected_datetime(value: &ImportCell, options: &ReadTableOptions) -> bool {
1239 match value {
1240 ImportCell::Empty | ImportCell::DateTime(_) => true,
1241 ImportCell::Text(text) => {
1242 let token = unquote(text.trim()).trim();
1243 options.is_missing(token) || parse_iso_datetime_to_datenum(token).is_some()
1244 }
1245 _ => false,
1246 }
1247}
1248
1249fn import_variable_type_label(kind: &ImportVariableType) -> String {
1250 match kind {
1251 ImportVariableType::Auto => "auto",
1252 ImportVariableType::Numeric(NumericDType::F64) => "double",
1253 ImportVariableType::Numeric(NumericDType::F32) => "single",
1254 ImportVariableType::Numeric(NumericDType::U8) => "uint8",
1255 ImportVariableType::Numeric(NumericDType::U16) => "uint16",
1256 ImportVariableType::Logical => "logical",
1257 ImportVariableType::Text(TextImportType::String) => "string",
1258 ImportVariableType::Text(TextImportType::Char) => "char",
1259 ImportVariableType::CellStr => "cellstr",
1260 ImportVariableType::Datetime => "datetime",
1261 ImportVariableType::Duration => "duration",
1262 }
1263 .to_string()
1264}
1265
1266fn detected_output_header_lines(options: &ReadTableOptions, header_rows_consumed: usize) -> usize {
1267 if options.range.is_some() {
1268 options.num_header_lines
1269 } else {
1270 options.num_header_lines + header_rows_consumed
1271 }
1272}
1273
1274fn detected_output_range(
1275 range: Option<RangeSpec>,
1276 header_rows_consumed: usize,
1277) -> Option<RangeSpec> {
1278 range.map(|mut range| {
1279 range.start_row = range.start_row.saturating_add(header_rows_consumed);
1280 range
1281 })
1282}
1283
1284fn import_file_type_label(file_type: ImportFileType) -> String {
1285 match file_type {
1286 ImportFileType::Text | ImportFileType::Auto => "text",
1287 ImportFileType::Spreadsheet => "spreadsheet",
1288 }
1289 .to_string()
1290}
1291
1292fn delimiter_label(delimiter: &Delimiter) -> String {
1293 match delimiter {
1294 Delimiter::Char('\t') => "\t".to_string(),
1295 Delimiter::Char(ch) => ch.to_string(),
1296 Delimiter::String(text) => text.clone(),
1297 Delimiter::Whitespace => "whitespace".to_string(),
1298 }
1299}
1300
1301fn sheet_value(sheet: &SheetSelector) -> Value {
1302 match sheet {
1303 SheetSelector::Name(name) => Value::String(name.clone()),
1304 SheetSelector::Index(index) => Value::Num((*index + 1) as f64),
1305 }
1306}
1307
1308fn range_spec_value(range: RangeSpec) -> BuiltinResult<Value> {
1309 Ok(Value::String(range_spec_text(range)))
1310}
1311
1312fn range_spec_text(range: RangeSpec) -> String {
1313 let has_end = range.end_row.is_some() || range.end_col.is_some();
1314 let include_start_col = range.start_col > 0 || range.end_col.is_some() || !has_end;
1315 let include_start_row = range.start_row > 0 || range.end_row.is_some() || !has_end;
1316 let start = range_ref_text(
1317 range.start_row,
1318 range.start_col,
1319 include_start_row,
1320 include_start_col,
1321 );
1322 if !has_end {
1323 return start;
1324 }
1325
1326 let end = range_ref_text(
1327 range.end_row.unwrap_or(0),
1328 range.end_col.unwrap_or(0),
1329 range.end_row.is_some(),
1330 range.end_col.is_some(),
1331 );
1332 format!("{start}:{end}")
1333}
1334
1335fn range_ref_text(row: usize, col: usize, include_row: bool, include_col: bool) -> String {
1336 let mut out = String::new();
1337 if include_col {
1338 out.push_str(&spreadsheet_column_label(col));
1339 }
1340 if include_row {
1341 out.push_str(&(row + 1).to_string());
1342 }
1343 out
1344}
1345
1346fn spreadsheet_column_label(mut col: usize) -> String {
1347 let mut chars = Vec::new();
1348 loop {
1349 let rem = col % 26;
1350 chars.push((b'A' + rem as u8) as char);
1351 if col < 26 {
1352 break;
1353 }
1354 col = col / 26 - 1;
1355 }
1356 chars.iter().rev().collect()
1357}
1358
1359fn string_array_value(values: Vec<String>, context: &str) -> BuiltinResult<Value> {
1360 let len = values.len();
1361 StringArray::new(values, vec![1, len])
1362 .map(Value::StringArray)
1363 .map_err(|err| invalid_variable(format!("{context}: {err}")))
1364}
1365
1366#[derive(Clone)]
1367struct SpreadsheetImportOptions {
1368 num_variables: usize,
1369 read_variable_names: Option<bool>,
1370 read_row_names: bool,
1371 variable_names: Vec<String>,
1372 variable_types: Vec<String>,
1373 data_range: Option<Value>,
1374 sheet: Option<Value>,
1375 treat_as_missing: Vec<String>,
1376 preserve_variable_names: bool,
1377 empty_line_rule: String,
1378 text_type: String,
1379 datetime_type: String,
1380}
1381
1382impl Default for SpreadsheetImportOptions {
1383 fn default() -> Self {
1384 let num_variables = 0;
1385 Self {
1386 num_variables,
1387 read_variable_names: None,
1388 read_row_names: false,
1389 variable_names: Vec::new(),
1390 variable_types: Vec::new(),
1391 data_range: None,
1392 sheet: None,
1393 treat_as_missing: Vec::new(),
1394 preserve_variable_names: false,
1395 empty_line_rule: "skip".to_string(),
1396 text_type: "string".to_string(),
1397 datetime_type: "datetime".to_string(),
1398 }
1399 }
1400}
1401
1402impl SpreadsheetImportOptions {
1403 fn apply(&mut self, name: &str, value: &Value) -> BuiltinResult<()> {
1404 if name.eq_ignore_ascii_case("NumVariables") {
1405 self.resize_variables(positive_usize(value, "NumVariables")?);
1406 } else if name.eq_ignore_ascii_case("VariableNames") {
1407 self.variable_names = raw_variable_name_list(value)?;
1408 self.align_variable_metadata_count(self.variable_names.len(), "VariableNames")?;
1409 self.ensure_variable_metadata_len();
1410 } else if name.eq_ignore_ascii_case("VariableTypes") {
1411 let types = variable_type_names(value)?;
1412 self.variable_types = types;
1413 self.align_variable_metadata_count(self.variable_types.len(), "VariableTypes")?;
1414 self.ensure_variable_metadata_len();
1415 } else if name.eq_ignore_ascii_case("DataRange") || name.eq_ignore_ascii_case("Range") {
1416 self.data_range = if option_value_is_empty(value) {
1417 None
1418 } else {
1419 RangeSpec::parse(value)?;
1420 Some(value.clone())
1421 };
1422 } else if name.eq_ignore_ascii_case("Sheet") {
1423 self.sheet = if option_value_is_empty(value) {
1424 None
1425 } else {
1426 SheetSelector::parse(value)?;
1427 Some(value.clone())
1428 };
1429 } else if name.eq_ignore_ascii_case("ReadVariableNames") {
1430 self.read_variable_names = Some(bool_scalar(value, "ReadVariableNames")?);
1431 } else if name.eq_ignore_ascii_case("ReadRowNames") {
1432 self.read_row_names = bool_scalar(value, "ReadRowNames")?;
1433 } else if name.eq_ignore_ascii_case("TreatAsMissing") {
1434 self.treat_as_missing = string_list(value)?;
1435 } else if name.eq_ignore_ascii_case("PreserveVariableNames") {
1436 self.preserve_variable_names = bool_scalar(value, "PreserveVariableNames")?;
1437 } else if name.eq_ignore_ascii_case("VariableNamingRule") {
1438 let rule = scalar_text(value, "VariableNamingRule")?;
1439 if rule.eq_ignore_ascii_case("preserve") {
1440 self.preserve_variable_names = true;
1441 } else if rule.eq_ignore_ascii_case("modify") {
1442 self.preserve_variable_names = false;
1443 } else {
1444 return Err(invalid_argument(format!(
1445 "spreadsheetImportOptions: unsupported VariableNamingRule '{rule}'"
1446 )));
1447 }
1448 } else if name.eq_ignore_ascii_case("EmptyLineRule") {
1449 let rule = scalar_text(value, "EmptyLineRule")?;
1450 if !(rule.eq_ignore_ascii_case("read") || rule.eq_ignore_ascii_case("skip")) {
1451 return Err(invalid_argument(format!(
1452 "spreadsheetImportOptions: unsupported EmptyLineRule '{rule}'"
1453 )));
1454 }
1455 self.empty_line_rule = rule.to_ascii_lowercase();
1456 } else if name.eq_ignore_ascii_case("TextType") {
1457 let text_type = scalar_text(value, "TextType")?;
1458 if !(text_type.eq_ignore_ascii_case("string") || text_type.eq_ignore_ascii_case("char"))
1459 {
1460 return Err(invalid_argument(format!(
1461 "spreadsheetImportOptions: unsupported TextType '{text_type}'"
1462 )));
1463 }
1464 self.text_type = text_type.to_ascii_lowercase();
1465 } else if name.eq_ignore_ascii_case("DatetimeType") {
1466 let datetime_type = scalar_text(value, "DatetimeType")?;
1467 if !(datetime_type.eq_ignore_ascii_case("datetime")
1468 || datetime_type.eq_ignore_ascii_case("text")
1469 || datetime_type.eq_ignore_ascii_case("exceldatenum"))
1470 {
1471 return Err(invalid_argument(format!(
1472 "spreadsheetImportOptions: unsupported DatetimeType '{datetime_type}'"
1473 )));
1474 }
1475 self.datetime_type = datetime_type.to_ascii_lowercase();
1476 } else {
1477 return Err(invalid_argument(format!(
1478 "spreadsheetImportOptions: unsupported option '{name}'"
1479 )));
1480 }
1481 Ok(())
1482 }
1483
1484 fn resize_variables(&mut self, num_variables: usize) {
1485 self.num_variables = num_variables;
1486 if self.variable_names.len() > num_variables {
1487 self.variable_names.truncate(num_variables);
1488 }
1489 if self.variable_types.len() > num_variables {
1490 self.variable_types.truncate(num_variables);
1491 }
1492 self.ensure_variable_metadata_len();
1493 }
1494
1495 fn align_variable_metadata_count(&mut self, len: usize, field: &str) -> BuiltinResult<()> {
1496 if self.num_variables == 0 {
1497 self.num_variables = len;
1498 return Ok(());
1499 }
1500 if len > self.num_variables {
1501 return Err(invalid_argument(format!(
1502 "spreadsheetImportOptions: {field} length exceeds NumVariables"
1503 )));
1504 }
1505 Ok(())
1506 }
1507
1508 fn ensure_variable_metadata_len(&mut self) {
1509 if self.num_variables == 0 {
1510 return;
1511 }
1512 while self.variable_names.len() < self.num_variables {
1513 self.variable_names
1514 .push(format!("Var{}", self.variable_names.len() + 1));
1515 }
1516 self.variable_names.truncate(self.num_variables);
1517 while self.variable_types.len() < self.num_variables {
1518 self.variable_types.push("auto".to_string());
1519 }
1520 self.variable_types.truncate(self.num_variables);
1521 }
1522
1523 fn into_struct(mut self) -> BuiltinResult<StructValue> {
1524 self.ensure_variable_metadata_len();
1525 let mut out = StructValue::new();
1526 out.insert("FileType", Value::String("spreadsheet".to_string()));
1527 out.insert("NumVariables", Value::Num(self.num_variables as f64));
1528 if let Some(read_variable_names) = self.read_variable_names {
1529 out.insert("ReadVariableNames", Value::Bool(read_variable_names));
1530 }
1531 out.insert("ReadRowNames", Value::Bool(self.read_row_names));
1532 out.insert(
1533 "VariableNames",
1534 Value::StringArray(
1535 StringArray::new(
1536 self.variable_names.clone(),
1537 vec![1, self.variable_names.len()],
1538 )
1539 .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1540 ),
1541 );
1542 out.insert(
1543 "VariableTypes",
1544 Value::StringArray(
1545 StringArray::new(
1546 self.variable_types.clone(),
1547 vec![1, self.variable_types.len()],
1548 )
1549 .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1550 ),
1551 );
1552 out.insert(
1553 "DataRange",
1554 self.data_range
1555 .unwrap_or_else(|| Value::String(String::new())),
1556 );
1557 out.insert(
1558 "Sheet",
1559 self.sheet.unwrap_or_else(|| Value::String(String::new())),
1560 );
1561 out.insert(
1562 "TreatAsMissing",
1563 Value::StringArray(
1564 StringArray::new(
1565 self.treat_as_missing.clone(),
1566 vec![1, self.treat_as_missing.len()],
1567 )
1568 .map_err(|err| invalid_variable(format!("spreadsheetImportOptions: {err}")))?,
1569 ),
1570 );
1571 out.insert(
1572 "PreserveVariableNames",
1573 Value::Bool(self.preserve_variable_names),
1574 );
1575 out.insert(
1576 "VariableNamingRule",
1577 Value::String(if self.preserve_variable_names {
1578 "preserve".to_string()
1579 } else {
1580 "modify".to_string()
1581 }),
1582 );
1583 out.insert("EmptyLineRule", Value::String(self.empty_line_rule));
1584 out.insert("TextType", Value::String(self.text_type));
1585 out.insert("DatetimeType", Value::String(self.datetime_type));
1586 Ok(out)
1587 }
1588}
1589
1590#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1591enum ImportVariableType {
1592 Auto,
1593 Numeric(NumericDType),
1594 Logical,
1595 Text(TextImportType),
1596 CellStr,
1597 Datetime,
1598 Duration,
1599}
1600
1601impl ImportVariableType {
1602 fn parse(raw: &str) -> BuiltinResult<Self> {
1603 match raw.trim().to_ascii_lowercase().as_str() {
1604 "" | "auto" => Ok(Self::Auto),
1605 "double" => Ok(Self::Numeric(NumericDType::F64)),
1606 "single" => Ok(Self::Numeric(NumericDType::F32)),
1607 "uint8" => Ok(Self::Numeric(NumericDType::U8)),
1608 "uint16" => Ok(Self::Numeric(NumericDType::U16)),
1609 "logical" | "bool" | "boolean" => Ok(Self::Logical),
1610 "string" => Ok(Self::Text(TextImportType::String)),
1611 "char" => Ok(Self::Text(TextImportType::Char)),
1612 "cellstr" => Ok(Self::CellStr),
1613 "int8" | "int16" | "int32" | "int64" | "uint32" | "uint64" => {
1614 Err(invalid_argument(format!(
1615 "readtable: unsupported VariableTypes entry '{}'; RunMat table imports currently support double, single, uint8, and uint16 numeric arrays",
1616 raw.trim()
1617 )))
1618 }
1619 "categorical" => Err(invalid_argument(
1620 "readtable: unsupported VariableTypes entry 'categorical'; categorical arrays are not implemented in RunMat yet",
1621 )),
1622 "datetime" => Ok(Self::Datetime),
1623 "duration" => Ok(Self::Duration),
1624 other => Err(invalid_argument(format!(
1625 "readtable: unsupported VariableTypes entry '{other}'"
1626 ))),
1627 }
1628 }
1629
1630 fn canonical_label(raw: &str) -> BuiltinResult<String> {
1631 Self::parse(raw)?;
1632 let label = raw.trim().to_ascii_lowercase();
1633 Ok(if label.is_empty() {
1634 "auto".to_string()
1635 } else {
1636 label
1637 })
1638 }
1639}
1640
1641#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1642enum TextImportType {
1643 String,
1644 Char,
1645}
1646
1647impl TextImportType {
1648 fn parse(value: &Value, context: &str) -> BuiltinResult<Self> {
1649 let text_type = scalar_text(value, "TextType")?;
1650 match text_type.trim().to_ascii_lowercase().as_str() {
1651 "string" => Ok(Self::String),
1652 "char" => Ok(Self::Char),
1653 other => Err(invalid_argument(format!(
1654 "{context}: unsupported TextType '{other}'"
1655 ))),
1656 }
1657 }
1658}
1659
1660#[derive(Clone, Copy)]
1661enum EmptyLineRule {
1662 Skip,
1663 Read,
1664}
1665
1666#[derive(Clone, Copy)]
1667enum DatetimeImportType {
1668 Datetime,
1669 Text,
1670 ExcelDatenum,
1671}
1672
1673impl DatetimeImportType {
1674 fn parse(value: &Value) -> BuiltinResult<Self> {
1675 let text = scalar_text(value, "DatetimeType")?;
1676 match text.trim().to_ascii_lowercase().as_str() {
1677 "datetime" => Ok(Self::Datetime),
1678 "text" => Ok(Self::Text),
1679 "exceldatenum" => Ok(Self::ExcelDatenum),
1680 other => Err(invalid_argument(format!(
1681 "readtable: unsupported DatetimeType '{other}'"
1682 ))),
1683 }
1684 }
1685}
1686
1687#[derive(Clone, Copy, PartialEq, Eq)]
1688enum ImportFileType {
1689 Auto,
1690 Text,
1691 Spreadsheet,
1692}
1693
1694impl ImportFileType {
1695 fn parse(value: &Value) -> BuiltinResult<Self> {
1696 let text = scalar_text(value, "FileType")?;
1697 match text.trim().to_ascii_lowercase().as_str() {
1698 "auto" => Ok(Self::Auto),
1699 "text" | "delimitedtext" | "delimited" => Ok(Self::Text),
1700 "spreadsheet" | "excel" => Ok(Self::Spreadsheet),
1701 other => Err(invalid_argument(format!(
1702 "readtable: unsupported FileType '{other}'"
1703 ))),
1704 }
1705 }
1706}
1707
1708#[derive(Clone)]
1709enum SheetSelector {
1710 Name(String),
1711 Index(usize),
1712}
1713
1714impl SheetSelector {
1715 fn parse(value: &Value) -> BuiltinResult<Self> {
1716 match value {
1717 Value::Int(i) if i.to_i64() >= 1 => Ok(Self::Index(i.to_i64() as usize - 1)),
1718 Value::Num(n)
1719 if n.is_finite() && *n >= 1.0 && (n.round() - n).abs() <= f64::EPSILON =>
1720 {
1721 Ok(Self::Index(n.round() as usize - 1))
1722 }
1723 _ => {
1724 let text = scalar_text(value, "Sheet")?;
1725 if text.trim().is_empty() {
1726 return Err(invalid_argument("readtable: Sheet must not be empty"));
1727 }
1728 Ok(Self::Name(text))
1729 }
1730 }
1731 }
1732}
1733
1734#[derive(Clone)]
1735enum Delimiter {
1736 Char(char),
1737 String(String),
1738 Whitespace,
1739}
1740
1741impl Delimiter {
1742 fn parse(value: &Value) -> BuiltinResult<Self> {
1743 let text = scalar_text(value, "Delimiter")?;
1744 if text.is_empty() {
1745 return Err(invalid_argument("readtable: Delimiter must not be empty"));
1746 }
1747 match text.trim().to_ascii_lowercase().as_str() {
1748 "tab" => Ok(Self::Char('\t')),
1749 "space" | "whitespace" => Ok(Self::Whitespace),
1750 "comma" => Ok(Self::Char(',')),
1751 "semicolon" => Ok(Self::Char(';')),
1752 "bar" | "pipe" => Ok(Self::Char('|')),
1753 _ if text.chars().count() == 1 => Ok(Self::Char(text.chars().next().unwrap())),
1754 _ => Ok(Self::String(text)),
1755 }
1756 }
1757}
1758
1759#[derive(Clone, Copy)]
1760struct RangeSpec {
1761 start_row: usize,
1762 start_col: usize,
1763 end_row: Option<usize>,
1764 end_col: Option<usize>,
1765}
1766
1767impl RangeSpec {
1768 fn parse(value: &Value) -> BuiltinResult<Self> {
1769 match value {
1770 Value::String(text) => Self::parse_text(text),
1771 Value::CharArray(ca) if ca.rows == 1 => {
1772 let text: String = ca.data.iter().collect();
1773 Self::parse_text(&text)
1774 }
1775 Value::StringArray(sa) if sa.data.len() == 1 => Self::parse_text(&sa.data[0]),
1776 Value::Tensor(t) if t.data.len() == 2 || t.data.len() == 4 => {
1777 let mut indices = Vec::with_capacity(t.data.len());
1778 for value in &t.data {
1779 indices.push(one_based_to_zero(*value, usize::MAX, "Range")?);
1780 }
1781 Ok(Self {
1782 start_row: indices[0],
1783 start_col: indices[1],
1784 end_row: indices.get(2).copied(),
1785 end_col: indices.get(3).copied(),
1786 })
1787 }
1788 _ => Err(invalid_argument(
1789 "readtable: Range must be a cell reference string or numeric vector",
1790 )),
1791 }
1792 }
1793
1794 fn parse_text(text: &str) -> BuiltinResult<Self> {
1795 let trimmed = text.trim();
1796 if trimmed.is_empty() {
1797 return Err(invalid_argument("readtable: Range must not be empty"));
1798 }
1799 let parts: Vec<&str> = trimmed.split(':').collect();
1800 if parts.len() > 2 {
1801 return Err(invalid_argument(format!(
1802 "readtable: invalid Range specification '{trimmed}'"
1803 )));
1804 }
1805 let start = parse_cell_ref(parts[0])?;
1806 let end = if parts.len() == 2 {
1807 Some(parse_cell_ref(parts[1])?)
1808 } else {
1809 None
1810 };
1811 Ok(Self {
1812 start_row: start.0.unwrap_or(0),
1813 start_col: start.1.unwrap_or(0),
1814 end_row: end.and_then(|item| item.0),
1815 end_col: end.and_then(|item| item.1),
1816 })
1817 }
1818}
1819
1820fn parse_cell_ref(token: &str) -> BuiltinResult<(Option<usize>, Option<usize>)> {
1821 let mut letters = String::new();
1822 let mut digits = String::new();
1823 for ch in token.trim().chars() {
1824 if ch == '$' {
1825 continue;
1826 }
1827 if ch.is_ascii_alphabetic() {
1828 letters.push(ch.to_ascii_uppercase());
1829 } else if ch.is_ascii_digit() {
1830 digits.push(ch);
1831 } else {
1832 return Err(invalid_argument(format!(
1833 "readtable: invalid Range component '{token}'"
1834 )));
1835 }
1836 }
1837 let col = if letters.is_empty() {
1838 None
1839 } else {
1840 let mut value = 0usize;
1841 for ch in letters.chars() {
1842 value = value
1843 .checked_mul(26)
1844 .and_then(|v| v.checked_add((ch as u8 - b'A' + 1) as usize))
1845 .ok_or_else(|| invalid_argument("readtable: Range column overflow"))?;
1846 }
1847 Some(value - 1)
1848 };
1849 let row = if digits.is_empty() {
1850 None
1851 } else {
1852 let parsed = digits
1853 .parse::<usize>()
1854 .map_err(|_| invalid_argument("readtable: invalid Range row"))?;
1855 if parsed == 0 {
1856 return Err(invalid_argument("readtable: Range rows are one-based"));
1857 }
1858 Some(parsed - 1)
1859 };
1860 Ok((row, col))
1861}
1862
1863fn resolve_path(value: &Value) -> BuiltinResult<PathBuf> {
1864 let text = scalar_text(value, "filename").map_err(|_| {
1865 table_error(
1866 &TABLE_ERROR_INVALID_ARGUMENT,
1867 "readtable: filename must be a string scalar or character vector",
1868 )
1869 })?;
1870 if text.trim().is_empty() {
1871 return Err(invalid_argument("readtable: filename must not be empty"));
1872 }
1873 let expanded =
1874 expand_user_path(&text, "readtable").map_err(|msg| invalid_argument(msg.to_string()))?;
1875 Ok(Path::new(&expanded).to_path_buf())
1876}
1877
1878async fn read_table_from_file(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1879 match options.file_type {
1880 ImportFileType::Spreadsheet => read_spreadsheet_table(path, options).await,
1881 ImportFileType::Text => read_text_table(path, options).await,
1882 ImportFileType::Auto if is_spreadsheet_path(path) => {
1883 read_spreadsheet_table(path, options).await
1884 }
1885 ImportFileType::Auto => read_text_table(path, options).await,
1886 }
1887}
1888
1889async fn read_text_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1890 if options.sheet.is_some() {
1891 return Err(invalid_argument(
1892 "readtable: Sheet is only valid for spreadsheet files",
1893 ));
1894 }
1895 let bytes = read_file_bytes(path).await?;
1896 let text = strip_utf8_bom(decode_text_bytes(&bytes, &options.encoding)?);
1897 let mut raw_lines = text.lines().map(ToString::to_string).collect::<Vec<_>>();
1898 if let Some(first) = raw_lines.first_mut() {
1899 if first.starts_with('\u{FEFF}') {
1900 *first = first.trim_start_matches('\u{FEFF}').to_string();
1901 }
1902 }
1903 let delimiter = options
1904 .delimiter
1905 .clone()
1906 .or_else(|| detect_delimiter(&raw_lines))
1907 .unwrap_or(Delimiter::Whitespace);
1908 let mut rows = parse_text_records(&text, &delimiter, options.empty_line_rule);
1909 if options.num_header_lines > 0 {
1910 rows = rows.into_iter().skip(options.num_header_lines).collect();
1911 }
1912 if let Some(range) = options.range {
1913 rows = apply_import_range(rows, range);
1914 }
1915 import_rows_to_table(rows, options)
1916}
1917
1918async fn read_spreadsheet_table(path: &Path, options: &ReadTableOptions) -> BuiltinResult<Value> {
1919 if options.delimiter.is_some() {
1920 return Err(invalid_argument(
1921 "readtable: Delimiter is only valid for text files",
1922 ));
1923 }
1924 let bytes = read_file_bytes(path).await?;
1925 let cursor = Cursor::new(bytes);
1926 let mut workbook = open_workbook_auto_from_rs(cursor).map_err(|err| {
1927 table_error(
1928 &TABLE_ERROR_UNSUPPORTED_FILE,
1929 format!(
1930 "readtable: unable to open spreadsheet '{}': {err}",
1931 path.display()
1932 ),
1933 )
1934 })?;
1935 let range = match &options.sheet {
1936 Some(SheetSelector::Name(name)) => workbook.worksheet_range(name).map_err(|err| {
1937 invalid_argument(format!("readtable: unable to read sheet '{name}': {err:?}"))
1938 })?,
1939 Some(SheetSelector::Index(index)) => workbook
1940 .worksheet_range_at(*index)
1941 .ok_or_else(|| {
1942 invalid_argument(format!(
1943 "readtable: sheet index {} exceeds bounds",
1944 index + 1
1945 ))
1946 })?
1947 .map_err(|err| {
1948 invalid_argument(format!(
1949 "readtable: unable to read sheet {}: {err:?}",
1950 index + 1
1951 ))
1952 })?,
1953 None => workbook
1954 .worksheet_range_at(0)
1955 .ok_or_else(|| invalid_argument("readtable: spreadsheet contains no worksheets"))?
1956 .map_err(|err| {
1957 invalid_argument(format!("readtable: unable to read first sheet: {err:?}"))
1958 })?,
1959 };
1960 let rows = spreadsheet_range_to_rows(&range, options)?;
1961 import_rows_to_table(rows, options)
1962}
1963
1964async fn read_file_bytes(path: &Path) -> BuiltinResult<Vec<u8>> {
1965 let mut file = File::open_async(path).await.map_err(|err| {
1966 table_error_with_source(
1967 &TABLE_ERROR_IO,
1968 format!("readtable: unable to open '{}': {err}", path.display()),
1969 err,
1970 )
1971 })?;
1972 let mut bytes = Vec::new();
1973 file.read_to_end(&mut bytes).map_err(|err| {
1974 table_error_with_source(
1975 &TABLE_ERROR_IO,
1976 format!("readtable: unable to read '{}': {err}", path.display()),
1977 err,
1978 )
1979 })?;
1980 Ok(bytes)
1981}
1982
1983fn is_spreadsheet_path(path: &Path) -> bool {
1984 matches!(
1985 path.extension()
1986 .and_then(|ext| ext.to_str())
1987 .map(|ext| ext.to_ascii_lowercase())
1988 .as_deref(),
1989 Some("xls") | Some("xlsx") | Some("xlsm") | Some("xlsb") | Some("ods")
1990 )
1991}
1992
1993fn validate_encoding_label(label: &str) -> BuiltinResult<()> {
1994 encoding_for_label(label)
1995 .map(|_| ())
1996 .ok_or_else(|| invalid_argument(format!("readtable: unsupported Encoding '{label}'")))
1997}
1998
1999fn encoding_for_label(label: &str) -> Option<&'static Encoding> {
2000 let label = label.trim();
2001 if label.is_empty()
2002 || label.eq_ignore_ascii_case("auto")
2003 || label.eq_ignore_ascii_case("default")
2004 || label.eq_ignore_ascii_case("system")
2005 || label.eq_ignore_ascii_case("native")
2006 || label.eq_ignore_ascii_case("utf-8")
2007 || label.eq_ignore_ascii_case("utf8")
2008 || label.eq_ignore_ascii_case("unicode")
2009 {
2010 return Some(UTF_8);
2011 }
2012 Encoding::for_label(label.as_bytes())
2013}
2014
2015fn decode_text_bytes(bytes: &[u8], encoding: &str) -> BuiltinResult<String> {
2016 let (encoding, offset) = if encoding.trim().eq_ignore_ascii_case("auto") {
2017 Encoding::for_bom(bytes).unwrap_or((UTF_8, 0))
2018 } else {
2019 (
2020 encoding_for_label(encoding).ok_or_else(|| {
2021 invalid_argument(format!("readtable: unsupported Encoding '{encoding}'"))
2022 })?,
2023 0,
2024 )
2025 };
2026 let (decoded, _, had_errors) = encoding.decode(&bytes[offset..]);
2027 if had_errors {
2028 return Err(table_error(
2029 &TABLE_ERROR_IO,
2030 format!(
2031 "readtable: unable to decode file contents using encoding '{}'",
2032 encoding.name()
2033 ),
2034 ));
2035 }
2036 Ok(decoded.into_owned())
2037}
2038
2039fn strip_utf8_bom(text: String) -> String {
2040 text.strip_prefix('\u{FEFF}')
2041 .map(ToString::to_string)
2042 .unwrap_or(text)
2043}
2044
2045#[derive(Clone, Debug)]
2046enum ImportCell {
2047 Empty,
2048 Text(String),
2049 Number(f64),
2050 Logical(bool),
2051 DateTime(f64),
2052 Error(String),
2053}
2054
2055impl ImportCell {
2056 fn from_text(text: String) -> Self {
2057 if text.trim().is_empty() {
2058 Self::Empty
2059 } else {
2060 Self::Text(text)
2061 }
2062 }
2063
2064 fn display_text(&self) -> String {
2065 match self {
2066 Self::Empty => String::new(),
2067 Self::Text(text) => text.clone(),
2068 Self::Number(value) => format_key_number(*value),
2069 Self::Logical(value) => value.to_string(),
2070 Self::DateTime(serial) => format_key_number(*serial),
2071 Self::Error(text) => text.clone(),
2072 }
2073 }
2074
2075 fn is_missing(&self, options: &ReadTableOptions) -> bool {
2076 match self {
2077 Self::Empty => true,
2078 Self::Text(text) => options.is_missing(text),
2079 _ => false,
2080 }
2081 }
2082
2083 fn is_likely_data_token(&self, options: &ReadTableOptions) -> bool {
2084 match self {
2085 Self::Number(_) | Self::Logical(_) | Self::DateTime(_) => true,
2086 Self::Empty => false,
2087 Self::Text(text) => {
2088 let token = unquote(text.trim()).trim();
2089 options.is_missing(token)
2090 || parse_numeric(token).is_some()
2091 || parse_logical(token).is_some()
2092 || parse_iso_datetime_to_datenum(token).is_some()
2093 }
2094 Self::Error(_) => true,
2095 }
2096 }
2097}
2098
2099fn spreadsheet_cell_to_import(cell: &SpreadsheetData) -> ImportCell {
2100 match cell {
2101 SpreadsheetData::Empty => ImportCell::Empty,
2102 SpreadsheetData::Int(value) => ImportCell::Number(*value as f64),
2103 SpreadsheetData::Float(value) => ImportCell::Number(*value),
2104 SpreadsheetData::String(text) => ImportCell::Text(text.clone()),
2105 SpreadsheetData::Bool(value) => ImportCell::Logical(*value),
2106 SpreadsheetData::DateTime(value) => value
2107 .as_datetime()
2108 .map(crate::builtins::datetime::datenum_from_naive)
2109 .map(ImportCell::DateTime)
2110 .unwrap_or_else(|| ImportCell::Number(value.as_f64())),
2111 SpreadsheetData::DateTimeIso(text) => parse_iso_datetime_to_datenum(text)
2112 .map(ImportCell::DateTime)
2113 .unwrap_or_else(|| ImportCell::Text(text.clone())),
2114 SpreadsheetData::DurationIso(text) => ImportCell::Text(text.clone()),
2115 SpreadsheetData::Error(err) => ImportCell::Error(err.to_string()),
2116 }
2117}
2118
2119fn spreadsheet_range_to_rows(
2120 range: &calamine::Range<SpreadsheetData>,
2121 options: &ReadTableOptions,
2122) -> BuiltinResult<Vec<Vec<ImportCell>>> {
2123 if range.is_empty() {
2124 return Ok(Vec::new());
2125 }
2126 let Some((range_start_row, range_start_col)) = range.start() else {
2127 return Ok(Vec::new());
2128 };
2129 let Some((range_end_row, range_end_col)) = range.end() else {
2130 return Ok(Vec::new());
2131 };
2132 let start_row = options
2133 .range
2134 .map(|spec| checked_u32(spec.start_row, "Range row"))
2135 .transpose()?
2136 .unwrap_or(range_start_row);
2137 let start_col = options
2138 .range
2139 .map(|spec| checked_u32(spec.start_col, "Range column"))
2140 .transpose()?
2141 .unwrap_or(range_start_col);
2142 let end_row = options
2143 .range
2144 .and_then(|spec| spec.end_row)
2145 .map(|row| checked_u32(row, "Range row"))
2146 .transpose()?
2147 .unwrap_or(range_end_row);
2148 let end_col = options
2149 .range
2150 .and_then(|spec| spec.end_col)
2151 .map(|col| checked_u32(col, "Range column"))
2152 .transpose()?
2153 .unwrap_or(range_end_col);
2154 if start_row > end_row || start_col > end_col {
2155 return Ok(Vec::new());
2156 }
2157 let mut rows = Vec::new();
2158 for row_idx in start_row..=end_row {
2159 let mut row = Vec::new();
2160 for col_idx in start_col..=end_col {
2161 row.push(
2162 range
2163 .get_value((row_idx, col_idx))
2164 .map(spreadsheet_cell_to_import)
2165 .unwrap_or(ImportCell::Empty),
2166 );
2167 }
2168 if matches!(options.empty_line_rule, EmptyLineRule::Skip)
2169 && row.iter().all(|cell| cell.is_missing(options))
2170 {
2171 continue;
2172 }
2173 rows.push(row);
2174 }
2175 if options.num_header_lines > 0 {
2176 Ok(rows.into_iter().skip(options.num_header_lines).collect())
2177 } else {
2178 Ok(rows)
2179 }
2180}
2181
2182fn checked_u32(value: usize, context: &str) -> BuiltinResult<u32> {
2183 u32::try_from(value).map_err(|_| invalid_argument(format!("readtable: {context} overflow")))
2184}
2185
2186fn detect_delimiter(lines: &[String]) -> Option<Delimiter> {
2187 let candidates = [',', '\t', ';', '|'];
2188 let mut best: Option<(f64, Delimiter)> = None;
2189 for candidate in candidates {
2190 let counts = lines
2191 .iter()
2192 .take(32)
2193 .filter(|line| line.contains(candidate))
2194 .map(|line| split_with_char_delim(line, candidate).len())
2195 .filter(|count| *count >= 2)
2196 .collect::<Vec<_>>();
2197 if counts.is_empty() {
2198 continue;
2199 }
2200 let avg = counts.iter().copied().sum::<usize>() as f64 / counts.len() as f64;
2201 if avg >= 2.0
2202 && best
2203 .as_ref()
2204 .map(|(best_avg, _)| avg > *best_avg)
2205 .unwrap_or(true)
2206 {
2207 best = Some((avg, Delimiter::Char(candidate)));
2208 }
2209 }
2210 best.map(|(_, delimiter)| delimiter).or_else(|| {
2211 lines
2212 .iter()
2213 .take(32)
2214 .any(|line| line.split_whitespace().count() > 1)
2215 .then_some(Delimiter::Whitespace)
2216 })
2217}
2218
2219fn split_with_char_delim(line: &str, delimiter: char) -> Vec<String> {
2220 let mut out = Vec::new();
2221 let mut current = String::new();
2222 let mut in_quotes = false;
2223 let mut chars = line.chars().peekable();
2224 while let Some(ch) = chars.next() {
2225 if ch == '"' {
2226 if in_quotes && chars.peek() == Some(&'"') {
2227 current.push('"');
2228 chars.next();
2229 } else {
2230 in_quotes = !in_quotes;
2231 }
2232 continue;
2233 }
2234 if ch == delimiter && !in_quotes {
2235 out.push(current.clone());
2236 current.clear();
2237 } else {
2238 current.push(ch);
2239 }
2240 }
2241 out.push(current);
2242 out
2243}
2244
2245fn parse_text_records(
2246 text: &str,
2247 delimiter: &Delimiter,
2248 empty_line_rule: EmptyLineRule,
2249) -> Vec<Vec<ImportCell>> {
2250 match delimiter {
2251 Delimiter::Whitespace => parse_whitespace_records(text, empty_line_rule),
2252 Delimiter::Char(ch) => parse_delimited_records(text, &ch.to_string(), empty_line_rule),
2253 Delimiter::String(pattern) => parse_delimited_records(text, pattern, empty_line_rule),
2254 }
2255}
2256
2257fn parse_delimited_records(
2258 text: &str,
2259 delimiter: &str,
2260 empty_line_rule: EmptyLineRule,
2261) -> Vec<Vec<ImportCell>> {
2262 let mut records = Vec::new();
2263 let mut row = Vec::new();
2264 let mut current = String::new();
2265 let mut in_quotes = false;
2266 let mut idx = 0usize;
2267 while idx < text.len() {
2268 let ch = text[idx..].chars().next().expect("valid char boundary");
2269 if ch == '"' {
2270 if in_quotes && text[idx + ch.len_utf8()..].starts_with('"') {
2271 current.push('"');
2272 idx += ch.len_utf8() + 1;
2273 continue;
2274 }
2275 in_quotes = !in_quotes;
2276 idx += ch.len_utf8();
2277 continue;
2278 }
2279 if !in_quotes && !delimiter.is_empty() && text[idx..].starts_with(delimiter) {
2280 row.push(ImportCell::from_text(std::mem::take(&mut current)));
2281 idx += delimiter.len();
2282 continue;
2283 }
2284 if !in_quotes && (ch == '\n' || ch == '\r') {
2285 row.push(ImportCell::from_text(std::mem::take(&mut current)));
2286 push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
2287 idx += ch.len_utf8();
2288 if ch == '\r' && text[idx..].starts_with('\n') {
2289 idx += 1;
2290 }
2291 continue;
2292 }
2293 current.push(ch);
2294 idx += ch.len_utf8();
2295 }
2296 if !current.is_empty() || !row.is_empty() || text.ends_with(delimiter) {
2297 row.push(ImportCell::from_text(current));
2298 push_import_record(&mut records, row, empty_line_rule);
2299 }
2300 records
2301}
2302
2303fn parse_whitespace_records(text: &str, empty_line_rule: EmptyLineRule) -> Vec<Vec<ImportCell>> {
2304 let mut records = Vec::new();
2305 let mut row = Vec::new();
2306 let mut current = String::new();
2307 let mut in_quotes = false;
2308 let mut field_open = false;
2309 let mut chars = text.chars().peekable();
2310 while let Some(ch) = chars.next() {
2311 if ch == '"' {
2312 if in_quotes && chars.peek() == Some(&'"') {
2313 current.push('"');
2314 chars.next();
2315 } else {
2316 in_quotes = !in_quotes;
2317 }
2318 field_open = true;
2319 continue;
2320 }
2321 if !in_quotes && (ch == '\n' || ch == '\r') {
2322 if field_open || !current.is_empty() {
2323 row.push(ImportCell::from_text(std::mem::take(&mut current)));
2324 }
2325 field_open = false;
2326 push_import_record(&mut records, std::mem::take(&mut row), empty_line_rule);
2327 if ch == '\r' && chars.peek() == Some(&'\n') {
2328 chars.next();
2329 }
2330 continue;
2331 }
2332 if !in_quotes && ch.is_whitespace() {
2333 if field_open || !current.is_empty() {
2334 row.push(ImportCell::from_text(std::mem::take(&mut current)));
2335 field_open = false;
2336 }
2337 continue;
2338 }
2339 current.push(ch);
2340 field_open = true;
2341 }
2342 if field_open || !current.is_empty() {
2343 row.push(ImportCell::from_text(current));
2344 }
2345 if !row.is_empty() {
2346 push_import_record(&mut records, row, empty_line_rule);
2347 }
2348 records
2349}
2350
2351fn push_import_record(
2352 records: &mut Vec<Vec<ImportCell>>,
2353 row: Vec<ImportCell>,
2354 empty_line_rule: EmptyLineRule,
2355) {
2356 if matches!(empty_line_rule, EmptyLineRule::Skip)
2357 && row.iter().all(|cell| matches!(cell, ImportCell::Empty))
2358 {
2359 return;
2360 }
2361 records.push(row);
2362}
2363
2364fn apply_import_range(rows: Vec<Vec<ImportCell>>, range: RangeSpec) -> Vec<Vec<ImportCell>> {
2365 if rows.is_empty() {
2366 return rows;
2367 }
2368 let end_row = range
2369 .end_row
2370 .unwrap_or_else(|| rows.len().saturating_sub(1));
2371 let max_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
2372 let end_col = range.end_col.unwrap_or_else(|| max_cols.saturating_sub(1));
2373 rows.into_iter()
2374 .enumerate()
2375 .filter_map(|(idx, row)| {
2376 if idx < range.start_row || idx > end_row {
2377 return None;
2378 }
2379 let selected = (range.start_col..=end_col)
2380 .map(|col| row.get(col).cloned().unwrap_or(ImportCell::Empty))
2381 .collect::<Vec<_>>();
2382 Some(selected)
2383 })
2384 .collect()
2385}
2386
2387fn import_rows_to_table(
2388 mut rows: Vec<Vec<ImportCell>>,
2389 options: &ReadTableOptions,
2390) -> BuiltinResult<Value> {
2391 let mut variable_names = options.variable_names.clone();
2392 let read_variable_names = options
2393 .read_variable_names
2394 .unwrap_or_else(|| variable_names.is_none() && should_read_variable_names(&rows, options));
2395 if variable_names.is_none() && read_variable_names && !rows.is_empty() {
2396 variable_names = Some(
2397 rows.remove(0)
2398 .into_iter()
2399 .map(|cell| cell.display_text())
2400 .collect(),
2401 );
2402 }
2403
2404 let mut row_names = options.row_names.clone();
2405 if options.read_row_names && !rows.is_empty() {
2406 row_names = Some(
2407 rows.iter_mut()
2408 .map(|row| {
2409 if row.is_empty() {
2410 String::new()
2411 } else {
2412 row.remove(0).display_text()
2413 }
2414 })
2415 .collect(),
2416 );
2417 if let Some(names) = variable_names.as_mut() {
2418 if !names.is_empty() {
2419 names.remove(0);
2420 }
2421 }
2422 }
2423
2424 let column_count = import_column_count(&rows, &variable_names, options)?;
2425 let names = import_variable_names(variable_names, column_count, options);
2426
2427 let mut columns = Vec::with_capacity(names.len());
2428 for col in 0..names.len() {
2429 let values = rows
2430 .iter()
2431 .map(|row| row.get(col).cloned().unwrap_or(ImportCell::Empty))
2432 .collect::<Vec<_>>();
2433 let requested_type = options
2434 .variable_types
2435 .as_ref()
2436 .and_then(|types| types.get(col))
2437 .copied();
2438 columns.push(import_column(values, options, requested_type)?);
2439 }
2440 table_from_columns_with_properties(names, columns, row_names)
2441}
2442
2443fn import_column_count(
2444 rows: &[Vec<ImportCell>],
2445 variable_names: &Option<Vec<String>>,
2446 options: &ReadTableOptions,
2447) -> BuiltinResult<usize> {
2448 let data_cols = rows.iter().map(Vec::len).max().unwrap_or(0);
2449 let name_cols = variable_names.as_ref().map(Vec::len).unwrap_or(0);
2450 let type_cols = options.variable_types.as_ref().map(Vec::len).unwrap_or(0);
2451 if let Some(count) = options.num_variables {
2452 if name_cols > count {
2453 return Err(invalid_argument(
2454 "readtable: VariableNames length exceeds NumVariables",
2455 ));
2456 }
2457 if type_cols > count {
2458 return Err(invalid_argument(
2459 "readtable: VariableTypes length exceeds NumVariables",
2460 ));
2461 }
2462 return Ok(count);
2463 }
2464 Ok(data_cols.max(name_cols).max(type_cols))
2465}
2466
2467fn import_variable_names(
2468 variable_names: Option<Vec<String>>,
2469 column_count: usize,
2470 options: &ReadTableOptions,
2471) -> Vec<String> {
2472 match variable_names {
2473 Some(mut names) => {
2474 while names.len() < column_count {
2475 names.push(format!("Var{}", names.len() + 1));
2476 }
2477 names.truncate(column_count);
2478 if options.preserve_variable_names {
2479 make_unique_names(names)
2480 } else {
2481 make_unique_variable_names(names)
2482 }
2483 }
2484 None => generated_variable_names(column_count),
2485 }
2486}
2487
2488fn should_read_variable_names(rows: &[Vec<ImportCell>], options: &ReadTableOptions) -> bool {
2489 let Some(first) = rows.first() else {
2490 return false;
2491 };
2492 if first.is_empty() {
2493 return false;
2494 }
2495 let names = first
2496 .iter()
2497 .map(ImportCell::display_text)
2498 .map(|text| text.trim().to_string())
2499 .collect::<Vec<_>>();
2500 if names.iter().any(|name| name.is_empty()) {
2501 return false;
2502 }
2503 if first.iter().all(|cell| cell.is_likely_data_token(options)) {
2504 return false;
2505 }
2506 true
2507}
2508
2509fn import_column(
2510 values: Vec<ImportCell>,
2511 options: &ReadTableOptions,
2512 requested_type: Option<ImportVariableType>,
2513) -> BuiltinResult<Value> {
2514 match requested_type.unwrap_or(ImportVariableType::Auto) {
2515 ImportVariableType::Auto => infer_import_column(values, options),
2516 ImportVariableType::Numeric(dtype) => import_numeric_column(values, options, dtype),
2517 ImportVariableType::Logical => import_logical_column(values, options),
2518 ImportVariableType::Text(kind) => import_text_column(values, options, kind),
2519 ImportVariableType::CellStr => import_cellstr_column(values, options),
2520 ImportVariableType::Datetime => import_datetime_column(values, options),
2521 ImportVariableType::Duration => import_duration_column(values, options),
2522 }
2523}
2524
2525fn import_numeric_column(
2526 values: Vec<ImportCell>,
2527 options: &ReadTableOptions,
2528 dtype: NumericDType,
2529) -> BuiltinResult<Value> {
2530 let mut numeric = Vec::with_capacity(values.len());
2531 for value in &values {
2532 let parsed = numeric_from_import_cell(value, options, dtype.class_name())?;
2533 numeric.push(cast_import_numeric(parsed, dtype));
2534 }
2535 Tensor::new_with_dtype(numeric, vec![values.len(), 1], dtype)
2536 .map(Value::Tensor)
2537 .map_err(|err| invalid_variable(format!("readtable: {err}")))
2538}
2539
2540fn numeric_from_import_cell(
2541 value: &ImportCell,
2542 options: &ReadTableOptions,
2543 context: &str,
2544) -> BuiltinResult<f64> {
2545 match value {
2546 ImportCell::Empty => Ok(f64::NAN),
2547 ImportCell::Number(value) => Ok(*value),
2548 ImportCell::Logical(value) => Ok(if *value { 1.0 } else { 0.0 }),
2549 ImportCell::DateTime(serial) => Ok(*serial),
2550 ImportCell::Text(text) => {
2551 let token = unquote(text.trim()).trim();
2552 if options.is_missing(token) {
2553 Ok(f64::NAN)
2554 } else {
2555 parse_numeric(token).ok_or_else(|| {
2556 invalid_variable(format!("readtable: cannot import '{token}' as {context}"))
2557 })
2558 }
2559 }
2560 ImportCell::Error(text) => Err(invalid_variable(format!(
2561 "readtable: cannot import spreadsheet error '{text}' as {context}"
2562 ))),
2563 }
2564}
2565
2566fn cast_import_numeric(value: f64, dtype: NumericDType) -> f64 {
2567 match dtype {
2568 NumericDType::F64 => value,
2569 NumericDType::F32 => (value as f32) as f64,
2570 NumericDType::U8 => {
2571 if value.is_finite() {
2572 value.round().clamp(0.0, u8::MAX as f64)
2573 } else {
2574 0.0
2575 }
2576 }
2577 NumericDType::U16 => {
2578 if value.is_finite() {
2579 value.round().clamp(0.0, u16::MAX as f64)
2580 } else {
2581 0.0
2582 }
2583 }
2584 }
2585}
2586
2587fn import_logical_column(
2588 values: Vec<ImportCell>,
2589 options: &ReadTableOptions,
2590) -> BuiltinResult<Value> {
2591 let mut logical = Vec::with_capacity(values.len());
2592 for value in &values {
2593 logical.push(logical_from_import_cell(value, options)?);
2594 }
2595 LogicalArray::new(logical, vec![values.len(), 1])
2596 .map(Value::LogicalArray)
2597 .map_err(|err| invalid_variable(format!("readtable: {err}")))
2598}
2599
2600fn logical_from_import_cell(value: &ImportCell, options: &ReadTableOptions) -> BuiltinResult<u8> {
2601 let flag = match value {
2602 ImportCell::Empty => false,
2603 ImportCell::Logical(value) => *value,
2604 ImportCell::Number(value) => *value != 0.0,
2605 ImportCell::DateTime(serial) => *serial != 0.0,
2606 ImportCell::Text(text) => {
2607 let token = unquote(text.trim()).trim();
2608 if options.is_missing(token) {
2609 false
2610 } else if let Some(value) = parse_logical(token) {
2611 value
2612 } else if let Some(value) = parse_numeric(token) {
2613 value != 0.0
2614 } else {
2615 return Err(invalid_variable(format!(
2616 "readtable: cannot import '{token}' as logical"
2617 )));
2618 }
2619 }
2620 ImportCell::Error(text) => {
2621 return Err(invalid_variable(format!(
2622 "readtable: cannot import spreadsheet error '{text}' as logical"
2623 )));
2624 }
2625 };
2626 Ok(u8::from(flag))
2627}
2628
2629fn import_text_column(
2630 values: Vec<ImportCell>,
2631 options: &ReadTableOptions,
2632 kind: TextImportType,
2633) -> BuiltinResult<Value> {
2634 let strings = import_text_values(values, options);
2635 match kind {
2636 TextImportType::String => StringArray::new(strings.clone(), vec![strings.len(), 1])
2637 .map(Value::StringArray)
2638 .map_err(|err| invalid_variable(format!("readtable: {err}"))),
2639 TextImportType::Char => import_char_column(strings),
2640 }
2641}
2642
2643fn import_text_values(values: Vec<ImportCell>, options: &ReadTableOptions) -> Vec<String> {
2644 values
2645 .into_iter()
2646 .map(|value| {
2647 if value.is_missing(options) {
2648 String::new()
2649 } else {
2650 unquote(value.display_text().trim()).to_string()
2651 }
2652 })
2653 .collect()
2654}
2655
2656fn import_char_column(strings: Vec<String>) -> BuiltinResult<Value> {
2657 let rows = strings.len();
2658 let cols = strings
2659 .iter()
2660 .map(|text| text.chars().count())
2661 .max()
2662 .unwrap_or(0);
2663 let mut data = vec![' '; rows * cols];
2664 for (row, text) in strings.iter().enumerate() {
2665 for (col, ch) in text.chars().enumerate() {
2666 data[row * cols + col] = ch;
2667 }
2668 }
2669 CharArray::new(data, rows, cols)
2670 .map(Value::CharArray)
2671 .map_err(|err| invalid_variable(format!("readtable: {err}")))
2672}
2673
2674fn import_cellstr_column(
2675 values: Vec<ImportCell>,
2676 options: &ReadTableOptions,
2677) -> BuiltinResult<Value> {
2678 let strings = import_text_values(values, options);
2679 let rows = strings.len();
2680 let cells = strings
2681 .into_iter()
2682 .map(|text| Value::CharArray(CharArray::new_row(&text)))
2683 .collect::<Vec<_>>();
2684 CellArray::new(cells, rows, 1)
2685 .map(Value::Cell)
2686 .map_err(|err| invalid_variable(format!("readtable: {err}")))
2687}
2688
2689fn import_datetime_column(
2690 values: Vec<ImportCell>,
2691 options: &ReadTableOptions,
2692) -> BuiltinResult<Value> {
2693 if matches!(options.datetime_type, DatetimeImportType::Text) {
2694 return import_text_column(values, options, options.text_type);
2695 }
2696
2697 let mut serials = Vec::with_capacity(values.len());
2698 for value in &values {
2699 serials.push(datetime_serial_from_import_cell(value, options)?);
2700 }
2701 let tensor = Tensor::new(serials, vec![values.len(), 1])
2702 .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2703 if matches!(options.datetime_type, DatetimeImportType::ExcelDatenum) {
2704 Ok(Value::Tensor(tensor))
2705 } else {
2706 crate::builtins::datetime::datetime_object_from_serial_tensor(tensor, "yyyy-MM-dd HH:mm:ss")
2707 }
2708}
2709
2710fn datetime_serial_from_import_cell(
2711 value: &ImportCell,
2712 options: &ReadTableOptions,
2713) -> BuiltinResult<f64> {
2714 match value {
2715 ImportCell::Empty => Ok(f64::NAN),
2716 ImportCell::DateTime(serial) => Ok(*serial),
2717 ImportCell::Number(value) => Ok(*value),
2718 ImportCell::Text(text) => {
2719 let token = unquote(text.trim()).trim();
2720 if options.is_missing(token) {
2721 Ok(f64::NAN)
2722 } else if let Some(serial) = parse_iso_datetime_to_datenum(token) {
2723 Ok(serial)
2724 } else if let Some(serial) = parse_numeric(token) {
2725 Ok(serial)
2726 } else {
2727 Err(invalid_variable(format!(
2728 "readtable: cannot import '{token}' as datetime"
2729 )))
2730 }
2731 }
2732 ImportCell::Logical(_) => Err(invalid_variable(
2733 "readtable: cannot import logical value as datetime",
2734 )),
2735 ImportCell::Error(text) => Err(invalid_variable(format!(
2736 "readtable: cannot import spreadsheet error '{text}' as datetime"
2737 ))),
2738 }
2739}
2740
2741fn import_duration_column(
2742 values: Vec<ImportCell>,
2743 options: &ReadTableOptions,
2744) -> BuiltinResult<Value> {
2745 let mut days = Vec::with_capacity(values.len());
2746 for value in &values {
2747 days.push(duration_days_from_import_cell(value, options)?);
2748 }
2749 let tensor = Tensor::new(days, vec![values.len(), 1])
2750 .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2751 crate::builtins::duration::duration_object_from_days_tensor(
2752 tensor,
2753 crate::builtins::duration::DEFAULT_DURATION_FORMAT,
2754 )
2755}
2756
2757fn duration_days_from_import_cell(
2758 value: &ImportCell,
2759 options: &ReadTableOptions,
2760) -> BuiltinResult<f64> {
2761 match value {
2762 ImportCell::Empty => Ok(f64::NAN),
2763 ImportCell::Number(value) => Ok(*value),
2764 ImportCell::Logical(value) => Ok(if *value { 1.0 } else { 0.0 }),
2765 ImportCell::Text(text) => {
2766 let token = unquote(text.trim()).trim();
2767 if options.is_missing(token) {
2768 Ok(f64::NAN)
2769 } else {
2770 parse_duration_to_days(token).ok_or_else(|| {
2771 invalid_variable(format!("readtable: cannot import '{token}' as duration"))
2772 })
2773 }
2774 }
2775 ImportCell::DateTime(_) => Err(invalid_variable(
2776 "readtable: cannot import datetime value as duration",
2777 )),
2778 ImportCell::Error(text) => Err(invalid_variable(format!(
2779 "readtable: cannot import spreadsheet error '{text}' as duration"
2780 ))),
2781 }
2782}
2783
2784fn infer_import_column(
2785 values: Vec<ImportCell>,
2786 options: &ReadTableOptions,
2787) -> BuiltinResult<Value> {
2788 let mut numeric = Vec::with_capacity(values.len());
2789 let mut all_numeric = true;
2790 for value in &values {
2791 match value {
2792 ImportCell::Empty => numeric.push(f64::NAN),
2793 ImportCell::Number(value) => numeric.push(*value),
2794 ImportCell::Text(text) => {
2795 let token = unquote(text.trim()).trim();
2796 if options.is_missing(token) {
2797 numeric.push(f64::NAN);
2798 } else if let Some(value) = parse_numeric(token) {
2799 numeric.push(value);
2800 } else {
2801 all_numeric = false;
2802 break;
2803 }
2804 }
2805 _ => {
2806 all_numeric = false;
2807 break;
2808 }
2809 }
2810 }
2811 if all_numeric {
2812 return Tensor::new(numeric, vec![values.len(), 1])
2813 .map(Value::Tensor)
2814 .map_err(|err| invalid_variable(format!("readtable: {err}")));
2815 }
2816
2817 let mut logical = Vec::with_capacity(values.len());
2818 let mut all_logical = true;
2819 for value in &values {
2820 match value {
2821 ImportCell::Empty => logical.push(0),
2822 ImportCell::Logical(value) => logical.push(i32::from(*value) as u8),
2823 ImportCell::Text(text) => {
2824 let token = unquote(text.trim()).trim();
2825 if options.is_missing(token) {
2826 logical.push(0);
2827 } else if let Some(value) = parse_logical(token) {
2828 logical.push(i32::from(value) as u8);
2829 } else {
2830 all_logical = false;
2831 break;
2832 }
2833 }
2834 _ => {
2835 all_logical = false;
2836 break;
2837 }
2838 }
2839 }
2840 if all_logical {
2841 return LogicalArray::new(logical, vec![values.len(), 1])
2842 .map(Value::LogicalArray)
2843 .map_err(|err| invalid_variable(format!("readtable: {err}")));
2844 }
2845
2846 if !matches!(options.datetime_type, DatetimeImportType::Text) {
2847 let mut serials = Vec::with_capacity(values.len());
2848 let mut all_datetime = true;
2849 for value in &values {
2850 match value {
2851 ImportCell::Empty => serials.push(f64::NAN),
2852 ImportCell::DateTime(serial) => serials.push(*serial),
2853 ImportCell::Text(text) => {
2854 let token = unquote(text.trim()).trim();
2855 if options.is_missing(token) {
2856 serials.push(f64::NAN);
2857 } else if let Some(serial) = parse_iso_datetime_to_datenum(token) {
2858 serials.push(serial);
2859 } else {
2860 all_datetime = false;
2861 break;
2862 }
2863 }
2864 _ => {
2865 all_datetime = false;
2866 break;
2867 }
2868 }
2869 }
2870 if all_datetime {
2871 let tensor = Tensor::new(serials, vec![values.len(), 1])
2872 .map_err(|err| invalid_variable(format!("readtable: {err}")))?;
2873 if matches!(options.datetime_type, DatetimeImportType::ExcelDatenum) {
2874 return Ok(Value::Tensor(tensor));
2875 }
2876 return crate::builtins::datetime::datetime_object_from_serial_tensor(
2877 tensor,
2878 "yyyy-MM-dd HH:mm:ss",
2879 );
2880 }
2881 }
2882
2883 import_text_column(values, options, options.text_type)
2884}
2885
2886fn parse_numeric(token: &str) -> Option<f64> {
2887 match token.to_ascii_lowercase().as_str() {
2888 "nan" => Some(f64::NAN),
2889 "inf" | "+inf" | "infinity" | "+infinity" => Some(f64::INFINITY),
2890 "-inf" | "-infinity" => Some(f64::NEG_INFINITY),
2891 _ => token.parse::<f64>().ok(),
2892 }
2893}
2894
2895fn parse_logical(token: &str) -> Option<bool> {
2896 match token.to_ascii_lowercase().as_str() {
2897 "true" | "t" | "yes" | "on" => Some(true),
2898 "false" | "f" | "no" | "off" => Some(false),
2899 _ => None,
2900 }
2901}
2902
2903fn parse_duration_to_days(token: &str) -> Option<f64> {
2904 parse_numeric(token).or_else(|| parse_clock_duration_to_days(token))
2905}
2906
2907fn parse_clock_duration_to_days(token: &str) -> Option<f64> {
2908 let trimmed = token.trim();
2909 if trimmed.is_empty() {
2910 return None;
2911 }
2912 let (sign, body) = if let Some(rest) = trimmed.strip_prefix('-') {
2913 (-1.0, rest)
2914 } else if let Some(rest) = trimmed.strip_prefix('+') {
2915 (1.0, rest)
2916 } else {
2917 (1.0, trimmed)
2918 };
2919 let parts = body.split(':').collect::<Vec<_>>();
2920 let (hours, minutes, seconds) = match parts.as_slice() {
2921 [hours, minutes] => (
2922 hours.parse::<f64>().ok()?,
2923 minutes.parse::<f64>().ok()?,
2924 0.0,
2925 ),
2926 [hours, minutes, seconds] => (
2927 hours.parse::<f64>().ok()?,
2928 minutes.parse::<f64>().ok()?,
2929 seconds.parse::<f64>().ok()?,
2930 ),
2931 _ => return None,
2932 };
2933 if !hours.is_finite()
2934 || !minutes.is_finite()
2935 || !seconds.is_finite()
2936 || !(0.0..60.0).contains(&minutes)
2937 || !(0.0..60.0).contains(&seconds)
2938 {
2939 return None;
2940 }
2941 Some(sign * (hours * 3600.0 + minutes * 60.0 + seconds) / 86_400.0)
2942}
2943
2944fn parse_iso_datetime_to_datenum(token: &str) -> Option<f64> {
2945 let trimmed = token.trim();
2946 if trimmed.is_empty() {
2947 return None;
2948 }
2949 for format in [
2950 "%Y-%m-%dT%H:%M:%S%.f",
2951 "%Y-%m-%d %H:%M:%S%.f",
2952 "%Y/%m/%d %H:%M:%S%.f",
2953 "%m/%d/%Y %H:%M:%S%.f",
2954 ] {
2955 if let Ok(value) = NaiveDateTime::parse_from_str(trimmed, format) {
2956 return Some(crate::builtins::datetime::datenum_from_naive(value));
2957 }
2958 }
2959 for format in ["%Y-%m-%d", "%Y/%m/%d", "%m/%d/%Y"] {
2960 if let Ok(date) = NaiveDate::parse_from_str(trimmed, format) {
2961 return Some(crate::builtins::datetime::datenum_from_naive(
2962 date.and_time(NaiveTime::MIN),
2963 ));
2964 }
2965 }
2966 None
2967}
2968
2969fn unquote(token: &str) -> &str {
2970 if token.len() >= 2 {
2971 let bytes = token.as_bytes();
2972 if (bytes[0] == b'"' && bytes[token.len() - 1] == b'"')
2973 || (bytes[0] == b'\'' && bytes[token.len() - 1] == b'\'')
2974 {
2975 return &token[1..token.len() - 1];
2976 }
2977 }
2978 token
2979}
2980
2981fn default_properties(variable_names: Vec<String>, row_names: Option<Vec<String>>) -> StructValue {
2982 let mut props = StructValue::new();
2983 props.insert(
2984 VARIABLE_NAMES,
2985 Value::StringArray(
2986 StringArray::new(variable_names.clone(), vec![1, variable_names.len()])
2987 .expect("VariableNames shape is valid"),
2988 ),
2989 );
2990 props.insert(
2991 ROW_NAMES,
2992 row_names
2993 .map(|names| {
2994 Value::StringArray(
2995 StringArray::new(names.clone(), vec![names.len(), 1])
2996 .expect("RowNames shape is valid"),
2997 )
2998 })
2999 .unwrap_or_else(|| {
3000 Value::StringArray(StringArray::new(Vec::new(), vec![0, 1]).unwrap())
3001 }),
3002 );
3003 props.insert(
3004 DIMENSION_NAMES,
3005 Value::StringArray(
3006 StringArray::new(
3007 vec![
3008 DEFAULT_ROW_DIM_NAME.to_string(),
3009 DEFAULT_VARIABLE_DIM_NAME.to_string(),
3010 ],
3011 vec![1, 2],
3012 )
3013 .expect("DimensionNames shape is valid"),
3014 ),
3015 );
3016 props.insert(
3017 VARIABLE_UNITS,
3018 Value::StringArray(
3019 StringArray::new(
3020 vec![String::new(); variable_names.len()],
3021 vec![1, variable_names.len()],
3022 )
3023 .expect("VariableUnits shape is valid"),
3024 ),
3025 );
3026 props.insert(
3027 VARIABLE_DESCRIPTIONS,
3028 Value::StringArray(
3029 StringArray::new(
3030 vec![String::new(); variable_names.len()],
3031 vec![1, variable_names.len()],
3032 )
3033 .expect("VariableDescriptions shape is valid"),
3034 ),
3035 );
3036 props.insert(DESCRIPTION, Value::String(String::new()));
3037 props.insert(USER_DATA, Value::Tensor(Tensor::zeros(vec![0, 0])));
3038 props
3039}
3040
3041pub fn table_from_columns(names: Vec<String>, columns: Vec<Value>) -> BuiltinResult<Value> {
3042 table_from_columns_with_properties(names, columns, None)
3043}
3044
3045fn table_from_columns_with_properties(
3046 names: Vec<String>,
3047 columns: Vec<Value>,
3048 row_names: Option<Vec<String>>,
3049) -> BuiltinResult<Value> {
3050 ensure_table_class_registered();
3051 if names.len() != columns.len() {
3052 return Err(invalid_variable(
3053 "table: number of variable names must match number of variables",
3054 ));
3055 }
3056 let names = make_unique_names(names);
3057 let height = validate_column_heights(&names, &columns)?;
3058 if let Some(row_names) = &row_names {
3059 if row_names.len() != height {
3060 return Err(invalid_variable(
3061 "table: number of row names must match table height",
3062 ));
3063 }
3064 }
3065 let mut variables = StructValue::new();
3066 for (name, value) in names.iter().cloned().zip(columns) {
3067 variables.insert(name, value);
3068 }
3069 let props = default_properties(names, row_names);
3070 let mut object = ObjectInstance::new(TABLE_CLASS.to_string());
3071 object
3072 .properties
3073 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
3074 object.properties.insert(
3075 TABLE_PROPERTIES_FIELD.to_string(),
3076 Value::Struct(props.clone()),
3077 );
3078 object
3079 .properties
3080 .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
3081 Ok(Value::Object(object))
3082}
3083
3084fn validate_column_heights(names: &[String], columns: &[Value]) -> BuiltinResult<usize> {
3085 if columns.is_empty() {
3086 return Ok(0);
3087 }
3088 let height = value_row_count(&columns[0])?;
3089 for (name, value) in names.iter().zip(columns) {
3090 let rows = value_row_count(value)?;
3091 if rows != height {
3092 return Err(invalid_variable(format!(
3093 "table: variable '{name}' has {rows} rows but expected {height}"
3094 )));
3095 }
3096 }
3097 Ok(height)
3098}
3099
3100pub fn is_table_value(value: &Value) -> bool {
3101 table_object(value).is_some()
3102}
3103
3104fn table_object(value: &Value) -> Option<&ObjectInstance> {
3105 match value {
3106 Value::Object(object) if object.is_class(TABLE_CLASS) => Some(object),
3107 _ => None,
3108 }
3109}
3110
3111fn into_table_object(value: Value, context: &str) -> BuiltinResult<ObjectInstance> {
3112 match value {
3113 Value::Object(object) if object.is_class(TABLE_CLASS) => Ok(object),
3114 other => Err(invalid_argument(format!(
3115 "{context}: expected table, got {other:?}"
3116 ))),
3117 }
3118}
3119
3120pub fn table_variables(object: &ObjectInstance) -> BuiltinResult<StructValue> {
3121 match object.properties.get(TABLE_VARIABLES_FIELD) {
3122 Some(Value::Struct(st)) => Ok(st.clone()),
3123 Some(other) => Err(invalid_variable(format!(
3124 "table: invalid internal variable storage {other:?}"
3125 ))),
3126 None => Ok(StructValue::new()),
3127 }
3128}
3129
3130pub fn table_variable_names_from_object(object: &ObjectInstance) -> BuiltinResult<Vec<String>> {
3131 let variables = table_variables(object)?;
3132 Ok(variables.fields.keys().cloned().collect())
3133}
3134
3135pub fn table_height(object: &ObjectInstance) -> BuiltinResult<usize> {
3136 let variables = table_variables(object)?;
3137 match variables.fields.values().next() {
3138 Some(value) => value_row_count(value),
3139 None => Ok(0),
3140 }
3141}
3142
3143pub fn table_width(object: &ObjectInstance) -> BuiltinResult<usize> {
3144 table_variables(object).map(|vars| vars.fields.len())
3145}
3146
3147fn table_public_properties(object: &ObjectInstance) -> BuiltinResult<StructValue> {
3148 match object
3149 .properties
3150 .get(TABLE_PROPERTIES_FIELD)
3151 .or_else(|| object.properties.get(PROPERTIES_MEMBER))
3152 {
3153 Some(Value::Struct(st)) => Ok(st.clone()),
3154 Some(other) => Err(invalid_variable(format!(
3155 "table: invalid Properties storage {other:?}"
3156 ))),
3157 None => Ok(default_properties(
3158 table_variable_names_from_object(object)?,
3159 None,
3160 )),
3161 }
3162}
3163
3164fn sync_table_properties(object: &mut ObjectInstance, props: StructValue) {
3165 object.properties.insert(
3166 TABLE_PROPERTIES_FIELD.to_string(),
3167 Value::Struct(props.clone()),
3168 );
3169 object
3170 .properties
3171 .insert(PROPERTIES_MEMBER.to_string(), Value::Struct(props));
3172}
3173
3174fn table_member_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
3175 let name = scalar_text(payload, "table member")?;
3176 if name == PROPERTIES_MEMBER {
3177 return Ok(Value::Struct(table_public_properties(object)?));
3178 }
3179 let variables = table_variables(object)?;
3180 variables
3181 .fields
3182 .get(&name)
3183 .cloned()
3184 .ok_or_else(|| invalid_variable(format!("table: unrecognized variable '{name}'")))
3185}
3186
3187fn table_member_set(object: &mut ObjectInstance, field: &str, rhs: Value) -> BuiltinResult<()> {
3188 if field == PROPERTIES_MEMBER {
3189 let Value::Struct(props) = rhs else {
3190 return Err(invalid_variable(
3191 "table: Properties assignment expects a scalar struct",
3192 ));
3193 };
3194 apply_properties(object, props)?;
3195 return Ok(());
3196 }
3197 let mut variables = table_variables(object)?;
3198 let mut names = table_variable_names_from_object(object)?;
3199 let height = table_height(object)?;
3200 let rhs_rows = value_row_count(&rhs)?;
3201 if !variables.fields.is_empty() && rhs_rows != height {
3202 return Err(invalid_variable(format!(
3203 "table: variable '{field}' has {rhs_rows} rows but table has {height}"
3204 )));
3205 }
3206 if !variables.fields.contains_key(field) {
3207 names.push(field.to_string());
3208 }
3209 variables.insert(field.to_string(), rhs);
3210 object
3211 .properties
3212 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
3213 let mut props = table_public_properties(object)?;
3214 update_variable_metadata_names(&mut props, names)?;
3215 sync_table_properties(object, props);
3216 Ok(())
3217}
3218
3219fn apply_properties(object: &mut ObjectInstance, mut props: StructValue) -> BuiltinResult<()> {
3220 if let Some(value) = props.fields.get(VARIABLE_NAMES) {
3221 let names = variable_name_list(value)?;
3222 rename_table_variables(object, names.clone())?;
3223 update_variable_metadata_names(&mut props, names)?;
3224 }
3225 sync_table_properties(object, props);
3226 Ok(())
3227}
3228
3229fn rename_table_variables(
3230 object: &mut ObjectInstance,
3231 new_names: Vec<String>,
3232) -> BuiltinResult<()> {
3233 let old_names = table_variable_names_from_object(object)?;
3234 if old_names.len() != new_names.len() {
3235 return Err(invalid_variable(
3236 "table: VariableNames assignment must preserve variable count",
3237 ));
3238 }
3239 let new_names = make_unique_variable_names(new_names);
3240 let variables = table_variables(object)?;
3241 let mut renamed = StructValue::new();
3242 for (old, new) in old_names.iter().zip(new_names.iter()) {
3243 let value = variables
3244 .fields
3245 .get(old)
3246 .cloned()
3247 .ok_or_else(|| invalid_variable(format!("table: missing variable '{old}'")))?;
3248 renamed.insert(new.clone(), value);
3249 }
3250 object
3251 .properties
3252 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(renamed));
3253 Ok(())
3254}
3255
3256fn update_variable_metadata_names(
3257 props: &mut StructValue,
3258 names: Vec<String>,
3259) -> BuiltinResult<()> {
3260 props.insert(
3261 VARIABLE_NAMES,
3262 Value::StringArray(
3263 StringArray::new(names.clone(), vec![1, names.len()])
3264 .map_err(|err| invalid_variable(format!("table: {err}")))?,
3265 ),
3266 );
3267 for field in [VARIABLE_UNITS, VARIABLE_DESCRIPTIONS] {
3268 let existing = props.fields.get(field).cloned();
3269 let values = match existing {
3270 Some(Value::StringArray(mut array)) => {
3271 array.data.resize(names.len(), String::new());
3272 array.data.truncate(names.len());
3273 array.data
3274 }
3275 _ => vec![String::new(); names.len()],
3276 };
3277 props.insert(
3278 field,
3279 Value::StringArray(
3280 StringArray::new(values, vec![1, names.len()])
3281 .map_err(|err| invalid_variable(format!("table: {err}")))?,
3282 ),
3283 );
3284 }
3285 Ok(())
3286}
3287
3288fn table_paren_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
3289 let selectors = selector_values(payload)?;
3290 let rows = parse_row_selector(selectors.first(), table_height(object)?)?;
3291 let variable_names = table_variable_names_from_object(object)?;
3292 let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
3293 let variables = table_variables(object)?;
3294 let mut out = Vec::with_capacity(selected_names.len());
3295 for name in &selected_names {
3296 let value = variables
3297 .fields
3298 .get(name)
3299 .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
3300 out.push(select_rows(value, &rows)?);
3301 }
3302 let row_names = selected_row_names(object, &rows)?;
3303 table_from_columns_with_properties(selected_names, out, row_names)
3304}
3305
3306fn table_brace_get(object: &ObjectInstance, payload: &Value) -> BuiltinResult<Value> {
3307 let subset = table_paren_get(object, payload)?;
3308 let object = into_table_object(subset, "table brace indexing")?;
3309 let variables = table_variables(&object)?;
3310 if variables.fields.len() == 1 {
3311 return variables
3312 .fields
3313 .values()
3314 .next()
3315 .cloned()
3316 .ok_or_else(|| invalid_variable("table: missing selected variable"));
3317 }
3318 let values = variables.fields.values().collect::<Vec<_>>();
3319 if values.iter().all(|value| matches!(value, Value::Tensor(_))) {
3320 return concatenate_numeric_columns(&values);
3321 }
3322 CellArray::new(
3323 values.into_iter().cloned().collect(),
3324 1,
3325 variables.fields.len(),
3326 )
3327 .map(Value::Cell)
3328 .map_err(|err| invalid_variable(format!("table: {err}")))
3329}
3330
3331fn table_paren_assign(
3332 mut object: ObjectInstance,
3333 payload: &Value,
3334 rhs: Value,
3335) -> BuiltinResult<Value> {
3336 let rhs_table = into_table_object(rhs, "table paren assignment")?;
3337 let selectors = selector_values(payload)?;
3338 let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
3339 let variable_names = table_variable_names_from_object(&object)?;
3340 let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
3341 let rhs_names = table_variable_names_from_object(&rhs_table)?;
3342 if selected_names.len() != rhs_names.len() {
3343 return Err(invalid_variable(
3344 "table: assignment variable count must match selected variables",
3345 ));
3346 }
3347 let mut variables = table_variables(&object)?;
3348 let rhs_variables = table_variables(&rhs_table)?;
3349 for (target_name, rhs_name) in selected_names.iter().zip(rhs_names.iter()) {
3350 let current =
3351 variables.fields.get(target_name).cloned().ok_or_else(|| {
3352 invalid_variable(format!("table: missing variable '{target_name}'"))
3353 })?;
3354 let rhs_col =
3355 rhs_variables.fields.get(rhs_name).cloned().ok_or_else(|| {
3356 invalid_variable(format!("table: missing rhs variable '{rhs_name}'"))
3357 })?;
3358 variables.insert(target_name.clone(), assign_rows(current, &rows, rhs_col)?);
3359 }
3360 object
3361 .properties
3362 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
3363 Ok(Value::Object(object))
3364}
3365
3366fn table_brace_assign(
3367 mut object: ObjectInstance,
3368 payload: &Value,
3369 rhs: Value,
3370) -> BuiltinResult<Value> {
3371 let selectors = selector_values(payload)?;
3372 let rows = parse_row_selector(selectors.first(), table_height(&object)?)?;
3373 let variable_names = table_variable_names_from_object(&object)?;
3374 let selected_names = parse_variable_selector(selectors.get(1), &variable_names)?;
3375 if selected_names.len() != 1 {
3376 return Err(invalid_variable(
3377 "table: brace assignment supports one variable at a time",
3378 ));
3379 }
3380 let mut variables = table_variables(&object)?;
3381 let target = selected_names[0].clone();
3382 let current = variables
3383 .fields
3384 .get(&target)
3385 .cloned()
3386 .ok_or_else(|| invalid_variable(format!("table: missing variable '{target}'")))?;
3387 variables.insert(target, assign_rows(current, &rows, rhs)?);
3388 object
3389 .properties
3390 .insert(TABLE_VARIABLES_FIELD.to_string(), Value::Struct(variables));
3391 Ok(Value::Object(object))
3392}
3393
3394fn selector_values(payload: &Value) -> BuiltinResult<Vec<Value>> {
3395 match payload {
3396 Value::Cell(cell) => {
3397 let mut out = Vec::with_capacity(cell.data.len());
3398 for handle in &cell.data {
3399 out.push(handle.clone());
3400 }
3401 Ok(out)
3402 }
3403 other => Ok(vec![other.clone()]),
3404 }
3405}
3406
3407fn parse_row_selector(selector: Option<&Value>, height: usize) -> BuiltinResult<Vec<usize>> {
3408 let Some(selector) = selector else {
3409 return Ok((0..height).collect());
3410 };
3411 if is_colon_selector(selector) {
3412 return Ok((0..height).collect());
3413 }
3414 if is_end_selector(selector) {
3415 return if height == 0 {
3416 Err(invalid_index(
3417 "table: end row index is invalid for empty table",
3418 ))
3419 } else {
3420 Ok(vec![height - 1])
3421 };
3422 }
3423 match selector {
3424 Value::Num(n) => Ok(vec![one_based_to_zero(*n, height, "row")?]),
3425 Value::Int(i) => Ok(vec![one_based_to_zero(i.to_f64(), height, "row")?]),
3426 Value::Tensor(tensor) => tensor
3427 .data
3428 .iter()
3429 .map(|value| one_based_to_zero(*value, height, "row"))
3430 .collect(),
3431 Value::LogicalArray(array) => {
3432 if array.data.len() != height {
3433 return Err(invalid_index(
3434 "table: logical row selector length must match table height",
3435 ));
3436 }
3437 Ok(array
3438 .data
3439 .iter()
3440 .enumerate()
3441 .filter_map(|(idx, value)| (*value != 0).then_some(idx))
3442 .collect())
3443 }
3444 other => Err(invalid_index(format!(
3445 "table: unsupported row selector {other:?}"
3446 ))),
3447 }
3448}
3449
3450fn parse_variable_selector(
3451 selector: Option<&Value>,
3452 names: &[String],
3453) -> BuiltinResult<Vec<String>> {
3454 let Some(selector) = selector else {
3455 return Ok(names.to_vec());
3456 };
3457 if is_colon_selector(selector) {
3458 return Ok(names.to_vec());
3459 }
3460 match selector {
3461 Value::String(_) | Value::CharArray(_) | Value::StringArray(_) | Value::Cell(_) => {
3462 let selected = string_list(selector)?;
3463 for name in &selected {
3464 if !names.contains(name) {
3465 return Err(invalid_variable(format!(
3466 "table: unrecognized variable '{name}'"
3467 )));
3468 }
3469 }
3470 Ok(selected)
3471 }
3472 Value::Num(n) => Ok(vec![name_at_index(names, *n)?]),
3473 Value::Int(i) => Ok(vec![name_at_index(names, i.to_f64())?]),
3474 Value::Tensor(tensor) => tensor
3475 .data
3476 .iter()
3477 .map(|value| name_at_index(names, *value))
3478 .collect(),
3479 Value::LogicalArray(array) => {
3480 if array.data.len() != names.len() {
3481 return Err(invalid_index(
3482 "table: logical variable selector length must match table width",
3483 ));
3484 }
3485 Ok(array
3486 .data
3487 .iter()
3488 .zip(names.iter())
3489 .filter_map(|(flag, name)| (*flag != 0).then_some(name.clone()))
3490 .collect())
3491 }
3492 other => Err(invalid_index(format!(
3493 "table: unsupported variable selector {other:?}"
3494 ))),
3495 }
3496}
3497
3498fn is_colon_selector(value: &Value) -> bool {
3499 scalar_text(value, "selector")
3500 .map(|text| text == ":")
3501 .unwrap_or(false)
3502}
3503
3504fn is_end_selector(value: &Value) -> bool {
3505 scalar_text(value, "selector")
3506 .map(|text| text == "end")
3507 .unwrap_or(false)
3508}
3509
3510fn name_at_index(names: &[String], value: f64) -> BuiltinResult<String> {
3511 let idx = one_based_to_zero(value, names.len(), "variable")?;
3512 Ok(names[idx].clone())
3513}
3514
3515fn one_based_to_zero(value: f64, len: usize, context: &str) -> BuiltinResult<usize> {
3516 if !value.is_finite() || value < 1.0 || (value.round() - value).abs() > f64::EPSILON {
3517 return Err(invalid_index(format!(
3518 "table: {context} indices must be positive finite integers"
3519 )));
3520 }
3521 let idx = value.round() as usize - 1;
3522 if idx >= len {
3523 return Err(invalid_index(format!(
3524 "table: {context} index exceeds bounds"
3525 )));
3526 }
3527 Ok(idx)
3528}
3529
3530fn selected_row_names(
3531 object: &ObjectInstance,
3532 rows: &[usize],
3533) -> BuiltinResult<Option<Vec<String>>> {
3534 let props = table_public_properties(object)?;
3535 let Some(value) = props.fields.get(ROW_NAMES) else {
3536 return Ok(None);
3537 };
3538 let names = string_list(value)?;
3539 if names.is_empty() {
3540 return Ok(None);
3541 }
3542 Ok(Some(
3543 rows.iter()
3544 .filter_map(|row| names.get(*row).cloned())
3545 .collect(),
3546 ))
3547}
3548
3549fn value_row_count(value: &Value) -> BuiltinResult<usize> {
3550 match value {
3551 Value::Tensor(tensor) => Ok(tensor.rows()),
3552 Value::ComplexTensor(tensor) => Ok(tensor.rows),
3553 Value::StringArray(array) => Ok(array.rows()),
3554 Value::LogicalArray(array) => Ok(array.shape.first().copied().unwrap_or(array.data.len())),
3555 Value::Cell(cell) => Ok(cell.rows),
3556 Value::CharArray(array) => Ok(array.rows),
3557 Value::Object(obj) if obj.is_class("datetime") => {
3558 crate::builtins::datetime::serials_from_datetime_value(value)
3559 .map(|tensor| tensor.rows())
3560 }
3561 Value::Object(obj) if obj.is_class("duration") => {
3562 crate::builtins::duration::duration_tensor_from_duration_value(value)
3563 .map(|tensor| tensor.rows())
3564 }
3565 Value::Object(obj) if obj.is_class(TABLE_CLASS) => table_height(obj),
3566 _ => Ok(1),
3567 }
3568}
3569
3570fn select_rows(value: &Value, rows: &[usize]) -> BuiltinResult<Value> {
3571 match value {
3572 Value::Tensor(tensor) => {
3573 let cols = tensor.cols();
3574 let mut data = Vec::with_capacity(rows.len() * cols);
3575 for col in 0..cols {
3576 for &row in rows {
3577 data.push(tensor.get2(row, col).map_err(invalid_index)?);
3578 }
3579 }
3580 Tensor::new_with_dtype(data, vec![rows.len(), cols], tensor.dtype)
3581 .map(Value::Tensor)
3582 .map_err(invalid_variable)
3583 }
3584 Value::ComplexTensor(tensor) => {
3585 let mut data = Vec::with_capacity(rows.len() * tensor.cols);
3586 for col in 0..tensor.cols {
3587 for &row in rows {
3588 let idx = row + col * tensor.rows;
3589 data.push(*tensor.data.get(idx).ok_or_else(|| {
3590 invalid_index("table: complex variable row index out of bounds")
3591 })?);
3592 }
3593 }
3594 ComplexTensor::new(data, vec![rows.len(), tensor.cols])
3595 .map(Value::ComplexTensor)
3596 .map_err(invalid_variable)
3597 }
3598 Value::StringArray(array) => {
3599 let cols = array.cols();
3600 let mut data = Vec::with_capacity(rows.len() * cols);
3601 for col in 0..cols {
3602 for &row in rows {
3603 let idx = row + col * array.rows();
3604 data.push(array.data.get(idx).cloned().ok_or_else(|| {
3605 invalid_index("table: string variable row index out of bounds")
3606 })?);
3607 }
3608 }
3609 StringArray::new(data, vec![rows.len(), cols])
3610 .map(Value::StringArray)
3611 .map_err(invalid_variable)
3612 }
3613 Value::CharArray(array) => {
3614 let mut data = Vec::with_capacity(rows.len() * array.cols);
3615 for &row in rows {
3616 if row >= array.rows {
3617 return Err(invalid_index(
3618 "table: char variable row index out of bounds",
3619 ));
3620 }
3621 let start = row * array.cols;
3622 data.extend_from_slice(&array.data[start..start + array.cols]);
3623 }
3624 CharArray::new(data, rows.len(), array.cols)
3625 .map(Value::CharArray)
3626 .map_err(invalid_variable)
3627 }
3628 Value::LogicalArray(array) => {
3629 let source_rows = array.shape.first().copied().unwrap_or(array.data.len());
3630 let cols = array.shape.get(1).copied().unwrap_or(1);
3631 let mut data = Vec::with_capacity(rows.len() * cols);
3632 for col in 0..cols {
3633 for &row in rows {
3634 let idx = row + col * source_rows;
3635 data.push(*array.data.get(idx).ok_or_else(|| {
3636 invalid_index("table: logical variable row index out of bounds")
3637 })?);
3638 }
3639 }
3640 LogicalArray::new(data, vec![rows.len(), cols])
3641 .map(Value::LogicalArray)
3642 .map_err(invalid_variable)
3643 }
3644 Value::Cell(cell) => {
3645 let mut data = Vec::with_capacity(rows.len() * cell.cols);
3646 for col in 0..cell.cols {
3647 for &row in rows {
3648 data.push(cell.get(row, col).map_err(invalid_index)?);
3649 }
3650 }
3651 CellArray::new(data, rows.len(), cell.cols)
3652 .map(Value::Cell)
3653 .map_err(invalid_variable)
3654 }
3655 Value::Object(obj) if obj.is_class("datetime") => {
3656 let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
3657 let selected = select_rows(&Value::Tensor(tensor), rows)?;
3658 match selected {
3659 Value::Tensor(tensor) => {
3660 crate::builtins::datetime::datetime_object_from_serial_tensor(
3661 tensor,
3662 crate::builtins::datetime::datetime_format_from_value(value),
3663 )
3664 }
3665 _ => unreachable!("select_rows tensor branch returns tensor"),
3666 }
3667 }
3668 Value::Object(obj) if obj.is_class("duration") => {
3669 let tensor = crate::builtins::duration::duration_tensor_from_duration_value(value)?;
3670 let selected = select_rows(&Value::Tensor(tensor), rows)?;
3671 match selected {
3672 Value::Tensor(tensor) => {
3673 crate::builtins::duration::duration_object_from_days_tensor(
3674 tensor,
3675 crate::builtins::duration::duration_format_from_value(value),
3676 )
3677 }
3678 _ => unreachable!("select_rows tensor branch returns tensor"),
3679 }
3680 }
3681 _ if rows.len() == 1 && rows[0] == 0 => Ok(value.clone()),
3682 other => Err(invalid_variable(format!(
3683 "table: row selection unsupported for variable {other:?}"
3684 ))),
3685 }
3686}
3687
3688fn assign_rows(mut current: Value, rows: &[usize], rhs: Value) -> BuiltinResult<Value> {
3689 if value_row_count(&rhs)? != rows.len() {
3690 return Err(invalid_variable(
3691 "table: assignment row count must match selected row count",
3692 ));
3693 }
3694 let replacing_all_rows = rows.len() == value_row_count(¤t)?;
3695 match (&mut current, rhs) {
3696 (Value::Tensor(target), Value::Tensor(source)) => {
3697 if target.cols() != source.cols() {
3698 return Err(invalid_variable(
3699 "table: tensor assignment column count mismatch",
3700 ));
3701 }
3702 for col in 0..target.cols() {
3703 for (src_row, &dst_row) in rows.iter().enumerate() {
3704 let value = source.get2(src_row, col).map_err(invalid_index)?;
3705 target.set2(dst_row, col, value).map_err(invalid_index)?;
3706 }
3707 }
3708 Ok(current)
3709 }
3710 (_, source) if replacing_all_rows => Ok(source),
3711 _ => Err(invalid_variable(
3712 "table: assignment for this variable type requires replacing all rows",
3713 )),
3714 }
3715}
3716
3717fn concatenate_numeric_columns(values: &[&Value]) -> BuiltinResult<Value> {
3718 let rows = values
3719 .first()
3720 .and_then(|value| match value {
3721 Value::Tensor(t) => Some(t.rows()),
3722 _ => None,
3723 })
3724 .unwrap_or(0);
3725 let cols = values
3726 .iter()
3727 .map(|value| match value {
3728 Value::Tensor(t) => Ok(t.cols()),
3729 _ => Err(invalid_variable("table: expected numeric variable")),
3730 })
3731 .collect::<BuiltinResult<Vec<_>>>()?;
3732 let total_cols: usize = cols.iter().sum();
3733 let mut data = Vec::with_capacity(rows * total_cols);
3734 for value in values {
3735 let Value::Tensor(tensor) = value else {
3736 return Err(invalid_variable("table: expected numeric variable"));
3737 };
3738 for col in 0..tensor.cols() {
3739 for row in 0..rows {
3740 data.push(tensor.get2(row, col).map_err(invalid_index)?);
3741 }
3742 }
3743 }
3744 Tensor::new(data, vec![rows, total_cols])
3745 .map(Value::Tensor)
3746 .map_err(invalid_variable)
3747}
3748
3749pub fn sortrows_table(value: Value, rest: &[Value]) -> BuiltinResult<(Value, Tensor)> {
3750 let object = into_table_object(value, "sortrows")?;
3751 let names = table_variable_names_from_object(&object)?;
3752 let sort_spec = SortSpec::parse(rest, &names)?;
3753 let height = table_height(&object)?;
3754 let variables = table_variables(&object)?;
3755 let mut indices: Vec<usize> = (0..height).collect();
3756 indices.sort_by(|&a, &b| {
3757 for key in &sort_spec.keys {
3758 let Some(value) = variables.fields.get(&key.name) else {
3759 continue;
3760 };
3761 let ord = compare_table_cells(value, a, b).unwrap_or(Ordering::Equal);
3762 let ord = if key.descending { ord.reverse() } else { ord };
3763 if ord != Ordering::Equal {
3764 return ord;
3765 }
3766 }
3767 a.cmp(&b)
3768 });
3769 let mut sorted_columns = Vec::with_capacity(names.len());
3770 for name in &names {
3771 let value = variables
3772 .fields
3773 .get(name)
3774 .ok_or_else(|| invalid_variable(format!("table: missing variable '{name}'")))?;
3775 sorted_columns.push(select_rows(value, &indices)?);
3776 }
3777 let row_names = selected_row_names(&object, &indices)?;
3778 let sorted = table_from_columns_with_properties(names, sorted_columns, row_names)?;
3779 let indices_tensor = Tensor::new(
3780 indices.iter().map(|idx| *idx as f64 + 1.0).collect(),
3781 vec![indices.len(), 1],
3782 )
3783 .map_err(invalid_variable)?;
3784 Ok((sorted, indices_tensor))
3785}
3786
3787struct SortSpec {
3788 keys: Vec<SortKey>,
3789}
3790
3791struct SortKey {
3792 name: String,
3793 descending: bool,
3794}
3795
3796impl SortSpec {
3797 fn parse(rest: &[Value], names: &[String]) -> BuiltinResult<Self> {
3798 let mut keys = if rest.is_empty() {
3799 names
3800 .iter()
3801 .map(|name| SortKey {
3802 name: name.clone(),
3803 descending: false,
3804 })
3805 .collect::<Vec<_>>()
3806 } else {
3807 parse_variable_selector(rest.first(), names)?
3808 .into_iter()
3809 .map(|name| SortKey {
3810 name,
3811 descending: false,
3812 })
3813 .collect()
3814 };
3815 if let Some(direction) = rest.get(1) {
3816 let directions = string_list(direction)?;
3817 if directions.len() == 1 {
3818 let descending = directions[0].eq_ignore_ascii_case("descend")
3819 || directions[0].eq_ignore_ascii_case("desc");
3820 for key in &mut keys {
3821 key.descending = descending;
3822 }
3823 } else {
3824 for (key, direction) in keys.iter_mut().zip(directions.iter()) {
3825 key.descending = direction.eq_ignore_ascii_case("descend")
3826 || direction.eq_ignore_ascii_case("desc");
3827 }
3828 }
3829 }
3830 Ok(Self { keys })
3831 }
3832}
3833
3834fn compare_table_cells(value: &Value, a: usize, b: usize) -> BuiltinResult<Ordering> {
3835 match value {
3836 Value::Tensor(tensor) => Ok(tensor
3837 .get2(a, 0)
3838 .map_err(invalid_index)?
3839 .partial_cmp(&tensor.get2(b, 0).map_err(invalid_index)?)
3840 .unwrap_or(Ordering::Greater)),
3841 Value::StringArray(array) => {
3842 let av = array.data.get(a).cloned().unwrap_or_default();
3843 let bv = array.data.get(b).cloned().unwrap_or_default();
3844 Ok(av.cmp(&bv))
3845 }
3846 Value::LogicalArray(array) => {
3847 let av = *array.data.get(a).unwrap_or(&0);
3848 let bv = *array.data.get(b).unwrap_or(&0);
3849 Ok(av.cmp(&bv))
3850 }
3851 Value::Object(obj) if obj.is_class("datetime") => {
3852 let tensor = crate::builtins::datetime::serials_from_datetime_value(value)?;
3853 Ok(tensor
3854 .data
3855 .get(a)
3856 .copied()
3857 .unwrap_or(f64::NAN)
3858 .partial_cmp(&tensor.data.get(b).copied().unwrap_or(f64::NAN))
3859 .unwrap_or(Ordering::Greater))
3860 }
3861 other => Ok(cell_key_string(other, a).cmp(&cell_key_string(other, b))),
3862 }
3863}
3864
3865#[derive(Clone, Debug)]
3866enum GroupAtom {
3867 Number(f64),
3868 Text(String),
3869 Logical(bool),
3870 Missing,
3871}
3872
3873impl GroupAtom {
3874 fn rank(&self) -> u8 {
3875 match self {
3876 Self::Missing => 0,
3877 Self::Logical(_) => 1,
3878 Self::Number(_) => 2,
3879 Self::Text(_) => 3,
3880 }
3881 }
3882}
3883
3884impl PartialEq for GroupAtom {
3885 fn eq(&self, other: &Self) -> bool {
3886 self.cmp(other) == Ordering::Equal
3887 }
3888}
3889
3890impl Eq for GroupAtom {}
3891
3892impl PartialOrd for GroupAtom {
3893 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
3894 Some(self.cmp(other))
3895 }
3896}
3897
3898impl Ord for GroupAtom {
3899 fn cmp(&self, other: &Self) -> Ordering {
3900 let rank = self.rank().cmp(&other.rank());
3901 if rank != Ordering::Equal {
3902 return rank;
3903 }
3904 match (self, other) {
3905 (Self::Missing, Self::Missing) => Ordering::Equal,
3906 (Self::Logical(a), Self::Logical(b)) => a.cmp(b),
3907 (Self::Number(a), Self::Number(b)) => a.total_cmp(b),
3908 (Self::Text(a), Self::Text(b)) => a.cmp(b),
3909 _ => Ordering::Equal,
3910 }
3911 }
3912}
3913
3914fn cell_group_atom(value: &Value, row: usize) -> GroupAtom {
3915 match value {
3916 Value::Tensor(tensor) => tensor
3917 .get2(row, 0)
3918 .map(GroupAtom::Number)
3919 .unwrap_or(GroupAtom::Missing),
3920 Value::StringArray(array) => array
3921 .data
3922 .get(row)
3923 .cloned()
3924 .map(GroupAtom::Text)
3925 .unwrap_or(GroupAtom::Missing),
3926 Value::LogicalArray(array) => array
3927 .data
3928 .get(row)
3929 .map(|value| GroupAtom::Logical(*value != 0))
3930 .unwrap_or(GroupAtom::Missing),
3931 Value::Object(obj) if obj.is_class("datetime") => {
3932 crate::builtins::datetime::serials_from_datetime_value(value)
3933 .ok()
3934 .and_then(|tensor| tensor.data.get(row).copied())
3935 .map(GroupAtom::Number)
3936 .unwrap_or(GroupAtom::Missing)
3937 }
3938 other => GroupAtom::Text(cell_key_string(other, row)),
3939 }
3940}
3941
3942fn groupsummary_impl(
3943 table: Value,
3944 groupvars: Value,
3945 method: Value,
3946 rest: Vec<Value>,
3947) -> BuiltinResult<Value> {
3948 let object = into_table_object(table, "groupsummary")?;
3949 let names = table_variable_names_from_object(&object)?;
3950 let group_names = parse_variable_selector(Some(&groupvars), &names)?;
3951 let methods = string_list(&method)?;
3952 if methods.is_empty() {
3953 return Err(invalid_argument(
3954 "groupsummary: method list must not be empty",
3955 ));
3956 }
3957 let data_names = if let Some(value) = rest.first() {
3958 parse_variable_selector(Some(value), &names)?
3959 } else {
3960 names
3961 .iter()
3962 .filter(|name| !group_names.contains(name))
3963 .filter(|name| {
3964 table_variables(&object)
3965 .ok()
3966 .and_then(|vars| vars.fields.get(*name).cloned())
3967 .map(|value| matches!(value, Value::Tensor(_)))
3968 .unwrap_or(false)
3969 })
3970 .cloned()
3971 .collect()
3972 };
3973 let variables = table_variables(&object)?;
3974 let height = table_height(&object)?;
3975 let mut groups: BTreeMap<Vec<GroupAtom>, Vec<usize>> = BTreeMap::new();
3976 for row in 0..height {
3977 let key = group_names
3978 .iter()
3979 .map(|name| {
3980 variables
3981 .fields
3982 .get(name)
3983 .map(|value| cell_group_atom(value, row))
3984 .unwrap_or(GroupAtom::Missing)
3985 })
3986 .collect::<Vec<_>>();
3987 groups.entry(key).or_default().push(row);
3988 }
3989 let group_rows = groups
3990 .values()
3991 .filter_map(|rows| rows.first().copied())
3992 .collect::<Vec<_>>();
3993 let mut out_names = Vec::new();
3994 let mut out_columns = Vec::new();
3995 for name in &group_names {
3996 let value = variables.fields.get(name).ok_or_else(|| {
3997 invalid_variable(format!("groupsummary: missing group variable '{name}'"))
3998 })?;
3999 out_names.push(name.clone());
4000 out_columns.push(select_rows(value, &group_rows)?);
4001 }
4002 out_names.push("GroupCount".to_string());
4003 out_columns.push(Value::Tensor(
4004 Tensor::new(
4005 groups.values().map(|rows| rows.len() as f64).collect(),
4006 vec![groups.len(), 1],
4007 )
4008 .map_err(invalid_variable)?,
4009 ));
4010 for method in &methods {
4011 for name in &data_names {
4012 let value = variables.fields.get(name).ok_or_else(|| {
4013 invalid_variable(format!("groupsummary: missing data variable '{name}'"))
4014 })?;
4015 let values = summarize_groups(value, groups.values(), method)?;
4016 out_names.push(format!("{}_{}", method.to_ascii_lowercase(), name));
4017 out_columns.push(Value::Tensor(
4018 Tensor::new(values, vec![groups.len(), 1]).map_err(invalid_variable)?,
4019 ));
4020 }
4021 }
4022 table_from_columns(out_names, out_columns)
4023}
4024
4025fn summarize_groups<'a>(
4026 value: &Value,
4027 groups: impl Iterator<Item = &'a Vec<usize>>,
4028 method: &str,
4029) -> BuiltinResult<Vec<f64>> {
4030 let tensor = match value {
4031 Value::Tensor(tensor) if tensor.cols() == 1 => tensor,
4032 _ => {
4033 return Err(invalid_variable(
4034 "groupsummary: summary data variables must be numeric column vectors",
4035 ))
4036 }
4037 };
4038 groups
4039 .map(|rows| {
4040 let mut values = rows
4041 .iter()
4042 .map(|row| tensor.get2(*row, 0).map_err(invalid_index))
4043 .collect::<BuiltinResult<Vec<_>>>()?;
4044 values.retain(|value| !value.is_nan());
4045 let result = match method.to_ascii_lowercase().as_str() {
4046 "mean" => {
4047 if values.is_empty() {
4048 f64::NAN
4049 } else {
4050 values.iter().sum::<f64>() / values.len() as f64
4051 }
4052 }
4053 "sum" => values.iter().sum(),
4054 "min" => values.into_iter().fold(f64::INFINITY, f64::min),
4055 "max" => values.into_iter().fold(f64::NEG_INFINITY, f64::max),
4056 "median" => {
4057 if values.is_empty() {
4058 f64::NAN
4059 } else {
4060 values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
4061 let mid = values.len() / 2;
4062 if values.len() % 2 == 0 {
4063 (values[mid - 1] + values[mid]) / 2.0
4064 } else {
4065 values[mid]
4066 }
4067 }
4068 }
4069 "count" | "numel" => values.len() as f64,
4070 other => {
4071 return Err(invalid_argument(format!(
4072 "groupsummary: unsupported method '{other}'"
4073 )))
4074 }
4075 };
4076 Ok(result)
4077 })
4078 .collect()
4079}
4080
4081fn cell_key_string(value: &Value, row: usize) -> String {
4082 match value {
4083 Value::Tensor(tensor) => tensor
4084 .get2(row, 0)
4085 .map(format_key_number)
4086 .unwrap_or_default(),
4087 Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
4088 Value::LogicalArray(array) => array
4089 .data
4090 .get(row)
4091 .map(|value| value.to_string())
4092 .unwrap_or_default(),
4093 Value::Object(obj) if obj.is_class("datetime") => {
4094 crate::builtins::datetime::serials_from_datetime_value(value)
4095 .ok()
4096 .and_then(|tensor| tensor.data.get(row).copied())
4097 .map(format_key_number)
4098 .unwrap_or_default()
4099 }
4100 other => format!("{other}"),
4101 }
4102}
4103
4104pub fn table_display_text(value: &Value) -> BuiltinResult<String> {
4105 let object = match value {
4106 Value::Object(object) if object.is_class(TABLE_CLASS) => object,
4107 _ => return Err(invalid_argument("table display expects table object")),
4108 };
4109 let names = table_variable_names_from_object(object)?;
4110 let variables = table_variables(object)?;
4111 let rows = table_height(object)?;
4112 let preview = rows.min(12);
4113 let mut widths = names.iter().map(|name| name.len()).collect::<Vec<_>>();
4114 let rendered_cols = names
4115 .iter()
4116 .enumerate()
4117 .map(|(col, name)| {
4118 let value = variables
4119 .fields
4120 .get(name)
4121 .cloned()
4122 .unwrap_or_else(|| Value::String(String::new()));
4123 let cells = (0..preview)
4124 .map(|row| render_table_cell(&value, row))
4125 .collect::<Vec<_>>();
4126 for cell in &cells {
4127 widths[col] = widths[col].max(cell.len());
4128 }
4129 cells
4130 })
4131 .collect::<Vec<_>>();
4132
4133 let mut lines = Vec::new();
4134 lines.push(format!("{rows}x{} table", names.len()));
4135 if names.is_empty() {
4136 return Ok(lines.join("\n"));
4137 }
4138 let header = names
4139 .iter()
4140 .enumerate()
4141 .map(|(idx, name)| format!("{name:<width$}", width = widths[idx]))
4142 .collect::<Vec<_>>()
4143 .join(" ");
4144 lines.push(header);
4145 for row in 0..preview {
4146 lines.push(
4147 rendered_cols
4148 .iter()
4149 .enumerate()
4150 .map(|(col, cells)| format!("{:<width$}", cells[row], width = widths[col]))
4151 .collect::<Vec<_>>()
4152 .join(" "),
4153 );
4154 }
4155 if preview < rows {
4156 lines.push(format!("... {} more rows", rows - preview));
4157 }
4158 Ok(lines.join("\n"))
4159}
4160
4161pub fn table_summary_text(value: &Value) -> BuiltinResult<String> {
4162 let object = match value {
4163 Value::Object(object) if object.is_class(TABLE_CLASS) => object,
4164 _ => return Err(invalid_argument("table display expects table object")),
4165 };
4166 Ok(format!(
4167 "{}x{} table",
4168 table_height(object)?,
4169 table_width(object)?
4170 ))
4171}
4172
4173fn render_table_cell(value: &Value, row: usize) -> String {
4174 match value {
4175 Value::Tensor(tensor) => tensor
4176 .get2(row, 0)
4177 .map(format_table_number)
4178 .unwrap_or_default(),
4179 Value::StringArray(array) => array.data.get(row).cloned().unwrap_or_default(),
4180 Value::LogicalArray(array) => array
4181 .data
4182 .get(row)
4183 .map(|value| if *value != 0 { "true" } else { "false" }.to_string())
4184 .unwrap_or_default(),
4185 Value::Object(obj) if obj.is_class("datetime") => {
4186 crate::builtins::datetime::datetime_string_array(value)
4187 .ok()
4188 .flatten()
4189 .and_then(|array| array.data.get(row).cloned())
4190 .unwrap_or_else(|| value.to_string())
4191 }
4192 other => other.to_string(),
4193 }
4194}
4195
4196fn format_table_number(value: f64) -> String {
4197 if value.is_nan() {
4198 "NaN".to_string()
4199 } else if value.fract() == 0.0 && value.abs() < 1e15 {
4200 format!("{}", value as i64)
4201 } else {
4202 trim_float(format!("{value:.6}"))
4203 }
4204}
4205
4206fn format_key_number(value: f64) -> String {
4207 if value.is_nan() {
4208 "NaN".to_string()
4209 } else if value.is_infinite() {
4210 value.to_string()
4211 } else {
4212 trim_float(format!("{value:.17}"))
4213 }
4214}
4215
4216fn trim_float(mut text: String) -> String {
4217 if let Some(dot) = text.find('.') {
4218 let mut end = text.len();
4219 while end > dot + 1 && text.as_bytes()[end - 1] == b'0' {
4220 end -= 1;
4221 }
4222 if end == dot + 1 {
4223 end -= 1;
4224 }
4225 text.truncate(end);
4226 }
4227 text
4228}
4229
4230fn scalar_text(value: &Value, context: &str) -> BuiltinResult<String> {
4231 match value {
4232 Value::String(text) => Ok(text.clone()),
4233 Value::CharArray(ca) if ca.rows == 1 => Ok(ca.data.iter().collect()),
4234 Value::StringArray(sa) if sa.data.len() == 1 => Ok(sa.data[0].clone()),
4235 _ => Err(invalid_argument(format!(
4236 "table: {context} must be a string scalar or character vector"
4237 ))),
4238 }
4239}
4240
4241fn bool_scalar(value: &Value, context: &str) -> BuiltinResult<bool> {
4242 match value {
4243 Value::Bool(flag) => Ok(*flag),
4244 Value::Int(value) => Ok(value.to_i64() != 0),
4245 Value::Num(value) if value.is_finite() => Ok(*value != 0.0),
4246 Value::String(_) | Value::CharArray(_) | Value::StringArray(_) => {
4247 let text = scalar_text(value, context)?;
4248 match text.to_ascii_lowercase().as_str() {
4249 "true" | "on" | "yes" => Ok(true),
4250 "false" | "off" | "no" => Ok(false),
4251 _ => Err(invalid_argument(format!(
4252 "table: {context} must be logical"
4253 ))),
4254 }
4255 }
4256 _ => Err(invalid_argument(format!(
4257 "table: {context} must be logical"
4258 ))),
4259 }
4260}
4261
4262fn nonnegative_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
4263 match value {
4264 Value::Int(value) if value.to_i64() >= 0 => Ok(value.to_i64() as usize),
4265 Value::Num(value)
4266 if value.is_finite()
4267 && *value >= 0.0
4268 && (value.round() - value).abs() <= f64::EPSILON =>
4269 {
4270 Ok(value.round() as usize)
4271 }
4272 _ => Err(invalid_argument(format!(
4273 "table: {context} must be a non-negative integer"
4274 ))),
4275 }
4276}
4277
4278fn positive_usize(value: &Value, context: &str) -> BuiltinResult<usize> {
4279 let value = nonnegative_usize(value, context)?;
4280 if value == 0 {
4281 return Err(invalid_argument(format!(
4282 "table: {context} must be a positive integer"
4283 )));
4284 }
4285 Ok(value)
4286}
4287
4288fn option_value_is_empty(value: &Value) -> bool {
4289 match value {
4290 Value::String(text) => text.trim().is_empty(),
4291 Value::CharArray(array) => {
4292 array.data.is_empty()
4293 || (array.rows == 1 && array.data.iter().all(|ch| ch.is_whitespace()))
4294 }
4295 Value::StringArray(array) => {
4296 array.data.is_empty() || (array.data.len() == 1 && array.data[0].trim().is_empty())
4297 }
4298 Value::Cell(cell) => {
4299 cell.data.is_empty() || cell.data.iter().all(|handle| option_value_is_empty(handle))
4300 }
4301 _ => false,
4302 }
4303}
4304
4305fn string_list(value: &Value) -> BuiltinResult<Vec<String>> {
4306 match value {
4307 Value::String(text) => Ok(vec![text.clone()]),
4308 Value::CharArray(ca) if ca.rows == 1 => Ok(vec![ca.data.iter().collect()]),
4309 Value::StringArray(array) => Ok(array.data.clone()),
4310 Value::Cell(cell) => {
4311 let mut out = Vec::with_capacity(cell.data.len());
4312 for handle in &cell.data {
4313 let value = handle;
4314 out.extend(string_list(value)?);
4315 }
4316 Ok(out)
4317 }
4318 _ => Err(invalid_argument(
4319 "table: expected string, string array, character vector, or cellstr",
4320 )),
4321 }
4322}
4323
4324fn optional_raw_variable_name_list(value: &Value) -> BuiltinResult<Option<Vec<String>>> {
4325 if option_value_is_empty(value) {
4326 Ok(None)
4327 } else {
4328 raw_variable_name_list(value).map(Some)
4329 }
4330}
4331
4332fn raw_variable_name_list(value: &Value) -> BuiltinResult<Vec<String>> {
4333 let names = string_list(value)?;
4334 if names.is_empty() {
4335 return Err(invalid_variable("table: variable names must not be empty"));
4336 }
4337 Ok(names)
4338}
4339
4340fn variable_name_list(value: &Value) -> BuiltinResult<Vec<String>> {
4341 raw_variable_name_list(value).map(make_unique_variable_names)
4342}
4343
4344fn optional_variable_type_list(value: &Value) -> BuiltinResult<Option<Vec<ImportVariableType>>> {
4345 if option_value_is_empty(value) {
4346 Ok(None)
4347 } else {
4348 variable_type_list(value).map(Some)
4349 }
4350}
4351
4352fn variable_type_list(value: &Value) -> BuiltinResult<Vec<ImportVariableType>> {
4353 string_list(value)?
4354 .iter()
4355 .map(|raw| ImportVariableType::parse(raw))
4356 .collect()
4357}
4358
4359fn variable_type_names(value: &Value) -> BuiltinResult<Vec<String>> {
4360 string_list(value)?
4361 .iter()
4362 .map(|raw| ImportVariableType::canonical_label(raw))
4363 .collect()
4364}
4365
4366fn optional_range_spec(value: &Value) -> BuiltinResult<Option<RangeSpec>> {
4367 if option_value_is_empty(value) {
4368 Ok(None)
4369 } else {
4370 RangeSpec::parse(value).map(Some)
4371 }
4372}
4373
4374fn optional_sheet_selector(value: &Value) -> BuiltinResult<Option<SheetSelector>> {
4375 if option_value_is_empty(value) {
4376 Ok(None)
4377 } else {
4378 SheetSelector::parse(value).map(Some)
4379 }
4380}
4381
4382fn generated_variable_names(count: usize) -> Vec<String> {
4383 (1..=count).map(|idx| format!("Var{idx}")).collect()
4384}
4385
4386fn make_unique_variable_names(names: Vec<String>) -> Vec<String> {
4387 make_unique_names(
4388 names
4389 .into_iter()
4390 .enumerate()
4391 .map(|(idx, name)| make_valid_variable_name(&name, idx + 1))
4392 .collect(),
4393 )
4394}
4395
4396fn make_unique_names(names: Vec<String>) -> Vec<String> {
4397 let mut used = HashSet::new();
4398 let mut out = Vec::with_capacity(names.len());
4399 for (idx, name) in names.into_iter().enumerate() {
4400 let base = if name.trim().is_empty() {
4401 format!("Var{}", idx + 1)
4402 } else {
4403 name.trim().to_string()
4404 };
4405 let mut candidate = base.clone();
4406 let mut suffix = 1usize;
4407 while used.contains(&candidate.to_ascii_lowercase()) {
4408 suffix += 1;
4409 candidate = format!("{base}_{suffix}");
4410 }
4411 used.insert(candidate.to_ascii_lowercase());
4412 out.push(candidate);
4413 }
4414 out
4415}
4416
4417fn make_valid_variable_name(raw: &str, fallback_index: usize) -> String {
4418 let mut out = String::new();
4419 for (idx, ch) in raw.trim().chars().enumerate() {
4420 if (idx == 0 && (ch.is_ascii_alphabetic() || ch == '_'))
4421 || (idx > 0 && (ch.is_ascii_alphanumeric() || ch == '_'))
4422 {
4423 out.push(ch);
4424 } else if !out.ends_with('_') {
4425 out.push('_');
4426 }
4427 }
4428 while out.ends_with('_') {
4429 out.pop();
4430 }
4431 if out.is_empty() || !out.chars().next().unwrap().is_ascii_alphabetic() {
4432 format!("Var{fallback_index}")
4433 } else {
4434 out
4435 }
4436}
4437
4438#[cfg(test)]
4439mod tests {
4440 use super::*;
4441 #[cfg(not(target_arch = "wasm32"))]
4442 use async_trait::async_trait;
4443 use futures::executor::block_on;
4444 #[cfg(not(target_arch = "wasm32"))]
4445 use runmat_filesystem::{
4446 DirEntry, FileHandle, FsMetadata, FsProvider, NativeFsProvider, OpenFlags,
4447 SandboxFsProvider,
4448 };
4449 use runmat_time::unix_timestamp_ms;
4450 use std::fs;
4451 #[cfg(not(target_arch = "wasm32"))]
4452 use std::io;
4453 use std::io::Write;
4454
4455 #[cfg(not(target_arch = "wasm32"))]
4456 struct PrefixSandboxProvider {
4457 prefix: &'static str,
4458 sandbox: SandboxFsProvider,
4459 native: NativeFsProvider,
4460 }
4461
4462 #[cfg(not(target_arch = "wasm32"))]
4463 impl PrefixSandboxProvider {
4464 fn is_virtual(&self, path: &Path) -> bool {
4465 path.to_string_lossy().starts_with(self.prefix)
4466 }
4467 }
4468
4469 #[cfg(not(target_arch = "wasm32"))]
4470 #[async_trait(?Send)]
4471 impl FsProvider for PrefixSandboxProvider {
4472 fn open(&self, path: &Path, flags: &OpenFlags) -> io::Result<Box<dyn FileHandle>> {
4473 if self.is_virtual(path) {
4474 self.sandbox.open(path, flags)
4475 } else {
4476 self.native.open(path, flags)
4477 }
4478 }
4479
4480 async fn read(&self, path: &Path) -> io::Result<Vec<u8>> {
4481 if self.is_virtual(path) {
4482 self.sandbox.read(path).await
4483 } else {
4484 self.native.read(path).await
4485 }
4486 }
4487
4488 async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
4489 if self.is_virtual(path) {
4490 self.sandbox.write(path, data).await
4491 } else {
4492 self.native.write(path, data).await
4493 }
4494 }
4495
4496 async fn remove_file(&self, path: &Path) -> io::Result<()> {
4497 if self.is_virtual(path) {
4498 self.sandbox.remove_file(path).await
4499 } else {
4500 self.native.remove_file(path).await
4501 }
4502 }
4503
4504 async fn metadata(&self, path: &Path) -> io::Result<FsMetadata> {
4505 if self.is_virtual(path) {
4506 self.sandbox.metadata(path).await
4507 } else {
4508 self.native.metadata(path).await
4509 }
4510 }
4511
4512 async fn symlink_metadata(&self, path: &Path) -> io::Result<FsMetadata> {
4513 if self.is_virtual(path) {
4514 self.sandbox.symlink_metadata(path).await
4515 } else {
4516 self.native.symlink_metadata(path).await
4517 }
4518 }
4519
4520 async fn read_dir(&self, path: &Path) -> io::Result<Vec<DirEntry>> {
4521 if self.is_virtual(path) {
4522 self.sandbox.read_dir(path).await
4523 } else {
4524 self.native.read_dir(path).await
4525 }
4526 }
4527
4528 async fn canonicalize(&self, path: &Path) -> io::Result<PathBuf> {
4529 if self.is_virtual(path) {
4530 self.sandbox.canonicalize(path).await
4531 } else {
4532 self.native.canonicalize(path).await
4533 }
4534 }
4535
4536 async fn create_dir(&self, path: &Path) -> io::Result<()> {
4537 if self.is_virtual(path) {
4538 self.sandbox.create_dir(path).await
4539 } else {
4540 self.native.create_dir(path).await
4541 }
4542 }
4543
4544 async fn create_dir_all(&self, path: &Path) -> io::Result<()> {
4545 if self.is_virtual(path) {
4546 self.sandbox.create_dir_all(path).await
4547 } else {
4548 self.native.create_dir_all(path).await
4549 }
4550 }
4551
4552 async fn remove_dir(&self, path: &Path) -> io::Result<()> {
4553 if self.is_virtual(path) {
4554 self.sandbox.remove_dir(path).await
4555 } else {
4556 self.native.remove_dir(path).await
4557 }
4558 }
4559
4560 async fn remove_dir_all(&self, path: &Path) -> io::Result<()> {
4561 if self.is_virtual(path) {
4562 self.sandbox.remove_dir_all(path).await
4563 } else {
4564 self.native.remove_dir_all(path).await
4565 }
4566 }
4567
4568 async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
4569 match (self.is_virtual(from), self.is_virtual(to)) {
4570 (true, true) => self.sandbox.rename(from, to).await,
4571 (false, false) => self.native.rename(from, to).await,
4572 _ => Err(io::Error::new(
4573 io::ErrorKind::Unsupported,
4574 "cross-provider rename is unsupported in test provider",
4575 )),
4576 }
4577 }
4578
4579 async fn set_readonly(&self, path: &Path, readonly: bool) -> io::Result<()> {
4580 if self.is_virtual(path) {
4581 self.sandbox.set_readonly(path, readonly).await
4582 } else {
4583 self.native.set_readonly(path, readonly).await
4584 }
4585 }
4586 }
4587
4588 fn unique_path(prefix: &str) -> PathBuf {
4589 let mut path = std::env::temp_dir();
4590 path.push(format!(
4591 "runmat_{prefix}_{}_{}",
4592 std::process::id(),
4593 unix_timestamp_ms()
4594 ));
4595 path
4596 }
4597
4598 fn read_table(path: &Path, args: Vec<Value>) -> Value {
4599 block_on(readtable_builtin(
4600 Value::from(path.to_string_lossy().to_string()),
4601 args,
4602 ))
4603 .expect("readtable")
4604 }
4605
4606 fn read_table_err(path: &Path, args: Vec<Value>) -> RuntimeError {
4607 block_on(readtable_builtin(
4608 Value::from(path.to_string_lossy().to_string()),
4609 args,
4610 ))
4611 .expect_err("expected readtable failure")
4612 }
4613
4614 fn spreadsheet_options(args: Vec<Value>) -> StructValue {
4615 match block_on(spreadsheet_import_options_builtin(args)).expect("spreadsheetImportOptions")
4616 {
4617 Value::Struct(options) => options,
4618 other => panic!("expected struct options, got {other:?}"),
4619 }
4620 }
4621
4622 fn detect_options(path: &Path, args: Vec<Value>) -> StructValue {
4623 match block_on(detect_import_options_builtin(
4624 Value::from(path.to_string_lossy().to_string()),
4625 args,
4626 ))
4627 .expect("detectImportOptions")
4628 {
4629 Value::Struct(options) => options,
4630 other => panic!("expected struct options, got {other:?}"),
4631 }
4632 }
4633
4634 fn char_row(array: &CharArray, row: usize) -> String {
4635 let start = row * array.cols;
4636 array.data[start..start + array.cols].iter().collect()
4637 }
4638
4639 fn object(value: Value) -> ObjectInstance {
4640 match value {
4641 Value::Object(object) => object,
4642 other => panic!("expected table object, got {other:?}"),
4643 }
4644 }
4645
4646 #[test]
4647 fn readtable_imports_headered_numeric_and_text_columns() {
4648 let path = unique_path("readtable_basic");
4649 fs::write(&path, "Name,Score\nAda,10\nGrace,12\n").expect("write sample");
4650 let table = object(read_table(&path, Vec::new()));
4651 assert_eq!(
4652 table_variable_names_from_object(&table).unwrap(),
4653 vec!["Name".to_string(), "Score".to_string()]
4654 );
4655 match table_member_get(&table, &Value::from("Score")).unwrap() {
4656 Value::Tensor(tensor) => {
4657 assert_eq!(tensor.shape, vec![2, 1]);
4658 assert_eq!(tensor.data, vec![10.0, 12.0]);
4659 }
4660 other => panic!("expected tensor, got {other:?}"),
4661 }
4662 match table_member_get(&table, &Value::from("Name")).unwrap() {
4663 Value::StringArray(array) => {
4664 assert_eq!(array.data, vec!["Ada".to_string(), "Grace".to_string()]);
4665 }
4666 other => panic!("expected string array, got {other:?}"),
4667 }
4668 let _ = fs::remove_file(&path);
4669 }
4670
4671 #[test]
4672 fn readtable_auto_does_not_consume_headerless_numeric_rows() {
4673 let path = unique_path("readtable_headerless_numeric");
4674 fs::write(&path, "1,2\n3,4\n").expect("write sample");
4675 let table = object(read_table(&path, Vec::new()));
4676 assert_eq!(
4677 table_variable_names_from_object(&table).unwrap(),
4678 vec!["Var1".to_string(), "Var2".to_string()]
4679 );
4680 match table_member_get(&table, &Value::from("Var1")).unwrap() {
4681 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![1.0, 3.0]),
4682 other => panic!("expected tensor, got {other:?}"),
4683 }
4684 match table_member_get(&table, &Value::from("Var2")).unwrap() {
4685 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 4.0]),
4686 other => panic!("expected tensor, got {other:?}"),
4687 }
4688 let _ = fs::remove_file(&path);
4689 }
4690
4691 #[test]
4692 fn readtable_rejects_unknown_and_invalid_options() {
4693 let path = unique_path("readtable_invalid_options");
4694 fs::write(&path, "A\n1\n").expect("write sample");
4695 let err = read_table_err(
4696 &path,
4697 vec![Value::from("DefinitelyNotAnOption"), Value::from(1.0)],
4698 );
4699 assert!(err.message().contains("unsupported option"));
4700 let err = read_table_err(
4701 &path,
4702 vec![Value::from("VariableNamingRule"), Value::from("mangle")],
4703 );
4704 assert!(err.message().contains("unsupported VariableNamingRule"));
4705 let _ = fs::remove_file(&path);
4706 }
4707
4708 #[test]
4709 fn readtable_handles_quoted_delimiters_and_newlines() {
4710 let path = unique_path("readtable_quoted_newlines");
4711 fs::write(
4712 &path,
4713 "Name,Note\nAda,\"hello, world\"\nGrace,\"line one\nline two\"\n",
4714 )
4715 .expect("write sample");
4716 let table = object(read_table(&path, Vec::new()));
4717 match table_member_get(&table, &Value::from("Note")).unwrap() {
4718 Value::StringArray(array) => assert_eq!(
4719 array.data,
4720 vec!["hello, world".to_string(), "line one\nline two".to_string()]
4721 ),
4722 other => panic!("expected string array, got {other:?}"),
4723 }
4724 let _ = fs::remove_file(&path);
4725 }
4726
4727 #[test]
4728 fn readtable_supports_explicit_names_and_missing_tokens() {
4729 let path = unique_path("readtable_options");
4730 fs::write(&path, "1,NA\n2,4\n").expect("write sample");
4731 let names =
4732 StringArray::new(vec!["A".to_string(), "B".to_string()], vec![1, 2]).expect("names");
4733 let table = object(read_table(
4734 &path,
4735 vec![
4736 Value::from("ReadVariableNames"),
4737 Value::Bool(false),
4738 Value::from("VariableNames"),
4739 Value::StringArray(names),
4740 Value::from("TreatAsMissing"),
4741 Value::from("NA"),
4742 ],
4743 ));
4744 match table_member_get(&table, &Value::from("B")).unwrap() {
4745 Value::Tensor(tensor) => {
4746 assert!(tensor.data[0].is_nan());
4747 assert_eq!(tensor.data[1], 4.0);
4748 }
4749 other => panic!("expected tensor, got {other:?}"),
4750 }
4751 let _ = fs::remove_file(&path);
4752 }
4753
4754 #[test]
4755 fn readtable_preserves_variable_names_when_requested() {
4756 let path = unique_path("readtable_preserve_names");
4757 fs::write(&path, "daily revenue,total orders\n100,10\n").expect("write sample");
4758 let table = object(read_table(
4759 &path,
4760 vec![Value::from("VariableNamingRule"), Value::from("preserve")],
4761 ));
4762 assert_eq!(
4763 table_variable_names_from_object(&table).unwrap(),
4764 vec!["daily revenue".to_string(), "total orders".to_string()]
4765 );
4766 let _ = fs::remove_file(&path);
4767 }
4768
4769 fn write_zip_file(zip: &mut zip::ZipWriter<std::fs::File>, name: &str, contents: &str) {
4770 let options = zip::write::SimpleFileOptions::default()
4771 .compression_method(zip::CompressionMethod::Stored);
4772 zip.start_file(name, options).expect("start xlsx part");
4773 zip.write_all(contents.as_bytes()).expect("write xlsx part");
4774 }
4775
4776 fn write_minimal_xlsx(path: &Path) {
4777 let file = std::fs::File::create(path).expect("create xlsx");
4778 let mut zip = zip::ZipWriter::new(file);
4779 write_zip_file(
4780 &mut zip,
4781 "[Content_Types].xml",
4782 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4783<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
4784 <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
4785 <Default Extension="xml" ContentType="application/xml"/>
4786 <Override PartName="/xl/workbook.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"/>
4787 <Override PartName="/xl/worksheets/sheet1.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"/>
4788 <Override PartName="/xl/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"/>
4789</Types>"#,
4790 );
4791 write_zip_file(
4792 &mut zip,
4793 "_rels/.rels",
4794 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4795<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
4796 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="xl/workbook.xml"/>
4797</Relationships>"#,
4798 );
4799 write_zip_file(
4800 &mut zip,
4801 "xl/workbook.xml",
4802 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4803<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
4804 <sheets>
4805 <sheet name="Data" sheetId="1" r:id="rId1"/>
4806 </sheets>
4807</workbook>"#,
4808 );
4809 write_zip_file(
4810 &mut zip,
4811 "xl/_rels/workbook.xml.rels",
4812 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4813<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
4814 <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" Target="worksheets/sheet1.xml"/>
4815 <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
4816</Relationships>"#,
4817 );
4818 write_zip_file(
4819 &mut zip,
4820 "xl/styles.xml",
4821 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4822<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
4823 <fonts count="1"><font><sz val="11"/><name val="Calibri"/></font></fonts>
4824 <fills count="1"><fill><patternFill patternType="none"/></fill></fills>
4825 <borders count="1"><border/></borders>
4826 <cellStyleXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellStyleXfs>
4827 <cellXfs count="1"><xf numFmtId="0" fontId="0" fillId="0" borderId="0"/></cellXfs>
4828</styleSheet>"#,
4829 );
4830 write_zip_file(
4831 &mut zip,
4832 "xl/worksheets/sheet1.xml",
4833 r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
4834<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
4835 <sheetData>
4836 <row r="1">
4837 <c r="A1" t="inlineStr"><is><t>Date</t></is></c>
4838 <c r="B1" t="inlineStr"><is><t>Orders</t></is></c>
4839 <c r="C1" t="inlineStr"><is><t>Revenue</t></is></c>
4840 </row>
4841 <row r="2">
4842 <c r="A2" t="inlineStr"><is><t>2026-06-01</t></is></c>
4843 <c r="B2"><v>10</v></c>
4844 <c r="C2"><v>200</v></c>
4845 </row>
4846 <row r="3">
4847 <c r="A3" t="inlineStr"><is><t>2026-06-02</t></is></c>
4848 <c r="B3"><v>4</v></c>
4849 <c r="C3"><v>90</v></c>
4850 </row>
4851 </sheetData>
4852</worksheet>"#,
4853 );
4854 zip.finish().expect("finish xlsx");
4855 }
4856
4857 #[test]
4858 fn readtable_imports_xlsx_sheet_and_range() {
4859 let path = unique_path("readtable_spreadsheet");
4860 let path = path.with_extension("xlsx");
4861 write_minimal_xlsx(&path);
4862 let table = object(read_table(
4863 &path,
4864 vec![
4865 Value::from("Sheet"),
4866 Value::from("Data"),
4867 Value::from("Range"),
4868 Value::from("A1:C3"),
4869 ],
4870 ));
4871 assert_eq!(
4872 table_variable_names_from_object(&table).unwrap(),
4873 vec![
4874 "Date".to_string(),
4875 "Orders".to_string(),
4876 "Revenue".to_string()
4877 ]
4878 );
4879 match table_member_get(&table, &Value::from("Revenue")).unwrap() {
4880 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![200.0, 90.0]),
4881 other => panic!("expected tensor, got {other:?}"),
4882 }
4883 let _ = fs::remove_file(&path);
4884 }
4885
4886 #[test]
4887 fn spreadsheet_import_options_registers_public_descriptor() {
4888 assert!(runmat_builtins::builtin_function_by_name("spreadsheetImportOptions").is_some());
4889 let labels = SPREADSHEET_IMPORT_OPTIONS_DESCRIPTOR
4890 .signatures
4891 .iter()
4892 .map(|signature| signature.label)
4893 .collect::<Vec<_>>();
4894 assert!(labels.contains(&"opts = spreadsheetImportOptions()"));
4895 assert!(labels.contains(&"opts = spreadsheetImportOptions(nameValuePairs...)"));
4896 }
4897
4898 #[test]
4899 fn detect_import_options_registers_public_descriptor() {
4900 assert!(runmat_builtins::builtin_function_by_name("detectImportOptions").is_some());
4901 let labels = DETECT_IMPORT_OPTIONS_DESCRIPTOR
4902 .signatures
4903 .iter()
4904 .map(|signature| signature.label)
4905 .collect::<Vec<_>>();
4906 assert!(labels.contains(&"opts = detectImportOptions(filename)"));
4907 assert!(labels.contains(&"opts = detectImportOptions(filename, nameValuePairs...)"));
4908 }
4909
4910 #[test]
4911 fn detect_import_options_infers_text_delimiter_names_and_types() {
4912 let path = unique_path("detect_import_options_text");
4913 fs::write(
4914 &path,
4915 "Name;Score;Flag;When\nAda;10;true;2026-06-01\nGrace;12;false;2026-06-02\n",
4916 )
4917 .expect("write sample");
4918 let options = detect_options(&path, Vec::new());
4919 assert_eq!(options.fields.get("FileType"), Some(&Value::from("text")));
4920 assert_eq!(options.fields.get("Delimiter"), Some(&Value::from(";")));
4921 assert_eq!(options.fields.get("NumHeaderLines"), Some(&Value::Num(1.0)));
4922 assert_eq!(
4923 options.fields.get("ReadVariableNames"),
4924 Some(&Value::Bool(false))
4925 );
4926 match options.fields.get("VariableNames").unwrap() {
4927 Value::StringArray(array) => assert_eq!(
4928 array.data,
4929 vec![
4930 "Name".to_string(),
4931 "Score".to_string(),
4932 "Flag".to_string(),
4933 "When".to_string()
4934 ]
4935 ),
4936 other => panic!("expected string array, got {other:?}"),
4937 }
4938 match options.fields.get("VariableTypes").unwrap() {
4939 Value::StringArray(array) => assert_eq!(
4940 array.data,
4941 vec![
4942 "string".to_string(),
4943 "double".to_string(),
4944 "logical".to_string(),
4945 "datetime".to_string()
4946 ]
4947 ),
4948 other => panic!("expected string array, got {other:?}"),
4949 }
4950 let table = object(read_table(&path, vec![Value::Struct(options)]));
4951 assert_eq!(
4952 table_variable_names_from_object(&table).unwrap(),
4953 vec![
4954 "Name".to_string(),
4955 "Score".to_string(),
4956 "Flag".to_string(),
4957 "When".to_string()
4958 ]
4959 );
4960 match table_member_get(&table, &Value::from("Score")).unwrap() {
4961 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![10.0, 12.0]),
4962 other => panic!("expected tensor, got {other:?}"),
4963 }
4964 let _ = fs::remove_file(&path);
4965 }
4966
4967 #[test]
4968 fn detect_import_options_struct_can_drive_readmatrix() {
4969 let path = unique_path("detect_import_options_readmatrix");
4970 fs::write(&path, "A,B\n1,2\n3,4\n").expect("write sample");
4971 let options = detect_options(&path, Vec::new());
4972 let matrix = block_on(
4973 crate::builtins::io::tabular::readmatrix::readmatrix_builtin(
4974 Value::from(path.to_string_lossy().to_string()),
4975 vec![Value::Struct(options)],
4976 ),
4977 )
4978 .expect("readmatrix");
4979 match matrix {
4980 Value::Tensor(tensor) => {
4981 assert_eq!(tensor.shape, vec![2, 2]);
4982 assert_eq!(tensor.data, vec![1.0, 3.0, 2.0, 4.0]);
4983 }
4984 other => panic!("expected tensor, got {other:?}"),
4985 }
4986 let _ = fs::remove_file(&path);
4987 }
4988
4989 #[test]
4990 fn detect_import_options_strips_bom_from_detected_names() {
4991 let path = unique_path("detect_import_options_bom");
4992 fs::write(&path, "\u{FEFF}A,B\n1,2\n3,4\n").expect("write sample");
4993 let options = detect_options(&path, Vec::new());
4994 match options.fields.get("VariableNames").unwrap() {
4995 Value::StringArray(array) => {
4996 assert_eq!(array.data, vec!["A".to_string(), "B".to_string()])
4997 }
4998 other => panic!("expected string array, got {other:?}"),
4999 }
5000 let table = object(read_table(&path, vec![Value::Struct(options)]));
5001 assert_eq!(
5002 table_variable_names_from_object(&table).unwrap(),
5003 vec!["A".to_string(), "B".to_string()]
5004 );
5005 match table_member_get(&table, &Value::from("A")).unwrap() {
5006 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![1.0, 3.0]),
5007 other => panic!("expected tensor, got {other:?}"),
5008 }
5009 let _ = fs::remove_file(&path);
5010 }
5011
5012 #[test]
5013 fn detect_import_options_preserves_partial_ranges_for_replay() {
5014 let path = unique_path("detect_import_options_partial_range");
5015 fs::write(&path, "ID,A,B,C\nr1,1,2,3\nr2,4,5,6\nr3,7,8,9\n").expect("write sample");
5016
5017 let column_options = detect_options(&path, vec![Value::from("Range"), Value::from("C:D")]);
5018 assert_eq!(
5019 column_options.fields.get("Range"),
5020 Some(&Value::from("C2:D"))
5021 );
5022 let table = object(read_table(&path, vec![Value::Struct(column_options)]));
5023 assert_eq!(
5024 table_variable_names_from_object(&table).unwrap(),
5025 vec!["B".to_string(), "C".to_string()]
5026 );
5027 match table_member_get(&table, &Value::from("B")).unwrap() {
5028 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 5.0, 8.0]),
5029 other => panic!("expected tensor, got {other:?}"),
5030 }
5031
5032 fs::write(&path, "11,12\n21,22\n31,32\n41,42\n").expect("write numeric sample");
5033 let row_options = detect_options(&path, vec![Value::from("Range"), Value::from("2:3")]);
5034 assert_eq!(row_options.fields.get("Range"), Some(&Value::from("2:3")));
5035 let table = object(read_table(&path, vec![Value::Struct(row_options)]));
5036 match table_member_get(&table, &Value::from("Var2")).unwrap() {
5037 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![22.0, 32.0]),
5038 other => panic!("expected tensor, got {other:?}"),
5039 }
5040 let _ = fs::remove_file(&path);
5041 }
5042
5043 #[test]
5044 fn detect_import_options_read_row_names_replays_through_readtable() {
5045 let path = unique_path("detect_import_options_row_names");
5046 fs::write(&path, "Row,Name,Score\nr1,Ada,10\nr2,Grace,12\n").expect("write sample");
5047 let options = detect_options(&path, vec![Value::from("ReadRowNames"), Value::Bool(true)]);
5048 assert_eq!(options.fields.get("NumVariables"), Some(&Value::Num(2.0)));
5049 match options.fields.get("VariableNames").unwrap() {
5050 Value::StringArray(array) => assert_eq!(
5051 array.data,
5052 vec!["Row".to_string(), "Name".to_string(), "Score".to_string()]
5053 ),
5054 other => panic!("expected string array, got {other:?}"),
5055 }
5056 let table = object(read_table(&path, vec![Value::Struct(options)]));
5057 assert_eq!(
5058 table_variable_names_from_object(&table).unwrap(),
5059 vec!["Name".to_string(), "Score".to_string()]
5060 );
5061 let props = table_public_properties(&table).unwrap();
5062 match props.fields.get(ROW_NAMES).unwrap() {
5063 Value::StringArray(array) => {
5064 assert_eq!(array.data, vec!["r1".to_string(), "r2".to_string()])
5065 }
5066 other => panic!("expected row names, got {other:?}"),
5067 }
5068 match table_member_get(&table, &Value::from("Score")).unwrap() {
5069 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![10.0, 12.0]),
5070 other => panic!("expected tensor, got {other:?}"),
5071 }
5072 let _ = fs::remove_file(&path);
5073 }
5074
5075 #[test]
5076 fn detect_import_options_encoding_replays_through_readmatrix() {
5077 let path = unique_path("detect_import_options_encoding_readmatrix");
5078 fs::write(&path, b"Caf\xe9,Score\n1,2\n3,4\n").expect("write sample");
5079 let options = detect_options(
5080 &path,
5081 vec![Value::from("Encoding"), Value::from("windows-1252")],
5082 );
5083 let matrix = block_on(
5084 crate::builtins::io::tabular::readmatrix::readmatrix_builtin(
5085 Value::from(path.to_string_lossy().to_string()),
5086 vec![Value::Struct(options)],
5087 ),
5088 )
5089 .expect("readmatrix");
5090 match matrix {
5091 Value::Tensor(tensor) => {
5092 assert_eq!(tensor.shape, vec![2, 2]);
5093 assert_eq!(tensor.data, vec![1.0, 3.0, 2.0, 4.0]);
5094 }
5095 other => panic!("expected tensor, got {other:?}"),
5096 }
5097 let _ = fs::remove_file(&path);
5098 }
5099
5100 #[cfg(not(target_arch = "wasm32"))]
5101 #[test]
5102 fn detect_import_options_replays_through_filesystem_provider() {
5103 let root = unique_path("detect_import_options_provider_root");
5104 {
5105 let _provider_lock = runmat_filesystem::provider_override_lock();
5106 let provider = PrefixSandboxProvider {
5107 prefix: "/provider",
5108 sandbox: SandboxFsProvider::new(root.clone()).expect("sandbox provider"),
5109 native: NativeFsProvider,
5110 };
5111 let _provider_guard =
5112 runmat_filesystem::replace_provider(std::sync::Arc::new(provider));
5113 block_on(runmat_filesystem::write_async(
5114 "/provider.csv",
5115 b"Name,Score\nAda,10\nGrace,12\n",
5116 ))
5117 .expect("write provider sample");
5118
5119 let virtual_path = Path::new("/provider.csv");
5120 let options = detect_options(virtual_path, Vec::new());
5121 let table = object(read_table(
5122 virtual_path,
5123 vec![Value::Struct(options.clone())],
5124 ));
5125 assert_eq!(
5126 table_variable_names_from_object(&table).unwrap(),
5127 vec!["Name".to_string(), "Score".to_string()]
5128 );
5129 match table_member_get(&table, &Value::from("Score")).unwrap() {
5130 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![10.0, 12.0]),
5131 other => panic!("expected tensor, got {other:?}"),
5132 }
5133
5134 block_on(runmat_filesystem::write_async(
5135 "/provider_numeric.csv",
5136 b"A,B\n1,2\n3,4\n",
5137 ))
5138 .expect("write provider numeric sample");
5139 let matrix_options = detect_options(Path::new("/provider_numeric.csv"), Vec::new());
5140 let matrix = block_on(
5141 crate::builtins::io::tabular::readmatrix::readmatrix_builtin(
5142 Value::from("/provider_numeric.csv"),
5143 vec![Value::Struct(matrix_options)],
5144 ),
5145 )
5146 .expect("readmatrix");
5147 match matrix {
5148 Value::Tensor(tensor) => {
5149 assert_eq!(tensor.shape, vec![2, 2]);
5150 assert_eq!(tensor.data, vec![1.0, 3.0, 2.0, 4.0]);
5151 }
5152 other => panic!("expected tensor, got {other:?}"),
5153 }
5154 }
5155 let _ = fs::remove_dir_all(&root);
5156 }
5157
5158 #[test]
5159 fn detect_import_options_honors_overrides_and_range() {
5160 let path = unique_path("detect_import_options_overrides");
5161 fs::write(&path, "ignore me\nRaw A|Raw B\n5|yes\n6|no\n").expect("write sample");
5162 let options = detect_options(
5163 &path,
5164 vec![
5165 Value::from("Delimiter"),
5166 Value::from("|"),
5167 Value::from("NumHeaderLines"),
5168 Value::Num(1.0),
5169 Value::from("VariableNamingRule"),
5170 Value::from("preserve"),
5171 Value::from("TextType"),
5172 Value::from("char"),
5173 ],
5174 );
5175 assert_eq!(options.fields.get("Delimiter"), Some(&Value::from("|")));
5176 assert_eq!(options.fields.get("NumHeaderLines"), Some(&Value::Num(2.0)));
5177 assert_eq!(
5178 options.fields.get("VariableNamingRule"),
5179 Some(&Value::from("preserve"))
5180 );
5181 match options.fields.get("VariableNames").unwrap() {
5182 Value::StringArray(array) => {
5183 assert_eq!(array.data, vec!["Raw A".to_string(), "Raw B".to_string()])
5184 }
5185 other => panic!("expected string array, got {other:?}"),
5186 }
5187 match options.fields.get("VariableTypes").unwrap() {
5188 Value::StringArray(array) => {
5189 assert_eq!(
5190 array.data,
5191 vec!["double".to_string(), "logical".to_string()]
5192 )
5193 }
5194 other => panic!("expected string array, got {other:?}"),
5195 }
5196 let _ = fs::remove_file(&path);
5197 }
5198
5199 #[test]
5200 fn spreadsheet_import_options_builds_editable_options_struct() {
5201 let options = spreadsheet_options(vec![
5202 Value::from("NumVariables"),
5203 Value::Num(2.0),
5204 Value::from("VariableTypes"),
5205 Value::StringArray(
5206 StringArray::new(vec!["double".into(), "string".into()], vec![1, 2]).unwrap(),
5207 ),
5208 Value::from("DataRange"),
5209 Value::from("A2:B5"),
5210 ]);
5211 assert_eq!(
5212 options.fields.get("FileType"),
5213 Some(&Value::from("spreadsheet"))
5214 );
5215 assert_eq!(options.fields.get("NumVariables"), Some(&Value::Num(2.0)));
5216 assert_eq!(options.fields.get("DataRange"), Some(&Value::from("A2:B5")));
5217 match options.fields.get("VariableNames").unwrap() {
5218 Value::StringArray(array) => {
5219 assert_eq!(array.data, vec!["Var1".to_string(), "Var2".to_string()]);
5220 assert_eq!(array.shape, vec![1, 2]);
5221 }
5222 other => panic!("expected string array, got {other:?}"),
5223 }
5224 match options.fields.get("VariableTypes").unwrap() {
5225 Value::StringArray(array) => {
5226 assert_eq!(array.data, vec!["double".to_string(), "string".to_string()]);
5227 assert_eq!(array.shape, vec![1, 2]);
5228 }
5229 other => panic!("expected string array, got {other:?}"),
5230 }
5231 }
5232
5233 #[test]
5234 fn readtable_consumes_spreadsheet_import_options_struct() {
5235 let path = unique_path("readtable_spreadsheet_options");
5236 let path = path.with_extension("xlsx");
5237 write_minimal_xlsx(&path);
5238 let mut options = spreadsheet_options(vec![Value::from("NumVariables"), Value::Num(1.0)]);
5239 options.insert("Sheet", Value::from("Data"));
5240 options.insert("DataRange", Value::from("C2:C3"));
5241 options.insert(
5242 "VariableNames",
5243 Value::StringArray(StringArray::new(vec!["Amount".into()], vec![1, 1]).unwrap()),
5244 );
5245 options.insert(
5246 "VariableTypes",
5247 Value::StringArray(StringArray::new(vec!["double".into()], vec![1, 1]).unwrap()),
5248 );
5249 let table = object(read_table(&path, vec![Value::Struct(options)]));
5250 assert_eq!(
5251 table_variable_names_from_object(&table).unwrap(),
5252 vec!["Amount".to_string()]
5253 );
5254 match table_member_get(&table, &Value::from("Amount")).unwrap() {
5255 Value::Tensor(tensor) => {
5256 assert_eq!(tensor.shape, vec![2, 1]);
5257 assert_eq!(tensor.data, vec![200.0, 90.0]);
5258 assert_eq!(tensor.dtype, NumericDType::F64);
5259 }
5260 other => panic!("expected tensor, got {other:?}"),
5261 }
5262 let _ = fs::remove_file(&path);
5263 }
5264
5265 #[test]
5266 fn readtable_default_spreadsheet_options_still_infers_headers() {
5267 let path = unique_path("readtable_default_spreadsheet_options");
5268 let path = path.with_extension("xlsx");
5269 write_minimal_xlsx(&path);
5270 let options = spreadsheet_options(Vec::new());
5271 let table = object(read_table(&path, vec![Value::Struct(options)]));
5272 assert_eq!(
5273 table_variable_names_from_object(&table).unwrap(),
5274 vec![
5275 "Date".to_string(),
5276 "Orders".to_string(),
5277 "Revenue".to_string()
5278 ]
5279 );
5280 let _ = fs::remove_file(&path);
5281 }
5282
5283 #[test]
5284 fn readtable_variable_types_coerce_imported_columns() {
5285 let path = unique_path("readtable_variable_types");
5286 fs::write(
5287 &path,
5288 "Value,Flag,When,Elapsed\n1.5,true,2026-06-01,01:30:00\n2.25,false,2026-06-02,02:00:00\n",
5289 )
5290 .expect("write sample");
5291 let types = StringArray::new(
5292 vec![
5293 "single".to_string(),
5294 "logical".to_string(),
5295 "datetime".to_string(),
5296 "duration".to_string(),
5297 ],
5298 vec![1, 4],
5299 )
5300 .unwrap();
5301 let table = object(read_table(
5302 &path,
5303 vec![Value::from("VariableTypes"), Value::StringArray(types)],
5304 ));
5305 match table_member_get(&table, &Value::from("Value")).unwrap() {
5306 Value::Tensor(tensor) => {
5307 assert_eq!(tensor.dtype, NumericDType::F32);
5308 assert_eq!(tensor.data, vec![1.5, 2.25]);
5309 }
5310 other => panic!("expected tensor, got {other:?}"),
5311 }
5312 match table_member_get(&table, &Value::from("Flag")).unwrap() {
5313 Value::LogicalArray(array) => assert_eq!(array.data, vec![1, 0]),
5314 other => panic!("expected logical array, got {other:?}"),
5315 }
5316 match table_member_get(&table, &Value::from("When")).unwrap() {
5317 Value::Object(object) => assert!(object.is_class("datetime")),
5318 other => panic!("expected datetime object, got {other:?}"),
5319 }
5320 match table_member_get(&table, &Value::from("Elapsed")).unwrap() {
5321 Value::Object(object) => assert!(object.is_class("duration")),
5322 other => panic!("expected duration object, got {other:?}"),
5323 }
5324 let _ = fs::remove_file(&path);
5325 }
5326
5327 #[test]
5328 fn readtable_preserves_explicit_import_variable_names_when_requested() {
5329 let path = unique_path("readtable_preserve_explicit_names");
5330 fs::write(&path, "100,10\n125,12\n").expect("write sample");
5331 let names = StringArray::new(
5332 vec!["daily revenue".to_string(), "total orders".to_string()],
5333 vec![1, 2],
5334 )
5335 .unwrap();
5336 let table = object(read_table(
5337 &path,
5338 vec![
5339 Value::from("ReadVariableNames"),
5340 Value::Bool(false),
5341 Value::from("VariableNames"),
5342 Value::StringArray(names),
5343 Value::from("VariableNamingRule"),
5344 Value::from("preserve"),
5345 ],
5346 ));
5347 assert_eq!(
5348 table_variable_names_from_object(&table).unwrap(),
5349 vec!["daily revenue".to_string(), "total orders".to_string()]
5350 );
5351 let _ = fs::remove_file(&path);
5352 }
5353
5354 #[test]
5355 fn readtable_text_type_char_imports_text_columns_as_char_matrix() {
5356 let path = unique_path("readtable_text_type_char");
5357 fs::write(&path, "Name\nAda\nGrace\n").expect("write sample");
5358 let table = object(read_table(
5359 &path,
5360 vec![Value::from("TextType"), Value::from("char")],
5361 ));
5362 match table_member_get(&table, &Value::from("Name")).unwrap() {
5363 Value::CharArray(array) => {
5364 assert_eq!(array.rows, 2);
5365 assert_eq!(array.cols, 5);
5366 assert_eq!(char_row(&array, 0), "Ada ");
5367 assert_eq!(char_row(&array, 1), "Grace");
5368 }
5369 other => panic!("expected char array, got {other:?}"),
5370 }
5371 let _ = fs::remove_file(&path);
5372 }
5373
5374 #[test]
5375 fn readtable_variable_types_cellstr_imports_cell_column() {
5376 let path = unique_path("readtable_variable_types_cellstr");
5377 fs::write(&path, "Name\nAda\nGrace\n").expect("write sample");
5378 let types = StringArray::new(vec!["cellstr".to_string()], vec![1, 1]).unwrap();
5379 let table = object(read_table(
5380 &path,
5381 vec![Value::from("VariableTypes"), Value::StringArray(types)],
5382 ));
5383 match table_member_get(&table, &Value::from("Name")).unwrap() {
5384 Value::Cell(cell) => {
5385 assert_eq!(cell.rows, 2);
5386 assert_eq!(cell.cols, 1);
5387 assert_eq!(
5388 cell.get(0, 0).unwrap(),
5389 Value::CharArray(CharArray::new_row("Ada"))
5390 );
5391 assert_eq!(
5392 cell.get(1, 0).unwrap(),
5393 Value::CharArray(CharArray::new_row("Grace"))
5394 );
5395 }
5396 other => panic!("expected cell array, got {other:?}"),
5397 }
5398 let _ = fs::remove_file(&path);
5399 }
5400
5401 #[test]
5402 fn readtable_rejects_unrepresented_import_variable_types() {
5403 let path = unique_path("readtable_unsupported_variable_types");
5404 fs::write(&path, "A\n1\n").expect("write sample");
5405 let unsupported_integer = StringArray::new(vec!["int8".to_string()], vec![1, 1]).unwrap();
5406 let err = read_table_err(
5407 &path,
5408 vec![
5409 Value::from("VariableTypes"),
5410 Value::StringArray(unsupported_integer),
5411 ],
5412 );
5413 assert!(err
5414 .message()
5415 .contains("unsupported VariableTypes entry 'int8'"));
5416 let categorical = StringArray::new(vec!["categorical".to_string()], vec![1, 1]).unwrap();
5417 let err = read_table_err(
5418 &path,
5419 vec![
5420 Value::from("VariableTypes"),
5421 Value::StringArray(categorical),
5422 ],
5423 );
5424 assert!(err
5425 .message()
5426 .contains("unsupported VariableTypes entry 'categorical'"));
5427 let _ = fs::remove_file(&path);
5428 }
5429
5430 #[test]
5431 fn table_properties_variable_names_rename_columns() {
5432 let a = Value::Tensor(Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap());
5433 let b = Value::Tensor(Tensor::new(vec![3.0, 4.0], vec![2, 1]).unwrap());
5434 let mut table =
5435 object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
5436 let mut props = table_public_properties(&table).unwrap();
5437 props.insert(
5438 VARIABLE_NAMES,
5439 Value::StringArray(StringArray::new(vec!["X".into(), "Y".into()], vec![1, 2]).unwrap()),
5440 );
5441 table_member_set(&mut table, PROPERTIES_MEMBER, Value::Struct(props)).unwrap();
5442 assert_eq!(
5443 table_variable_names_from_object(&table).unwrap(),
5444 vec!["X".to_string(), "Y".to_string()]
5445 );
5446 }
5447
5448 #[test]
5449 fn table_paren_selects_rows_and_named_variables() {
5450 let a = Value::Tensor(Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap());
5451 let b = Value::Tensor(Tensor::new(vec![4.0, 5.0, 6.0], vec![3, 1]).unwrap());
5452 let table = object(table_from_columns(vec!["A".into(), "B".into()], vec![a, b]).unwrap());
5453 let selector = CellArray::new(
5454 vec![
5455 Value::Tensor(Tensor::new(vec![3.0, 1.0], vec![1, 2]).unwrap()),
5456 Value::Cell(CellArray::new(vec![Value::from("B")], 1, 1).unwrap()),
5457 ],
5458 1,
5459 2,
5460 )
5461 .unwrap();
5462 let subset = object(table_paren_get(&table, &Value::Cell(selector)).unwrap());
5463 assert_eq!(
5464 table_variable_names_from_object(&subset).unwrap(),
5465 vec!["B".to_string()]
5466 );
5467 match table_member_get(&subset, &Value::from("B")).unwrap() {
5468 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![6.0, 4.0]),
5469 other => panic!("expected tensor, got {other:?}"),
5470 }
5471 }
5472
5473 #[test]
5474 fn sortrows_preserves_row_names() {
5475 let values = Value::Tensor(Tensor::new(vec![2.0, 1.0], vec![2, 1]).unwrap());
5476 let table = table_from_columns_with_properties(
5477 vec!["X".into()],
5478 vec![values],
5479 Some(vec!["second".into(), "first".into()]),
5480 )
5481 .unwrap();
5482 let (sorted, _) = sortrows_table(table, &[Value::from("X")]).unwrap();
5483 let sorted = object(sorted);
5484 let props = table_public_properties(&sorted).unwrap();
5485 match props.fields.get(ROW_NAMES).unwrap() {
5486 Value::StringArray(array) => {
5487 assert_eq!(array.data, vec!["first".to_string(), "second".to_string()]);
5488 }
5489 other => panic!("expected row names, got {other:?}"),
5490 }
5491 }
5492
5493 #[test]
5494 fn groupsummary_mean_counts_groups() {
5495 let group = Value::StringArray(
5496 StringArray::new(vec!["a".into(), "b".into(), "a".into()], vec![3, 1]).unwrap(),
5497 );
5498 let value = Value::Tensor(Tensor::new(vec![2.0, 5.0, 4.0], vec![3, 1]).unwrap());
5499 let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
5500 let summary = groupsummary_impl(
5501 table,
5502 Value::from("G"),
5503 Value::from("mean"),
5504 vec![Value::from("X")],
5505 )
5506 .unwrap();
5507 let summary = object(summary);
5508 assert_eq!(
5509 table_variable_names_from_object(&summary).unwrap(),
5510 vec![
5511 "G".to_string(),
5512 "GroupCount".to_string(),
5513 "mean_X".to_string()
5514 ]
5515 );
5516 match table_member_get(&summary, &Value::from("mean_X")).unwrap() {
5517 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![3.0, 5.0]),
5518 other => panic!("expected tensor, got {other:?}"),
5519 }
5520 }
5521
5522 #[test]
5523 fn groupsummary_orders_numeric_groups_numerically() {
5524 let group = Value::Tensor(Tensor::new(vec![10.0, 2.0, 10.0], vec![3, 1]).unwrap());
5525 let value = Value::Tensor(Tensor::new(vec![1.0, 5.0, 3.0], vec![3, 1]).unwrap());
5526 let table = table_from_columns(vec!["G".into(), "X".into()], vec![group, value]).unwrap();
5527 let summary =
5528 object(groupsummary_impl(table, Value::from("G"), Value::from("sum"), vec![]).unwrap());
5529 match table_member_get(&summary, &Value::from("G")).unwrap() {
5530 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![2.0, 10.0]),
5531 other => panic!("expected tensor, got {other:?}"),
5532 }
5533 match table_member_get(&summary, &Value::from("sum_X")).unwrap() {
5534 Value::Tensor(tensor) => assert_eq!(tensor.data, vec![5.0, 4.0]),
5535 other => panic!("expected tensor, got {other:?}"),
5536 }
5537 }
5538}