1use std::collections::HashSet;
4
5use runmat_builtins::{CellArray, CharArray, StringArray, Value};
6use runmat_macros::runtime_builtin;
7
8use crate::builtins::common::map_control_flow_with_builtin;
9use crate::builtins::common::spec::{
10 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
11 ReductionNaN, ResidencyPolicy, ShapeRequirements,
12};
13use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
14use crate::builtins::strings::type_resolvers::string_array_type;
15use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
16
17#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::strings::transform::split")]
18pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
19 name: "split",
20 op_kind: GpuOpKind::Custom("string-transform"),
21 supported_precisions: &[],
22 broadcast: BroadcastSemantics::None,
23 provider_hooks: &[],
24 constant_strategy: ConstantStrategy::InlineLiteral,
25 residency: ResidencyPolicy::GatherImmediately,
26 nan_mode: ReductionNaN::Include,
27 two_pass_threshold: None,
28 workgroup_size: None,
29 accepts_nan_mode: false,
30 notes: "Executes on the CPU; GPU-resident inputs are gathered to host memory before splitting.",
31};
32
33#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::strings::transform::split")]
34pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
35 name: "split",
36 shape: ShapeRequirements::Any,
37 constant_strategy: ConstantStrategy::InlineLiteral,
38 elementwise: None,
39 reduction: None,
40 emits_nan: false,
41 notes: "String transformation builtin; not eligible for fusion planning and always gathers GPU inputs.",
42};
43
44const BUILTIN_NAME: &str = "split";
45const ARG_TYPE_ERROR: &str =
46 "split: first argument must be a string scalar, string array, character array, or cell array of character vectors";
47const DELIMITER_TYPE_ERROR: &str =
48 "split: delimiter input must be a string scalar, string array, character array, or cell array of character vectors";
49const NAME_VALUE_PAIR_ERROR: &str = "split: name-value arguments must be supplied in pairs";
50const UNKNOWN_NAME_ERROR: &str =
51 "split: unrecognized name-value argument; supported names are 'CollapseDelimiters' and 'IncludeDelimiters'";
52const EMPTY_DELIMITER_ERROR: &str = "split: delimiters must contain at least one character";
53const CELL_ELEMENT_ERROR: &str =
54 "split: cell array elements must be string scalars or character vectors";
55
56fn runtime_error_for(message: impl Into<String>) -> RuntimeError {
57 build_runtime_error(message)
58 .with_builtin(BUILTIN_NAME)
59 .build()
60}
61
62fn map_flow(err: RuntimeError) -> RuntimeError {
63 map_control_flow_with_builtin(err, BUILTIN_NAME)
64}
65
66#[runtime_builtin(
67 name = "split",
68 category = "strings/transform",
69 summary = "Split strings, character arrays, and cell arrays into substrings using delimiters.",
70 keywords = "split,strsplit,delimiter,CollapseDelimiters,IncludeDelimiters",
71 accel = "sink",
72 type_resolver(string_array_type),
73 builtin_path = "crate::builtins::strings::transform::split"
74)]
75async fn split_builtin(text: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
76 let text = gather_if_needed_async(&text).await.map_err(map_flow)?;
77 let mut args: Vec<Value> = Vec::with_capacity(rest.len());
78 for arg in rest {
79 args.push(gather_if_needed_async(&arg).await.map_err(map_flow)?);
80 }
81
82 let options = SplitOptions::parse(&args)?;
83 let matrix = TextMatrix::from_value(text)?;
84 matrix.into_split_result(&options)
85}
86
87#[derive(Clone)]
88enum DelimiterSpec {
89 Whitespace,
90 Patterns(Vec<String>),
91}
92
93#[derive(Clone)]
94struct SplitOptions {
95 delimiters: DelimiterSpec,
96 collapse_delimiters: bool,
97 include_delimiters: bool,
98}
99
100impl SplitOptions {
101 fn parse(args: &[Value]) -> BuiltinResult<Self> {
102 let mut index = 0usize;
103 let mut delimiters = DelimiterSpec::Whitespace;
104
105 if index < args.len() && !is_name_key(&args[index]) {
106 let list = extract_delimiters(&args[index])?;
107 if list.is_empty() {
108 return Err(runtime_error_for(EMPTY_DELIMITER_ERROR));
109 }
110 let mut seen = HashSet::new();
111 let mut patterns: Vec<String> = Vec::new();
112 for pattern in list {
113 if pattern.is_empty() {
114 return Err(runtime_error_for(EMPTY_DELIMITER_ERROR));
115 }
116 if seen.insert(pattern.clone()) {
117 patterns.push(pattern);
118 }
119 }
120 patterns.sort_by_key(|pat| std::cmp::Reverse(pat.len()));
121 delimiters = DelimiterSpec::Patterns(patterns);
122 index += 1;
123 }
124
125 let mut collapse = match delimiters {
126 DelimiterSpec::Whitespace => true,
127 DelimiterSpec::Patterns(_) => false,
128 };
129 let mut include = false;
130
131 while index < args.len() {
132 let name = match name_key(&args[index]) {
133 Some(NameKey::CollapseDelimiters) => NameKey::CollapseDelimiters,
134 Some(NameKey::IncludeDelimiters) => NameKey::IncludeDelimiters,
135 None => return Err(runtime_error_for(UNKNOWN_NAME_ERROR)),
136 };
137 index += 1;
138 if index >= args.len() {
139 return Err(runtime_error_for(NAME_VALUE_PAIR_ERROR));
140 }
141 let value = &args[index];
142 index += 1;
143
144 match name {
145 NameKey::CollapseDelimiters => {
146 collapse = parse_bool(value, "CollapseDelimiters")?;
147 }
148 NameKey::IncludeDelimiters => {
149 include = parse_bool(value, "IncludeDelimiters")?;
150 }
151 }
152 }
153
154 Ok(Self {
155 delimiters,
156 collapse_delimiters: collapse,
157 include_delimiters: include,
158 })
159 }
160}
161
162struct TextMatrix {
163 data: Vec<String>,
164 rows: usize,
165 cols: usize,
166}
167
168impl TextMatrix {
169 fn from_value(value: Value) -> BuiltinResult<Self> {
170 match value {
171 Value::String(text) => Ok(Self {
172 data: vec![text],
173 rows: 1,
174 cols: 1,
175 }),
176 Value::StringArray(array) => Ok(Self {
177 data: array.data,
178 rows: array.rows,
179 cols: array.cols,
180 }),
181 Value::CharArray(array) => Self::from_char_array(array),
182 Value::Cell(cell) => Self::from_cell_array(cell),
183 _ => Err(runtime_error_for(ARG_TYPE_ERROR)),
184 }
185 }
186
187 fn from_char_array(array: CharArray) -> BuiltinResult<Self> {
188 let CharArray { data, rows, cols } = array;
189 if rows == 0 {
190 return Ok(Self {
191 data: Vec::new(),
192 rows: 0,
193 cols: 1,
194 });
195 }
196 let mut strings = Vec::with_capacity(rows);
197 for row in 0..rows {
198 strings.push(char_row_to_string_slice(&data, cols, row));
199 }
200 Ok(Self {
201 data: strings,
202 rows,
203 cols: 1,
204 })
205 }
206
207 fn from_cell_array(cell: CellArray) -> BuiltinResult<Self> {
208 let CellArray {
209 data, rows, cols, ..
210 } = cell;
211 let mut strings = Vec::with_capacity(data.len());
212 for col in 0..cols {
213 for row in 0..rows {
214 let idx = row * cols + col;
215 let value_ref: &Value = &data[idx];
216 strings.push(
217 cell_element_to_string(value_ref)
218 .ok_or_else(|| runtime_error_for(CELL_ELEMENT_ERROR))?,
219 );
220 }
221 }
222 Ok(Self {
223 data: strings,
224 rows,
225 cols,
226 })
227 }
228
229 fn into_split_result(self, options: &SplitOptions) -> BuiltinResult<Value> {
230 let TextMatrix { data, rows, cols } = self;
231
232 if data.is_empty() {
233 let block_cols = if cols == 0 { 0 } else { 1 };
234 let shape = if cols == 0 {
235 vec![rows, 0]
236 } else {
237 vec![rows, cols * block_cols]
238 };
239 let array = StringArray::new(Vec::new(), shape)
240 .map_err(|e| runtime_error_for(format!("{BUILTIN_NAME}: {e}")))?;
241 return Ok(Value::StringArray(array));
242 }
243
244 let mut per_element: Vec<Vec<String>> = Vec::with_capacity(data.len());
245 let mut max_tokens = 0usize;
246 for text in &data {
247 let tokens = split_text(text, options);
248 max_tokens = max_tokens.max(tokens.len());
249 per_element.push(tokens);
250 }
251 if max_tokens == 0 {
252 max_tokens = 1;
253 }
254 let block_cols = max_tokens;
255 let result_cols = block_cols * cols.max(1);
256 let total = rows * result_cols;
257 let missing = "<missing>".to_string();
258 let mut output = vec![missing.clone(); total];
259
260 for col in 0..cols.max(1) {
261 for row in 0..rows {
262 let element_index = if cols == 0 { row } else { row + col * rows };
263 if element_index >= per_element.len() {
264 continue;
265 }
266 let tokens = &per_element[element_index];
267 for t in 0..block_cols {
268 let out_col = if cols == 0 { t } else { col * block_cols + t };
269 let out_index = row + out_col * rows;
270 if out_index >= output.len() {
271 continue;
272 }
273 if t < tokens.len() {
274 output[out_index] = tokens[t].clone();
275 } else {
276 output[out_index] = missing.clone();
277 }
278 }
279 }
280 }
281
282 let shape = vec![rows, result_cols];
283 let array = StringArray::new(output, shape)
284 .map_err(|e| runtime_error_for(format!("{BUILTIN_NAME}: {e}")))?;
285 Ok(Value::StringArray(array))
286 }
287}
288
289fn split_text(text: &str, options: &SplitOptions) -> Vec<String> {
290 if is_missing_string(text) {
291 return vec![text.to_string()];
292 }
293 match &options.delimiters {
294 DelimiterSpec::Whitespace => split_whitespace(text, options),
295 DelimiterSpec::Patterns(patterns) => split_by_patterns(text, patterns, options),
296 }
297}
298
299fn split_whitespace(text: &str, options: &SplitOptions) -> Vec<String> {
300 if text.is_empty() {
301 return vec![String::new()];
302 }
303
304 let mut parts: Vec<String> = Vec::new();
305 let mut idx = 0usize;
306 let mut last = 0usize;
307 let len = text.len();
308
309 while idx < len {
310 let ch = text[idx..].chars().next().unwrap();
311 let width = ch.len_utf8();
312 if !ch.is_whitespace() {
313 idx += width;
314 continue;
315 }
316
317 let token = &text[last..idx];
318 if !token.is_empty() || !options.collapse_delimiters {
319 parts.push(token.to_string());
320 }
321
322 let run_end = advance_whitespace(text, idx);
323 if options.include_delimiters {
324 if options.collapse_delimiters {
325 parts.push(text[idx..run_end].to_string());
326 } else {
327 parts.push(text[idx..idx + width].to_string());
328 }
329 }
330
331 if options.collapse_delimiters {
332 idx = run_end;
333 last = run_end;
334 } else {
335 idx += width;
336 last = idx;
337 }
338 }
339
340 let tail = &text[last..];
341 if !tail.is_empty() || !options.collapse_delimiters {
342 parts.push(tail.to_string());
343 }
344 if parts.is_empty() {
345 parts.push(String::new());
346 }
347 parts
348}
349
350fn split_by_patterns(text: &str, patterns: &[String], options: &SplitOptions) -> Vec<String> {
351 if patterns.is_empty() {
352 return vec![text.to_string()];
353 }
354
355 let mut parts: Vec<String> = Vec::new();
356 let mut idx = 0usize;
357 let mut last = 0usize;
358 while idx < text.len() {
359 if let Some(pattern) = patterns
360 .iter()
361 .find(|candidate| text[idx..].starts_with(candidate.as_str()))
362 {
363 let token = &text[last..idx];
364 if !token.is_empty() || !options.collapse_delimiters {
365 parts.push(token.to_string());
366 }
367
368 let pat_len = pattern.len();
369 if options.collapse_delimiters {
370 let mut run_end = idx + pat_len;
371 while run_end < text.len() {
372 if let Some(next) = patterns
373 .iter()
374 .find(|candidate| text[run_end..].starts_with(candidate.as_str()))
375 {
376 let len = next.len();
377 if len == 0 {
378 break;
379 }
380 run_end += len;
381 } else {
382 break;
383 }
384 }
385 if options.include_delimiters {
386 parts.push(text[idx..run_end].to_string());
387 }
388 idx = run_end;
389 last = run_end;
390 } else {
391 if options.include_delimiters {
392 parts.push(text[idx..idx + pat_len].to_string());
393 }
394 idx += pat_len;
395 last = idx;
396 }
397
398 continue;
399 }
400 let ch = text[idx..].chars().next().unwrap();
401 idx += ch.len_utf8();
402 }
403 let tail = &text[last..];
404 if !tail.is_empty() || !options.collapse_delimiters {
405 parts.push(tail.to_string());
406 }
407 if parts.is_empty() {
408 parts.push(String::new());
409 }
410 parts
411}
412
413fn advance_whitespace(text: &str, mut start: usize) -> usize {
414 while start < text.len() {
415 let ch = text[start..].chars().next().unwrap();
416 if !ch.is_whitespace() {
417 break;
418 }
419 start += ch.len_utf8();
420 }
421 start
422}
423
424fn extract_delimiters(value: &Value) -> BuiltinResult<Vec<String>> {
425 match value {
426 Value::String(text) => Ok(vec![text.clone()]),
427 Value::StringArray(array) => Ok(array.data.clone()),
428 Value::CharArray(array) => {
429 if array.rows == 0 {
430 return Ok(Vec::new());
431 }
432 let mut entries = Vec::with_capacity(array.rows);
433 for row in 0..array.rows {
434 entries.push(char_row_to_string_slice(&array.data, array.cols, row));
435 }
436 Ok(entries)
437 }
438 Value::Cell(cell) => {
439 let mut entries = Vec::with_capacity(cell.data.len());
440 for element in &cell.data {
441 entries.push(
442 cell_element_to_string(element)
443 .ok_or_else(|| runtime_error_for(CELL_ELEMENT_ERROR))?,
444 );
445 }
446 Ok(entries)
447 }
448 _ => Err(runtime_error_for(DELIMITER_TYPE_ERROR)),
449 }
450}
451
452fn cell_element_to_string(value: &Value) -> Option<String> {
453 match value {
454 Value::String(text) => Some(text.clone()),
455 Value::StringArray(array) if array.data.len() == 1 => Some(array.data[0].clone()),
456 Value::CharArray(array) if array.rows <= 1 => {
457 if array.rows == 0 {
458 Some(String::new())
459 } else {
460 Some(char_row_to_string_slice(&array.data, array.cols, 0))
461 }
462 }
463 _ => None,
464 }
465}
466
467fn value_to_scalar_string(value: &Value) -> Option<String> {
468 match value {
469 Value::String(text) => Some(text.clone()),
470 Value::StringArray(array) if array.data.len() == 1 => Some(array.data[0].clone()),
471 Value::CharArray(array) if array.rows <= 1 => {
472 if array.rows == 0 {
473 Some(String::new())
474 } else {
475 Some(char_row_to_string_slice(&array.data, array.cols, 0))
476 }
477 }
478 Value::Cell(cell) if cell.data.len() == 1 => cell_element_to_string(&cell.data[0]),
479 _ => None,
480 }
481}
482
483fn parse_bool(value: &Value, name: &str) -> BuiltinResult<bool> {
484 match value {
485 Value::Bool(b) => Ok(*b),
486 Value::Int(i) => Ok(i.to_i64() != 0),
487 Value::Num(n) => Ok(*n != 0.0),
488 Value::LogicalArray(array) => {
489 if array.data.len() == 1 {
490 Ok(array.data[0] != 0)
491 } else {
492 Err(runtime_error_for(format!(
493 "{BUILTIN_NAME}: value for '{}' must be logical true or false",
494 name
495 )))
496 }
497 }
498 Value::Tensor(tensor) => {
499 if tensor.data.len() == 1 {
500 Ok(tensor.data[0] != 0.0)
501 } else {
502 Err(runtime_error_for(format!(
503 "{BUILTIN_NAME}: value for '{}' must be logical true or false",
504 name
505 )))
506 }
507 }
508 _ => {
509 if let Some(text) = value_to_scalar_string(value) {
510 let lowered = text.trim().to_ascii_lowercase();
511 match lowered.as_str() {
512 "true" | "on" | "yes" => Ok(true),
513 "false" | "off" | "no" => Ok(false),
514 _ => Err(runtime_error_for(format!(
515 "{BUILTIN_NAME}: value for '{}' must be logical true or false",
516 name
517 ))),
518 }
519 } else {
520 Err(runtime_error_for(format!(
521 "{BUILTIN_NAME}: value for '{}' must be logical true or false",
522 name
523 )))
524 }
525 }
526 }
527}
528
529#[derive(PartialEq, Eq)]
530enum NameKey {
531 CollapseDelimiters,
532 IncludeDelimiters,
533}
534
535fn is_name_key(value: &Value) -> bool {
536 name_key(value).is_some()
537}
538
539fn name_key(value: &Value) -> Option<NameKey> {
540 value_to_scalar_string(value).and_then(|text| {
541 let lowered = text.trim().to_ascii_lowercase();
542 match lowered.as_str() {
543 "collapsedelimiters" => Some(NameKey::CollapseDelimiters),
544 "includedelimiters" => Some(NameKey::IncludeDelimiters),
545 _ => None,
546 }
547 })
548}
549
550#[cfg(test)]
551pub(crate) mod tests {
552 use super::*;
553 use runmat_builtins::{CellArray, LogicalArray, ResolveContext, Tensor, Type};
554
555 fn split_builtin(text: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
556 futures::executor::block_on(super::split_builtin(text, rest))
557 }
558
559 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
560 #[test]
561 fn split_string_whitespace_default() {
562 let input = Value::String("RunMat Accelerate Planner".to_string());
563 let result = split_builtin(input, Vec::new()).expect("split");
564 match result {
565 Value::StringArray(array) => {
566 assert_eq!(array.shape, vec![1, 3]);
567 assert_eq!(
568 array.data,
569 vec![
570 "RunMat".to_string(),
571 "Accelerate".to_string(),
572 "Planner".to_string()
573 ]
574 );
575 }
576 other => panic!("expected string array, got {other:?}"),
577 }
578 }
579
580 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
581 #[test]
582 fn split_string_custom_delimiter() {
583 let input = Value::String("alpha,beta,gamma".to_string());
584 let args = vec![Value::String(",".to_string())];
585 let result = split_builtin(input, args).expect("split");
586 match result {
587 Value::StringArray(array) => {
588 assert_eq!(array.shape, vec![1, 3]);
589 assert_eq!(
590 array.data,
591 vec!["alpha".to_string(), "beta".to_string(), "gamma".to_string()]
592 );
593 }
594 other => panic!("expected string array, got {other:?}"),
595 }
596 }
597
598 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
599 #[test]
600 fn split_include_delimiters_true() {
601 let input = Value::String("A+B-C".to_string());
602 let args = vec![
603 Value::StringArray(
604 StringArray::new(vec!["+".to_string(), "-".to_string()], vec![1, 2]).unwrap(),
605 ),
606 Value::String("IncludeDelimiters".to_string()),
607 Value::Bool(true),
608 ];
609 let result = split_builtin(input, args).expect("split");
610 match result {
611 Value::StringArray(array) => {
612 assert_eq!(array.shape, vec![1, 5]);
613 assert_eq!(
614 array.data,
615 vec![
616 "A".to_string(),
617 "+".to_string(),
618 "B".to_string(),
619 "-".to_string(),
620 "C".to_string()
621 ]
622 );
623 }
624 other => panic!("expected string array, got {other:?}"),
625 }
626 }
627
628 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
629 #[test]
630 fn split_include_delimiters_whitespace_collapse_default() {
631 let input = Value::String("A B".to_string());
632 let args = vec![
633 Value::String("IncludeDelimiters".to_string()),
634 Value::Bool(true),
635 ];
636 let result = split_builtin(input, args).expect("split");
637 match result {
638 Value::StringArray(array) => {
639 assert_eq!(array.shape, vec![1, 3]);
640 assert_eq!(
641 array.data,
642 vec!["A".to_string(), " ".to_string(), "B".to_string()]
643 );
644 }
645 other => panic!("expected string array, got {other:?}"),
646 }
647 }
648
649 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
650 #[test]
651 fn split_patterns_include_delimiters_collapse_true() {
652 let input = Value::String("a,,b".to_string());
653 let args = vec![
654 Value::String(",".to_string()),
655 Value::String("IncludeDelimiters".to_string()),
656 Value::Bool(true),
657 Value::String("CollapseDelimiters".to_string()),
658 Value::Bool(true),
659 ];
660 let result = split_builtin(input, args).expect("split");
661 match result {
662 Value::StringArray(array) => {
663 assert_eq!(array.shape, vec![1, 3]);
664 assert_eq!(
665 array.data,
666 vec!["a".to_string(), ",,".to_string(), "b".to_string()]
667 );
668 }
669 other => panic!("expected string array, got {other:?}"),
670 }
671 }
672
673 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
674 #[test]
675 fn split_collapse_false_preserves_empty_segments() {
676 let input = Value::String("one,,three,".to_string());
677 let args = vec![
678 Value::String(",".to_string()),
679 Value::String("CollapseDelimiters".to_string()),
680 Value::Bool(false),
681 ];
682 let result = split_builtin(input, args).expect("split");
683 match result {
684 Value::StringArray(array) => {
685 assert_eq!(array.shape, vec![1, 4]);
686 assert_eq!(
687 array.data,
688 vec![
689 "one".to_string(),
690 "".to_string(),
691 "three".to_string(),
692 "".to_string()
693 ]
694 );
695 }
696 other => panic!("expected string array, got {other:?}"),
697 }
698 }
699
700 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
701 #[test]
702 fn split_character_array_rows() {
703 let mut row1: Vec<char> = "GPU Accelerate".chars().collect();
704 let mut row2: Vec<char> = "Ignition Engine".chars().collect();
705 let width = row1.len().max(row2.len());
706 row1.resize(width, ' ');
707 row2.resize(width, ' ');
708 let mut data = row1;
709 data.extend(row2);
710 let char_array = CharArray::new(data, 2, width).unwrap();
711 let input = Value::CharArray(char_array);
712 let result = split_builtin(input, Vec::new()).expect("split");
713 match result {
714 Value::StringArray(array) => {
715 assert_eq!(array.shape, vec![2, 2]);
716 assert_eq!(
717 array.data,
718 vec![
719 "GPU".to_string(),
720 "Ignition".to_string(),
721 "Accelerate".to_string(),
722 "Engine".to_string()
723 ]
724 );
725 }
726 other => panic!("expected string array, got {other:?}"),
727 }
728 }
729
730 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
731 #[test]
732 fn split_string_array_multiple_columns() {
733 let data = vec![
734 "RunMat Core".to_string(),
735 "Ignition Interpreter".to_string(),
736 "Accelerate Engine".to_string(),
737 "<missing>".to_string(),
738 ];
739 let array = StringArray::new(data, vec![2, 2]).unwrap();
740 let input = Value::StringArray(array);
741 let result = split_builtin(input, Vec::new()).expect("split");
742 match result {
743 Value::StringArray(array) => {
744 assert_eq!(array.shape, vec![2, 4]);
745 assert_eq!(
746 array.data,
747 vec![
748 "RunMat".to_string(),
749 "Ignition".to_string(),
750 "Core".to_string(),
751 "Interpreter".to_string(),
752 "Accelerate".to_string(),
753 "<missing>".to_string(),
754 "Engine".to_string(),
755 "<missing>".to_string()
756 ]
757 );
758 }
759 other => panic!("expected string array, got {other:?}"),
760 }
761 }
762
763 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
764 #[test]
765 fn split_cell_array_outputs_string_array() {
766 let values = vec![
767 Value::String("RunMat Snapshot".to_string()),
768 Value::String("Fusion Planner".to_string()),
769 ];
770 let cell = crate::make_cell(values, 2, 1).expect("cell");
771 let result = split_builtin(cell, vec![Value::String(" ".to_string())]).expect("split");
772 match result {
773 Value::StringArray(array) => {
774 assert_eq!(array.shape, vec![2, 2]);
775 assert_eq!(
776 array.data,
777 vec![
778 "RunMat".to_string(),
779 "Fusion".to_string(),
780 "Snapshot".to_string(),
781 "Planner".to_string()
782 ]
783 );
784 }
785 other => panic!("expected string array, got {other:?}"),
786 }
787 }
788
789 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
790 #[test]
791 fn split_cell_array_multiple_columns() {
792 let values = vec![
793 Value::String("alpha beta".to_string()),
794 Value::String("gamma".to_string()),
795 Value::String("delta epsilon".to_string()),
796 Value::String("<missing>".to_string()),
797 ];
798 let cell = crate::make_cell(values, 2, 2).expect("cell");
799 let result = split_builtin(cell, Vec::new()).expect("split");
800 match result {
801 Value::StringArray(array) => {
802 assert_eq!(array.shape, vec![2, 4]);
803 assert_eq!(
804 array.data,
805 vec![
806 "alpha".to_string(),
807 "delta".to_string(),
808 "beta".to_string(),
809 "epsilon".to_string(),
810 "gamma".to_string(),
811 "<missing>".to_string(),
812 "<missing>".to_string(),
813 "<missing>".to_string()
814 ]
815 );
816 }
817 other => panic!("expected string array, got {other:?}"),
818 }
819 }
820
821 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
822 #[test]
823 fn split_missing_string_propagates() {
824 let input = Value::String("<missing>".to_string());
825 let result = split_builtin(input, Vec::new()).expect("split");
826 match result {
827 Value::StringArray(array) => {
828 assert_eq!(array.shape, vec![1, 1]);
829 assert_eq!(array.data, vec!["<missing>".to_string()]);
830 }
831 other => panic!("expected string array, got {other:?}"),
832 }
833 }
834
835 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
836 #[test]
837 fn split_invalid_name_value_pair_errors() {
838 let input = Value::String("abc".to_string());
839 let args = vec![Value::String("CollapseDelimiters".to_string())];
840 let err = split_builtin(input, args).unwrap_err();
841 assert!(err.to_string().contains("name-value"));
842 }
843
844 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
845 #[test]
846 fn split_invalid_text_argument_errors() {
847 let err = split_builtin(Value::Num(1.0), Vec::new()).unwrap_err();
848 assert!(err.to_string().contains("first argument"));
849 }
850
851 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
852 #[test]
853 fn split_invalid_delimiter_type_errors() {
854 let err =
855 split_builtin(Value::String("abc".to_string()), vec![Value::Num(1.0)]).unwrap_err();
856 assert!(err.to_string().contains("delimiter input"));
857 }
858
859 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
860 #[test]
861 fn split_empty_delimiter_errors() {
862 let err = split_builtin(
863 Value::String("abc".to_string()),
864 vec![Value::String(String::new())],
865 )
866 .unwrap_err();
867 assert!(err.to_string().contains("at least one character"));
868 }
869
870 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
871 #[test]
872 fn split_unknown_name_argument_errors() {
873 let err = split_builtin(
874 Value::String("abc".to_string()),
875 vec![
876 Value::String("UnknownOption".to_string()),
877 Value::Bool(true),
878 ],
879 )
880 .unwrap_err();
881 assert!(err.to_string().contains("unrecognized"));
882 }
883
884 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
885 #[test]
886 fn split_collapse_delimiters_accepts_logical_array() {
887 let logical = LogicalArray::new(vec![1u8], vec![1]).unwrap();
888 let args = vec![
889 Value::String(",".to_string()),
890 Value::String("CollapseDelimiters".to_string()),
891 Value::LogicalArray(logical),
892 ];
893 let result = split_builtin(Value::String("a,,b".to_string()), args).expect("split");
894 match result {
895 Value::StringArray(array) => {
896 assert_eq!(array.shape, vec![1, 2]);
897 assert_eq!(array.data, vec!["a".to_string(), "b".to_string()]);
898 }
899 other => panic!("expected string array, got {other:?}"),
900 }
901 }
902
903 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
904 #[test]
905 fn split_include_delimiters_accepts_tensor_scalar() {
906 let tensor = Tensor::new(vec![1.0], vec![1, 1]).unwrap();
907 let args = vec![
908 Value::String(",".to_string()),
909 Value::String("IncludeDelimiters".to_string()),
910 Value::Tensor(tensor),
911 ];
912 let result = split_builtin(Value::String("a,b".to_string()), args).expect("split");
913 match result {
914 Value::StringArray(array) => {
915 assert_eq!(array.shape, vec![1, 3]);
916 assert_eq!(
917 array.data,
918 vec!["a".to_string(), ",".to_string(), "b".to_string()]
919 );
920 }
921 other => panic!("expected string array, got {other:?}"),
922 }
923 }
924
925 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
926 #[test]
927 fn split_cell_array_mixed_inputs() {
928 let handles: Vec<_> = vec![
929 runmat_gc::gc_allocate(Value::String("alpha beta".to_string())).unwrap(),
930 runmat_gc::gc_allocate(Value::CharArray(
931 CharArray::new("gamma".chars().collect(), 1, 5).unwrap(),
932 ))
933 .unwrap(),
934 ];
935 let cell =
936 Value::Cell(CellArray::new_handles(handles, 1, 2).expect("cell array construction"));
937 let result = split_builtin(cell, Vec::new()).expect("split");
938 match result {
939 Value::StringArray(array) => {
940 assert_eq!(array.shape, vec![1, 4]);
941 assert_eq!(
942 array.data,
943 vec![
944 "alpha".to_string(),
945 "beta".to_string(),
946 "gamma".to_string(),
947 "<missing>".to_string()
948 ]
949 );
950 }
951 other => panic!("expected string array, got {other:?}"),
952 }
953 }
954
955 #[test]
956 fn split_type_is_string_array() {
957 assert_eq!(
958 string_array_type(&[Type::String], &ResolveContext::new(Vec::new())),
959 Type::cell_of(Type::String)
960 );
961 }
962}