1use std::collections::HashSet;
4
5use runmat_builtins::{CellArray, CharArray, StringArray, Value};
6use runmat_macros::runtime_builtin;
7
8use crate::builtins::common::spec::{
9 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
10 ReductionNaN, ResidencyPolicy, ShapeRequirements,
11};
12use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
13#[cfg(feature = "doc_export")]
14use crate::register_builtin_doc_text;
15use crate::{gather_if_needed, register_builtin_fusion_spec, register_builtin_gpu_spec};
16
17#[cfg(feature = "doc_export")]
18pub const DOC_MD: &str = r#"---
19title: "split"
20category: "strings/transform"
21keywords: ["split", "string split", "text split", "delimiters", "collapse delimiters", "include delimiters"]
22summary: "Split strings, character arrays, and cell arrays into substrings using delimiters."
23references:
24 - https://www.mathworks.com/help/matlab/ref/split.html
25gpu_support:
26 elementwise: false
27 reduction: false
28 precisions: []
29 broadcasting: "none"
30 notes: "Executes on the CPU; GPU-resident arguments are gathered to host memory prior to splitting."
31fusion:
32 elementwise: false
33 reduction: false
34 max_inputs: 2
35 constants: "inline"
36requires_feature: null
37tested:
38 unit: "builtins::strings::transform::split::tests"
39 integration: "builtins::strings::transform::split::tests::split_cell_array_mixed_inputs"
40---
41
42# What does the `split` function do in MATLAB / RunMat?
43`split(text)` breaks text into substrings separated by delimiters. The input can be a string scalar,
44string array, character array, or a cell array of character vectors—`split` mirrors MATLAB behaviour
45across each of these representations. When you omit the delimiter argument, `split` collapses
46whitespace runs and returns the remaining tokens as a string array.
47
48## How does the `split` function behave in MATLAB / RunMat?
49- The default delimiter is whitespace (`isspace`), and consecutive whitespace is treated as a single
50 separator (equivalent to `'CollapseDelimiters', true`).
51- When you supply explicit delimiters, they can be a string scalar, string array, character array
52 (rows), or a cell array of character vectors. Delimiters are matched left to right and the longest
53 delimiter wins when several candidates match at the same position.
54- `'CollapseDelimiters'` controls whether consecutive delimiters generate empty substrings. The default
55 is `false` when you specify explicit delimiters and `true` when you rely on the whitespace default.
56- `'IncludeDelimiters'` inserts the matched delimiters as separate elements in the output string array.
57- Outputs are string arrays. For scalar inputs, the result is a row vector. For string/character arrays,
58 the first dimension matches the number of rows in the input and additional columns are appended to
59 accommodate the longest token list. Missing values are padded with `<missing>`.
60- Missing string scalars propagate unchanged.
61
62## `split` Function GPU Execution Behaviour
63`split` executes on the CPU. When the input or delimiter arguments reside on the GPU, RunMat gathers
64them to host memory before performing the split so the results match MATLAB exactly. Providers do not
65need to implement custom kernels for this builtin today.
66
67## GPU residency in RunMat (Do I need `gpuArray`?)
68String manipulation currently runs on the host. If text data lives on the GPU (for example after a
69gathered computation), `split` automatically fetches it. You never need to move text explicitly before
70calling this builtin.
71
72## Examples of using the `split` function in MATLAB / RunMat
73
74### Split A String On Whitespace
75```matlab
76txt = "RunMat Accelerate Planner";
77pieces = split(txt);
78```
79Expected output:
80```matlab
81pieces = 1×3 string
82 "RunMat" "Accelerate" "Planner"
83```
84
85### Split A String Using A Custom Delimiter
86```matlab
87csv = "alpha,beta,gamma";
88tokens = split(csv, ",");
89```
90Expected output:
91```matlab
92tokens = 1×3 string
93 "alpha" "beta" "gamma"
94```
95
96### Include Delimiters In The Output
97```matlab
98expr = "A+B-C";
99segments = split(expr, ["+", "-"], "IncludeDelimiters", true);
100```
101Expected output:
102```matlab
103segments = 1×5 string
104 "A" "+" "B" "-" "C"
105```
106
107### Preserve Empty Segments When CollapseDelimiters Is False
108```matlab
109values = "one,,three,";
110parts = split(values, ",", "CollapseDelimiters", false);
111```
112Expected output:
113```matlab
114parts = 1×4 string
115 "one" "" "three" ""
116```
117
118### Split Each Row Of A Character Array
119```matlab
120rows = char("GPU Accelerate", "Ignition Interpreter");
121result = split(rows);
122```
123Expected output:
124```matlab
125result = 2×2 string
126 "GPU" "Accelerate"
127 "Ignition" "Interpreter"
128```
129
130### Split Elements Of A Cell Array
131```matlab
132C = {'RunMat Snapshot'; "Fusion Planner"};
133out = split(C, " ");
134```
135Expected output:
136```matlab
137out = 2×2 string
138 "RunMat" "Snapshot"
139 "Fusion" "Planner"
140```
141
142### Handle Missing String Inputs
143```matlab
144names = ["RunMat", "<missing>", "Accelerate Engine"];
145split_names = split(names);
146```
147Expected output:
148```matlab
149split_names = 3×2 string
150 "RunMat" "<missing>"
151 "<missing>" "<missing>"
152 "Accelerate" "Engine"
153```
154
155## FAQ
156
157### What delimiters does `split` use by default?
158When you omit the second argument, `split` treats any Unicode whitespace as a delimiter and collapses
159consecutive whitespace runs so they produce a single split point.
160
161### How do explicit delimiters change the defaults?
162Providing explicit delimiters switches the default for `'CollapseDelimiters'` to `false`, matching MATLAB.
163You can override that behaviour with a name-value pair.
164
165### What happens when `'IncludeDelimiters'` is `true`?
166Matched delimiters are inserted between tokens in the output string array, preserving their original
167order. Tokens still expand to fill rows and columns, with missing values used for padding.
168
169### How is the output sized for string arrays?
170The number of rows matches the input. Columns are added to accommodate the longest token list observed
171across all elements. Shorter rows are padded with `<missing>`.
172
173### How does `split` handle missing strings?
174Missing string scalars propagate unchanged. When padding is required, `<missing>` is used so MATLAB and
175RunMat stay aligned.
176
177### Can I provide empty delimiters?
178No. Empty delimiters are disallowed, matching MATLAB's input validation. Specify at least one character
179per delimiter.
180
181### Which argument types are accepted as delimiters?
182You may pass string scalars, string arrays, character arrays (each row is a delimiter), or cell arrays
183containing string scalars or character vectors.
184
185## See Also
186[strsplit](../../search/strsplit), [replace](./replace), [lower](./lower), [upper](./upper), [strip](./strip)
187
188## Source & Feedback
189- Implementation: [`crates/runmat-runtime/src/builtins/strings/transform/split.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/transform/split.rs)
190- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
191"#;
192
193pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
194 name: "split",
195 op_kind: GpuOpKind::Custom("string-transform"),
196 supported_precisions: &[],
197 broadcast: BroadcastSemantics::None,
198 provider_hooks: &[],
199 constant_strategy: ConstantStrategy::InlineLiteral,
200 residency: ResidencyPolicy::GatherImmediately,
201 nan_mode: ReductionNaN::Include,
202 two_pass_threshold: None,
203 workgroup_size: None,
204 accepts_nan_mode: false,
205 notes: "Executes on the CPU; GPU-resident inputs are gathered to host memory before splitting.",
206};
207
208register_builtin_gpu_spec!(GPU_SPEC);
209
210pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
211 name: "split",
212 shape: ShapeRequirements::Any,
213 constant_strategy: ConstantStrategy::InlineLiteral,
214 elementwise: None,
215 reduction: None,
216 emits_nan: false,
217 notes: "String transformation builtin; not eligible for fusion planning and always gathers GPU inputs.",
218};
219
220register_builtin_fusion_spec!(FUSION_SPEC);
221
222#[cfg(feature = "doc_export")]
223register_builtin_doc_text!("split", DOC_MD);
224
225const ARG_TYPE_ERROR: &str =
226 "split: first argument must be a string scalar, string array, character array, or cell array of character vectors";
227const DELIMITER_TYPE_ERROR: &str =
228 "split: delimiter input must be a string scalar, string array, character array, or cell array of character vectors";
229const NAME_VALUE_PAIR_ERROR: &str = "split: name-value arguments must be supplied in pairs";
230const UNKNOWN_NAME_ERROR: &str =
231 "split: unrecognized name-value argument; supported names are 'CollapseDelimiters' and 'IncludeDelimiters'";
232const EMPTY_DELIMITER_ERROR: &str = "split: delimiters must contain at least one character";
233const CELL_ELEMENT_ERROR: &str =
234 "split: cell array elements must be string scalars or character vectors";
235
236#[runtime_builtin(
237 name = "split",
238 category = "strings/transform",
239 summary = "Split strings, character arrays, and cell arrays into substrings using delimiters.",
240 keywords = "split,strsplit,delimiter,CollapseDelimiters,IncludeDelimiters",
241 accel = "sink"
242)]
243fn split_builtin(text: Value, rest: Vec<Value>) -> Result<Value, String> {
244 let text = gather_if_needed(&text).map_err(|e| format!("split: {e}"))?;
245 let mut args: Vec<Value> = Vec::with_capacity(rest.len());
246 for arg in rest {
247 args.push(gather_if_needed(&arg).map_err(|e| format!("split: {e}"))?);
248 }
249
250 let options = SplitOptions::parse(&args)?;
251 let matrix = TextMatrix::from_value(text)?;
252 matrix.into_split_result(&options)
253}
254
255#[derive(Clone)]
256enum DelimiterSpec {
257 Whitespace,
258 Patterns(Vec<String>),
259}
260
261#[derive(Clone)]
262struct SplitOptions {
263 delimiters: DelimiterSpec,
264 collapse_delimiters: bool,
265 include_delimiters: bool,
266}
267
268impl SplitOptions {
269 fn parse(args: &[Value]) -> Result<Self, String> {
270 let mut index = 0usize;
271 let mut delimiters = DelimiterSpec::Whitespace;
272
273 if index < args.len() && !is_name_key(&args[index]) {
274 let list = extract_delimiters(&args[index])?;
275 if list.is_empty() {
276 return Err(EMPTY_DELIMITER_ERROR.to_string());
277 }
278 let mut seen = HashSet::new();
279 let mut patterns: Vec<String> = Vec::new();
280 for pattern in list {
281 if pattern.is_empty() {
282 return Err(EMPTY_DELIMITER_ERROR.to_string());
283 }
284 if seen.insert(pattern.clone()) {
285 patterns.push(pattern);
286 }
287 }
288 patterns.sort_by_key(|pat| std::cmp::Reverse(pat.len()));
289 delimiters = DelimiterSpec::Patterns(patterns);
290 index += 1;
291 }
292
293 let mut collapse = match delimiters {
294 DelimiterSpec::Whitespace => true,
295 DelimiterSpec::Patterns(_) => false,
296 };
297 let mut include = false;
298
299 while index < args.len() {
300 let name = match name_key(&args[index]) {
301 Some(NameKey::CollapseDelimiters) => NameKey::CollapseDelimiters,
302 Some(NameKey::IncludeDelimiters) => NameKey::IncludeDelimiters,
303 None => return Err(UNKNOWN_NAME_ERROR.to_string()),
304 };
305 index += 1;
306 if index >= args.len() {
307 return Err(NAME_VALUE_PAIR_ERROR.to_string());
308 }
309 let value = &args[index];
310 index += 1;
311
312 match name {
313 NameKey::CollapseDelimiters => {
314 collapse = parse_bool(value, "CollapseDelimiters")?;
315 }
316 NameKey::IncludeDelimiters => {
317 include = parse_bool(value, "IncludeDelimiters")?;
318 }
319 }
320 }
321
322 Ok(Self {
323 delimiters,
324 collapse_delimiters: collapse,
325 include_delimiters: include,
326 })
327 }
328}
329
330struct TextMatrix {
331 data: Vec<String>,
332 rows: usize,
333 cols: usize,
334}
335
336impl TextMatrix {
337 fn from_value(value: Value) -> Result<Self, String> {
338 match value {
339 Value::String(text) => Ok(Self {
340 data: vec![text],
341 rows: 1,
342 cols: 1,
343 }),
344 Value::StringArray(array) => Ok(Self {
345 data: array.data,
346 rows: array.rows,
347 cols: array.cols,
348 }),
349 Value::CharArray(array) => Self::from_char_array(array),
350 Value::Cell(cell) => Self::from_cell_array(cell),
351 _ => Err(ARG_TYPE_ERROR.to_string()),
352 }
353 }
354
355 fn from_char_array(array: CharArray) -> Result<Self, String> {
356 let CharArray { data, rows, cols } = array;
357 if rows == 0 {
358 return Ok(Self {
359 data: Vec::new(),
360 rows: 0,
361 cols: 1,
362 });
363 }
364 let mut strings = Vec::with_capacity(rows);
365 for row in 0..rows {
366 strings.push(char_row_to_string_slice(&data, cols, row));
367 }
368 Ok(Self {
369 data: strings,
370 rows,
371 cols: 1,
372 })
373 }
374
375 fn from_cell_array(cell: CellArray) -> Result<Self, String> {
376 let CellArray {
377 data, rows, cols, ..
378 } = cell;
379 let mut strings = Vec::with_capacity(data.len());
380 for col in 0..cols {
381 for row in 0..rows {
382 let idx = row * cols + col;
383 let value_ref: &Value = &data[idx];
384 strings.push(
385 cell_element_to_string(value_ref)
386 .ok_or_else(|| CELL_ELEMENT_ERROR.to_string())?,
387 );
388 }
389 }
390 Ok(Self {
391 data: strings,
392 rows,
393 cols,
394 })
395 }
396
397 fn into_split_result(self, options: &SplitOptions) -> Result<Value, String> {
398 let TextMatrix { data, rows, cols } = self;
399
400 if data.is_empty() {
401 let block_cols = if cols == 0 { 0 } else { 1 };
402 let shape = if cols == 0 {
403 vec![rows, 0]
404 } else {
405 vec![rows, cols * block_cols]
406 };
407 let array = StringArray::new(Vec::new(), shape).map_err(|e| format!("split: {e}"))?;
408 return Ok(Value::StringArray(array));
409 }
410
411 let mut per_element: Vec<Vec<String>> = Vec::with_capacity(data.len());
412 let mut max_tokens = 0usize;
413 for text in &data {
414 let tokens = split_text(text, options);
415 max_tokens = max_tokens.max(tokens.len());
416 per_element.push(tokens);
417 }
418 if max_tokens == 0 {
419 max_tokens = 1;
420 }
421 let block_cols = max_tokens;
422 let result_cols = block_cols * cols.max(1);
423 let total = rows * result_cols;
424 let missing = "<missing>".to_string();
425 let mut output = vec![missing.clone(); total];
426
427 for col in 0..cols.max(1) {
428 for row in 0..rows {
429 let element_index = if cols == 0 { row } else { row + col * rows };
430 if element_index >= per_element.len() {
431 continue;
432 }
433 let tokens = &per_element[element_index];
434 for t in 0..block_cols {
435 let out_col = if cols == 0 { t } else { col * block_cols + t };
436 let out_index = row + out_col * rows;
437 if out_index >= output.len() {
438 continue;
439 }
440 if t < tokens.len() {
441 output[out_index] = tokens[t].clone();
442 } else {
443 output[out_index] = missing.clone();
444 }
445 }
446 }
447 }
448
449 let shape = vec![rows, result_cols];
450 let array = StringArray::new(output, shape).map_err(|e| format!("split: {e}"))?;
451 Ok(Value::StringArray(array))
452 }
453}
454
455fn split_text(text: &str, options: &SplitOptions) -> Vec<String> {
456 if is_missing_string(text) {
457 return vec![text.to_string()];
458 }
459 match &options.delimiters {
460 DelimiterSpec::Whitespace => split_whitespace(text, options),
461 DelimiterSpec::Patterns(patterns) => split_by_patterns(text, patterns, options),
462 }
463}
464
465fn split_whitespace(text: &str, options: &SplitOptions) -> Vec<String> {
466 if text.is_empty() {
467 return vec![String::new()];
468 }
469
470 let mut parts: Vec<String> = Vec::new();
471 let mut idx = 0usize;
472 let mut last = 0usize;
473 let len = text.len();
474
475 while idx < len {
476 let ch = text[idx..].chars().next().unwrap();
477 let width = ch.len_utf8();
478 if !ch.is_whitespace() {
479 idx += width;
480 continue;
481 }
482
483 let token = &text[last..idx];
484 if !token.is_empty() || !options.collapse_delimiters {
485 parts.push(token.to_string());
486 }
487
488 let run_end = advance_whitespace(text, idx);
489 if options.include_delimiters {
490 if options.collapse_delimiters {
491 parts.push(text[idx..run_end].to_string());
492 } else {
493 parts.push(text[idx..idx + width].to_string());
494 }
495 }
496
497 if options.collapse_delimiters {
498 idx = run_end;
499 last = run_end;
500 } else {
501 idx += width;
502 last = idx;
503 }
504 }
505
506 let tail = &text[last..];
507 if !tail.is_empty() || !options.collapse_delimiters {
508 parts.push(tail.to_string());
509 }
510 if parts.is_empty() {
511 parts.push(String::new());
512 }
513 parts
514}
515
516fn split_by_patterns(text: &str, patterns: &[String], options: &SplitOptions) -> Vec<String> {
517 if patterns.is_empty() {
518 return vec![text.to_string()];
519 }
520
521 let mut parts: Vec<String> = Vec::new();
522 let mut idx = 0usize;
523 let mut last = 0usize;
524 while idx < text.len() {
525 if let Some(pattern) = patterns
526 .iter()
527 .find(|candidate| text[idx..].starts_with(candidate.as_str()))
528 {
529 let token = &text[last..idx];
530 if !token.is_empty() || !options.collapse_delimiters {
531 parts.push(token.to_string());
532 }
533
534 let pat_len = pattern.len();
535 if options.collapse_delimiters {
536 let mut run_end = idx + pat_len;
537 while run_end < text.len() {
538 if let Some(next) = patterns
539 .iter()
540 .find(|candidate| text[run_end..].starts_with(candidate.as_str()))
541 {
542 let len = next.len();
543 if len == 0 {
544 break;
545 }
546 run_end += len;
547 } else {
548 break;
549 }
550 }
551 if options.include_delimiters {
552 parts.push(text[idx..run_end].to_string());
553 }
554 idx = run_end;
555 last = run_end;
556 } else {
557 if options.include_delimiters {
558 parts.push(text[idx..idx + pat_len].to_string());
559 }
560 idx += pat_len;
561 last = idx;
562 }
563
564 continue;
565 }
566 let ch = text[idx..].chars().next().unwrap();
567 idx += ch.len_utf8();
568 }
569 let tail = &text[last..];
570 if !tail.is_empty() || !options.collapse_delimiters {
571 parts.push(tail.to_string());
572 }
573 if parts.is_empty() {
574 parts.push(String::new());
575 }
576 parts
577}
578
579fn advance_whitespace(text: &str, mut start: usize) -> usize {
580 while start < text.len() {
581 let ch = text[start..].chars().next().unwrap();
582 if !ch.is_whitespace() {
583 break;
584 }
585 start += ch.len_utf8();
586 }
587 start
588}
589
590fn extract_delimiters(value: &Value) -> Result<Vec<String>, String> {
591 match value {
592 Value::String(text) => Ok(vec![text.clone()]),
593 Value::StringArray(array) => Ok(array.data.clone()),
594 Value::CharArray(array) => {
595 if array.rows == 0 {
596 return Ok(Vec::new());
597 }
598 let mut entries = Vec::with_capacity(array.rows);
599 for row in 0..array.rows {
600 entries.push(char_row_to_string_slice(&array.data, array.cols, row));
601 }
602 Ok(entries)
603 }
604 Value::Cell(cell) => {
605 let mut entries = Vec::with_capacity(cell.data.len());
606 for element in &cell.data {
607 entries.push(
608 cell_element_to_string(element)
609 .ok_or_else(|| CELL_ELEMENT_ERROR.to_string())?,
610 );
611 }
612 Ok(entries)
613 }
614 _ => Err(DELIMITER_TYPE_ERROR.to_string()),
615 }
616}
617
618fn cell_element_to_string(value: &Value) -> Option<String> {
619 match value {
620 Value::String(text) => Some(text.clone()),
621 Value::StringArray(array) if array.data.len() == 1 => Some(array.data[0].clone()),
622 Value::CharArray(array) if array.rows <= 1 => {
623 if array.rows == 0 {
624 Some(String::new())
625 } else {
626 Some(char_row_to_string_slice(&array.data, array.cols, 0))
627 }
628 }
629 _ => None,
630 }
631}
632
633fn value_to_scalar_string(value: &Value) -> Option<String> {
634 match value {
635 Value::String(text) => Some(text.clone()),
636 Value::StringArray(array) if array.data.len() == 1 => Some(array.data[0].clone()),
637 Value::CharArray(array) if array.rows <= 1 => {
638 if array.rows == 0 {
639 Some(String::new())
640 } else {
641 Some(char_row_to_string_slice(&array.data, array.cols, 0))
642 }
643 }
644 Value::Cell(cell) if cell.data.len() == 1 => cell_element_to_string(&cell.data[0]),
645 _ => None,
646 }
647}
648
649fn parse_bool(value: &Value, name: &str) -> Result<bool, String> {
650 match value {
651 Value::Bool(b) => Ok(*b),
652 Value::Int(i) => Ok(i.to_i64() != 0),
653 Value::Num(n) => Ok(*n != 0.0),
654 Value::LogicalArray(array) => {
655 if array.data.len() == 1 {
656 Ok(array.data[0] != 0)
657 } else {
658 Err(format!(
659 "split: value for '{}' must be logical true or false",
660 name
661 ))
662 }
663 }
664 Value::Tensor(tensor) => {
665 if tensor.data.len() == 1 {
666 Ok(tensor.data[0] != 0.0)
667 } else {
668 Err(format!(
669 "split: value for '{}' must be logical true or false",
670 name
671 ))
672 }
673 }
674 _ => {
675 if let Some(text) = value_to_scalar_string(value) {
676 let lowered = text.trim().to_ascii_lowercase();
677 match lowered.as_str() {
678 "true" | "on" | "yes" => Ok(true),
679 "false" | "off" | "no" => Ok(false),
680 _ => Err(format!(
681 "split: value for '{}' must be logical true or false",
682 name
683 )),
684 }
685 } else {
686 Err(format!(
687 "split: value for '{}' must be logical true or false",
688 name
689 ))
690 }
691 }
692 }
693}
694
695#[derive(PartialEq, Eq)]
696enum NameKey {
697 CollapseDelimiters,
698 IncludeDelimiters,
699}
700
701fn is_name_key(value: &Value) -> bool {
702 name_key(value).is_some()
703}
704
705fn name_key(value: &Value) -> Option<NameKey> {
706 value_to_scalar_string(value).and_then(|text| {
707 let lowered = text.trim().to_ascii_lowercase();
708 match lowered.as_str() {
709 "collapsedelimiters" => Some(NameKey::CollapseDelimiters),
710 "includedelimiters" => Some(NameKey::IncludeDelimiters),
711 _ => None,
712 }
713 })
714}
715
716#[cfg(test)]
717mod tests {
718 use super::*;
719 #[cfg(feature = "doc_export")]
720 use crate::builtins::common::test_support;
721 use runmat_builtins::{CellArray, LogicalArray, Tensor};
722
723 #[test]
724 fn split_string_whitespace_default() {
725 let input = Value::String("RunMat Accelerate Planner".to_string());
726 let result = split_builtin(input, Vec::new()).expect("split");
727 match result {
728 Value::StringArray(array) => {
729 assert_eq!(array.shape, vec![1, 3]);
730 assert_eq!(
731 array.data,
732 vec![
733 "RunMat".to_string(),
734 "Accelerate".to_string(),
735 "Planner".to_string()
736 ]
737 );
738 }
739 other => panic!("expected string array, got {other:?}"),
740 }
741 }
742
743 #[test]
744 fn split_string_custom_delimiter() {
745 let input = Value::String("alpha,beta,gamma".to_string());
746 let args = vec![Value::String(",".to_string())];
747 let result = split_builtin(input, args).expect("split");
748 match result {
749 Value::StringArray(array) => {
750 assert_eq!(array.shape, vec![1, 3]);
751 assert_eq!(
752 array.data,
753 vec!["alpha".to_string(), "beta".to_string(), "gamma".to_string()]
754 );
755 }
756 other => panic!("expected string array, got {other:?}"),
757 }
758 }
759
760 #[test]
761 fn split_include_delimiters_true() {
762 let input = Value::String("A+B-C".to_string());
763 let args = vec![
764 Value::StringArray(
765 StringArray::new(vec!["+".to_string(), "-".to_string()], vec![1, 2]).unwrap(),
766 ),
767 Value::String("IncludeDelimiters".to_string()),
768 Value::Bool(true),
769 ];
770 let result = split_builtin(input, args).expect("split");
771 match result {
772 Value::StringArray(array) => {
773 assert_eq!(array.shape, vec![1, 5]);
774 assert_eq!(
775 array.data,
776 vec![
777 "A".to_string(),
778 "+".to_string(),
779 "B".to_string(),
780 "-".to_string(),
781 "C".to_string()
782 ]
783 );
784 }
785 other => panic!("expected string array, got {other:?}"),
786 }
787 }
788
789 #[test]
790 fn split_include_delimiters_whitespace_collapse_default() {
791 let input = Value::String("A B".to_string());
792 let args = vec![
793 Value::String("IncludeDelimiters".to_string()),
794 Value::Bool(true),
795 ];
796 let result = split_builtin(input, args).expect("split");
797 match result {
798 Value::StringArray(array) => {
799 assert_eq!(array.shape, vec![1, 3]);
800 assert_eq!(
801 array.data,
802 vec!["A".to_string(), " ".to_string(), "B".to_string()]
803 );
804 }
805 other => panic!("expected string array, got {other:?}"),
806 }
807 }
808
809 #[test]
810 fn split_patterns_include_delimiters_collapse_true() {
811 let input = Value::String("a,,b".to_string());
812 let args = vec![
813 Value::String(",".to_string()),
814 Value::String("IncludeDelimiters".to_string()),
815 Value::Bool(true),
816 Value::String("CollapseDelimiters".to_string()),
817 Value::Bool(true),
818 ];
819 let result = split_builtin(input, args).expect("split");
820 match result {
821 Value::StringArray(array) => {
822 assert_eq!(array.shape, vec![1, 3]);
823 assert_eq!(
824 array.data,
825 vec!["a".to_string(), ",,".to_string(), "b".to_string()]
826 );
827 }
828 other => panic!("expected string array, got {other:?}"),
829 }
830 }
831
832 #[test]
833 fn split_collapse_false_preserves_empty_segments() {
834 let input = Value::String("one,,three,".to_string());
835 let args = vec![
836 Value::String(",".to_string()),
837 Value::String("CollapseDelimiters".to_string()),
838 Value::Bool(false),
839 ];
840 let result = split_builtin(input, args).expect("split");
841 match result {
842 Value::StringArray(array) => {
843 assert_eq!(array.shape, vec![1, 4]);
844 assert_eq!(
845 array.data,
846 vec![
847 "one".to_string(),
848 "".to_string(),
849 "three".to_string(),
850 "".to_string()
851 ]
852 );
853 }
854 other => panic!("expected string array, got {other:?}"),
855 }
856 }
857
858 #[test]
859 fn split_character_array_rows() {
860 let mut row1: Vec<char> = "GPU Accelerate".chars().collect();
861 let mut row2: Vec<char> = "Ignition Engine".chars().collect();
862 let width = row1.len().max(row2.len());
863 row1.resize(width, ' ');
864 row2.resize(width, ' ');
865 let mut data = row1;
866 data.extend(row2);
867 let char_array = CharArray::new(data, 2, width).unwrap();
868 let input = Value::CharArray(char_array);
869 let result = split_builtin(input, Vec::new()).expect("split");
870 match result {
871 Value::StringArray(array) => {
872 assert_eq!(array.shape, vec![2, 2]);
873 assert_eq!(
874 array.data,
875 vec![
876 "GPU".to_string(),
877 "Ignition".to_string(),
878 "Accelerate".to_string(),
879 "Engine".to_string()
880 ]
881 );
882 }
883 other => panic!("expected string array, got {other:?}"),
884 }
885 }
886
887 #[test]
888 fn split_string_array_multiple_columns() {
889 let data = vec![
890 "RunMat Core".to_string(),
891 "Ignition Interpreter".to_string(),
892 "Accelerate Engine".to_string(),
893 "<missing>".to_string(),
894 ];
895 let array = StringArray::new(data, vec![2, 2]).unwrap();
896 let input = Value::StringArray(array);
897 let result = split_builtin(input, Vec::new()).expect("split");
898 match result {
899 Value::StringArray(array) => {
900 assert_eq!(array.shape, vec![2, 4]);
901 assert_eq!(
902 array.data,
903 vec![
904 "RunMat".to_string(),
905 "Ignition".to_string(),
906 "Core".to_string(),
907 "Interpreter".to_string(),
908 "Accelerate".to_string(),
909 "<missing>".to_string(),
910 "Engine".to_string(),
911 "<missing>".to_string()
912 ]
913 );
914 }
915 other => panic!("expected string array, got {other:?}"),
916 }
917 }
918
919 #[test]
920 fn split_cell_array_outputs_string_array() {
921 let values = vec![
922 Value::String("RunMat Snapshot".to_string()),
923 Value::String("Fusion Planner".to_string()),
924 ];
925 let cell = crate::make_cell(values, 2, 1).expect("cell");
926 let result = split_builtin(cell, vec![Value::String(" ".to_string())]).expect("split");
927 match result {
928 Value::StringArray(array) => {
929 assert_eq!(array.shape, vec![2, 2]);
930 assert_eq!(
931 array.data,
932 vec![
933 "RunMat".to_string(),
934 "Fusion".to_string(),
935 "Snapshot".to_string(),
936 "Planner".to_string()
937 ]
938 );
939 }
940 other => panic!("expected string array, got {other:?}"),
941 }
942 }
943
944 #[test]
945 fn split_cell_array_multiple_columns() {
946 let values = vec![
947 Value::String("alpha beta".to_string()),
948 Value::String("gamma".to_string()),
949 Value::String("delta epsilon".to_string()),
950 Value::String("<missing>".to_string()),
951 ];
952 let cell = crate::make_cell(values, 2, 2).expect("cell");
953 let result = split_builtin(cell, Vec::new()).expect("split");
954 match result {
955 Value::StringArray(array) => {
956 assert_eq!(array.shape, vec![2, 4]);
957 assert_eq!(
958 array.data,
959 vec![
960 "alpha".to_string(),
961 "delta".to_string(),
962 "beta".to_string(),
963 "epsilon".to_string(),
964 "gamma".to_string(),
965 "<missing>".to_string(),
966 "<missing>".to_string(),
967 "<missing>".to_string()
968 ]
969 );
970 }
971 other => panic!("expected string array, got {other:?}"),
972 }
973 }
974
975 #[test]
976 fn split_missing_string_propagates() {
977 let input = Value::String("<missing>".to_string());
978 let result = split_builtin(input, Vec::new()).expect("split");
979 match result {
980 Value::StringArray(array) => {
981 assert_eq!(array.shape, vec![1, 1]);
982 assert_eq!(array.data, vec!["<missing>".to_string()]);
983 }
984 other => panic!("expected string array, got {other:?}"),
985 }
986 }
987
988 #[test]
989 fn split_invalid_name_value_pair_errors() {
990 let input = Value::String("abc".to_string());
991 let args = vec![Value::String("CollapseDelimiters".to_string())];
992 let err = split_builtin(input, args).unwrap_err();
993 assert!(err.contains("name-value"));
994 }
995
996 #[test]
997 fn split_invalid_text_argument_errors() {
998 let err = split_builtin(Value::Num(1.0), Vec::new()).unwrap_err();
999 assert!(err.contains("first argument"));
1000 }
1001
1002 #[test]
1003 fn split_invalid_delimiter_type_errors() {
1004 let err =
1005 split_builtin(Value::String("abc".to_string()), vec![Value::Num(1.0)]).unwrap_err();
1006 assert!(err.contains("delimiter input"));
1007 }
1008
1009 #[test]
1010 fn split_empty_delimiter_errors() {
1011 let err = split_builtin(
1012 Value::String("abc".to_string()),
1013 vec![Value::String(String::new())],
1014 )
1015 .unwrap_err();
1016 assert!(err.contains("at least one character"));
1017 }
1018
1019 #[test]
1020 fn split_unknown_name_argument_errors() {
1021 let err = split_builtin(
1022 Value::String("abc".to_string()),
1023 vec![
1024 Value::String("UnknownOption".to_string()),
1025 Value::Bool(true),
1026 ],
1027 )
1028 .unwrap_err();
1029 assert!(err.contains("unrecognized"));
1030 }
1031
1032 #[test]
1033 fn split_collapse_delimiters_accepts_logical_array() {
1034 let logical = LogicalArray::new(vec![1u8], vec![1]).unwrap();
1035 let args = vec![
1036 Value::String(",".to_string()),
1037 Value::String("CollapseDelimiters".to_string()),
1038 Value::LogicalArray(logical),
1039 ];
1040 let result = split_builtin(Value::String("a,,b".to_string()), args).expect("split");
1041 match result {
1042 Value::StringArray(array) => {
1043 assert_eq!(array.shape, vec![1, 2]);
1044 assert_eq!(array.data, vec!["a".to_string(), "b".to_string()]);
1045 }
1046 other => panic!("expected string array, got {other:?}"),
1047 }
1048 }
1049
1050 #[test]
1051 fn split_include_delimiters_accepts_tensor_scalar() {
1052 let tensor = Tensor::new(vec![1.0], vec![1, 1]).unwrap();
1053 let args = vec![
1054 Value::String(",".to_string()),
1055 Value::String("IncludeDelimiters".to_string()),
1056 Value::Tensor(tensor),
1057 ];
1058 let result = split_builtin(Value::String("a,b".to_string()), args).expect("split");
1059 match result {
1060 Value::StringArray(array) => {
1061 assert_eq!(array.shape, vec![1, 3]);
1062 assert_eq!(
1063 array.data,
1064 vec!["a".to_string(), ",".to_string(), "b".to_string()]
1065 );
1066 }
1067 other => panic!("expected string array, got {other:?}"),
1068 }
1069 }
1070
1071 #[test]
1072 fn split_cell_array_mixed_inputs() {
1073 let handles: Vec<_> = vec![
1074 runmat_gc::gc_allocate(Value::String("alpha beta".to_string())).unwrap(),
1075 runmat_gc::gc_allocate(Value::CharArray(
1076 CharArray::new("gamma".chars().collect(), 1, 5).unwrap(),
1077 ))
1078 .unwrap(),
1079 ];
1080 let cell =
1081 Value::Cell(CellArray::new_handles(handles, 1, 2).expect("cell array construction"));
1082 let result = split_builtin(cell, Vec::new()).expect("split");
1083 match result {
1084 Value::StringArray(array) => {
1085 assert_eq!(array.shape, vec![1, 4]);
1086 assert_eq!(
1087 array.data,
1088 vec![
1089 "alpha".to_string(),
1090 "beta".to_string(),
1091 "gamma".to_string(),
1092 "<missing>".to_string()
1093 ]
1094 );
1095 }
1096 other => panic!("expected string array, got {other:?}"),
1097 }
1098 }
1099
1100 #[test]
1101 #[cfg(feature = "doc_export")]
1102 fn doc_examples_present() {
1103 let blocks = test_support::doc_examples(DOC_MD);
1104 assert!(!blocks.is_empty());
1105 }
1106}