runmat_runtime/builtins/strings/transform/
split.rs

1//! MATLAB-compatible `split` builtin with GPU-aware semantics for RunMat.
2
3use std::collections::HashSet;
4
5use runmat_builtins::{CellArray, CharArray, StringArray, Value};
6use runmat_macros::runtime_builtin;
7
8use crate::builtins::common::spec::{
9    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
10    ReductionNaN, ResidencyPolicy, ShapeRequirements,
11};
12use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
13#[cfg(feature = "doc_export")]
14use crate::register_builtin_doc_text;
15use crate::{gather_if_needed, register_builtin_fusion_spec, register_builtin_gpu_spec};
16
17#[cfg(feature = "doc_export")]
18pub const DOC_MD: &str = r#"---
19title: "split"
20category: "strings/transform"
21keywords: ["split", "string split", "text split", "delimiters", "collapse delimiters", "include delimiters"]
22summary: "Split strings, character arrays, and cell arrays into substrings using delimiters."
23references:
24  - https://www.mathworks.com/help/matlab/ref/split.html
25gpu_support:
26  elementwise: false
27  reduction: false
28  precisions: []
29  broadcasting: "none"
30  notes: "Executes on the CPU; GPU-resident arguments are gathered to host memory prior to splitting."
31fusion:
32  elementwise: false
33  reduction: false
34  max_inputs: 2
35  constants: "inline"
36requires_feature: null
37tested:
38  unit: "builtins::strings::transform::split::tests"
39  integration: "builtins::strings::transform::split::tests::split_cell_array_mixed_inputs"
40---
41
42# What does the `split` function do in MATLAB / RunMat?
43`split(text)` breaks text into substrings separated by delimiters. The input can be a string scalar,
44string array, character array, or a cell array of character vectors—`split` mirrors MATLAB behaviour
45across each of these representations. When you omit the delimiter argument, `split` collapses
46whitespace runs and returns the remaining tokens as a string array.
47
48## How does the `split` function behave in MATLAB / RunMat?
49- The default delimiter is whitespace (`isspace`), and consecutive whitespace is treated as a single
50  separator (equivalent to `'CollapseDelimiters', true`).
51- When you supply explicit delimiters, they can be a string scalar, string array, character array
52  (rows), or a cell array of character vectors. Delimiters are matched left to right and the longest
53  delimiter wins when several candidates match at the same position.
54- `'CollapseDelimiters'` controls whether consecutive delimiters generate empty substrings. The default
55  is `false` when you specify explicit delimiters and `true` when you rely on the whitespace default.
56- `'IncludeDelimiters'` inserts the matched delimiters as separate elements in the output string array.
57- Outputs are string arrays. For scalar inputs, the result is a row vector. For string/character arrays,
58  the first dimension matches the number of rows in the input and additional columns are appended to
59  accommodate the longest token list. Missing values are padded with `<missing>`.
60- Missing string scalars propagate unchanged.
61
62## `split` Function GPU Execution Behaviour
63`split` executes on the CPU. When the input or delimiter arguments reside on the GPU, RunMat gathers
64them to host memory before performing the split so the results match MATLAB exactly. Providers do not
65need to implement custom kernels for this builtin today.
66
67## GPU residency in RunMat (Do I need `gpuArray`?)
68String manipulation currently runs on the host. If text data lives on the GPU (for example after a
69gathered computation), `split` automatically fetches it. You never need to move text explicitly before
70calling this builtin.
71
72## Examples of using the `split` function in MATLAB / RunMat
73
74### Split A String On Whitespace
75```matlab
76txt = "RunMat Accelerate Planner";
77pieces = split(txt);
78```
79Expected output:
80```matlab
81pieces = 1×3 string
82    "RunMat"    "Accelerate"    "Planner"
83```
84
85### Split A String Using A Custom Delimiter
86```matlab
87csv = "alpha,beta,gamma";
88tokens = split(csv, ",");
89```
90Expected output:
91```matlab
92tokens = 1×3 string
93    "alpha"    "beta"    "gamma"
94```
95
96### Include Delimiters In The Output
97```matlab
98expr = "A+B-C";
99segments = split(expr, ["+", "-"], "IncludeDelimiters", true);
100```
101Expected output:
102```matlab
103segments = 1×5 string
104    "A"    "+"    "B"    "-"    "C"
105```
106
107### Preserve Empty Segments When CollapseDelimiters Is False
108```matlab
109values = "one,,three,";
110parts = split(values, ",", "CollapseDelimiters", false);
111```
112Expected output:
113```matlab
114parts = 1×4 string
115    "one"    ""    "three"    ""
116```
117
118### Split Each Row Of A Character Array
119```matlab
120rows = char("GPU Accelerate", "Ignition Interpreter");
121result = split(rows);
122```
123Expected output:
124```matlab
125result = 2×2 string
126    "GPU"          "Accelerate"
127    "Ignition"     "Interpreter"
128```
129
130### Split Elements Of A Cell Array
131```matlab
132C = {'RunMat Snapshot'; "Fusion Planner"};
133out = split(C, " ");
134```
135Expected output:
136```matlab
137out = 2×2 string
138    "RunMat"    "Snapshot"
139    "Fusion"    "Planner"
140```
141
142### Handle Missing String Inputs
143```matlab
144names = ["RunMat", "<missing>", "Accelerate Engine"];
145split_names = split(names);
146```
147Expected output:
148```matlab
149split_names = 3×2 string
150    "RunMat"        "<missing>"
151    "<missing>"     "<missing>"
152    "Accelerate"    "Engine"
153```
154
155## FAQ
156
157### What delimiters does `split` use by default?
158When you omit the second argument, `split` treats any Unicode whitespace as a delimiter and collapses
159consecutive whitespace runs so they produce a single split point.
160
161### How do explicit delimiters change the defaults?
162Providing explicit delimiters switches the default for `'CollapseDelimiters'` to `false`, matching MATLAB.
163You can override that behaviour with a name-value pair.
164
165### What happens when `'IncludeDelimiters'` is `true`?
166Matched delimiters are inserted between tokens in the output string array, preserving their original
167order. Tokens still expand to fill rows and columns, with missing values used for padding.
168
169### How is the output sized for string arrays?
170The number of rows matches the input. Columns are added to accommodate the longest token list observed
171across all elements. Shorter rows are padded with `<missing>`.
172
173### How does `split` handle missing strings?
174Missing string scalars propagate unchanged. When padding is required, `<missing>` is used so MATLAB and
175RunMat stay aligned.
176
177### Can I provide empty delimiters?
178No. Empty delimiters are disallowed, matching MATLAB's input validation. Specify at least one character
179per delimiter.
180
181### Which argument types are accepted as delimiters?
182You may pass string scalars, string arrays, character arrays (each row is a delimiter), or cell arrays
183containing string scalars or character vectors.
184
185## See Also
186[strsplit](../../search/strsplit), [replace](./replace), [lower](./lower), [upper](./upper), [strip](./strip)
187
188## Source & Feedback
189- Implementation: [`crates/runmat-runtime/src/builtins/strings/transform/split.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/transform/split.rs)
190- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
191"#;
192
193pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
194    name: "split",
195    op_kind: GpuOpKind::Custom("string-transform"),
196    supported_precisions: &[],
197    broadcast: BroadcastSemantics::None,
198    provider_hooks: &[],
199    constant_strategy: ConstantStrategy::InlineLiteral,
200    residency: ResidencyPolicy::GatherImmediately,
201    nan_mode: ReductionNaN::Include,
202    two_pass_threshold: None,
203    workgroup_size: None,
204    accepts_nan_mode: false,
205    notes: "Executes on the CPU; GPU-resident inputs are gathered to host memory before splitting.",
206};
207
208register_builtin_gpu_spec!(GPU_SPEC);
209
210pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
211    name: "split",
212    shape: ShapeRequirements::Any,
213    constant_strategy: ConstantStrategy::InlineLiteral,
214    elementwise: None,
215    reduction: None,
216    emits_nan: false,
217    notes: "String transformation builtin; not eligible for fusion planning and always gathers GPU inputs.",
218};
219
220register_builtin_fusion_spec!(FUSION_SPEC);
221
222#[cfg(feature = "doc_export")]
223register_builtin_doc_text!("split", DOC_MD);
224
225const ARG_TYPE_ERROR: &str =
226    "split: first argument must be a string scalar, string array, character array, or cell array of character vectors";
227const DELIMITER_TYPE_ERROR: &str =
228    "split: delimiter input must be a string scalar, string array, character array, or cell array of character vectors";
229const NAME_VALUE_PAIR_ERROR: &str = "split: name-value arguments must be supplied in pairs";
230const UNKNOWN_NAME_ERROR: &str =
231    "split: unrecognized name-value argument; supported names are 'CollapseDelimiters' and 'IncludeDelimiters'";
232const EMPTY_DELIMITER_ERROR: &str = "split: delimiters must contain at least one character";
233const CELL_ELEMENT_ERROR: &str =
234    "split: cell array elements must be string scalars or character vectors";
235
236#[runtime_builtin(
237    name = "split",
238    category = "strings/transform",
239    summary = "Split strings, character arrays, and cell arrays into substrings using delimiters.",
240    keywords = "split,strsplit,delimiter,CollapseDelimiters,IncludeDelimiters",
241    accel = "sink"
242)]
243fn split_builtin(text: Value, rest: Vec<Value>) -> Result<Value, String> {
244    let text = gather_if_needed(&text).map_err(|e| format!("split: {e}"))?;
245    let mut args: Vec<Value> = Vec::with_capacity(rest.len());
246    for arg in rest {
247        args.push(gather_if_needed(&arg).map_err(|e| format!("split: {e}"))?);
248    }
249
250    let options = SplitOptions::parse(&args)?;
251    let matrix = TextMatrix::from_value(text)?;
252    matrix.into_split_result(&options)
253}
254
255#[derive(Clone)]
256enum DelimiterSpec {
257    Whitespace,
258    Patterns(Vec<String>),
259}
260
261#[derive(Clone)]
262struct SplitOptions {
263    delimiters: DelimiterSpec,
264    collapse_delimiters: bool,
265    include_delimiters: bool,
266}
267
268impl SplitOptions {
269    fn parse(args: &[Value]) -> Result<Self, String> {
270        let mut index = 0usize;
271        let mut delimiters = DelimiterSpec::Whitespace;
272
273        if index < args.len() && !is_name_key(&args[index]) {
274            let list = extract_delimiters(&args[index])?;
275            if list.is_empty() {
276                return Err(EMPTY_DELIMITER_ERROR.to_string());
277            }
278            let mut seen = HashSet::new();
279            let mut patterns: Vec<String> = Vec::new();
280            for pattern in list {
281                if pattern.is_empty() {
282                    return Err(EMPTY_DELIMITER_ERROR.to_string());
283                }
284                if seen.insert(pattern.clone()) {
285                    patterns.push(pattern);
286                }
287            }
288            patterns.sort_by_key(|pat| std::cmp::Reverse(pat.len()));
289            delimiters = DelimiterSpec::Patterns(patterns);
290            index += 1;
291        }
292
293        let mut collapse = match delimiters {
294            DelimiterSpec::Whitespace => true,
295            DelimiterSpec::Patterns(_) => false,
296        };
297        let mut include = false;
298
299        while index < args.len() {
300            let name = match name_key(&args[index]) {
301                Some(NameKey::CollapseDelimiters) => NameKey::CollapseDelimiters,
302                Some(NameKey::IncludeDelimiters) => NameKey::IncludeDelimiters,
303                None => return Err(UNKNOWN_NAME_ERROR.to_string()),
304            };
305            index += 1;
306            if index >= args.len() {
307                return Err(NAME_VALUE_PAIR_ERROR.to_string());
308            }
309            let value = &args[index];
310            index += 1;
311
312            match name {
313                NameKey::CollapseDelimiters => {
314                    collapse = parse_bool(value, "CollapseDelimiters")?;
315                }
316                NameKey::IncludeDelimiters => {
317                    include = parse_bool(value, "IncludeDelimiters")?;
318                }
319            }
320        }
321
322        Ok(Self {
323            delimiters,
324            collapse_delimiters: collapse,
325            include_delimiters: include,
326        })
327    }
328}
329
330struct TextMatrix {
331    data: Vec<String>,
332    rows: usize,
333    cols: usize,
334}
335
336impl TextMatrix {
337    fn from_value(value: Value) -> Result<Self, String> {
338        match value {
339            Value::String(text) => Ok(Self {
340                data: vec![text],
341                rows: 1,
342                cols: 1,
343            }),
344            Value::StringArray(array) => Ok(Self {
345                data: array.data,
346                rows: array.rows,
347                cols: array.cols,
348            }),
349            Value::CharArray(array) => Self::from_char_array(array),
350            Value::Cell(cell) => Self::from_cell_array(cell),
351            _ => Err(ARG_TYPE_ERROR.to_string()),
352        }
353    }
354
355    fn from_char_array(array: CharArray) -> Result<Self, String> {
356        let CharArray { data, rows, cols } = array;
357        if rows == 0 {
358            return Ok(Self {
359                data: Vec::new(),
360                rows: 0,
361                cols: 1,
362            });
363        }
364        let mut strings = Vec::with_capacity(rows);
365        for row in 0..rows {
366            strings.push(char_row_to_string_slice(&data, cols, row));
367        }
368        Ok(Self {
369            data: strings,
370            rows,
371            cols: 1,
372        })
373    }
374
375    fn from_cell_array(cell: CellArray) -> Result<Self, String> {
376        let CellArray {
377            data, rows, cols, ..
378        } = cell;
379        let mut strings = Vec::with_capacity(data.len());
380        for col in 0..cols {
381            for row in 0..rows {
382                let idx = row * cols + col;
383                let value_ref: &Value = &data[idx];
384                strings.push(
385                    cell_element_to_string(value_ref)
386                        .ok_or_else(|| CELL_ELEMENT_ERROR.to_string())?,
387                );
388            }
389        }
390        Ok(Self {
391            data: strings,
392            rows,
393            cols,
394        })
395    }
396
397    fn into_split_result(self, options: &SplitOptions) -> Result<Value, String> {
398        let TextMatrix { data, rows, cols } = self;
399
400        if data.is_empty() {
401            let block_cols = if cols == 0 { 0 } else { 1 };
402            let shape = if cols == 0 {
403                vec![rows, 0]
404            } else {
405                vec![rows, cols * block_cols]
406            };
407            let array = StringArray::new(Vec::new(), shape).map_err(|e| format!("split: {e}"))?;
408            return Ok(Value::StringArray(array));
409        }
410
411        let mut per_element: Vec<Vec<String>> = Vec::with_capacity(data.len());
412        let mut max_tokens = 0usize;
413        for text in &data {
414            let tokens = split_text(text, options);
415            max_tokens = max_tokens.max(tokens.len());
416            per_element.push(tokens);
417        }
418        if max_tokens == 0 {
419            max_tokens = 1;
420        }
421        let block_cols = max_tokens;
422        let result_cols = block_cols * cols.max(1);
423        let total = rows * result_cols;
424        let missing = "<missing>".to_string();
425        let mut output = vec![missing.clone(); total];
426
427        for col in 0..cols.max(1) {
428            for row in 0..rows {
429                let element_index = if cols == 0 { row } else { row + col * rows };
430                if element_index >= per_element.len() {
431                    continue;
432                }
433                let tokens = &per_element[element_index];
434                for t in 0..block_cols {
435                    let out_col = if cols == 0 { t } else { col * block_cols + t };
436                    let out_index = row + out_col * rows;
437                    if out_index >= output.len() {
438                        continue;
439                    }
440                    if t < tokens.len() {
441                        output[out_index] = tokens[t].clone();
442                    } else {
443                        output[out_index] = missing.clone();
444                    }
445                }
446            }
447        }
448
449        let shape = vec![rows, result_cols];
450        let array = StringArray::new(output, shape).map_err(|e| format!("split: {e}"))?;
451        Ok(Value::StringArray(array))
452    }
453}
454
455fn split_text(text: &str, options: &SplitOptions) -> Vec<String> {
456    if is_missing_string(text) {
457        return vec![text.to_string()];
458    }
459    match &options.delimiters {
460        DelimiterSpec::Whitespace => split_whitespace(text, options),
461        DelimiterSpec::Patterns(patterns) => split_by_patterns(text, patterns, options),
462    }
463}
464
465fn split_whitespace(text: &str, options: &SplitOptions) -> Vec<String> {
466    if text.is_empty() {
467        return vec![String::new()];
468    }
469
470    let mut parts: Vec<String> = Vec::new();
471    let mut idx = 0usize;
472    let mut last = 0usize;
473    let len = text.len();
474
475    while idx < len {
476        let ch = text[idx..].chars().next().unwrap();
477        let width = ch.len_utf8();
478        if !ch.is_whitespace() {
479            idx += width;
480            continue;
481        }
482
483        let token = &text[last..idx];
484        if !token.is_empty() || !options.collapse_delimiters {
485            parts.push(token.to_string());
486        }
487
488        let run_end = advance_whitespace(text, idx);
489        if options.include_delimiters {
490            if options.collapse_delimiters {
491                parts.push(text[idx..run_end].to_string());
492            } else {
493                parts.push(text[idx..idx + width].to_string());
494            }
495        }
496
497        if options.collapse_delimiters {
498            idx = run_end;
499            last = run_end;
500        } else {
501            idx += width;
502            last = idx;
503        }
504    }
505
506    let tail = &text[last..];
507    if !tail.is_empty() || !options.collapse_delimiters {
508        parts.push(tail.to_string());
509    }
510    if parts.is_empty() {
511        parts.push(String::new());
512    }
513    parts
514}
515
516fn split_by_patterns(text: &str, patterns: &[String], options: &SplitOptions) -> Vec<String> {
517    if patterns.is_empty() {
518        return vec![text.to_string()];
519    }
520
521    let mut parts: Vec<String> = Vec::new();
522    let mut idx = 0usize;
523    let mut last = 0usize;
524    while idx < text.len() {
525        if let Some(pattern) = patterns
526            .iter()
527            .find(|candidate| text[idx..].starts_with(candidate.as_str()))
528        {
529            let token = &text[last..idx];
530            if !token.is_empty() || !options.collapse_delimiters {
531                parts.push(token.to_string());
532            }
533
534            let pat_len = pattern.len();
535            if options.collapse_delimiters {
536                let mut run_end = idx + pat_len;
537                while run_end < text.len() {
538                    if let Some(next) = patterns
539                        .iter()
540                        .find(|candidate| text[run_end..].starts_with(candidate.as_str()))
541                    {
542                        let len = next.len();
543                        if len == 0 {
544                            break;
545                        }
546                        run_end += len;
547                    } else {
548                        break;
549                    }
550                }
551                if options.include_delimiters {
552                    parts.push(text[idx..run_end].to_string());
553                }
554                idx = run_end;
555                last = run_end;
556            } else {
557                if options.include_delimiters {
558                    parts.push(text[idx..idx + pat_len].to_string());
559                }
560                idx += pat_len;
561                last = idx;
562            }
563
564            continue;
565        }
566        let ch = text[idx..].chars().next().unwrap();
567        idx += ch.len_utf8();
568    }
569    let tail = &text[last..];
570    if !tail.is_empty() || !options.collapse_delimiters {
571        parts.push(tail.to_string());
572    }
573    if parts.is_empty() {
574        parts.push(String::new());
575    }
576    parts
577}
578
579fn advance_whitespace(text: &str, mut start: usize) -> usize {
580    while start < text.len() {
581        let ch = text[start..].chars().next().unwrap();
582        if !ch.is_whitespace() {
583            break;
584        }
585        start += ch.len_utf8();
586    }
587    start
588}
589
590fn extract_delimiters(value: &Value) -> Result<Vec<String>, String> {
591    match value {
592        Value::String(text) => Ok(vec![text.clone()]),
593        Value::StringArray(array) => Ok(array.data.clone()),
594        Value::CharArray(array) => {
595            if array.rows == 0 {
596                return Ok(Vec::new());
597            }
598            let mut entries = Vec::with_capacity(array.rows);
599            for row in 0..array.rows {
600                entries.push(char_row_to_string_slice(&array.data, array.cols, row));
601            }
602            Ok(entries)
603        }
604        Value::Cell(cell) => {
605            let mut entries = Vec::with_capacity(cell.data.len());
606            for element in &cell.data {
607                entries.push(
608                    cell_element_to_string(element)
609                        .ok_or_else(|| CELL_ELEMENT_ERROR.to_string())?,
610                );
611            }
612            Ok(entries)
613        }
614        _ => Err(DELIMITER_TYPE_ERROR.to_string()),
615    }
616}
617
618fn cell_element_to_string(value: &Value) -> Option<String> {
619    match value {
620        Value::String(text) => Some(text.clone()),
621        Value::StringArray(array) if array.data.len() == 1 => Some(array.data[0].clone()),
622        Value::CharArray(array) if array.rows <= 1 => {
623            if array.rows == 0 {
624                Some(String::new())
625            } else {
626                Some(char_row_to_string_slice(&array.data, array.cols, 0))
627            }
628        }
629        _ => None,
630    }
631}
632
633fn value_to_scalar_string(value: &Value) -> Option<String> {
634    match value {
635        Value::String(text) => Some(text.clone()),
636        Value::StringArray(array) if array.data.len() == 1 => Some(array.data[0].clone()),
637        Value::CharArray(array) if array.rows <= 1 => {
638            if array.rows == 0 {
639                Some(String::new())
640            } else {
641                Some(char_row_to_string_slice(&array.data, array.cols, 0))
642            }
643        }
644        Value::Cell(cell) if cell.data.len() == 1 => cell_element_to_string(&cell.data[0]),
645        _ => None,
646    }
647}
648
649fn parse_bool(value: &Value, name: &str) -> Result<bool, String> {
650    match value {
651        Value::Bool(b) => Ok(*b),
652        Value::Int(i) => Ok(i.to_i64() != 0),
653        Value::Num(n) => Ok(*n != 0.0),
654        Value::LogicalArray(array) => {
655            if array.data.len() == 1 {
656                Ok(array.data[0] != 0)
657            } else {
658                Err(format!(
659                    "split: value for '{}' must be logical true or false",
660                    name
661                ))
662            }
663        }
664        Value::Tensor(tensor) => {
665            if tensor.data.len() == 1 {
666                Ok(tensor.data[0] != 0.0)
667            } else {
668                Err(format!(
669                    "split: value for '{}' must be logical true or false",
670                    name
671                ))
672            }
673        }
674        _ => {
675            if let Some(text) = value_to_scalar_string(value) {
676                let lowered = text.trim().to_ascii_lowercase();
677                match lowered.as_str() {
678                    "true" | "on" | "yes" => Ok(true),
679                    "false" | "off" | "no" => Ok(false),
680                    _ => Err(format!(
681                        "split: value for '{}' must be logical true or false",
682                        name
683                    )),
684                }
685            } else {
686                Err(format!(
687                    "split: value for '{}' must be logical true or false",
688                    name
689                ))
690            }
691        }
692    }
693}
694
695#[derive(PartialEq, Eq)]
696enum NameKey {
697    CollapseDelimiters,
698    IncludeDelimiters,
699}
700
701fn is_name_key(value: &Value) -> bool {
702    name_key(value).is_some()
703}
704
705fn name_key(value: &Value) -> Option<NameKey> {
706    value_to_scalar_string(value).and_then(|text| {
707        let lowered = text.trim().to_ascii_lowercase();
708        match lowered.as_str() {
709            "collapsedelimiters" => Some(NameKey::CollapseDelimiters),
710            "includedelimiters" => Some(NameKey::IncludeDelimiters),
711            _ => None,
712        }
713    })
714}
715
716#[cfg(test)]
717mod tests {
718    use super::*;
719    #[cfg(feature = "doc_export")]
720    use crate::builtins::common::test_support;
721    use runmat_builtins::{CellArray, LogicalArray, Tensor};
722
723    #[test]
724    fn split_string_whitespace_default() {
725        let input = Value::String("RunMat Accelerate Planner".to_string());
726        let result = split_builtin(input, Vec::new()).expect("split");
727        match result {
728            Value::StringArray(array) => {
729                assert_eq!(array.shape, vec![1, 3]);
730                assert_eq!(
731                    array.data,
732                    vec![
733                        "RunMat".to_string(),
734                        "Accelerate".to_string(),
735                        "Planner".to_string()
736                    ]
737                );
738            }
739            other => panic!("expected string array, got {other:?}"),
740        }
741    }
742
743    #[test]
744    fn split_string_custom_delimiter() {
745        let input = Value::String("alpha,beta,gamma".to_string());
746        let args = vec![Value::String(",".to_string())];
747        let result = split_builtin(input, args).expect("split");
748        match result {
749            Value::StringArray(array) => {
750                assert_eq!(array.shape, vec![1, 3]);
751                assert_eq!(
752                    array.data,
753                    vec!["alpha".to_string(), "beta".to_string(), "gamma".to_string()]
754                );
755            }
756            other => panic!("expected string array, got {other:?}"),
757        }
758    }
759
760    #[test]
761    fn split_include_delimiters_true() {
762        let input = Value::String("A+B-C".to_string());
763        let args = vec![
764            Value::StringArray(
765                StringArray::new(vec!["+".to_string(), "-".to_string()], vec![1, 2]).unwrap(),
766            ),
767            Value::String("IncludeDelimiters".to_string()),
768            Value::Bool(true),
769        ];
770        let result = split_builtin(input, args).expect("split");
771        match result {
772            Value::StringArray(array) => {
773                assert_eq!(array.shape, vec![1, 5]);
774                assert_eq!(
775                    array.data,
776                    vec![
777                        "A".to_string(),
778                        "+".to_string(),
779                        "B".to_string(),
780                        "-".to_string(),
781                        "C".to_string()
782                    ]
783                );
784            }
785            other => panic!("expected string array, got {other:?}"),
786        }
787    }
788
789    #[test]
790    fn split_include_delimiters_whitespace_collapse_default() {
791        let input = Value::String("A  B".to_string());
792        let args = vec![
793            Value::String("IncludeDelimiters".to_string()),
794            Value::Bool(true),
795        ];
796        let result = split_builtin(input, args).expect("split");
797        match result {
798            Value::StringArray(array) => {
799                assert_eq!(array.shape, vec![1, 3]);
800                assert_eq!(
801                    array.data,
802                    vec!["A".to_string(), "  ".to_string(), "B".to_string()]
803                );
804            }
805            other => panic!("expected string array, got {other:?}"),
806        }
807    }
808
809    #[test]
810    fn split_patterns_include_delimiters_collapse_true() {
811        let input = Value::String("a,,b".to_string());
812        let args = vec![
813            Value::String(",".to_string()),
814            Value::String("IncludeDelimiters".to_string()),
815            Value::Bool(true),
816            Value::String("CollapseDelimiters".to_string()),
817            Value::Bool(true),
818        ];
819        let result = split_builtin(input, args).expect("split");
820        match result {
821            Value::StringArray(array) => {
822                assert_eq!(array.shape, vec![1, 3]);
823                assert_eq!(
824                    array.data,
825                    vec!["a".to_string(), ",,".to_string(), "b".to_string()]
826                );
827            }
828            other => panic!("expected string array, got {other:?}"),
829        }
830    }
831
832    #[test]
833    fn split_collapse_false_preserves_empty_segments() {
834        let input = Value::String("one,,three,".to_string());
835        let args = vec![
836            Value::String(",".to_string()),
837            Value::String("CollapseDelimiters".to_string()),
838            Value::Bool(false),
839        ];
840        let result = split_builtin(input, args).expect("split");
841        match result {
842            Value::StringArray(array) => {
843                assert_eq!(array.shape, vec![1, 4]);
844                assert_eq!(
845                    array.data,
846                    vec![
847                        "one".to_string(),
848                        "".to_string(),
849                        "three".to_string(),
850                        "".to_string()
851                    ]
852                );
853            }
854            other => panic!("expected string array, got {other:?}"),
855        }
856    }
857
858    #[test]
859    fn split_character_array_rows() {
860        let mut row1: Vec<char> = "GPU Accelerate".chars().collect();
861        let mut row2: Vec<char> = "Ignition Engine".chars().collect();
862        let width = row1.len().max(row2.len());
863        row1.resize(width, ' ');
864        row2.resize(width, ' ');
865        let mut data = row1;
866        data.extend(row2);
867        let char_array = CharArray::new(data, 2, width).unwrap();
868        let input = Value::CharArray(char_array);
869        let result = split_builtin(input, Vec::new()).expect("split");
870        match result {
871            Value::StringArray(array) => {
872                assert_eq!(array.shape, vec![2, 2]);
873                assert_eq!(
874                    array.data,
875                    vec![
876                        "GPU".to_string(),
877                        "Ignition".to_string(),
878                        "Accelerate".to_string(),
879                        "Engine".to_string()
880                    ]
881                );
882            }
883            other => panic!("expected string array, got {other:?}"),
884        }
885    }
886
887    #[test]
888    fn split_string_array_multiple_columns() {
889        let data = vec![
890            "RunMat Core".to_string(),
891            "Ignition Interpreter".to_string(),
892            "Accelerate Engine".to_string(),
893            "<missing>".to_string(),
894        ];
895        let array = StringArray::new(data, vec![2, 2]).unwrap();
896        let input = Value::StringArray(array);
897        let result = split_builtin(input, Vec::new()).expect("split");
898        match result {
899            Value::StringArray(array) => {
900                assert_eq!(array.shape, vec![2, 4]);
901                assert_eq!(
902                    array.data,
903                    vec![
904                        "RunMat".to_string(),
905                        "Ignition".to_string(),
906                        "Core".to_string(),
907                        "Interpreter".to_string(),
908                        "Accelerate".to_string(),
909                        "<missing>".to_string(),
910                        "Engine".to_string(),
911                        "<missing>".to_string()
912                    ]
913                );
914            }
915            other => panic!("expected string array, got {other:?}"),
916        }
917    }
918
919    #[test]
920    fn split_cell_array_outputs_string_array() {
921        let values = vec![
922            Value::String("RunMat Snapshot".to_string()),
923            Value::String("Fusion Planner".to_string()),
924        ];
925        let cell = crate::make_cell(values, 2, 1).expect("cell");
926        let result = split_builtin(cell, vec![Value::String(" ".to_string())]).expect("split");
927        match result {
928            Value::StringArray(array) => {
929                assert_eq!(array.shape, vec![2, 2]);
930                assert_eq!(
931                    array.data,
932                    vec![
933                        "RunMat".to_string(),
934                        "Fusion".to_string(),
935                        "Snapshot".to_string(),
936                        "Planner".to_string()
937                    ]
938                );
939            }
940            other => panic!("expected string array, got {other:?}"),
941        }
942    }
943
944    #[test]
945    fn split_cell_array_multiple_columns() {
946        let values = vec![
947            Value::String("alpha beta".to_string()),
948            Value::String("gamma".to_string()),
949            Value::String("delta epsilon".to_string()),
950            Value::String("<missing>".to_string()),
951        ];
952        let cell = crate::make_cell(values, 2, 2).expect("cell");
953        let result = split_builtin(cell, Vec::new()).expect("split");
954        match result {
955            Value::StringArray(array) => {
956                assert_eq!(array.shape, vec![2, 4]);
957                assert_eq!(
958                    array.data,
959                    vec![
960                        "alpha".to_string(),
961                        "delta".to_string(),
962                        "beta".to_string(),
963                        "epsilon".to_string(),
964                        "gamma".to_string(),
965                        "<missing>".to_string(),
966                        "<missing>".to_string(),
967                        "<missing>".to_string()
968                    ]
969                );
970            }
971            other => panic!("expected string array, got {other:?}"),
972        }
973    }
974
975    #[test]
976    fn split_missing_string_propagates() {
977        let input = Value::String("<missing>".to_string());
978        let result = split_builtin(input, Vec::new()).expect("split");
979        match result {
980            Value::StringArray(array) => {
981                assert_eq!(array.shape, vec![1, 1]);
982                assert_eq!(array.data, vec!["<missing>".to_string()]);
983            }
984            other => panic!("expected string array, got {other:?}"),
985        }
986    }
987
988    #[test]
989    fn split_invalid_name_value_pair_errors() {
990        let input = Value::String("abc".to_string());
991        let args = vec![Value::String("CollapseDelimiters".to_string())];
992        let err = split_builtin(input, args).unwrap_err();
993        assert!(err.contains("name-value"));
994    }
995
996    #[test]
997    fn split_invalid_text_argument_errors() {
998        let err = split_builtin(Value::Num(1.0), Vec::new()).unwrap_err();
999        assert!(err.contains("first argument"));
1000    }
1001
1002    #[test]
1003    fn split_invalid_delimiter_type_errors() {
1004        let err =
1005            split_builtin(Value::String("abc".to_string()), vec![Value::Num(1.0)]).unwrap_err();
1006        assert!(err.contains("delimiter input"));
1007    }
1008
1009    #[test]
1010    fn split_empty_delimiter_errors() {
1011        let err = split_builtin(
1012            Value::String("abc".to_string()),
1013            vec![Value::String(String::new())],
1014        )
1015        .unwrap_err();
1016        assert!(err.contains("at least one character"));
1017    }
1018
1019    #[test]
1020    fn split_unknown_name_argument_errors() {
1021        let err = split_builtin(
1022            Value::String("abc".to_string()),
1023            vec![
1024                Value::String("UnknownOption".to_string()),
1025                Value::Bool(true),
1026            ],
1027        )
1028        .unwrap_err();
1029        assert!(err.contains("unrecognized"));
1030    }
1031
1032    #[test]
1033    fn split_collapse_delimiters_accepts_logical_array() {
1034        let logical = LogicalArray::new(vec![1u8], vec![1]).unwrap();
1035        let args = vec![
1036            Value::String(",".to_string()),
1037            Value::String("CollapseDelimiters".to_string()),
1038            Value::LogicalArray(logical),
1039        ];
1040        let result = split_builtin(Value::String("a,,b".to_string()), args).expect("split");
1041        match result {
1042            Value::StringArray(array) => {
1043                assert_eq!(array.shape, vec![1, 2]);
1044                assert_eq!(array.data, vec!["a".to_string(), "b".to_string()]);
1045            }
1046            other => panic!("expected string array, got {other:?}"),
1047        }
1048    }
1049
1050    #[test]
1051    fn split_include_delimiters_accepts_tensor_scalar() {
1052        let tensor = Tensor::new(vec![1.0], vec![1, 1]).unwrap();
1053        let args = vec![
1054            Value::String(",".to_string()),
1055            Value::String("IncludeDelimiters".to_string()),
1056            Value::Tensor(tensor),
1057        ];
1058        let result = split_builtin(Value::String("a,b".to_string()), args).expect("split");
1059        match result {
1060            Value::StringArray(array) => {
1061                assert_eq!(array.shape, vec![1, 3]);
1062                assert_eq!(
1063                    array.data,
1064                    vec!["a".to_string(), ",".to_string(), "b".to_string()]
1065                );
1066            }
1067            other => panic!("expected string array, got {other:?}"),
1068        }
1069    }
1070
1071    #[test]
1072    fn split_cell_array_mixed_inputs() {
1073        let handles: Vec<_> = vec![
1074            runmat_gc::gc_allocate(Value::String("alpha beta".to_string())).unwrap(),
1075            runmat_gc::gc_allocate(Value::CharArray(
1076                CharArray::new("gamma".chars().collect(), 1, 5).unwrap(),
1077            ))
1078            .unwrap(),
1079        ];
1080        let cell =
1081            Value::Cell(CellArray::new_handles(handles, 1, 2).expect("cell array construction"));
1082        let result = split_builtin(cell, Vec::new()).expect("split");
1083        match result {
1084            Value::StringArray(array) => {
1085                assert_eq!(array.shape, vec![1, 4]);
1086                assert_eq!(
1087                    array.data,
1088                    vec![
1089                        "alpha".to_string(),
1090                        "beta".to_string(),
1091                        "gamma".to_string(),
1092                        "<missing>".to_string()
1093                    ]
1094                );
1095            }
1096            other => panic!("expected string array, got {other:?}"),
1097        }
1098    }
1099
1100    #[test]
1101    #[cfg(feature = "doc_export")]
1102    fn doc_examples_present() {
1103        let blocks = test_support::doc_examples(DOC_MD);
1104        assert!(!blocks.is_empty());
1105    }
1106}