Skip to main content

runmat_runtime/builtins/strings/transform/
split.rs

1//! MATLAB-compatible `split` builtin with GPU-aware semantics for RunMat.
2
3use std::collections::HashSet;
4
5use runmat_builtins::{CellArray, CharArray, StringArray, Value};
6use runmat_macros::runtime_builtin;
7
8use crate::builtins::common::map_control_flow_with_builtin;
9use crate::builtins::common::spec::{
10    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
11    ReductionNaN, ResidencyPolicy, ShapeRequirements,
12};
13use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
14use crate::builtins::strings::type_resolvers::string_array_type;
15use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
16
17#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::strings::transform::split")]
18pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
19    name: "split",
20    op_kind: GpuOpKind::Custom("string-transform"),
21    supported_precisions: &[],
22    broadcast: BroadcastSemantics::None,
23    provider_hooks: &[],
24    constant_strategy: ConstantStrategy::InlineLiteral,
25    residency: ResidencyPolicy::GatherImmediately,
26    nan_mode: ReductionNaN::Include,
27    two_pass_threshold: None,
28    workgroup_size: None,
29    accepts_nan_mode: false,
30    notes: "Executes on the CPU; GPU-resident inputs are gathered to host memory before splitting.",
31};
32
33#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::strings::transform::split")]
34pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
35    name: "split",
36    shape: ShapeRequirements::Any,
37    constant_strategy: ConstantStrategy::InlineLiteral,
38    elementwise: None,
39    reduction: None,
40    emits_nan: false,
41    notes: "String transformation builtin; not eligible for fusion planning and always gathers GPU inputs.",
42};
43
44const BUILTIN_NAME: &str = "split";
45const ARG_TYPE_ERROR: &str =
46    "split: first argument must be a string scalar, string array, character array, or cell array of character vectors";
47const DELIMITER_TYPE_ERROR: &str =
48    "split: delimiter input must be a string scalar, string array, character array, or cell array of character vectors";
49const NAME_VALUE_PAIR_ERROR: &str = "split: name-value arguments must be supplied in pairs";
50const UNKNOWN_NAME_ERROR: &str =
51    "split: unrecognized name-value argument; supported names are 'CollapseDelimiters' and 'IncludeDelimiters'";
52const EMPTY_DELIMITER_ERROR: &str = "split: delimiters must contain at least one character";
53const CELL_ELEMENT_ERROR: &str =
54    "split: cell array elements must be string scalars or character vectors";
55
56fn runtime_error_for(message: impl Into<String>) -> RuntimeError {
57    build_runtime_error(message)
58        .with_builtin(BUILTIN_NAME)
59        .build()
60}
61
62fn map_flow(err: RuntimeError) -> RuntimeError {
63    map_control_flow_with_builtin(err, BUILTIN_NAME)
64}
65
66#[runtime_builtin(
67    name = "split",
68    category = "strings/transform",
69    summary = "Split strings, character arrays, and cell arrays into substrings using delimiters.",
70    keywords = "split,strsplit,delimiter,CollapseDelimiters,IncludeDelimiters",
71    accel = "sink",
72    type_resolver(string_array_type),
73    builtin_path = "crate::builtins::strings::transform::split"
74)]
75async fn split_builtin(text: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
76    let text = gather_if_needed_async(&text).await.map_err(map_flow)?;
77    let mut args: Vec<Value> = Vec::with_capacity(rest.len());
78    for arg in rest {
79        args.push(gather_if_needed_async(&arg).await.map_err(map_flow)?);
80    }
81
82    let options = SplitOptions::parse(&args)?;
83    let matrix = TextMatrix::from_value(text)?;
84    matrix.into_split_result(&options)
85}
86
87#[derive(Clone)]
88enum DelimiterSpec {
89    Whitespace,
90    Patterns(Vec<String>),
91}
92
93#[derive(Clone)]
94struct SplitOptions {
95    delimiters: DelimiterSpec,
96    collapse_delimiters: bool,
97    include_delimiters: bool,
98}
99
100impl SplitOptions {
101    fn parse(args: &[Value]) -> BuiltinResult<Self> {
102        let mut index = 0usize;
103        let mut delimiters = DelimiterSpec::Whitespace;
104
105        if index < args.len() && !is_name_key(&args[index]) {
106            let list = extract_delimiters(&args[index])?;
107            if list.is_empty() {
108                return Err(runtime_error_for(EMPTY_DELIMITER_ERROR));
109            }
110            let mut seen = HashSet::new();
111            let mut patterns: Vec<String> = Vec::new();
112            for pattern in list {
113                if pattern.is_empty() {
114                    return Err(runtime_error_for(EMPTY_DELIMITER_ERROR));
115                }
116                if seen.insert(pattern.clone()) {
117                    patterns.push(pattern);
118                }
119            }
120            patterns.sort_by_key(|pat| std::cmp::Reverse(pat.len()));
121            delimiters = DelimiterSpec::Patterns(patterns);
122            index += 1;
123        }
124
125        let mut collapse = match delimiters {
126            DelimiterSpec::Whitespace => true,
127            DelimiterSpec::Patterns(_) => false,
128        };
129        let mut include = false;
130
131        while index < args.len() {
132            let name = match name_key(&args[index]) {
133                Some(NameKey::CollapseDelimiters) => NameKey::CollapseDelimiters,
134                Some(NameKey::IncludeDelimiters) => NameKey::IncludeDelimiters,
135                None => return Err(runtime_error_for(UNKNOWN_NAME_ERROR)),
136            };
137            index += 1;
138            if index >= args.len() {
139                return Err(runtime_error_for(NAME_VALUE_PAIR_ERROR));
140            }
141            let value = &args[index];
142            index += 1;
143
144            match name {
145                NameKey::CollapseDelimiters => {
146                    collapse = parse_bool(value, "CollapseDelimiters")?;
147                }
148                NameKey::IncludeDelimiters => {
149                    include = parse_bool(value, "IncludeDelimiters")?;
150                }
151            }
152        }
153
154        Ok(Self {
155            delimiters,
156            collapse_delimiters: collapse,
157            include_delimiters: include,
158        })
159    }
160}
161
162struct TextMatrix {
163    data: Vec<String>,
164    rows: usize,
165    cols: usize,
166}
167
168impl TextMatrix {
169    fn from_value(value: Value) -> BuiltinResult<Self> {
170        match value {
171            Value::String(text) => Ok(Self {
172                data: vec![text],
173                rows: 1,
174                cols: 1,
175            }),
176            Value::StringArray(array) => Ok(Self {
177                data: array.data,
178                rows: array.rows,
179                cols: array.cols,
180            }),
181            Value::CharArray(array) => Self::from_char_array(array),
182            Value::Cell(cell) => Self::from_cell_array(cell),
183            _ => Err(runtime_error_for(ARG_TYPE_ERROR)),
184        }
185    }
186
187    fn from_char_array(array: CharArray) -> BuiltinResult<Self> {
188        let CharArray { data, rows, cols } = array;
189        if rows == 0 {
190            return Ok(Self {
191                data: Vec::new(),
192                rows: 0,
193                cols: 1,
194            });
195        }
196        let mut strings = Vec::with_capacity(rows);
197        for row in 0..rows {
198            strings.push(char_row_to_string_slice(&data, cols, row));
199        }
200        Ok(Self {
201            data: strings,
202            rows,
203            cols: 1,
204        })
205    }
206
207    fn from_cell_array(cell: CellArray) -> BuiltinResult<Self> {
208        let CellArray {
209            data, rows, cols, ..
210        } = cell;
211        let mut strings = Vec::with_capacity(data.len());
212        for col in 0..cols {
213            for row in 0..rows {
214                let idx = row * cols + col;
215                let value_ref: &Value = &data[idx];
216                strings.push(
217                    cell_element_to_string(value_ref)
218                        .ok_or_else(|| runtime_error_for(CELL_ELEMENT_ERROR))?,
219                );
220            }
221        }
222        Ok(Self {
223            data: strings,
224            rows,
225            cols,
226        })
227    }
228
229    fn into_split_result(self, options: &SplitOptions) -> BuiltinResult<Value> {
230        let TextMatrix { data, rows, cols } = self;
231
232        if data.is_empty() {
233            let block_cols = if cols == 0 { 0 } else { 1 };
234            let shape = if cols == 0 {
235                vec![rows, 0]
236            } else {
237                vec![rows, cols * block_cols]
238            };
239            let array = StringArray::new(Vec::new(), shape)
240                .map_err(|e| runtime_error_for(format!("{BUILTIN_NAME}: {e}")))?;
241            return Ok(Value::StringArray(array));
242        }
243
244        let mut per_element: Vec<Vec<String>> = Vec::with_capacity(data.len());
245        let mut max_tokens = 0usize;
246        for text in &data {
247            let tokens = split_text(text, options);
248            max_tokens = max_tokens.max(tokens.len());
249            per_element.push(tokens);
250        }
251        if max_tokens == 0 {
252            max_tokens = 1;
253        }
254        let block_cols = max_tokens;
255        let result_cols = block_cols * cols.max(1);
256        let total = rows * result_cols;
257        let missing = "<missing>".to_string();
258        let mut output = vec![missing.clone(); total];
259
260        for col in 0..cols.max(1) {
261            for row in 0..rows {
262                let element_index = if cols == 0 { row } else { row + col * rows };
263                if element_index >= per_element.len() {
264                    continue;
265                }
266                let tokens = &per_element[element_index];
267                for t in 0..block_cols {
268                    let out_col = if cols == 0 { t } else { col * block_cols + t };
269                    let out_index = row + out_col * rows;
270                    if out_index >= output.len() {
271                        continue;
272                    }
273                    if t < tokens.len() {
274                        output[out_index] = tokens[t].clone();
275                    } else {
276                        output[out_index] = missing.clone();
277                    }
278                }
279            }
280        }
281
282        let shape = vec![rows, result_cols];
283        let array = StringArray::new(output, shape)
284            .map_err(|e| runtime_error_for(format!("{BUILTIN_NAME}: {e}")))?;
285        Ok(Value::StringArray(array))
286    }
287}
288
289fn split_text(text: &str, options: &SplitOptions) -> Vec<String> {
290    if is_missing_string(text) {
291        return vec![text.to_string()];
292    }
293    match &options.delimiters {
294        DelimiterSpec::Whitespace => split_whitespace(text, options),
295        DelimiterSpec::Patterns(patterns) => split_by_patterns(text, patterns, options),
296    }
297}
298
299fn split_whitespace(text: &str, options: &SplitOptions) -> Vec<String> {
300    if text.is_empty() {
301        return vec![String::new()];
302    }
303
304    let mut parts: Vec<String> = Vec::new();
305    let mut idx = 0usize;
306    let mut last = 0usize;
307    let len = text.len();
308
309    while idx < len {
310        let ch = text[idx..].chars().next().unwrap();
311        let width = ch.len_utf8();
312        if !ch.is_whitespace() {
313            idx += width;
314            continue;
315        }
316
317        let token = &text[last..idx];
318        if !token.is_empty() || !options.collapse_delimiters {
319            parts.push(token.to_string());
320        }
321
322        let run_end = advance_whitespace(text, idx);
323        if options.include_delimiters {
324            if options.collapse_delimiters {
325                parts.push(text[idx..run_end].to_string());
326            } else {
327                parts.push(text[idx..idx + width].to_string());
328            }
329        }
330
331        if options.collapse_delimiters {
332            idx = run_end;
333            last = run_end;
334        } else {
335            idx += width;
336            last = idx;
337        }
338    }
339
340    let tail = &text[last..];
341    if !tail.is_empty() || !options.collapse_delimiters {
342        parts.push(tail.to_string());
343    }
344    if parts.is_empty() {
345        parts.push(String::new());
346    }
347    parts
348}
349
350fn split_by_patterns(text: &str, patterns: &[String], options: &SplitOptions) -> Vec<String> {
351    if patterns.is_empty() {
352        return vec![text.to_string()];
353    }
354
355    let mut parts: Vec<String> = Vec::new();
356    let mut idx = 0usize;
357    let mut last = 0usize;
358    while idx < text.len() {
359        if let Some(pattern) = patterns
360            .iter()
361            .find(|candidate| text[idx..].starts_with(candidate.as_str()))
362        {
363            let token = &text[last..idx];
364            if !token.is_empty() || !options.collapse_delimiters {
365                parts.push(token.to_string());
366            }
367
368            let pat_len = pattern.len();
369            if options.collapse_delimiters {
370                let mut run_end = idx + pat_len;
371                while run_end < text.len() {
372                    if let Some(next) = patterns
373                        .iter()
374                        .find(|candidate| text[run_end..].starts_with(candidate.as_str()))
375                    {
376                        let len = next.len();
377                        if len == 0 {
378                            break;
379                        }
380                        run_end += len;
381                    } else {
382                        break;
383                    }
384                }
385                if options.include_delimiters {
386                    parts.push(text[idx..run_end].to_string());
387                }
388                idx = run_end;
389                last = run_end;
390            } else {
391                if options.include_delimiters {
392                    parts.push(text[idx..idx + pat_len].to_string());
393                }
394                idx += pat_len;
395                last = idx;
396            }
397
398            continue;
399        }
400        let ch = text[idx..].chars().next().unwrap();
401        idx += ch.len_utf8();
402    }
403    let tail = &text[last..];
404    if !tail.is_empty() || !options.collapse_delimiters {
405        parts.push(tail.to_string());
406    }
407    if parts.is_empty() {
408        parts.push(String::new());
409    }
410    parts
411}
412
413fn advance_whitespace(text: &str, mut start: usize) -> usize {
414    while start < text.len() {
415        let ch = text[start..].chars().next().unwrap();
416        if !ch.is_whitespace() {
417            break;
418        }
419        start += ch.len_utf8();
420    }
421    start
422}
423
424fn extract_delimiters(value: &Value) -> BuiltinResult<Vec<String>> {
425    match value {
426        Value::String(text) => Ok(vec![text.clone()]),
427        Value::StringArray(array) => Ok(array.data.clone()),
428        Value::CharArray(array) => {
429            if array.rows == 0 {
430                return Ok(Vec::new());
431            }
432            let mut entries = Vec::with_capacity(array.rows);
433            for row in 0..array.rows {
434                entries.push(char_row_to_string_slice(&array.data, array.cols, row));
435            }
436            Ok(entries)
437        }
438        Value::Cell(cell) => {
439            let mut entries = Vec::with_capacity(cell.data.len());
440            for element in &cell.data {
441                entries.push(
442                    cell_element_to_string(element)
443                        .ok_or_else(|| runtime_error_for(CELL_ELEMENT_ERROR))?,
444                );
445            }
446            Ok(entries)
447        }
448        _ => Err(runtime_error_for(DELIMITER_TYPE_ERROR)),
449    }
450}
451
452fn cell_element_to_string(value: &Value) -> Option<String> {
453    match value {
454        Value::String(text) => Some(text.clone()),
455        Value::StringArray(array) if array.data.len() == 1 => Some(array.data[0].clone()),
456        Value::CharArray(array) if array.rows <= 1 => {
457            if array.rows == 0 {
458                Some(String::new())
459            } else {
460                Some(char_row_to_string_slice(&array.data, array.cols, 0))
461            }
462        }
463        _ => None,
464    }
465}
466
467fn value_to_scalar_string(value: &Value) -> Option<String> {
468    match value {
469        Value::String(text) => Some(text.clone()),
470        Value::StringArray(array) if array.data.len() == 1 => Some(array.data[0].clone()),
471        Value::CharArray(array) if array.rows <= 1 => {
472            if array.rows == 0 {
473                Some(String::new())
474            } else {
475                Some(char_row_to_string_slice(&array.data, array.cols, 0))
476            }
477        }
478        Value::Cell(cell) if cell.data.len() == 1 => cell_element_to_string(&cell.data[0]),
479        _ => None,
480    }
481}
482
483fn parse_bool(value: &Value, name: &str) -> BuiltinResult<bool> {
484    match value {
485        Value::Bool(b) => Ok(*b),
486        Value::Int(i) => Ok(i.to_i64() != 0),
487        Value::Num(n) => Ok(*n != 0.0),
488        Value::LogicalArray(array) => {
489            if array.data.len() == 1 {
490                Ok(array.data[0] != 0)
491            } else {
492                Err(runtime_error_for(format!(
493                    "{BUILTIN_NAME}: value for '{}' must be logical true or false",
494                    name
495                )))
496            }
497        }
498        Value::Tensor(tensor) => {
499            if tensor.data.len() == 1 {
500                Ok(tensor.data[0] != 0.0)
501            } else {
502                Err(runtime_error_for(format!(
503                    "{BUILTIN_NAME}: value for '{}' must be logical true or false",
504                    name
505                )))
506            }
507        }
508        _ => {
509            if let Some(text) = value_to_scalar_string(value) {
510                let lowered = text.trim().to_ascii_lowercase();
511                match lowered.as_str() {
512                    "true" | "on" | "yes" => Ok(true),
513                    "false" | "off" | "no" => Ok(false),
514                    _ => Err(runtime_error_for(format!(
515                        "{BUILTIN_NAME}: value for '{}' must be logical true or false",
516                        name
517                    ))),
518                }
519            } else {
520                Err(runtime_error_for(format!(
521                    "{BUILTIN_NAME}: value for '{}' must be logical true or false",
522                    name
523                )))
524            }
525        }
526    }
527}
528
529#[derive(PartialEq, Eq)]
530enum NameKey {
531    CollapseDelimiters,
532    IncludeDelimiters,
533}
534
535fn is_name_key(value: &Value) -> bool {
536    name_key(value).is_some()
537}
538
539fn name_key(value: &Value) -> Option<NameKey> {
540    value_to_scalar_string(value).and_then(|text| {
541        let lowered = text.trim().to_ascii_lowercase();
542        match lowered.as_str() {
543            "collapsedelimiters" => Some(NameKey::CollapseDelimiters),
544            "includedelimiters" => Some(NameKey::IncludeDelimiters),
545            _ => None,
546        }
547    })
548}
549
550#[cfg(test)]
551pub(crate) mod tests {
552    use super::*;
553    use runmat_builtins::{CellArray, LogicalArray, ResolveContext, Tensor, Type};
554
555    fn split_builtin(text: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
556        futures::executor::block_on(super::split_builtin(text, rest))
557    }
558
559    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
560    #[test]
561    fn split_string_whitespace_default() {
562        let input = Value::String("RunMat Accelerate Planner".to_string());
563        let result = split_builtin(input, Vec::new()).expect("split");
564        match result {
565            Value::StringArray(array) => {
566                assert_eq!(array.shape, vec![1, 3]);
567                assert_eq!(
568                    array.data,
569                    vec![
570                        "RunMat".to_string(),
571                        "Accelerate".to_string(),
572                        "Planner".to_string()
573                    ]
574                );
575            }
576            other => panic!("expected string array, got {other:?}"),
577        }
578    }
579
580    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
581    #[test]
582    fn split_string_custom_delimiter() {
583        let input = Value::String("alpha,beta,gamma".to_string());
584        let args = vec![Value::String(",".to_string())];
585        let result = split_builtin(input, args).expect("split");
586        match result {
587            Value::StringArray(array) => {
588                assert_eq!(array.shape, vec![1, 3]);
589                assert_eq!(
590                    array.data,
591                    vec!["alpha".to_string(), "beta".to_string(), "gamma".to_string()]
592                );
593            }
594            other => panic!("expected string array, got {other:?}"),
595        }
596    }
597
598    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
599    #[test]
600    fn split_include_delimiters_true() {
601        let input = Value::String("A+B-C".to_string());
602        let args = vec![
603            Value::StringArray(
604                StringArray::new(vec!["+".to_string(), "-".to_string()], vec![1, 2]).unwrap(),
605            ),
606            Value::String("IncludeDelimiters".to_string()),
607            Value::Bool(true),
608        ];
609        let result = split_builtin(input, args).expect("split");
610        match result {
611            Value::StringArray(array) => {
612                assert_eq!(array.shape, vec![1, 5]);
613                assert_eq!(
614                    array.data,
615                    vec![
616                        "A".to_string(),
617                        "+".to_string(),
618                        "B".to_string(),
619                        "-".to_string(),
620                        "C".to_string()
621                    ]
622                );
623            }
624            other => panic!("expected string array, got {other:?}"),
625        }
626    }
627
628    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
629    #[test]
630    fn split_include_delimiters_whitespace_collapse_default() {
631        let input = Value::String("A  B".to_string());
632        let args = vec![
633            Value::String("IncludeDelimiters".to_string()),
634            Value::Bool(true),
635        ];
636        let result = split_builtin(input, args).expect("split");
637        match result {
638            Value::StringArray(array) => {
639                assert_eq!(array.shape, vec![1, 3]);
640                assert_eq!(
641                    array.data,
642                    vec!["A".to_string(), "  ".to_string(), "B".to_string()]
643                );
644            }
645            other => panic!("expected string array, got {other:?}"),
646        }
647    }
648
649    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
650    #[test]
651    fn split_patterns_include_delimiters_collapse_true() {
652        let input = Value::String("a,,b".to_string());
653        let args = vec![
654            Value::String(",".to_string()),
655            Value::String("IncludeDelimiters".to_string()),
656            Value::Bool(true),
657            Value::String("CollapseDelimiters".to_string()),
658            Value::Bool(true),
659        ];
660        let result = split_builtin(input, args).expect("split");
661        match result {
662            Value::StringArray(array) => {
663                assert_eq!(array.shape, vec![1, 3]);
664                assert_eq!(
665                    array.data,
666                    vec!["a".to_string(), ",,".to_string(), "b".to_string()]
667                );
668            }
669            other => panic!("expected string array, got {other:?}"),
670        }
671    }
672
673    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
674    #[test]
675    fn split_collapse_false_preserves_empty_segments() {
676        let input = Value::String("one,,three,".to_string());
677        let args = vec![
678            Value::String(",".to_string()),
679            Value::String("CollapseDelimiters".to_string()),
680            Value::Bool(false),
681        ];
682        let result = split_builtin(input, args).expect("split");
683        match result {
684            Value::StringArray(array) => {
685                assert_eq!(array.shape, vec![1, 4]);
686                assert_eq!(
687                    array.data,
688                    vec![
689                        "one".to_string(),
690                        "".to_string(),
691                        "three".to_string(),
692                        "".to_string()
693                    ]
694                );
695            }
696            other => panic!("expected string array, got {other:?}"),
697        }
698    }
699
700    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
701    #[test]
702    fn split_character_array_rows() {
703        let mut row1: Vec<char> = "GPU Accelerate".chars().collect();
704        let mut row2: Vec<char> = "Ignition Engine".chars().collect();
705        let width = row1.len().max(row2.len());
706        row1.resize(width, ' ');
707        row2.resize(width, ' ');
708        let mut data = row1;
709        data.extend(row2);
710        let char_array = CharArray::new(data, 2, width).unwrap();
711        let input = Value::CharArray(char_array);
712        let result = split_builtin(input, Vec::new()).expect("split");
713        match result {
714            Value::StringArray(array) => {
715                assert_eq!(array.shape, vec![2, 2]);
716                assert_eq!(
717                    array.data,
718                    vec![
719                        "GPU".to_string(),
720                        "Ignition".to_string(),
721                        "Accelerate".to_string(),
722                        "Engine".to_string()
723                    ]
724                );
725            }
726            other => panic!("expected string array, got {other:?}"),
727        }
728    }
729
730    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
731    #[test]
732    fn split_string_array_multiple_columns() {
733        let data = vec![
734            "RunMat Core".to_string(),
735            "Ignition Interpreter".to_string(),
736            "Accelerate Engine".to_string(),
737            "<missing>".to_string(),
738        ];
739        let array = StringArray::new(data, vec![2, 2]).unwrap();
740        let input = Value::StringArray(array);
741        let result = split_builtin(input, Vec::new()).expect("split");
742        match result {
743            Value::StringArray(array) => {
744                assert_eq!(array.shape, vec![2, 4]);
745                assert_eq!(
746                    array.data,
747                    vec![
748                        "RunMat".to_string(),
749                        "Ignition".to_string(),
750                        "Core".to_string(),
751                        "Interpreter".to_string(),
752                        "Accelerate".to_string(),
753                        "<missing>".to_string(),
754                        "Engine".to_string(),
755                        "<missing>".to_string()
756                    ]
757                );
758            }
759            other => panic!("expected string array, got {other:?}"),
760        }
761    }
762
763    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
764    #[test]
765    fn split_cell_array_outputs_string_array() {
766        let values = vec![
767            Value::String("RunMat Snapshot".to_string()),
768            Value::String("Fusion Planner".to_string()),
769        ];
770        let cell = crate::make_cell(values, 2, 1).expect("cell");
771        let result = split_builtin(cell, vec![Value::String(" ".to_string())]).expect("split");
772        match result {
773            Value::StringArray(array) => {
774                assert_eq!(array.shape, vec![2, 2]);
775                assert_eq!(
776                    array.data,
777                    vec![
778                        "RunMat".to_string(),
779                        "Fusion".to_string(),
780                        "Snapshot".to_string(),
781                        "Planner".to_string()
782                    ]
783                );
784            }
785            other => panic!("expected string array, got {other:?}"),
786        }
787    }
788
789    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
790    #[test]
791    fn split_cell_array_multiple_columns() {
792        let values = vec![
793            Value::String("alpha beta".to_string()),
794            Value::String("gamma".to_string()),
795            Value::String("delta epsilon".to_string()),
796            Value::String("<missing>".to_string()),
797        ];
798        let cell = crate::make_cell(values, 2, 2).expect("cell");
799        let result = split_builtin(cell, Vec::new()).expect("split");
800        match result {
801            Value::StringArray(array) => {
802                assert_eq!(array.shape, vec![2, 4]);
803                assert_eq!(
804                    array.data,
805                    vec![
806                        "alpha".to_string(),
807                        "delta".to_string(),
808                        "beta".to_string(),
809                        "epsilon".to_string(),
810                        "gamma".to_string(),
811                        "<missing>".to_string(),
812                        "<missing>".to_string(),
813                        "<missing>".to_string()
814                    ]
815                );
816            }
817            other => panic!("expected string array, got {other:?}"),
818        }
819    }
820
821    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
822    #[test]
823    fn split_missing_string_propagates() {
824        let input = Value::String("<missing>".to_string());
825        let result = split_builtin(input, Vec::new()).expect("split");
826        match result {
827            Value::StringArray(array) => {
828                assert_eq!(array.shape, vec![1, 1]);
829                assert_eq!(array.data, vec!["<missing>".to_string()]);
830            }
831            other => panic!("expected string array, got {other:?}"),
832        }
833    }
834
835    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
836    #[test]
837    fn split_invalid_name_value_pair_errors() {
838        let input = Value::String("abc".to_string());
839        let args = vec![Value::String("CollapseDelimiters".to_string())];
840        let err = split_builtin(input, args).unwrap_err();
841        assert!(err.to_string().contains("name-value"));
842    }
843
844    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
845    #[test]
846    fn split_invalid_text_argument_errors() {
847        let err = split_builtin(Value::Num(1.0), Vec::new()).unwrap_err();
848        assert!(err.to_string().contains("first argument"));
849    }
850
851    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
852    #[test]
853    fn split_invalid_delimiter_type_errors() {
854        let err =
855            split_builtin(Value::String("abc".to_string()), vec![Value::Num(1.0)]).unwrap_err();
856        assert!(err.to_string().contains("delimiter input"));
857    }
858
859    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
860    #[test]
861    fn split_empty_delimiter_errors() {
862        let err = split_builtin(
863            Value::String("abc".to_string()),
864            vec![Value::String(String::new())],
865        )
866        .unwrap_err();
867        assert!(err.to_string().contains("at least one character"));
868    }
869
870    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
871    #[test]
872    fn split_unknown_name_argument_errors() {
873        let err = split_builtin(
874            Value::String("abc".to_string()),
875            vec![
876                Value::String("UnknownOption".to_string()),
877                Value::Bool(true),
878            ],
879        )
880        .unwrap_err();
881        assert!(err.to_string().contains("unrecognized"));
882    }
883
884    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
885    #[test]
886    fn split_collapse_delimiters_accepts_logical_array() {
887        let logical = LogicalArray::new(vec![1u8], vec![1]).unwrap();
888        let args = vec![
889            Value::String(",".to_string()),
890            Value::String("CollapseDelimiters".to_string()),
891            Value::LogicalArray(logical),
892        ];
893        let result = split_builtin(Value::String("a,,b".to_string()), args).expect("split");
894        match result {
895            Value::StringArray(array) => {
896                assert_eq!(array.shape, vec![1, 2]);
897                assert_eq!(array.data, vec!["a".to_string(), "b".to_string()]);
898            }
899            other => panic!("expected string array, got {other:?}"),
900        }
901    }
902
903    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
904    #[test]
905    fn split_include_delimiters_accepts_tensor_scalar() {
906        let tensor = Tensor::new(vec![1.0], vec![1, 1]).unwrap();
907        let args = vec![
908            Value::String(",".to_string()),
909            Value::String("IncludeDelimiters".to_string()),
910            Value::Tensor(tensor),
911        ];
912        let result = split_builtin(Value::String("a,b".to_string()), args).expect("split");
913        match result {
914            Value::StringArray(array) => {
915                assert_eq!(array.shape, vec![1, 3]);
916                assert_eq!(
917                    array.data,
918                    vec!["a".to_string(), ",".to_string(), "b".to_string()]
919                );
920            }
921            other => panic!("expected string array, got {other:?}"),
922        }
923    }
924
925    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
926    #[test]
927    fn split_cell_array_mixed_inputs() {
928        let handles: Vec<_> = vec![
929            runmat_gc::gc_allocate(Value::String("alpha beta".to_string())).unwrap(),
930            runmat_gc::gc_allocate(Value::CharArray(
931                CharArray::new("gamma".chars().collect(), 1, 5).unwrap(),
932            ))
933            .unwrap(),
934        ];
935        let cell =
936            Value::Cell(CellArray::new_handles(handles, 1, 2).expect("cell array construction"));
937        let result = split_builtin(cell, Vec::new()).expect("split");
938        match result {
939            Value::StringArray(array) => {
940                assert_eq!(array.shape, vec![1, 4]);
941                assert_eq!(
942                    array.data,
943                    vec![
944                        "alpha".to_string(),
945                        "beta".to_string(),
946                        "gamma".to_string(),
947                        "<missing>".to_string()
948                    ]
949                );
950            }
951            other => panic!("expected string array, got {other:?}"),
952        }
953    }
954
955    #[test]
956    fn split_type_is_string_array() {
957        assert_eq!(
958            string_array_type(&[Type::String], &ResolveContext::new(Vec::new())),
959            Type::cell_of(Type::String)
960        );
961    }
962}