Skip to main content

runmat_runtime/builtins/cells/core/
cellstr.rs

1//! MATLAB-compatible `cellstr` builtin implemented for the modern RunMat runtime.
2
3use runmat_builtins::{CellArray, CharArray, StringArray, Value};
4use runmat_macros::runtime_builtin;
5
6use crate::builtins::cells::type_resolvers::cellstr_type;
7use crate::builtins::common::spec::{
8    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
9    ReductionNaN, ResidencyPolicy, ShapeRequirements,
10};
11use crate::dispatcher::gather_if_needed_async;
12use crate::{build_runtime_error, make_cell, make_cell_with_shape, BuiltinResult, RuntimeError};
13
14const ERR_INPUT_NOT_TEXT: &str =
15    "cellstr: input must be a character array, string array, or cell array of character vectors";
16const ERR_CELL_CONTENT_NOT_TEXT: &str =
17    "cellstr: cell array elements must be character vectors or string scalars";
18
19#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::cells::core::cellstr")]
20pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
21    name: "cellstr",
22    op_kind: GpuOpKind::Custom("text-convert"),
23    supported_precisions: &[],
24    broadcast: BroadcastSemantics::None,
25    provider_hooks: &[],
26    constant_strategy: ConstantStrategy::InlineLiteral,
27    residency: ResidencyPolicy::GatherImmediately,
28    nan_mode: ReductionNaN::Include,
29    two_pass_threshold: None,
30    workgroup_size: None,
31    accepts_nan_mode: false,
32    notes: "Host-only text conversion. Inputs originating on the GPU are gathered before processing, and the output is always a host cell array.",
33};
34
35#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::cells::core::cellstr")]
36pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
37    name: "cellstr",
38    shape: ShapeRequirements::Any,
39    constant_strategy: ConstantStrategy::InlineLiteral,
40    elementwise: None,
41    reduction: None,
42    emits_nan: false,
43    notes:
44        "Terminates fusion because the result is a host-resident cell array of character vectors.",
45};
46
47const IDENT_INVALID_INPUT: &str = "RunMat:cellstr:InvalidInput";
48const IDENT_INVALID_CONTENTS: &str = "RunMat:cellstr:InvalidContents";
49
50fn cellstr_error(message: impl Into<String>) -> RuntimeError {
51    build_runtime_error(message).with_builtin("cellstr").build()
52}
53
54fn cellstr_error_with_identifier(message: impl Into<String>, identifier: &str) -> RuntimeError {
55    build_runtime_error(message)
56        .with_builtin("cellstr")
57        .with_identifier(identifier)
58        .build()
59}
60
61#[runtime_builtin(
62    name = "cellstr",
63    category = "cells/core",
64    summary = "Convert text to a cell array of character vectors.",
65    keywords = "cellstr,text,character,string,conversion",
66    accel = "gather",
67    type_resolver(cellstr_type),
68    builtin_path = "crate::builtins::cells::core::cellstr"
69)]
70async fn cellstr_builtin(value: Value) -> crate::BuiltinResult<Value> {
71    let host = gather_if_needed_async(&value).await?;
72    match host {
73        Value::CharArray(ca) => cellstr_from_char_array(ca),
74        Value::StringArray(sa) => cellstr_from_string_array(sa),
75        Value::String(text) => cellstr_from_string(text),
76        Value::Cell(cell) => cellstr_from_cell(cell).await,
77        Value::LogicalArray(_)
78        | Value::Bool(_)
79        | Value::Int(_)
80        | Value::Num(_)
81        | Value::Tensor(_)
82        | Value::Complex(_, _)
83        | Value::ComplexTensor(_)
84        | Value::Struct(_)
85        | Value::Object(_)
86        | Value::HandleObject(_)
87        | Value::Listener(_)
88        | Value::FunctionHandle(_)
89        | Value::Closure(_)
90        | Value::ClassRef(_)
91        | Value::MException(_)
92        | Value::OutputList(_) => Err(cellstr_error_with_identifier(
93            ERR_INPUT_NOT_TEXT,
94            IDENT_INVALID_INPUT,
95        )),
96        Value::GpuTensor(_) => Err(cellstr_error_with_identifier(
97            "cellstr: input must be gathered to the host before conversion",
98            IDENT_INVALID_INPUT,
99        )),
100    }
101}
102
103fn cellstr_from_string(text: String) -> BuiltinResult<Value> {
104    let row = Value::CharArray(CharArray::new_row(&text));
105    make_cell(vec![row], 1, 1).map_err(|e| cellstr_error(format!("cellstr: {e}")))
106}
107
108fn cellstr_from_char_array(ca: CharArray) -> BuiltinResult<Value> {
109    let rows = ca.rows;
110    let cols = ca.cols;
111    if rows == 0 {
112        return make_cell(Vec::new(), 0, 1).map_err(|e| cellstr_error(format!("cellstr: {e}")));
113    }
114    let mut values = Vec::with_capacity(rows);
115    for row in 0..rows {
116        let start = row * cols;
117        let end = start + cols;
118        let slice = &ca.data[start..end];
119        let trimmed = trim_trailing_spaces(slice);
120        values.push(Value::CharArray(CharArray::new_row(&trimmed)));
121    }
122    make_cell(values, rows, 1).map_err(|e| cellstr_error(format!("cellstr: {e}")))
123}
124
125fn cellstr_from_string_array(sa: StringArray) -> BuiltinResult<Value> {
126    let shape = if sa.shape.is_empty() {
127        vec![sa.rows.max(1), sa.cols.max(1)]
128    } else {
129        sa.shape.clone()
130    };
131    let total = shape.iter().product::<usize>();
132    if total == 0 {
133        return make_cell_with_shape(Vec::new(), shape)
134            .map_err(|e| cellstr_error(format!("cellstr: {e}")));
135    }
136    if total != sa.data.len() {
137        return Err(cellstr_error_with_identifier(
138            "cellstr: internal string array shape mismatch",
139            IDENT_INVALID_INPUT,
140        ));
141    }
142    let mut values = Vec::with_capacity(total);
143    for row_major in 0..total {
144        let coords = linear_to_multi_row_major(row_major, &shape);
145        let column_major = multi_to_linear_column_major(&coords, &shape);
146        let text = sa.data[column_major].clone();
147        values.push(Value::CharArray(CharArray::new_row(&text)));
148    }
149    make_cell_with_shape(values, shape).map_err(|e| cellstr_error(format!("cellstr: {e}")))
150}
151
152async fn cellstr_from_cell(cell: CellArray) -> BuiltinResult<Value> {
153    let mut values = Vec::with_capacity(cell.data.len());
154    for ptr in &cell.data {
155        let element = unsafe { &*ptr.as_raw() };
156        let gathered = gather_if_needed_async(element).await?;
157        values.push(coerce_to_char_vector(gathered)?);
158    }
159    make_cell_with_shape(values, cell.shape.clone())
160        .map_err(|e| cellstr_error(format!("cellstr: {e}")))
161}
162
163fn coerce_to_char_vector(value: Value) -> BuiltinResult<Value> {
164    match value {
165        Value::CharArray(ca) => {
166            if ca.rows == 1 || (ca.rows == 0 && ca.cols == 0) {
167                Ok(Value::CharArray(ca))
168            } else {
169                Err(cellstr_error_with_identifier(
170                    ERR_CELL_CONTENT_NOT_TEXT,
171                    IDENT_INVALID_CONTENTS,
172                ))
173            }
174        }
175        Value::String(text) => Ok(Value::CharArray(CharArray::new_row(&text))),
176        Value::StringArray(sa) => {
177            if sa.data.len() == 1 {
178                Ok(Value::CharArray(CharArray::new_row(&sa.data[0])))
179            } else {
180                Err(cellstr_error_with_identifier(
181                    ERR_CELL_CONTENT_NOT_TEXT,
182                    IDENT_INVALID_CONTENTS,
183                ))
184            }
185        }
186        Value::Num(_)
187        | Value::Int(_)
188        | Value::Bool(_)
189        | Value::Tensor(_)
190        | Value::LogicalArray(_)
191        | Value::Complex(_, _)
192        | Value::ComplexTensor(_)
193        | Value::GpuTensor(_) => Err(cellstr_error_with_identifier(
194            ERR_CELL_CONTENT_NOT_TEXT,
195            IDENT_INVALID_CONTENTS,
196        )),
197        Value::Cell(_) | Value::Struct(_) | Value::Object(_) | Value::HandleObject(_) => Err(
198            cellstr_error_with_identifier(ERR_CELL_CONTENT_NOT_TEXT, IDENT_INVALID_CONTENTS),
199        ),
200        other => Err(cellstr_error_with_identifier(
201            format!("cellstr: unsupported cell element {other:?}"),
202            IDENT_INVALID_CONTENTS,
203        )),
204    }
205}
206
207fn trim_trailing_spaces(chars: &[char]) -> String {
208    let mut end = chars.len();
209    while end > 0 && chars[end - 1] == ' ' {
210        end -= 1;
211    }
212    chars[..end].iter().collect()
213}
214
215fn linear_to_multi_row_major(mut index: usize, shape: &[usize]) -> Vec<usize> {
216    if shape.is_empty() {
217        return Vec::new();
218    }
219    let mut coords = vec![0usize; shape.len()];
220    for (dim, &extent) in shape.iter().enumerate().rev() {
221        if extent == 0 {
222            coords[dim] = 0;
223        } else {
224            coords[dim] = index % extent;
225            index /= extent;
226        }
227    }
228    coords
229}
230
231fn multi_to_linear_column_major(coords: &[usize], shape: &[usize]) -> usize {
232    let mut stride = 1usize;
233    let mut index = 0usize;
234    for (dim, &coord) in coords.iter().enumerate() {
235        let extent = shape[dim];
236        if extent == 0 {
237            return 0;
238        }
239        index += coord * stride;
240        stride *= extent;
241    }
242    index
243}
244
245#[cfg(test)]
246pub(crate) mod tests {
247    use super::*;
248    use futures::executor::block_on;
249
250    fn cellstr_builtin(value: Value) -> BuiltinResult<Value> {
251        block_on(super::cellstr_builtin(value))
252    }
253
254    fn cell_to_strings(cell: &CellArray) -> Vec<String> {
255        cell.data
256            .iter()
257            .map(|ptr| match unsafe { &*ptr.as_raw() } {
258                Value::CharArray(ca) => ca.data.iter().collect(),
259                other => panic!("expected CharArray in cell, found {other:?}"),
260            })
261            .collect()
262    }
263
264    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
265    #[test]
266    fn converts_char_matrix_and_trims() {
267        let data: Vec<char> = vec!['c', 'a', 't', ' ', 'd', 'o', 'g', ' ', 'f', 'o', 'x', ' '];
268        let ca = CharArray::new(data, 3, 4).expect("char array");
269        let result = cellstr_builtin(Value::CharArray(ca)).expect("cellstr");
270        match result {
271            Value::Cell(cell) => {
272                assert_eq!(cell.rows, 3);
273                assert_eq!(cell.cols, 1);
274                let rows = cell_to_strings(&cell);
275                assert_eq!(
276                    rows,
277                    vec!["cat".to_string(), "dog".to_string(), "fox".to_string()]
278                );
279            }
280            other => panic!("expected cell result, got {other:?}"),
281        }
282    }
283
284    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
285    #[test]
286    fn converts_string_array_with_shape() {
287        let data = vec![
288            "north".to_string(),
289            "south".to_string(),
290            "east".to_string(),
291            "west".to_string(),
292        ];
293        let sa = StringArray::new(data, vec![2, 2]).expect("string array");
294        let result = cellstr_builtin(Value::StringArray(sa)).expect("cellstr");
295        match result {
296            Value::Cell(cell) => {
297                assert_eq!(cell.rows, 2);
298                assert_eq!(cell.cols, 2);
299                let rows = cell_to_strings(&cell);
300                assert_eq!(
301                    rows,
302                    vec![
303                        "north".to_string(),
304                        "east".to_string(),
305                        "south".to_string(),
306                        "west".to_string(),
307                    ]
308                );
309            }
310            other => panic!("expected cell result, got {other:?}"),
311        }
312    }
313
314    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
315    #[test]
316    fn converts_string_scalar() {
317        let result = cellstr_builtin(Value::String("RunMat".to_string())).expect("cellstr");
318        match result {
319            Value::Cell(cell) => {
320                assert_eq!(cell.rows, 1);
321                assert_eq!(cell.cols, 1);
322                let rows = cell_to_strings(&cell);
323                assert_eq!(rows, vec!["RunMat".to_string()]);
324            }
325            other => panic!("expected cell result, got {other:?}"),
326        }
327    }
328
329    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
330    #[test]
331    fn normalises_cell_elements() {
332        let alpha = Value::CharArray(CharArray::new_row("alpha"));
333        let beta = Value::String("beta".to_string());
334        let cell = crate::make_cell(vec![alpha, beta], 1, 2).expect("cell");
335        let result = cellstr_builtin(cell).expect("cellstr");
336        match result {
337            Value::Cell(cell) => {
338                assert_eq!(cell.rows, 1);
339                assert_eq!(cell.cols, 2);
340                let rows = cell_to_strings(&cell);
341                assert_eq!(rows, vec!["alpha".to_string(), "beta".to_string()]);
342            }
343            other => panic!("expected cell result, got {other:?}"),
344        }
345    }
346
347    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
348    #[test]
349    fn rejects_non_text_cell_element() {
350        let cell = crate::make_cell(vec![Value::Num(1.0)], 1, 1).expect("cell");
351        let err = cellstr_builtin(cell)
352            .expect_err("expected error")
353            .to_string();
354        assert!(err.contains("cell array elements must be"));
355    }
356
357    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
358    #[test]
359    fn rejects_multirow_char_element() {
360        let ca = CharArray::new(vec!['a', 'b', 'c', 'd'], 2, 2).expect("char array");
361        let cell = crate::make_cell(vec![Value::CharArray(ca)], 1, 1).expect("cell");
362        let err = cellstr_builtin(cell)
363            .expect_err("expected error")
364            .to_string();
365        assert!(err.contains("cell array elements must be"));
366    }
367
368    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
369    #[test]
370    fn rejects_non_text_input() {
371        let err = cellstr_builtin(Value::Num(std::f64::consts::PI))
372            .expect_err("expected error")
373            .to_string();
374        assert!(err.contains("input must be"));
375    }
376
377    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
378    #[test]
379    fn handles_empty_char_array() {
380        let ca = CharArray::new(Vec::new(), 0, 5).expect("empty char");
381        let result = cellstr_builtin(Value::CharArray(ca)).expect("cellstr");
382        match result {
383            Value::Cell(cell) => {
384                assert_eq!(cell.rows, 0);
385                assert_eq!(cell.cols, 1);
386                assert!(cell.data.is_empty());
387            }
388            other => panic!("expected cell result, got {other:?}"),
389        }
390    }
391
392    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
393    #[test]
394    fn char_row_of_spaces_becomes_empty_vector() {
395        let ca = CharArray::new(vec![' '; 3], 1, 3).expect("char array");
396        let result = cellstr_builtin(Value::CharArray(ca)).expect("cellstr");
397        match result {
398            Value::Cell(cell) => {
399                assert_eq!(cell.rows, 1);
400                assert_eq!(cell.cols, 1);
401                match unsafe { &*cell.data[0].as_raw() } {
402                    Value::CharArray(row) => {
403                        assert_eq!(row.rows, 1);
404                        assert_eq!(row.cols, 0);
405                        assert!(row.data.is_empty());
406                    }
407                    other => panic!("expected CharArray, got {other:?}"),
408                }
409            }
410            other => panic!("expected cell result, got {other:?}"),
411        }
412    }
413
414    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
415    #[test]
416    fn cell_elements_preserve_trailing_spaces() {
417        let ca = CharArray::new(vec!['a', ' ', ' '], 1, 3).expect("char array");
418        let cell = crate::make_cell(vec![Value::CharArray(ca.clone())], 1, 1).expect("cell");
419        let result = cellstr_builtin(cell).expect("cellstr");
420        match result {
421            Value::Cell(cell) => {
422                assert_eq!(cell.rows, 1);
423                assert_eq!(cell.cols, 1);
424                match unsafe { &*cell.data[0].as_raw() } {
425                    Value::CharArray(row) => {
426                        assert_eq!(row.rows, ca.rows);
427                        assert_eq!(row.cols, ca.cols);
428                        assert_eq!(row.data, ca.data);
429                    }
430                    other => panic!("expected CharArray, got {other:?}"),
431                }
432            }
433            other => panic!("expected cell result, got {other:?}"),
434        }
435    }
436
437    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
438    #[test]
439    fn string_array_missing_value_converts() {
440        let sa = StringArray::new(vec!["<missing>".to_string()], vec![1, 1]).expect("string array");
441        let result = cellstr_builtin(Value::StringArray(sa)).expect("cellstr");
442        match result {
443            Value::Cell(cell) => {
444                let rows = cell_to_strings(&cell);
445                assert_eq!(rows, vec!["<missing>".to_string()]);
446            }
447            other => panic!("expected cell result, got {other:?}"),
448        }
449    }
450
451    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
452    #[test]
453    fn empty_string_array_produces_empty_cell_shape() {
454        let sa = StringArray::new(Vec::new(), vec![0, 2]).expect("string array");
455        let result = cellstr_builtin(Value::StringArray(sa)).expect("cellstr");
456        match result {
457            Value::Cell(cell) => {
458                assert_eq!(cell.rows, 0);
459                assert_eq!(cell.cols, 2);
460                assert!(cell.data.is_empty());
461            }
462            other => panic!("expected cell result, got {other:?}"),
463        }
464    }
465}