Skip to main content

runmat_runtime/builtins/strings/core/
char.rs

1//! MATLAB-compatible `char` builtin with GPU-aware conversion semantics for RunMat.
2
3use runmat_builtins::{CellArray, CharArray, LogicalArray, StringArray, Tensor, Value};
4use runmat_macros::runtime_builtin;
5
6use crate::builtins::common::map_control_flow_with_builtin;
7use crate::builtins::common::spec::{
8    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
9    ReductionNaN, ResidencyPolicy, ShapeRequirements,
10};
11use crate::builtins::strings::type_resolvers::string_array_type;
12use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
13
14#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::strings::core::char")]
15pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
16    name: "char",
17    op_kind: GpuOpKind::Custom("conversion"),
18    supported_precisions: &[],
19    broadcast: BroadcastSemantics::None,
20    provider_hooks: &[],
21    constant_strategy: ConstantStrategy::InlineLiteral,
22    residency: ResidencyPolicy::GatherImmediately,
23    nan_mode: ReductionNaN::Include,
24    two_pass_threshold: None,
25    workgroup_size: None,
26    accepts_nan_mode: false,
27    notes:
28        "Conversion always runs on the CPU; GPU tensors are gathered before building the result.",
29};
30
31#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::strings::core::char")]
32pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
33    name: "char",
34    shape: ShapeRequirements::Any,
35    constant_strategy: ConstantStrategy::InlineLiteral,
36    elementwise: None,
37    reduction: None,
38    emits_nan: false,
39    notes: "Character materialisation runs outside of fusion; results always live on the host.",
40};
41
42fn char_flow(message: impl Into<String>) -> RuntimeError {
43    build_runtime_error(message).with_builtin("char").build()
44}
45
46fn remap_char_flow(err: RuntimeError) -> RuntimeError {
47    map_control_flow_with_builtin(err, "char")
48}
49
50#[runtime_builtin(
51    name = "char",
52    category = "strings/core",
53    summary = "Convert numeric codes, strings, and cell contents into a character array.",
54    keywords = "char,character,string,gpu",
55    accel = "conversion",
56    type_resolver(string_array_type),
57    builtin_path = "crate::builtins::strings::core::char"
58)]
59async fn char_builtin(rest: Vec<Value>) -> crate::BuiltinResult<Value> {
60    if rest.is_empty() {
61        let empty =
62            CharArray::new(Vec::new(), 0, 0).map_err(|e| char_flow(format!("char: {e}")))?;
63        return Ok(Value::CharArray(empty));
64    }
65
66    let mut rows: Vec<Vec<char>> = Vec::new();
67    let mut max_width = 0usize;
68
69    for arg in rest {
70        let gathered = gather_if_needed_async(&arg)
71            .await
72            .map_err(remap_char_flow)?;
73        let mut produced = value_to_char_rows(&gathered)?;
74        for row in &produced {
75            if row.len() > max_width {
76                max_width = row.len();
77            }
78        }
79        rows.append(&mut produced);
80    }
81
82    if rows.is_empty() {
83        let empty =
84            CharArray::new(Vec::new(), 0, 0).map_err(|e| char_flow(format!("char: {e}")))?;
85        return Ok(Value::CharArray(empty));
86    }
87
88    let cols = max_width;
89    let total_rows = rows.len();
90    let mut data = vec![' '; total_rows * cols];
91    for (row_idx, row) in rows.into_iter().enumerate() {
92        for (col_idx, ch) in row.into_iter().enumerate() {
93            if col_idx < cols {
94                data[row_idx * cols + col_idx] = ch;
95            }
96        }
97    }
98
99    let array =
100        CharArray::new(data, total_rows, cols).map_err(|e| char_flow(format!("char: {e}")))?;
101    Ok(Value::CharArray(array))
102}
103
104fn value_to_char_rows(value: &Value) -> BuiltinResult<Vec<Vec<char>>> {
105    if let Some(array) = crate::builtins::datetime::datetime_char_array(value)
106        .map_err(|err| char_flow(err.message().to_string()))?
107    {
108        return Ok(char_array_rows(&array));
109    }
110    if let Some(array) = crate::builtins::duration::duration_char_array(value)
111        .map_err(|err| char_flow(err.message().to_string()))?
112    {
113        return Ok(char_array_rows(&array));
114    }
115    match value {
116        Value::CharArray(ca) => Ok(char_array_rows(ca)),
117        Value::String(s) => Ok(vec![s.chars().collect()]),
118        Value::StringArray(sa) => string_array_rows(sa),
119        Value::Num(n) => Ok(vec![vec![number_to_char(*n)?]]),
120        Value::Int(i) => {
121            let as_double = i.to_f64();
122            Ok(vec![vec![number_to_char(as_double)?]])
123        }
124        Value::Bool(b) => {
125            let code = if *b { 1.0 } else { 0.0 };
126            Ok(vec![vec![number_to_char(code)?]])
127        }
128        Value::Tensor(t) => tensor_rows(t),
129        Value::LogicalArray(la) => logical_rows(la),
130        Value::Cell(ca) => cell_rows(ca),
131        Value::GpuTensor(_) => Err(char_flow("char: expected host data after gather")),
132        Value::Complex(_, _) | Value::ComplexTensor(_) => {
133            Err(char_flow("char: complex inputs are not supported"))
134        }
135        Value::Struct(_)
136        | Value::Object(_)
137        | Value::HandleObject(_)
138        | Value::Listener(_)
139        | Value::FunctionHandle(_)
140        | Value::Closure(_)
141        | Value::ClassRef(_)
142        | Value::MException(_)
143        | Value::OutputList(_) => Err(char_flow(format!(
144            "char: unsupported input type {:?}",
145            value
146        ))),
147    }
148}
149
150fn char_array_rows(ca: &CharArray) -> Vec<Vec<char>> {
151    let mut rows = Vec::with_capacity(ca.rows);
152    for r in 0..ca.rows {
153        let mut row = Vec::with_capacity(ca.cols);
154        for c in 0..ca.cols {
155            row.push(ca.data[r * ca.cols + c]);
156        }
157        rows.push(row);
158    }
159    rows
160}
161
162fn string_array_rows(sa: &StringArray) -> BuiltinResult<Vec<Vec<char>>> {
163    ensure_two_dimensional(&sa.shape, "char")?;
164    if sa.data.is_empty() {
165        return Ok(Vec::new());
166    }
167    let mut rows = Vec::with_capacity(sa.data.len());
168    let rows_count = sa.rows();
169    let cols_count = sa.cols();
170    if rows_count == 0 || cols_count == 0 {
171        return Ok(Vec::new());
172    }
173    for c in 0..cols_count {
174        for r in 0..rows_count {
175            let idx = r + c * rows_count;
176            rows.push(sa.data[idx].chars().collect());
177        }
178    }
179    Ok(rows)
180}
181
182fn tensor_rows(t: &Tensor) -> BuiltinResult<Vec<Vec<char>>> {
183    ensure_two_dimensional(&t.shape, "char")?;
184    let (rows, cols) = infer_rows_cols(&t.shape, t.data.len());
185    if rows == 0 {
186        return Ok(Vec::new());
187    }
188    let mut out = Vec::with_capacity(rows);
189    for r in 0..rows {
190        let mut row = Vec::with_capacity(cols);
191        for c in 0..cols {
192            if cols == 0 {
193                continue;
194            }
195            let idx = r + c * rows;
196            let value = t.data[idx];
197            row.push(number_to_char(value)?);
198        }
199        out.push(row);
200    }
201    Ok(out)
202}
203
204fn logical_rows(la: &LogicalArray) -> BuiltinResult<Vec<Vec<char>>> {
205    ensure_two_dimensional(&la.shape, "char")?;
206    let (rows, cols) = infer_rows_cols(&la.shape, la.data.len());
207    if rows == 0 {
208        return Ok(Vec::new());
209    }
210    let mut out = Vec::with_capacity(rows);
211    for r in 0..rows {
212        let mut row = Vec::with_capacity(cols);
213        for c in 0..cols {
214            if cols == 0 {
215                continue;
216            }
217            let idx = r + c * rows;
218            let code = if la.data[idx] != 0 { 1.0 } else { 0.0 };
219            row.push(number_to_char(code)?);
220        }
221        out.push(row);
222    }
223    Ok(out)
224}
225
226fn cell_rows(ca: &CellArray) -> BuiltinResult<Vec<Vec<char>>> {
227    let mut rows = Vec::with_capacity(ca.data.len());
228    for ptr in &ca.data {
229        let element = (**ptr).clone();
230        let mut converted = value_to_char_rows(&element)?;
231        match converted.len() {
232            0 => rows.push(Vec::new()),
233            1 => rows.push(converted.remove(0)),
234            _ => {
235                return Err(char_flow(
236                    "char: cell elements must be character vectors or string scalars",
237                ))
238            }
239        }
240    }
241    Ok(rows)
242}
243
244fn number_to_char(value: f64) -> BuiltinResult<char> {
245    if !value.is_finite() {
246        return Err(char_flow("char: numeric inputs must be finite"));
247    }
248    let rounded = value.round();
249    if (value - rounded).abs() > 1e-9 {
250        return Err(char_flow(format!(
251            "char: numeric inputs must be integers in the Unicode range (got {value})"
252        )));
253    }
254    if rounded < 0.0 {
255        return Err(char_flow(format!(
256            "char: negative code points are invalid (got {rounded})"
257        )));
258    }
259    if rounded > 0x10FFFF as f64 {
260        return Err(char_flow(format!(
261            "char: code point {} exceeds Unicode range",
262            rounded as u64
263        )));
264    }
265    let code = rounded as u32;
266    char::from_u32(code).ok_or_else(|| char_flow(format!("char: invalid code point {code}")))
267}
268
269fn ensure_two_dimensional(shape: &[usize], context: &str) -> BuiltinResult<()> {
270    if shape.len() <= 2 {
271        return Ok(());
272    }
273    if shape.iter().skip(2).all(|&d| d == 1) {
274        return Ok(());
275    }
276    Err(char_flow(format!("{context}: inputs must be 2-D")))
277}
278
279fn infer_rows_cols(shape: &[usize], len: usize) -> (usize, usize) {
280    match shape.len() {
281        0 => {
282            if len == 0 {
283                (0, 0)
284            } else {
285                (1, 1)
286            }
287        }
288        1 => (1, shape[0]),
289        2 => (shape[0], shape[1]),
290        _ => {
291            let rows = shape[0];
292            let cols = if shape.len() > 1 { shape[1] } else { 1 };
293            (rows, cols)
294        }
295    }
296}
297
298#[cfg(test)]
299pub(crate) mod tests {
300    use super::*;
301    use crate::builtins::common::test_support;
302    use runmat_builtins::{ResolveContext, Type};
303
304    fn char_builtin(rest: Vec<Value>) -> BuiltinResult<Value> {
305        futures::executor::block_on(super::char_builtin(rest))
306    }
307    use runmat_builtins::StringArray;
308
309    fn error_message(err: crate::RuntimeError) -> String {
310        err.message().to_string()
311    }
312
313    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
314    #[test]
315    fn char_no_arguments_returns_empty() {
316        let result = char_builtin(Vec::new()).expect("char");
317        match result {
318            Value::CharArray(ca) => {
319                assert_eq!(ca.rows, 0);
320                assert_eq!(ca.cols, 0);
321                assert!(ca.data.is_empty());
322            }
323            other => panic!("expected char array, got {other:?}"),
324        }
325    }
326
327    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
328    #[test]
329    fn char_from_string_scalar() {
330        let value = Value::String("RunMat".to_string());
331        let result = char_builtin(vec![value]).expect("char");
332        match result {
333            Value::CharArray(ca) => {
334                assert_eq!(ca.rows, 1);
335                assert_eq!(ca.cols, 6);
336                assert_eq!(ca.data, "RunMat".chars().collect::<Vec<_>>());
337            }
338            other => panic!("expected char array, got {other:?}"),
339        }
340    }
341
342    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
343    #[test]
344    fn char_from_numeric_tensor() {
345        let tensor =
346            Tensor::new(vec![82.0, 85.0, 78.0, 77.0, 65.0, 84.0], vec![1, 6]).expect("tensor");
347        let result = char_builtin(vec![Value::Tensor(tensor)]).expect("char");
348        match result {
349            Value::CharArray(ca) => {
350                assert_eq!(ca.rows, 1);
351                assert_eq!(ca.cols, 6);
352                assert_eq!(ca.data, "RUNMAT".chars().collect::<Vec<_>>());
353            }
354            other => panic!("expected char array, got {other:?}"),
355        }
356    }
357
358    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
359    #[test]
360    fn char_from_string_array_with_padding() {
361        let data = vec!["cat".to_string(), "giraffe".to_string()];
362        let sa = StringArray::new(data, vec![2, 1]).expect("string array");
363        let result = char_builtin(vec![Value::StringArray(sa)]).expect("char from string array");
364        match result {
365            Value::CharArray(ca) => {
366                assert_eq!(ca.rows, 2);
367                assert_eq!(ca.cols, 7);
368                assert_eq!(
369                    ca.data,
370                    vec!['c', 'a', 't', ' ', ' ', ' ', ' ', 'g', 'i', 'r', 'a', 'f', 'f', 'e']
371                );
372            }
373            other => panic!("expected char array, got {other:?}"),
374        }
375    }
376
377    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
378    #[test]
379    fn char_from_cell_array_of_strings() {
380        let cell = CellArray::new(
381            vec![
382                Value::from("north"),
383                Value::from("east"),
384                Value::from("west"),
385            ],
386            3,
387            1,
388        )
389        .expect("cell array");
390        let result = char_builtin(vec![Value::Cell(cell)]).expect("char");
391        match result {
392            Value::CharArray(ca) => {
393                assert_eq!(ca.rows, 3);
394                assert_eq!(ca.cols, 5);
395                assert_eq!(
396                    ca.data,
397                    vec!['n', 'o', 'r', 't', 'h', 'e', 'a', 's', 't', ' ', 'w', 'e', 's', 't', ' ']
398                );
399            }
400            other => panic!("expected char array, got {other:?}"),
401        }
402    }
403
404    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
405    #[test]
406    fn char_numeric_and_text_arguments_concatenate() {
407        let text = Value::String("hi".to_string());
408        let codes = Tensor::new(vec![65.0, 66.0], vec![1, 2]).expect("tensor");
409        let result = char_builtin(vec![text, Value::Tensor(codes)]).expect("char");
410        match result {
411            Value::CharArray(ca) => {
412                assert_eq!(ca.rows, 2);
413                assert_eq!(ca.cols, 2);
414                assert_eq!(ca.data, vec!['h', 'i', 'A', 'B']);
415            }
416            other => panic!("expected char array, got {other:?}"),
417        }
418    }
419
420    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
421    #[test]
422    fn char_gpu_tensor_round_trip() {
423        test_support::with_test_provider(|provider| {
424            let tensor = Tensor::new(vec![82.0, 85.0, 78.0], vec![1, 3]).expect("tensor");
425            let view = runmat_accelerate_api::HostTensorView {
426                data: &tensor.data,
427                shape: &tensor.shape,
428            };
429            let handle = provider.upload(&view).expect("upload");
430            let result = char_builtin(vec![Value::GpuTensor(handle)]).expect("char");
431            match result {
432                Value::CharArray(ca) => {
433                    assert_eq!(ca.rows, 1);
434                    assert_eq!(ca.cols, 3);
435                    assert_eq!(ca.data, vec!['R', 'U', 'N']);
436                }
437                other => panic!("expected char array, got {other:?}"),
438            }
439        });
440    }
441
442    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
443    #[test]
444    fn char_rejects_non_integer_numeric() {
445        let err =
446            error_message(char_builtin(vec![Value::Num(65.5)]).expect_err("non-integer numeric"));
447        assert!(err.contains("integers"), "unexpected error message: {err}");
448    }
449
450    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
451    #[test]
452    fn char_rejects_high_dimension_tensor() {
453        let tensor =
454            Tensor::new(vec![65.0, 66.0], vec![1, 1, 2]).expect("tensor construction failed");
455        let err = error_message(
456            char_builtin(vec![Value::Tensor(tensor)]).expect_err("should reject >2D tensor"),
457        );
458        assert!(err.contains("2-D"), "expected dimension error, got {err}");
459    }
460
461    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
462    #[test]
463    fn char_string_array_column_major_order() {
464        let data = vec![
465            "c0r0".to_string(),
466            "c0r1".to_string(),
467            "c1r0".to_string(),
468            "c1r1".to_string(),
469        ];
470        let sa = StringArray::new(data, vec![2, 2]).expect("string array");
471        let result = char_builtin(vec![Value::StringArray(sa)]).expect("char");
472        match result {
473            Value::CharArray(ca) => {
474                assert_eq!(ca.rows, 4);
475                assert_eq!(ca.cols, 4);
476                assert_eq!(ca.data, "c0r0c0r1c1r0c1r1".chars().collect::<Vec<char>>());
477            }
478            other => panic!("expected char array, got {other:?}"),
479        }
480    }
481
482    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
483    #[test]
484    fn char_rejects_high_dimension_string_array() {
485        let sa = StringArray::new(vec!["a".to_string(), "b".to_string()], vec![1, 1, 2])
486            .expect("string array");
487        let err = error_message(
488            char_builtin(vec![Value::StringArray(sa)]).expect_err("should reject >2D string array"),
489        );
490        assert!(err.contains("2-D"), "expected dimension error, got {err}");
491    }
492
493    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
494    #[test]
495    fn char_rejects_complex_input() {
496        let err =
497            error_message(char_builtin(vec![Value::Complex(1.0, 2.0)]).expect_err("complex input"));
498        assert!(
499            err.contains("complex"),
500            "expected complex error message, got {err}"
501        );
502    }
503
504    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
505    #[test]
506    #[cfg(feature = "wgpu")]
507    fn char_wgpu_numeric_codes_matches_cpu() {
508        use runmat_accelerate::backend::wgpu::provider::{
509            register_wgpu_provider, WgpuProviderOptions,
510        };
511
512        let _ = register_wgpu_provider(WgpuProviderOptions::default());
513
514        let tensor = Tensor::new(vec![82.0, 85.0, 78.0], vec![1, 3]).unwrap();
515        let cpu = char_builtin(vec![Value::Tensor(tensor.clone())]).expect("char cpu");
516
517        let view = runmat_accelerate_api::HostTensorView {
518            data: &tensor.data,
519            shape: &tensor.shape,
520        };
521        let handle = runmat_accelerate_api::provider()
522            .expect("wgpu provider")
523            .upload(&view)
524            .expect("upload");
525        let gpu = char_builtin(vec![Value::GpuTensor(handle)]).expect("char gpu");
526
527        match (cpu, gpu) {
528            (Value::CharArray(expected), Value::CharArray(actual)) => {
529                assert_eq!(actual, expected);
530            }
531            other => panic!("unexpected results {other:?}"),
532        }
533    }
534
535    #[test]
536    fn char_type_is_string_array() {
537        assert_eq!(
538            string_array_type(&[Type::Num], &ResolveContext::new(Vec::new())),
539            Type::cell_of(Type::String)
540        );
541    }
542}