1use runmat_builtins::{CellArray, CharArray, LogicalArray, StringArray, Tensor, Value};
4use runmat_macros::runtime_builtin;
5
6use crate::builtins::common::spec::{
7 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
8 ReductionNaN, ResidencyPolicy, ShapeRequirements,
9};
10#[cfg(feature = "doc_export")]
11use crate::register_builtin_doc_text;
12use crate::{gather_if_needed, register_builtin_fusion_spec, register_builtin_gpu_spec};
13
14#[cfg(feature = "doc_export")]
15pub const DOC_MD: &str = r#"---
16title: "char"
17category: "strings/core"
18keywords: ["char", "character array", "string conversion", "padding", "gpu"]
19summary: "Convert text, numeric codes, and cell contents into MATLAB-style character arrays."
20references:
21 - https://www.mathworks.com/help/matlab/ref/char.html
22gpu_support:
23 elementwise: false
24 reduction: false
25 precisions: []
26 broadcasting: "none"
27 notes: "Conversion always runs on the CPU; GPU tensors are gathered to host memory before building the character array."
28fusion:
29 elementwise: false
30 reduction: false
31 max_inputs: 0
32 constants: "inline"
33requires_feature: null
34tested:
35 unit: "builtins::strings::core::char::tests"
36 integration: "builtins::strings::core::char::tests::char_gpu_tensor_round_trip"
37---
38
39# What does the `char` function do in MATLAB / RunMat?
40`char` converts its inputs into a character array. Numeric inputs are interpreted as Unicode code
41points, text inputs become rows of characters, and cell elements or string scalars are padded with
42spaces when necessary so every row in the result has the same width.
43
44## How does the `char` function behave in MATLAB / RunMat?
45- `char(x)` with no arguments returns a `0×0` character array.
46- Numeric arrays must be real integers. The output character array has the same shape (up to two
47 dimensions) as the numeric input.
48- String scalars and character vectors become individual rows. Rows are padded on the right with
49 spaces to match the longest row.
50- String arrays with one or two dimensions contribute one row per element using MATLAB's
51 column-major ordering.
52- Cell arrays must contain character vectors or string scalars. Each element becomes exactly one
53 row in the result.
54- Inputs may be mixed and are vertically concatenated in the order they appear.
55- Complex inputs are unsupported and raise MATLAB-compatible errors.
56
57## `char` Function GPU Execution Behaviour
58`char` gathers GPU tensors back to host memory using the active RunMat Accelerate provider before
59performing any conversion. The resulting character array always resides in host memory; providers
60do not need to supply specialised kernels.
61
62## GPU residency in RunMat (Do I need `gpuArray`?)
63You usually do **not** need to call `gpuArray` manually for `char`. The runtime recognises that this
64builtin materialises text on the host, gathers GPU tensors automatically, and keeps the character
65array in CPU memory. Wrap the result in `gpuArray(char(...))` only when you explicitly want the
66characters back on the device for subsequent GPU pipelines.
67
68## Examples of using the `char` function in MATLAB / RunMat
69
70### Converting a string scalar to a character row
71```matlab
72name = char("RunMat");
73```
74Expected output:
75```matlab
76name =
77 'RunMat'
78```
79
80### Building a character matrix from multiple rows
81```matlab
82rows = char("alpha", "beta");
83```
84Expected output:
85```matlab
86rows =
87 'alpha'
88 'beta '
89```
90
91### Transforming numeric codes to characters
92```matlab
93codes = [77 65 84 76 65 66];
94letters = char(codes);
95```
96Expected output:
97```matlab
98letters =
99 'MATLAB'
100```
101
102### Padding a string array into a character matrix
103```matlab
104animals = ["cat"; "giraffe"];
105C = char(animals);
106```
107Expected output:
108```matlab
109C =
110 'cat '
111 'giraffe'
112```
113
114### Creating rows from a cell array of character vectors
115```matlab
116dirs = {'north', 'east', 'west'};
117chart = char(dirs);
118```
119Expected output:
120```matlab
121chart =
122 'north'
123 'east '
124 'west '
125```
126
127### Converting GPU-resident codes back to text
128```matlab
129G = gpuArray([82 85 78 77 65 84]);
130label = char(G);
131```
132Expected output:
133```matlab
134label =
135 'RUNMAT'
136```
137RunMat downloads the numeric data from the GPU before constructing the character array.
138
139## FAQ
140
141### Does `char` accept numeric arrays with more than two dimensions?
142No. Numeric inputs must be scalars, vectors, or two-dimensional matrices. Higher-dimensional arrays
143raise an error so MATLAB's behaviour is preserved.
144
145### How are rows padded when lengths differ?
146Each row is right-padded with space characters so every row in the result has the same width as the
147longest row that was produced.
148
149### Can I convert cell arrays that contain empty text?
150Yes. Empty strings or character vectors become rows with zero columns; they still participate in
151padding when combined with longer rows.
152
153### What happens if a numeric value is not an integer?
154The builtin rejects non-integer numeric values. Use `round`, `floor`, or `uint32` beforehand if you
155need to convert floating-point values into valid code points.
156
157### Are code points above the Basic Multilingual Plane supported?
158Yes. Any integer that represents a valid Unicode scalar value (`0..0x10FFFF`, excluding surrogates)
159is accepted and converted to the corresponding character.
160
161### Can `char` convert complex numbers?
162No. Complex values are not supported because MATLAB also rejects them. Convert the data to real
163values before calling `char`.
164
165### Does `char` keep characters on the GPU?
166No. After conversion the result is a CPU-resident character array. Use `gpuArray(char(...))` if you
167need to move the result back to the device.
168
169## See Also
170[string](./string), [gpuArray](../../acceleration/gpu/gpuArray), [gather](../../acceleration/gpu/gather)
171
172## Source & Feedback
173- The full source code for the implementation of the `char` function is available at: [`crates/runmat-runtime/src/builtins/strings/core/char.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/core/char.rs)
174- Found a bug or behavioral difference? Please [open an issue](https://github.com/runmat-org/runmat/issues/new/choose) with details and a minimal repro.
175"#;
176
177pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
178 name: "char",
179 op_kind: GpuOpKind::Custom("conversion"),
180 supported_precisions: &[],
181 broadcast: BroadcastSemantics::None,
182 provider_hooks: &[],
183 constant_strategy: ConstantStrategy::InlineLiteral,
184 residency: ResidencyPolicy::GatherImmediately,
185 nan_mode: ReductionNaN::Include,
186 two_pass_threshold: None,
187 workgroup_size: None,
188 accepts_nan_mode: false,
189 notes:
190 "Conversion always runs on the CPU; GPU tensors are gathered before building the result.",
191};
192
193register_builtin_gpu_spec!(GPU_SPEC);
194
195pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
196 name: "char",
197 shape: ShapeRequirements::Any,
198 constant_strategy: ConstantStrategy::InlineLiteral,
199 elementwise: None,
200 reduction: None,
201 emits_nan: false,
202 notes: "Character materialisation runs outside of fusion; results always live on the host.",
203};
204
205register_builtin_fusion_spec!(FUSION_SPEC);
206
207#[cfg(feature = "doc_export")]
208register_builtin_doc_text!("char", DOC_MD);
209
210#[runtime_builtin(
211 name = "char",
212 category = "strings/core",
213 summary = "Convert numeric codes, strings, and cell contents into a character array.",
214 keywords = "char,character,string,gpu",
215 accel = "conversion"
216)]
217fn char_builtin(rest: Vec<Value>) -> Result<Value, String> {
218 if rest.is_empty() {
219 let empty = CharArray::new(Vec::new(), 0, 0).map_err(|e| format!("char: {e}"))?;
220 return Ok(Value::CharArray(empty));
221 }
222
223 let mut rows: Vec<Vec<char>> = Vec::new();
224 let mut max_width = 0usize;
225
226 for arg in rest {
227 let gathered = gather_if_needed(&arg)?;
228 let mut produced = value_to_char_rows(&gathered)?;
229 for row in &produced {
230 if row.len() > max_width {
231 max_width = row.len();
232 }
233 }
234 rows.append(&mut produced);
235 }
236
237 if rows.is_empty() {
238 let empty = CharArray::new(Vec::new(), 0, 0).map_err(|e| format!("char: {e}"))?;
239 return Ok(Value::CharArray(empty));
240 }
241
242 let cols = max_width;
243 let total_rows = rows.len();
244 let mut data = vec![' '; total_rows * cols];
245 for (row_idx, row) in rows.into_iter().enumerate() {
246 for (col_idx, ch) in row.into_iter().enumerate() {
247 if col_idx < cols {
248 data[row_idx * cols + col_idx] = ch;
249 }
250 }
251 }
252
253 let array = CharArray::new(data, total_rows, cols).map_err(|e| format!("char: {e}"))?;
254 Ok(Value::CharArray(array))
255}
256
257fn value_to_char_rows(value: &Value) -> Result<Vec<Vec<char>>, String> {
258 match value {
259 Value::CharArray(ca) => Ok(char_array_rows(ca)),
260 Value::String(s) => Ok(vec![s.chars().collect()]),
261 Value::StringArray(sa) => string_array_rows(sa),
262 Value::Num(n) => Ok(vec![vec![number_to_char(*n)?]]),
263 Value::Int(i) => {
264 let as_double = i.to_f64();
265 Ok(vec![vec![number_to_char(as_double)?]])
266 }
267 Value::Bool(b) => {
268 let code = if *b { 1.0 } else { 0.0 };
269 Ok(vec![vec![number_to_char(code)?]])
270 }
271 Value::Tensor(t) => tensor_rows(t),
272 Value::LogicalArray(la) => logical_rows(la),
273 Value::Cell(ca) => cell_rows(ca),
274 Value::GpuTensor(_) => Err("char: expected host data after gather".to_string()),
275 Value::Complex(_, _) | Value::ComplexTensor(_) => {
276 Err("char: complex inputs are not supported".to_string())
277 }
278 Value::Struct(_)
279 | Value::Object(_)
280 | Value::HandleObject(_)
281 | Value::Listener(_)
282 | Value::FunctionHandle(_)
283 | Value::Closure(_)
284 | Value::ClassRef(_)
285 | Value::MException(_) => Err(format!("char: unsupported input type {:?}", value)),
286 }
287}
288
289fn char_array_rows(ca: &CharArray) -> Vec<Vec<char>> {
290 let mut rows = Vec::with_capacity(ca.rows);
291 for r in 0..ca.rows {
292 let mut row = Vec::with_capacity(ca.cols);
293 for c in 0..ca.cols {
294 row.push(ca.data[r * ca.cols + c]);
295 }
296 rows.push(row);
297 }
298 rows
299}
300
301fn string_array_rows(sa: &StringArray) -> Result<Vec<Vec<char>>, String> {
302 ensure_two_dimensional(&sa.shape, "char")?;
303 if sa.data.is_empty() {
304 return Ok(Vec::new());
305 }
306 let mut rows = Vec::with_capacity(sa.data.len());
307 let rows_count = sa.rows();
308 let cols_count = sa.cols();
309 if rows_count == 0 || cols_count == 0 {
310 return Ok(Vec::new());
311 }
312 for c in 0..cols_count {
313 for r in 0..rows_count {
314 let idx = r + c * rows_count;
315 rows.push(sa.data[idx].chars().collect());
316 }
317 }
318 Ok(rows)
319}
320
321fn tensor_rows(t: &Tensor) -> Result<Vec<Vec<char>>, String> {
322 ensure_two_dimensional(&t.shape, "char")?;
323 let (rows, cols) = infer_rows_cols(&t.shape, t.data.len());
324 if rows == 0 {
325 return Ok(Vec::new());
326 }
327 let mut out = Vec::with_capacity(rows);
328 for r in 0..rows {
329 let mut row = Vec::with_capacity(cols);
330 for c in 0..cols {
331 if cols == 0 {
332 continue;
333 }
334 let idx = r + c * rows;
335 let value = t.data[idx];
336 row.push(number_to_char(value)?);
337 }
338 out.push(row);
339 }
340 Ok(out)
341}
342
343fn logical_rows(la: &LogicalArray) -> Result<Vec<Vec<char>>, String> {
344 ensure_two_dimensional(&la.shape, "char")?;
345 let (rows, cols) = infer_rows_cols(&la.shape, la.data.len());
346 if rows == 0 {
347 return Ok(Vec::new());
348 }
349 let mut out = Vec::with_capacity(rows);
350 for r in 0..rows {
351 let mut row = Vec::with_capacity(cols);
352 for c in 0..cols {
353 if cols == 0 {
354 continue;
355 }
356 let idx = r + c * rows;
357 let code = if la.data[idx] != 0 { 1.0 } else { 0.0 };
358 row.push(number_to_char(code)?);
359 }
360 out.push(row);
361 }
362 Ok(out)
363}
364
365fn cell_rows(ca: &CellArray) -> Result<Vec<Vec<char>>, String> {
366 let mut rows = Vec::with_capacity(ca.data.len());
367 for ptr in &ca.data {
368 let element = (**ptr).clone();
369 let mut converted = value_to_char_rows(&element)?;
370 match converted.len() {
371 0 => rows.push(Vec::new()),
372 1 => rows.push(converted.remove(0)),
373 _ => {
374 return Err(
375 "char: cell elements must be character vectors or string scalars".to_string(),
376 )
377 }
378 }
379 }
380 Ok(rows)
381}
382
383fn number_to_char(value: f64) -> Result<char, String> {
384 if !value.is_finite() {
385 return Err("char: numeric inputs must be finite".to_string());
386 }
387 let rounded = value.round();
388 if (value - rounded).abs() > 1e-9 {
389 return Err(format!(
390 "char: numeric inputs must be integers in the Unicode range (got {value})"
391 ));
392 }
393 if rounded < 0.0 {
394 return Err(format!(
395 "char: negative code points are invalid (got {rounded})"
396 ));
397 }
398 if rounded > 0x10FFFF as f64 {
399 return Err(format!(
400 "char: code point {} exceeds Unicode range",
401 rounded as u64
402 ));
403 }
404 let code = rounded as u32;
405 char::from_u32(code).ok_or_else(|| format!("char: invalid code point {code}"))
406}
407
408fn ensure_two_dimensional(shape: &[usize], context: &str) -> Result<(), String> {
409 if shape.len() <= 2 {
410 return Ok(());
411 }
412 if shape.iter().skip(2).all(|&d| d == 1) {
413 return Ok(());
414 }
415 Err(format!("{context}: inputs must be 2-D"))
416}
417
418fn infer_rows_cols(shape: &[usize], len: usize) -> (usize, usize) {
419 match shape.len() {
420 0 => {
421 if len == 0 {
422 (0, 0)
423 } else {
424 (1, 1)
425 }
426 }
427 1 => (1, shape[0]),
428 2 => (shape[0], shape[1]),
429 _ => {
430 let rows = shape[0];
431 let cols = if shape.len() > 1 { shape[1] } else { 1 };
432 (rows, cols)
433 }
434 }
435}
436
437#[cfg(test)]
438mod tests {
439 use super::*;
440 use crate::builtins::common::test_support;
441 use runmat_builtins::StringArray;
442
443 #[test]
444 fn char_no_arguments_returns_empty() {
445 let result = char_builtin(Vec::new()).expect("char");
446 match result {
447 Value::CharArray(ca) => {
448 assert_eq!(ca.rows, 0);
449 assert_eq!(ca.cols, 0);
450 assert!(ca.data.is_empty());
451 }
452 other => panic!("expected char array, got {other:?}"),
453 }
454 }
455
456 #[test]
457 fn char_from_string_scalar() {
458 let value = Value::String("RunMat".to_string());
459 let result = char_builtin(vec![value]).expect("char");
460 match result {
461 Value::CharArray(ca) => {
462 assert_eq!(ca.rows, 1);
463 assert_eq!(ca.cols, 6);
464 assert_eq!(ca.data, "RunMat".chars().collect::<Vec<_>>());
465 }
466 other => panic!("expected char array, got {other:?}"),
467 }
468 }
469
470 #[test]
471 fn char_from_numeric_tensor() {
472 let tensor =
473 Tensor::new(vec![82.0, 85.0, 78.0, 77.0, 65.0, 84.0], vec![1, 6]).expect("tensor");
474 let result = char_builtin(vec![Value::Tensor(tensor)]).expect("char");
475 match result {
476 Value::CharArray(ca) => {
477 assert_eq!(ca.rows, 1);
478 assert_eq!(ca.cols, 6);
479 assert_eq!(ca.data, "RUNMAT".chars().collect::<Vec<_>>());
480 }
481 other => panic!("expected char array, got {other:?}"),
482 }
483 }
484
485 #[test]
486 fn char_from_string_array_with_padding() {
487 let data = vec!["cat".to_string(), "giraffe".to_string()];
488 let sa = StringArray::new(data, vec![2, 1]).expect("string array");
489 let result = char_builtin(vec![Value::StringArray(sa)]).expect("char from string array");
490 match result {
491 Value::CharArray(ca) => {
492 assert_eq!(ca.rows, 2);
493 assert_eq!(ca.cols, 7);
494 assert_eq!(
495 ca.data,
496 vec!['c', 'a', 't', ' ', ' ', ' ', ' ', 'g', 'i', 'r', 'a', 'f', 'f', 'e']
497 );
498 }
499 other => panic!("expected char array, got {other:?}"),
500 }
501 }
502
503 #[test]
504 fn char_from_cell_array_of_strings() {
505 let cell = CellArray::new(
506 vec![
507 Value::from("north"),
508 Value::from("east"),
509 Value::from("west"),
510 ],
511 3,
512 1,
513 )
514 .expect("cell array");
515 let result = char_builtin(vec![Value::Cell(cell)]).expect("char");
516 match result {
517 Value::CharArray(ca) => {
518 assert_eq!(ca.rows, 3);
519 assert_eq!(ca.cols, 5);
520 assert_eq!(
521 ca.data,
522 vec!['n', 'o', 'r', 't', 'h', 'e', 'a', 's', 't', ' ', 'w', 'e', 's', 't', ' ']
523 );
524 }
525 other => panic!("expected char array, got {other:?}"),
526 }
527 }
528
529 #[test]
530 fn char_numeric_and_text_arguments_concatenate() {
531 let text = Value::String("hi".to_string());
532 let codes = Tensor::new(vec![65.0, 66.0], vec![1, 2]).expect("tensor");
533 let result = char_builtin(vec![text, Value::Tensor(codes)]).expect("char");
534 match result {
535 Value::CharArray(ca) => {
536 assert_eq!(ca.rows, 2);
537 assert_eq!(ca.cols, 2);
538 assert_eq!(ca.data, vec!['h', 'i', 'A', 'B']);
539 }
540 other => panic!("expected char array, got {other:?}"),
541 }
542 }
543
544 #[test]
545 fn char_gpu_tensor_round_trip() {
546 test_support::with_test_provider(|provider| {
547 let tensor = Tensor::new(vec![82.0, 85.0, 78.0], vec![1, 3]).expect("tensor");
548 let view = runmat_accelerate_api::HostTensorView {
549 data: &tensor.data,
550 shape: &tensor.shape,
551 };
552 let handle = provider.upload(&view).expect("upload");
553 let result = char_builtin(vec![Value::GpuTensor(handle)]).expect("char");
554 match result {
555 Value::CharArray(ca) => {
556 assert_eq!(ca.rows, 1);
557 assert_eq!(ca.cols, 3);
558 assert_eq!(ca.data, vec!['R', 'U', 'N']);
559 }
560 other => panic!("expected char array, got {other:?}"),
561 }
562 });
563 }
564
565 #[test]
566 #[cfg(feature = "doc_export")]
567 fn doc_examples_present() {
568 let blocks = test_support::doc_examples(DOC_MD);
569 assert!(!blocks.is_empty());
570 }
571
572 #[test]
573 fn char_rejects_non_integer_numeric() {
574 let err = char_builtin(vec![Value::Num(65.5)]).expect_err("non-integer numeric");
575 assert!(err.contains("integers"), "unexpected error message: {err}");
576 }
577
578 #[test]
579 fn char_rejects_high_dimension_tensor() {
580 let tensor =
581 Tensor::new(vec![65.0, 66.0], vec![1, 1, 2]).expect("tensor construction failed");
582 let err = char_builtin(vec![Value::Tensor(tensor)]).expect_err("should reject >2D tensor");
583 assert!(err.contains("2-D"), "expected dimension error, got {err}");
584 }
585
586 #[test]
587 fn char_string_array_column_major_order() {
588 let data = vec![
589 "c0r0".to_string(),
590 "c0r1".to_string(),
591 "c1r0".to_string(),
592 "c1r1".to_string(),
593 ];
594 let sa = StringArray::new(data, vec![2, 2]).expect("string array");
595 let result = char_builtin(vec![Value::StringArray(sa)]).expect("char");
596 match result {
597 Value::CharArray(ca) => {
598 assert_eq!(ca.rows, 4);
599 assert_eq!(ca.cols, 4);
600 assert_eq!(ca.data, "c0r0c0r1c1r0c1r1".chars().collect::<Vec<char>>());
601 }
602 other => panic!("expected char array, got {other:?}"),
603 }
604 }
605
606 #[test]
607 fn char_rejects_high_dimension_string_array() {
608 let sa = StringArray::new(vec!["a".to_string(), "b".to_string()], vec![1, 1, 2])
609 .expect("string array");
610 let err =
611 char_builtin(vec![Value::StringArray(sa)]).expect_err("should reject >2D string array");
612 assert!(err.contains("2-D"), "expected dimension error, got {err}");
613 }
614
615 #[test]
616 fn char_rejects_complex_input() {
617 let err = char_builtin(vec![Value::Complex(1.0, 2.0)]).expect_err("complex input");
618 assert!(
619 err.contains("complex"),
620 "expected complex error message, got {err}"
621 );
622 }
623
624 #[test]
625 #[cfg(feature = "wgpu")]
626 fn char_wgpu_numeric_codes_matches_cpu() {
627 use runmat_accelerate::backend::wgpu::provider::{
628 register_wgpu_provider, WgpuProviderOptions,
629 };
630
631 let _ = register_wgpu_provider(WgpuProviderOptions::default());
632
633 let tensor = Tensor::new(vec![82.0, 85.0, 78.0], vec![1, 3]).unwrap();
634 let cpu = char_builtin(vec![Value::Tensor(tensor.clone())]).expect("char cpu");
635
636 let view = runmat_accelerate_api::HostTensorView {
637 data: &tensor.data,
638 shape: &tensor.shape,
639 };
640 let handle = runmat_accelerate_api::provider()
641 .expect("wgpu provider")
642 .upload(&view)
643 .expect("upload");
644 let gpu = char_builtin(vec![Value::GpuTensor(handle)]).expect("char gpu");
645
646 match (cpu, gpu) {
647 (Value::CharArray(expected), Value::CharArray(actual)) => {
648 assert_eq!(actual, expected);
649 }
650 other => panic!("unexpected results {other:?}"),
651 }
652 }
653}