Skip to main content

runmat_runtime/builtins/strings/core/
string.rs

1//! MATLAB-compatible `string` builtin with GPU-aware conversion semantics for RunMat.
2
3use runmat_builtins::{
4    CharArray, ComplexTensor, IntValue, LogicalArray, StringArray, Tensor, Value,
5};
6use runmat_macros::runtime_builtin;
7
8use crate::builtins::common::format::{complex_to_string, format_variadic, number_to_string};
9use crate::builtins::common::map_control_flow_with_builtin;
10use crate::builtins::common::spec::{
11    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
12    ReductionNaN, ResidencyPolicy, ShapeRequirements,
13};
14use crate::builtins::common::tensor;
15use crate::builtins::strings::type_resolvers::string_array_type;
16use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
17
18#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::strings::core::string")]
19pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
20    name: "string",
21    op_kind: GpuOpKind::Custom("conversion"),
22    supported_precisions: &[],
23    broadcast: BroadcastSemantics::None,
24    provider_hooks: &[],
25    constant_strategy: ConstantStrategy::InlineLiteral,
26    residency: ResidencyPolicy::GatherImmediately,
27    nan_mode: ReductionNaN::Include,
28    two_pass_threshold: None,
29    workgroup_size: None,
30    accepts_nan_mode: false,
31    notes: "Always converts on the CPU; GPU tensors are gathered to host memory before conversion.",
32};
33
34#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::strings::core::string")]
35pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
36    name: "string",
37    shape: ShapeRequirements::Any,
38    constant_strategy: ConstantStrategy::InlineLiteral,
39    elementwise: None,
40    reduction: None,
41    emits_nan: false,
42    notes:
43        "Conversion builtin; not eligible for fusion and always materialises host string arrays.",
44};
45
46#[runtime_builtin(
47    name = "string",
48    category = "strings/core",
49    summary = "Convert numeric, logical, and text inputs into MATLAB string arrays.",
50    keywords = "string,convert,text,char,gpu",
51    accel = "sink",
52    type_resolver(string_array_type),
53    builtin_path = "crate::builtins::strings::core::string"
54)]
55async fn string_builtin(value: Value, rest: Vec<Value>) -> crate::BuiltinResult<Value> {
56    if rest.is_empty() {
57        let gathered = gather_if_needed_async(&value)
58            .await
59            .map_err(|flow| remap_string_flow(flow))?;
60        let array = convert_to_string_array(gathered, StringEncoding::Utf8).await?;
61        return Ok(Value::StringArray(array));
62    }
63
64    let mut args = rest;
65    let format_value = gather_if_needed_async(&value)
66        .await
67        .map_err(|flow| remap_string_flow(flow))?;
68
69    if args.len() == 1 {
70        let arg = args.pop().unwrap();
71        let gathered_arg = gather_if_needed_async(&arg)
72            .await
73            .map_err(|flow| remap_string_flow(flow))?;
74        if let Some(encoding) = try_encoding_argument(&format_value, &gathered_arg)? {
75            let array = convert_to_string_array(format_value, encoding).await?;
76            return Ok(Value::StringArray(array));
77        }
78        let formatted = format_from_spec(format_value, vec![gathered_arg]).await?;
79        return Ok(Value::StringArray(formatted));
80    }
81
82    let mut gathered_args = Vec::with_capacity(args.len());
83    for arg in args {
84        gathered_args.push(
85            gather_if_needed_async(&arg)
86                .await
87                .map_err(|flow| remap_string_flow(flow))?,
88        );
89    }
90    let formatted = format_from_spec(format_value, gathered_args).await?;
91    Ok(Value::StringArray(formatted))
92}
93
94#[derive(Clone, Copy, Debug, PartialEq, Eq)]
95enum StringEncoding {
96    Utf8,
97}
98
99fn try_encoding_argument(
100    first: &Value,
101    candidate: &Value,
102) -> BuiltinResult<Option<StringEncoding>> {
103    if !matches!(
104        first,
105        Value::CharArray(_) | Value::String(_) | Value::StringArray(_) | Value::Cell(_)
106    ) {
107        return Ok(None);
108    }
109    if has_format_placeholders(first) {
110        return Ok(None);
111    }
112    if let Value::Cell(cell) = first {
113        if !cell_contains_only_text_scalars(cell) {
114            return Ok(None);
115        }
116    }
117    let Some(text) = value_to_scalar_text(candidate) else {
118        return Ok(None);
119    };
120    parse_encoding_text(&text).map(Some)
121}
122
123fn parse_encoding_text(raw: &str) -> BuiltinResult<StringEncoding> {
124    let trimmed = raw.trim();
125    let lowered = trimmed.to_ascii_lowercase();
126    match lowered.as_str() {
127        "utf-8" | "utf8" | "unicode" | "system" => Ok(StringEncoding::Utf8),
128        _ => Err(string_flow(format!(
129            "string: unsupported character encoding '{trimmed}'; only UTF-8 is available"
130        ))),
131    }
132}
133
134fn cell_contains_only_text_scalars(cell: &runmat_builtins::CellArray) -> bool {
135    cell.data.iter().all(|ptr| match &**ptr {
136        Value::String(_) => true,
137        Value::StringArray(sa) => sa.data.len() <= 1,
138        Value::CharArray(ca) => ca.rows <= 1,
139        _ => false,
140    })
141}
142
143fn text_has_format_placeholder(text: &str) -> bool {
144    let mut chars = text.chars().peekable();
145    while let Some(ch) = chars.next() {
146        if ch != '%' {
147            continue;
148        }
149        if let Some('%') = chars.peek() {
150            chars.next();
151            continue;
152        }
153        while matches!(chars.peek(), Some(flag) if matches!(flag, '+' | '-' | '0' | '#')) {
154            chars.next();
155        }
156        while matches!(chars.peek(), Some(digit) if digit.is_ascii_digit()) {
157            chars.next();
158        }
159        if let Some('.') = chars.peek() {
160            chars.next();
161            while matches!(chars.peek(), Some(digit) if digit.is_ascii_digit()) {
162                chars.next();
163            }
164        }
165        if let Some(conv) = chars.peek() {
166            if conv.is_ascii_alphabetic() {
167                return true;
168            }
169        }
170    }
171    false
172}
173
174fn has_format_placeholders(value: &Value) -> bool {
175    match value {
176        Value::String(s) => text_has_format_placeholder(s),
177        Value::StringArray(sa) => sa.data.iter().any(|s| text_has_format_placeholder(s)),
178        Value::CharArray(ca) => {
179            for row in 0..ca.rows {
180                let mut row_str = String::with_capacity(ca.cols);
181                for col in 0..ca.cols {
182                    row_str.push(ca.data[row * ca.cols + col]);
183                }
184                if text_has_format_placeholder(&row_str) {
185                    return true;
186                }
187            }
188            false
189        }
190        Value::Cell(cell) => {
191            for ptr in &cell.data {
192                let element = (**ptr).clone();
193                if has_format_placeholders(&element) {
194                    return true;
195                }
196            }
197            false
198        }
199        _ => false,
200    }
201}
202
203pub(crate) struct FormatSpecData {
204    pub(crate) specs: Vec<String>,
205    pub(crate) shape: Vec<usize>,
206}
207
208struct ArgumentData {
209    values: Vec<Value>,
210    shape: Vec<usize>,
211}
212
213fn string_flow(message: impl Into<String>) -> RuntimeError {
214    build_runtime_error(message).with_builtin("string").build()
215}
216
217fn remap_string_flow(err: RuntimeError) -> RuntimeError {
218    map_control_flow_with_builtin(err, "string")
219}
220
221pub(crate) async fn format_from_spec(
222    format_value: Value,
223    args: Vec<Value>,
224) -> crate::BuiltinResult<StringArray> {
225    let spec = extract_format_spec(format_value).await?;
226    let mut arguments = Vec::with_capacity(args.len());
227    for arg in args {
228        arguments.push(extract_argument_data(arg).await?);
229    }
230
231    let (target_len, mut target_shape) = resolve_target_shape(&spec, &arguments)?;
232
233    if target_len == 0 {
234        let shape = if target_shape.is_empty() {
235            if spec.shape.is_empty() {
236                vec![0, 0]
237            } else {
238                spec.shape.clone()
239            }
240        } else {
241            target_shape
242        };
243        return StringArray::new(Vec::new(), shape)
244            .map_err(|e| string_flow(format!("string: {e}")));
245    }
246
247    let spec_len = spec.specs.len();
248    if spec_len == 0 {
249        return Err(string_flow(
250            "string: formatSpec must contain at least one element when formatting with data",
251        ));
252    }
253
254    for arg in &arguments {
255        if target_len > 0 && arg.values.is_empty() {
256            return Err(string_flow(
257                "string: format data arguments must be scalars or match formatSpec size",
258            ));
259        }
260    }
261
262    let mut output = Vec::with_capacity(target_len);
263    for idx in 0..target_len {
264        let spec_idx = if spec_len == 1 { 0 } else { idx };
265        let spec_str = &spec.specs[spec_idx];
266        let mut per_call = Vec::with_capacity(arguments.len());
267        for arg in &arguments {
268            let value =
269                match arg.values.len() {
270                    0 => continue,
271                    1 => arg.values[0].clone(),
272                    len if len == target_len => arg.values[idx].clone(),
273                    _ => return Err(string_flow(
274                        "string: format data arguments must be scalars or match formatSpec size",
275                    )),
276                };
277            per_call.push(value);
278        }
279        let formatted =
280            format_variadic(spec_str, &per_call).map_err(|flow| remap_string_flow(flow))?;
281        output.push(formatted);
282    }
283
284    if target_shape.is_empty() {
285        target_shape = if spec_len > 1 {
286            spec.shape.clone()
287        } else {
288            vec![target_len, 1]
289        };
290    }
291
292    if tensor::element_count(&target_shape) != target_len {
293        target_shape = vec![target_len, 1];
294    }
295
296    StringArray::new(output, target_shape).map_err(|e| string_flow(format!("string: {e}")))
297}
298
299fn resolve_target_shape(
300    spec: &FormatSpecData,
301    args: &[ArgumentData],
302) -> BuiltinResult<(usize, Vec<usize>)> {
303    let mut target_len = spec.specs.len();
304    let mut target_shape = if target_len > 1 || (target_len == 1 && !spec.shape.is_empty()) {
305        spec.shape.clone()
306    } else {
307        Vec::new()
308    };
309
310    for arg in args {
311        let len = arg.values.len();
312        if len == 0 {
313            continue;
314        }
315        if target_len == 0 {
316            target_len = len;
317            target_shape = arg.shape.clone();
318            continue;
319        }
320        if len == 1 {
321            continue;
322        }
323        if target_len == 1 {
324            target_len = len;
325            target_shape = arg.shape.clone();
326            continue;
327        }
328        if len != target_len {
329            return Err(string_flow(
330                "string: format data arguments must be scalars or match formatSpec size",
331            ));
332        }
333        if target_shape.is_empty() && len > 1 {
334            target_shape = arg.shape.clone();
335        }
336    }
337
338    if target_len == 0 {
339        let shape = if spec.shape.is_empty() {
340            vec![0, 0]
341        } else {
342            spec.shape.clone()
343        };
344        return Ok((0, shape));
345    }
346
347    if target_shape.is_empty() {
348        target_shape = if spec.shape.is_empty() {
349            vec![target_len, 1]
350        } else {
351            spec.shape.clone()
352        };
353        if spec.specs.len() == 1 && tensor::element_count(&target_shape) != target_len {
354            target_shape = vec![target_len, 1];
355        }
356    }
357
358    if tensor::element_count(&target_shape) != target_len {
359        target_shape = vec![target_len, 1];
360    }
361
362    Ok((target_len, target_shape))
363}
364
365pub(crate) async fn extract_format_spec(value: Value) -> BuiltinResult<FormatSpecData> {
366    match value {
367        Value::String(s) => Ok(FormatSpecData {
368            specs: vec![s],
369            shape: vec![1, 1],
370        }),
371        Value::StringArray(sa) => Ok(FormatSpecData {
372            specs: sa.data.clone(),
373            shape: sa.shape.clone(),
374        }),
375        Value::CharArray(ca) => {
376            let array = char_array_to_string_array(ca, StringEncoding::Utf8)?;
377            Ok(FormatSpecData {
378                specs: array.data,
379                shape: array.shape,
380            })
381        }
382        Value::Cell(cell) => {
383            let mut specs = Vec::with_capacity(cell.data.len());
384            for col in 0..cell.cols {
385                for row in 0..cell.rows {
386                    let idx = row * cell.cols + col;
387                    let element = &cell.data[idx];
388                    let value = (**element).clone();
389                    let gathered = gather_if_needed_async(&value)
390                        .await
391                        .map_err(|flow| remap_string_flow(flow))?;
392                    let text = value_to_scalar_text(&gathered).ok_or_else(|| {
393                        string_flow("string: formatSpec cell elements must be text scalars")
394                    })?;
395                    specs.push(text);
396                }
397            }
398            Ok(FormatSpecData {
399                specs,
400                shape: vec![cell.rows, cell.cols],
401            })
402        }
403        _ => Err(string_flow(
404            "string: formatSpec must be text (string, char, or cellstr)",
405        )),
406    }
407}
408
409#[async_recursion::async_recursion(?Send)]
410async fn extract_argument_data(value: Value) -> BuiltinResult<ArgumentData> {
411    match value {
412        Value::String(s) => Ok(ArgumentData {
413            values: vec![Value::String(s)],
414            shape: vec![1, 1],
415        }),
416        Value::StringArray(sa) => Ok(ArgumentData {
417            values: sa.data.into_iter().map(Value::String).collect(),
418            shape: sa.shape,
419        }),
420        Value::CharArray(ca) => {
421            let array = char_array_to_string_array(ca, StringEncoding::Utf8)?;
422            Ok(ArgumentData {
423                values: array.data.into_iter().map(Value::String).collect(),
424                shape: array.shape,
425            })
426        }
427        Value::Num(n) => Ok(ArgumentData {
428            values: vec![Value::Num(n)],
429            shape: vec![1, 1],
430        }),
431        Value::Int(i) => Ok(ArgumentData {
432            values: vec![Value::Int(i)],
433            shape: vec![1, 1],
434        }),
435        Value::Bool(b) => Ok(ArgumentData {
436            values: vec![Value::Num(if b { 1.0 } else { 0.0 })],
437            shape: vec![1, 1],
438        }),
439        Value::Tensor(t) => Ok(ArgumentData {
440            values: t.data.into_iter().map(Value::Num).collect(),
441            shape: t.shape,
442        }),
443        Value::Complex(re, im) => Ok(ArgumentData {
444            values: vec![Value::String(complex_to_string(re, im))],
445            shape: vec![1, 1],
446        }),
447        Value::ComplexTensor(t) => Ok(ArgumentData {
448            values: t
449                .data
450                .into_iter()
451                .map(|(re, im)| Value::String(complex_to_string(re, im)))
452                .collect(),
453            shape: t.shape,
454        }),
455        Value::LogicalArray(la) => Ok(ArgumentData {
456            values: la
457                .data
458                .into_iter()
459                .map(|byte| Value::Num(if byte != 0 { 1.0 } else { 0.0 }))
460                .collect(),
461            shape: la.shape,
462        }),
463        Value::Cell(cell) => {
464            let mut values = Vec::with_capacity(cell.data.len());
465            for col in 0..cell.cols {
466                for row in 0..cell.rows {
467                    let idx = row * cell.cols + col;
468                    let element = &cell.data[idx];
469                    let value = (**element).clone();
470                    let gathered = gather_if_needed_async(&value)
471                        .await
472                        .map_err(|flow| remap_string_flow(flow))?;
473                    let value = match gathered {
474                        Value::String(s) => Value::String(s),
475                        Value::StringArray(sa) if sa.data.len() == 1 => {
476                            Value::String(sa.data[0].clone())
477                        }
478                        Value::CharArray(ca) => {
479                            if ca.rows != 1 {
480                                return Err(string_flow(
481                                    "string: cell format arguments must contain char row vectors",
482                                ));
483                            }
484                            let mut row_str = String::with_capacity(ca.cols);
485                            for ch in ca.data {
486                                row_str.push(ch);
487                            }
488                            Value::String(row_str)
489                        }
490                        Value::Num(n) => Value::Num(n),
491                        Value::Int(i) => Value::Int(i),
492                        Value::Bool(b) => Value::Num(if b { 1.0 } else { 0.0 }),
493                        Value::Tensor(t) => {
494                            if t.data.len() != 1 {
495                                return Err(string_flow(
496                                    "string: cell format arguments must contain scalar values",
497                                ));
498                            }
499                            Value::Num(t.data[0])
500                        }
501                        Value::LogicalArray(la) => {
502                            if la.data.len() != 1 {
503                                return Err(string_flow(
504                                    "string: cell format arguments must contain scalar values",
505                                ));
506                            }
507                            Value::Num(if la.data[0] != 0 { 1.0 } else { 0.0 })
508                        }
509                        Value::Complex(re, im) => Value::String(complex_to_string(re, im)),
510                        Value::ComplexTensor(t) => {
511                            if t.data.len() != 1 {
512                                return Err(string_flow(
513                                    "string: cell format arguments must contain scalar values",
514                                ));
515                            }
516                            let (re, im) = t.data[0];
517                            Value::String(complex_to_string(re, im))
518                        }
519                        other => {
520                            return Err(string_flow(format!(
521                                "string: unsupported cell format argument {other:?}; expected scalar text or numeric values"
522                            )))
523                        }
524                    };
525                    values.push(value);
526                }
527            }
528            Ok(ArgumentData {
529                values,
530                shape: vec![cell.rows, cell.cols],
531            })
532        }
533        Value::GpuTensor(handle) => {
534            let gathered = gather_if_needed_async(&Value::GpuTensor(handle))
535                .await
536                .map_err(|flow| remap_string_flow(flow))?;
537            extract_argument_data(gathered).await
538        }
539        Value::MException(_)
540        | Value::HandleObject(_)
541        | Value::Object(_)
542        | Value::Listener(_)
543        | Value::Struct(_)
544        | Value::OutputList(_) => Err(string_flow("string: unsupported format argument type")),
545        Value::FunctionHandle(_) | Value::Closure(_) | Value::ClassRef(_) => {
546            Err(string_flow("string: unsupported format argument type"))
547        }
548    }
549}
550
551#[async_recursion::async_recursion(?Send)]
552async fn convert_to_string_array(
553    value: Value,
554    encoding: StringEncoding,
555) -> BuiltinResult<StringArray> {
556    if let Some(array) = crate::builtins::datetime::datetime_string_array(&value)
557        .map_err(|err| string_flow(err.message().to_string()))?
558    {
559        return Ok(array);
560    }
561    if let Some(array) = crate::builtins::duration::duration_string_array(&value)
562        .map_err(|err| string_flow(err.message().to_string()))?
563    {
564        return Ok(array);
565    }
566    match value {
567        Value::String(s) => string_scalar(s),
568        Value::StringArray(sa) => Ok(sa),
569        Value::CharArray(ca) => char_array_to_string_array(ca, encoding),
570        Value::Tensor(tensor) => tensor_to_string_array(tensor),
571        Value::ComplexTensor(tensor) => complex_tensor_to_string_array(tensor),
572        Value::LogicalArray(logical) => logical_array_to_string_array(logical),
573        Value::Cell(cell) => cell_array_to_string_array(cell, encoding).await,
574        Value::Num(n) => string_scalar(number_to_string(n)),
575        Value::Int(i) => string_scalar(int_value_to_string(&i)),
576        Value::Bool(b) => string_scalar(bool_to_string(b).to_string()),
577        Value::Complex(re, im) => string_scalar(complex_to_string(re, im)),
578        Value::GpuTensor(handle) => {
579            // Defensive fallback: gather and retry.
580            let gathered = gather_if_needed_async(&Value::GpuTensor(handle))
581                .await
582                .map_err(|flow| remap_string_flow(flow))?;
583            convert_to_string_array(gathered, encoding).await
584        }
585        Value::Object(_) | Value::HandleObject(_) | Value::Listener(_) => Err(string_flow(
586            "string: unsupported conversion from handle-based objects. Use class-specific formatters.",
587        )),
588        Value::Struct(_) => Err(string_flow(
589            "string: structs are not supported for automatic conversion",
590        )),
591        Value::FunctionHandle(_)
592        | Value::Closure(_)
593        | Value::ClassRef(_)
594        | Value::MException(_)
595        | Value::OutputList(_) => Err(
596            string_flow("string: unsupported conversion for function or exception handles"),
597        ),
598    }
599}
600
601fn string_scalar<S: Into<String>>(text: S) -> BuiltinResult<StringArray> {
602    StringArray::new(vec![text.into()], vec![1, 1]).map_err(|e| string_flow(format!("string: {e}")))
603}
604
605fn value_to_scalar_text(value: &Value) -> Option<String> {
606    match value {
607        Value::String(s) => Some(s.clone()),
608        Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
609        Value::CharArray(ca) if ca.rows == 1 => Some(ca.data.iter().collect()),
610        _ => None,
611    }
612}
613
614fn char_array_to_string_array(
615    array: CharArray,
616    _encoding: StringEncoding,
617) -> BuiltinResult<StringArray> {
618    let mut rows: Vec<String> = Vec::with_capacity(array.rows);
619    for r in 0..array.rows {
620        let mut row = String::with_capacity(array.cols);
621        for c in 0..array.cols {
622            row.push(array.data[r * array.cols + c]);
623        }
624        rows.push(row);
625    }
626    let shape = if array.rows == 0 {
627        vec![0, 1]
628    } else {
629        vec![array.rows, 1]
630    };
631    StringArray::new(rows, shape).map_err(|e| string_flow(format!("string: {e}")))
632}
633
634fn tensor_to_string_array(tensor: Tensor) -> BuiltinResult<StringArray> {
635    let mut strings = Vec::with_capacity(tensor.data.len());
636    for &value in &tensor.data {
637        strings.push(number_to_string(value));
638    }
639    StringArray::new(strings, tensor.shape).map_err(|e| string_flow(format!("string: {e}")))
640}
641
642fn complex_tensor_to_string_array(tensor: ComplexTensor) -> BuiltinResult<StringArray> {
643    let mut strings = Vec::with_capacity(tensor.data.len());
644    for &(re, im) in &tensor.data {
645        strings.push(complex_to_string(re, im));
646    }
647    StringArray::new(strings, tensor.shape).map_err(|e| string_flow(format!("string: {e}")))
648}
649
650fn logical_array_to_string_array(logical: LogicalArray) -> BuiltinResult<StringArray> {
651    let mut strings = Vec::with_capacity(logical.data.len());
652    for &byte in &logical.data {
653        strings.push(bool_to_string(byte != 0).to_string());
654    }
655    StringArray::new(strings, logical.shape).map_err(|e| string_flow(format!("string: {e}")))
656}
657
658async fn cell_array_to_string_array(
659    cell: runmat_builtins::CellArray,
660    _encoding: StringEncoding,
661) -> BuiltinResult<StringArray> {
662    let mut strings = Vec::with_capacity(cell.data.len());
663    for col in 0..cell.cols {
664        for row in 0..cell.rows {
665            let idx = row * cell.cols + col;
666            let element = &cell.data[idx];
667            let value = (**element).clone();
668            let gathered = gather_if_needed_async(&value)
669                .await
670                .map_err(|flow| remap_string_flow(flow))?;
671            strings.push(cell_element_to_string(&gathered)?);
672        }
673    }
674    StringArray::new(strings, vec![cell.rows, cell.cols])
675        .map_err(|e| string_flow(format!("string: {e}")))
676}
677
678fn cell_element_to_string(value: &Value) -> BuiltinResult<String> {
679    if let Some(array) = crate::builtins::datetime::datetime_string_array(value)
680        .map_err(|err| string_flow(err.message().to_string()))?
681    {
682        if array.data.len() == 1 {
683            return Ok(array.data[0].clone());
684        }
685        return Err(string_flow("string: cell datetime values must be scalar"));
686    }
687    if let Some(array) = crate::builtins::duration::duration_string_array(value)
688        .map_err(|err| string_flow(err.message().to_string()))?
689    {
690        if array.data.len() == 1 {
691            return Ok(array.data[0].clone());
692        }
693        return Err(string_flow("string: cell duration values must be scalar"));
694    }
695    match value {
696        Value::String(s) => Ok(s.clone()),
697        Value::StringArray(sa) => {
698            if sa.data.len() == 1 {
699                Ok(sa.data[0].clone())
700            } else {
701                Err(string_flow(
702                    "string: cell elements must contain string scalars, not string arrays",
703                ))
704            }
705        }
706        Value::CharArray(ca) => {
707            if ca.rows == 1 {
708                Ok(ca.data.iter().collect())
709            } else {
710                Err(string_flow(
711                    "string: cell character arrays must be row vectors",
712                ))
713            }
714        }
715        Value::Num(n) => Ok(number_to_string(*n)),
716        Value::Int(i) => Ok(int_value_to_string(i)),
717        Value::Bool(b) => Ok(bool_to_string(*b).to_string()),
718        Value::LogicalArray(array) => {
719            if array.data.len() == 1 {
720                Ok(bool_to_string(array.data[0] != 0).to_string())
721            } else {
722                Err(string_flow("string: cell logical values must be scalar"))
723            }
724        }
725        Value::Tensor(t) => {
726            if t.data.len() == 1 {
727                Ok(number_to_string(t.data[0]))
728            } else {
729                Err(string_flow("string: cell numeric values must be scalar"))
730            }
731        }
732        Value::Complex(re, im) => Ok(complex_to_string(*re, *im)),
733        Value::ComplexTensor(t) => {
734            if t.data.len() == 1 {
735                let (re, im) = t.data[0];
736                Ok(complex_to_string(re, im))
737            } else {
738                Err(string_flow("string: cell complex values must be scalar"))
739            }
740        }
741        other => Err(string_flow(format!(
742            "string: unsupported cell element type {:?}; expected text or scalar values",
743            other
744        ))),
745    }
746}
747
748fn bool_to_string(value: bool) -> &'static str {
749    if value {
750        "true"
751    } else {
752        "false"
753    }
754}
755
756fn int_value_to_string(value: &IntValue) -> String {
757    match value {
758        IntValue::I8(v) => v.to_string(),
759        IntValue::I16(v) => v.to_string(),
760        IntValue::I32(v) => v.to_string(),
761        IntValue::I64(v) => v.to_string(),
762        IntValue::U8(v) => v.to_string(),
763        IntValue::U16(v) => v.to_string(),
764        IntValue::U32(v) => v.to_string(),
765        IntValue::U64(v) => v.to_string(),
766    }
767}
768
769#[cfg(test)]
770pub(crate) mod tests {
771    use super::*;
772    use crate::builtins::common::test_support;
773    use runmat_builtins::{CellArray, IntValue, ResolveContext, StringArray, StructValue, Type};
774
775    fn string_builtin(value: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
776        futures::executor::block_on(super::string_builtin(value, rest))
777    }
778
779    fn error_message(err: crate::RuntimeError) -> String {
780        err.message().to_string()
781    }
782
783    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
784    #[test]
785    fn string_from_numeric_scalar() {
786        let out = string_builtin(Value::Num(42.0), Vec::new()).expect("string");
787        match out {
788            Value::StringArray(sa) => {
789                assert_eq!(sa.shape, vec![1, 1]);
790                assert_eq!(sa.data, vec!["42".to_string()]);
791            }
792            other => panic!("expected string array, got {other:?}"),
793        }
794    }
795
796    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
797    #[test]
798    fn string_from_numeric_tensor_preserves_shape() {
799        let tensor = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![2, 2]).unwrap();
800        let out = string_builtin(Value::Tensor(tensor), Vec::new()).expect("string");
801        match out {
802            Value::StringArray(sa) => {
803                assert_eq!(sa.shape, vec![2, 2]);
804                assert_eq!(sa.data, vec!["1", "2", "3", "4"]);
805            }
806            other => panic!("expected string array, got {other:?}"),
807        }
808    }
809
810    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
811    #[test]
812    fn string_from_logical_array_uses_boolean_text() {
813        let logical = LogicalArray::new(vec![1, 0, 1], vec![1, 3]).unwrap();
814        let out = string_builtin(Value::LogicalArray(logical), Vec::new()).expect("string");
815        match out {
816            Value::StringArray(sa) => {
817                assert_eq!(sa.shape, vec![1, 3]);
818                assert_eq!(sa.data, vec!["true", "false", "true"]);
819            }
820            other => panic!("expected string array, got {other:?}"),
821        }
822    }
823
824    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
825    #[test]
826    fn string_from_char_array_produces_column_vector() {
827        let chars = CharArray::new("abc".chars().collect(), 1, 3).unwrap();
828        let out = string_builtin(Value::CharArray(chars), Vec::new()).expect("string");
829        match out {
830            Value::StringArray(sa) => {
831                assert_eq!(sa.shape, vec![1, 1]);
832                assert_eq!(sa.data, vec!["abc"]);
833            }
834            other => panic!("expected string array, got {other:?}"),
835        }
836    }
837
838    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
839    #[test]
840    fn string_from_cell_array() {
841        let cell = CellArray::new(vec![Value::Bool(true), Value::Int(IntValue::I32(7))], 1, 2)
842            .expect("cell array");
843        let out = string_builtin(Value::Cell(cell), Vec::new()).expect("string");
844        match out {
845            Value::StringArray(sa) => {
846                assert_eq!(sa.shape, vec![1, 2]);
847                assert_eq!(sa.data, vec!["true", "7"]);
848            }
849            other => panic!("expected string array, got {other:?}"),
850        }
851    }
852
853    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
854    #[test]
855    fn string_from_cell_array_column_major() {
856        let cell = CellArray::new(
857            vec![
858                Value::Int(IntValue::I32(1)),
859                Value::Int(IntValue::I32(2)),
860                Value::Int(IntValue::I32(3)),
861                Value::Int(IntValue::I32(4)),
862            ],
863            2,
864            2,
865        )
866        .expect("cell array");
867        let out = string_builtin(Value::Cell(cell), Vec::new()).expect("string");
868        match out {
869            Value::StringArray(sa) => {
870                assert_eq!(sa.shape, vec![2, 2]);
871                assert_eq!(sa.data, vec!["1", "3", "2", "4"]);
872            }
873            other => panic!("expected string array, got {other:?}"),
874        }
875    }
876
877    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
878    #[test]
879    fn string_cell_element_requires_scalar_numeric() {
880        let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
881        let cell =
882            CellArray::new(vec![Value::Tensor(tensor)], 1, 1).expect("cell with numeric tensor");
883        let err = error_message(string_builtin(Value::Cell(cell), Vec::new()).unwrap_err());
884        assert!(err.contains("cell numeric values must be scalar"));
885    }
886
887    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
888    #[test]
889    fn string_rejects_struct_input() {
890        let err = error_message(
891            string_builtin(Value::Struct(StructValue::new()), Vec::new()).expect_err("string"),
892        );
893        assert!(err.contains("structs are not supported"));
894    }
895
896    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
897    #[test]
898    fn string_errors_on_unsupported_encoding() {
899        let err = error_message(
900            string_builtin(
901                Value::CharArray(CharArray::new_row("abc")),
902                vec![Value::from("UTF-16")],
903            )
904            .unwrap_err(),
905        );
906        assert!(
907            err.contains("unsupported character encoding"),
908            "unexpected error message: {err}"
909        );
910    }
911
912    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
913    #[test]
914    fn string_accepts_system_encoding_alias() {
915        let out = string_builtin(
916            Value::CharArray(CharArray::new_row("hello")),
917            vec![Value::from("system")],
918        )
919        .expect("string");
920        match out {
921            Value::StringArray(sa) => {
922                assert_eq!(sa.shape, vec![1, 1]);
923                assert_eq!(sa.data, vec!["hello"]);
924            }
925            other => panic!("expected string array, got {other:?}"),
926        }
927    }
928
929    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
930    #[test]
931    fn string_encoding_allows_percent_literal() {
932        let out = string_builtin(
933            Value::CharArray(CharArray::new_row("100% Done")),
934            vec![Value::from("utf8")],
935        )
936        .expect("string");
937        match out {
938            Value::StringArray(sa) => {
939                assert_eq!(sa.shape, vec![1, 1]);
940                assert_eq!(sa.data, vec!["100% Done"]);
941            }
942            other => panic!("expected string array, got {other:?}"),
943        }
944    }
945
946    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
947    #[test]
948    fn string_format_spec_cell_requires_text_scalars() {
949        let cell = CellArray::new(vec![Value::Num(1.0)], 1, 1).expect("cell");
950        let err = error_message(
951            string_builtin(Value::Cell(cell), vec![Value::from("data")]).expect_err("string"),
952        );
953        assert!(
954            err.contains("formatSpec cell elements must be text scalars"),
955            "unexpected error: {err}"
956        );
957    }
958
959    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
960    #[test]
961    fn string_format_cell_argument_requires_scalar_values() {
962        let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
963        let cell = CellArray::new(vec![Value::Tensor(tensor)], 1, 1).expect("cell argument values");
964        let err = error_message(
965            string_builtin(Value::from("%d"), vec![Value::Cell(cell)]).expect_err("string"),
966        );
967        assert!(err.contains("cell format arguments must contain scalar values"));
968    }
969
970    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
971    #[test]
972    fn string_handles_large_unsigned_int() {
973        let value = Value::Int(IntValue::U64(u64::MAX));
974        let out = string_builtin(value, Vec::new()).expect("string");
975        match out {
976            Value::StringArray(sa) => {
977                assert_eq!(sa.shape, vec![1, 1]);
978                assert_eq!(sa.data, vec![u64::MAX.to_string()]);
979            }
980            other => panic!("expected string array, got {other:?}"),
981        }
982    }
983
984    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
985    #[test]
986    fn string_format_numeric_scalar() {
987        let out = string_builtin(Value::from("%d"), vec![Value::Num(7.0)]).expect("string");
988        match out {
989            Value::StringArray(sa) => {
990                assert_eq!(sa.shape, vec![1, 1]);
991                assert_eq!(sa.data, vec!["7"]);
992            }
993            other => panic!("expected string array, got {other:?}"),
994        }
995    }
996
997    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
998    #[test]
999    fn string_format_broadcast_over_tensor() {
1000        let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![1, 3]).unwrap();
1001        let out =
1002            string_builtin(Value::from("Trial %d"), vec![Value::Tensor(tensor)]).expect("string");
1003        match out {
1004            Value::StringArray(sa) => {
1005                assert_eq!(sa.shape, vec![1, 3]);
1006                assert_eq!(sa.data, vec!["Trial 1", "Trial 2", "Trial 3"]);
1007            }
1008            other => panic!("expected string array, got {other:?}"),
1009        }
1010    }
1011
1012    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1013    #[test]
1014    fn string_format_string_array_spec_alignment() {
1015        let spec = StringArray::new(vec!["[%d]".into(), "Value %d".into()], vec![1, 2]).unwrap();
1016        let tensor = Tensor::new(vec![5.0, 6.0], vec![1, 2]).unwrap();
1017        let out =
1018            string_builtin(Value::StringArray(spec), vec![Value::Tensor(tensor)]).expect("string");
1019        match out {
1020            Value::StringArray(sa) => {
1021                assert_eq!(sa.shape, vec![1, 2]);
1022                assert_eq!(sa.data, vec!["[5]", "Value 6"]);
1023            }
1024            other => panic!("expected string array, got {other:?}"),
1025        }
1026    }
1027
1028    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1029    #[test]
1030    fn string_format_prefers_placeholders_over_encoding_hint() {
1031        let out = string_builtin(Value::from("%s"), vec![Value::from("UTF-8")]).expect("string");
1032        match out {
1033            Value::StringArray(sa) => {
1034                assert_eq!(sa.shape, vec![1, 1]);
1035                assert_eq!(sa.data, vec!["UTF-8"]);
1036            }
1037            other => panic!("expected string array, got {other:?}"),
1038        }
1039    }
1040
1041    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1042    #[test]
1043    fn string_format_mismatched_lengths_errors() {
1044        let spec = StringArray::new(vec!["%d".into(), "%d".into()], vec![2, 1]).unwrap();
1045        let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap();
1046        let err = error_message(
1047            string_builtin(Value::StringArray(spec), vec![Value::Tensor(tensor)]).unwrap_err(),
1048        );
1049        assert!(err.contains("must be scalars or match formatSpec size"));
1050    }
1051
1052    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1053    #[test]
1054    fn string_gpu_numeric_tensor() {
1055        test_support::with_test_provider(|provider| {
1056            let tensor = Tensor::new(vec![10.0, 20.0], vec![1, 2]).unwrap();
1057            let view = runmat_accelerate_api::HostTensorView {
1058                data: &tensor.data,
1059                shape: &tensor.shape,
1060            };
1061            let handle = provider.upload(&view).expect("upload");
1062            let result = string_builtin(Value::GpuTensor(handle), Vec::new())
1063                .expect("gpu string conversion");
1064            match result {
1065                Value::StringArray(sa) => {
1066                    assert_eq!(sa.shape, vec![1, 2]);
1067                    assert_eq!(sa.data, vec!["10", "20"]);
1068                }
1069                other => panic!("expected string array, got {other:?}"),
1070            }
1071        });
1072    }
1073
1074    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1075    #[test]
1076    #[cfg(feature = "wgpu")]
1077    fn string_wgpu_numeric_tensor_matches_cpu() {
1078        let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
1079            runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
1080        );
1081        let tensor = Tensor::new(vec![4.0, 5.0, 6.0], vec![1, 3]).unwrap();
1082        let cpu = string_builtin(Value::Tensor(tensor.clone()), Vec::new())
1083            .expect("cpu string conversion");
1084        let view = runmat_accelerate_api::HostTensorView {
1085            data: &tensor.data,
1086            shape: &tensor.shape,
1087        };
1088        let handle = runmat_accelerate_api::provider()
1089            .unwrap()
1090            .upload(&view)
1091            .expect("gpu upload");
1092        let gpu =
1093            string_builtin(Value::GpuTensor(handle), Vec::new()).expect("gpu string conversion");
1094        match (cpu, gpu) {
1095            (Value::StringArray(expect), Value::StringArray(actual)) => {
1096                assert_eq!(actual.shape, expect.shape);
1097                assert_eq!(actual.data, expect.data);
1098            }
1099            other => panic!("unexpected results {other:?}"),
1100        }
1101    }
1102
1103    #[test]
1104    fn string_type_is_string_array() {
1105        assert_eq!(
1106            string_array_type(&[Type::Num], &ResolveContext::new(Vec::new())),
1107            Type::cell_of(Type::String)
1108        );
1109    }
1110}