Skip to main content

runmat_runtime/builtins/strings/core/
string.rs

1//! MATLAB-compatible `string` builtin with GPU-aware conversion semantics for RunMat.
2
3use runmat_builtins::{
4    CharArray, ComplexTensor, IntValue, LogicalArray, StringArray, Tensor, Value,
5};
6use runmat_macros::runtime_builtin;
7
8use crate::builtins::common::format::format_variadic;
9use crate::builtins::common::map_control_flow_with_builtin;
10use crate::builtins::common::spec::{
11    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
12    ReductionNaN, ResidencyPolicy, ShapeRequirements,
13};
14use crate::builtins::common::tensor;
15use crate::builtins::strings::type_resolvers::string_array_type;
16use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
17
18#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::strings::core::string")]
19pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
20    name: "string",
21    op_kind: GpuOpKind::Custom("conversion"),
22    supported_precisions: &[],
23    broadcast: BroadcastSemantics::None,
24    provider_hooks: &[],
25    constant_strategy: ConstantStrategy::InlineLiteral,
26    residency: ResidencyPolicy::GatherImmediately,
27    nan_mode: ReductionNaN::Include,
28    two_pass_threshold: None,
29    workgroup_size: None,
30    accepts_nan_mode: false,
31    notes: "Always converts on the CPU; GPU tensors are gathered to host memory before conversion.",
32};
33
34#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::strings::core::string")]
35pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
36    name: "string",
37    shape: ShapeRequirements::Any,
38    constant_strategy: ConstantStrategy::InlineLiteral,
39    elementwise: None,
40    reduction: None,
41    emits_nan: false,
42    notes:
43        "Conversion builtin; not eligible for fusion and always materialises host string arrays.",
44};
45
46#[runtime_builtin(
47    name = "string",
48    category = "strings/core",
49    summary = "Convert numeric, logical, and text inputs into MATLAB string arrays.",
50    keywords = "string,convert,text,char,gpu",
51    accel = "sink",
52    type_resolver(string_array_type),
53    builtin_path = "crate::builtins::strings::core::string"
54)]
55async fn string_builtin(value: Value, rest: Vec<Value>) -> crate::BuiltinResult<Value> {
56    if rest.is_empty() {
57        let gathered = gather_if_needed_async(&value)
58            .await
59            .map_err(|flow| remap_string_flow(flow))?;
60        let array = convert_to_string_array(gathered, StringEncoding::Utf8).await?;
61        return Ok(Value::StringArray(array));
62    }
63
64    let mut args = rest;
65    let format_value = gather_if_needed_async(&value)
66        .await
67        .map_err(|flow| remap_string_flow(flow))?;
68
69    if args.len() == 1 {
70        let arg = args.pop().unwrap();
71        let gathered_arg = gather_if_needed_async(&arg)
72            .await
73            .map_err(|flow| remap_string_flow(flow))?;
74        if let Some(encoding) = try_encoding_argument(&format_value, &gathered_arg)? {
75            let array = convert_to_string_array(format_value, encoding).await?;
76            return Ok(Value::StringArray(array));
77        }
78        let formatted = format_from_spec(format_value, vec![gathered_arg]).await?;
79        return Ok(Value::StringArray(formatted));
80    }
81
82    let mut gathered_args = Vec::with_capacity(args.len());
83    for arg in args {
84        gathered_args.push(
85            gather_if_needed_async(&arg)
86                .await
87                .map_err(|flow| remap_string_flow(flow))?,
88        );
89    }
90    let formatted = format_from_spec(format_value, gathered_args).await?;
91    Ok(Value::StringArray(formatted))
92}
93
94#[derive(Clone, Copy, Debug, PartialEq, Eq)]
95enum StringEncoding {
96    Utf8,
97}
98
99fn try_encoding_argument(
100    first: &Value,
101    candidate: &Value,
102) -> BuiltinResult<Option<StringEncoding>> {
103    if !matches!(
104        first,
105        Value::CharArray(_) | Value::String(_) | Value::StringArray(_) | Value::Cell(_)
106    ) {
107        return Ok(None);
108    }
109    if has_format_placeholders(first) {
110        return Ok(None);
111    }
112    if let Value::Cell(cell) = first {
113        if !cell_contains_only_text_scalars(cell) {
114            return Ok(None);
115        }
116    }
117    let Some(text) = value_to_scalar_text(candidate) else {
118        return Ok(None);
119    };
120    parse_encoding_text(&text).map(Some)
121}
122
123fn parse_encoding_text(raw: &str) -> BuiltinResult<StringEncoding> {
124    let trimmed = raw.trim();
125    let lowered = trimmed.to_ascii_lowercase();
126    match lowered.as_str() {
127        "utf-8" | "utf8" | "unicode" | "system" => Ok(StringEncoding::Utf8),
128        _ => Err(string_flow(format!(
129            "string: unsupported character encoding '{trimmed}'; only UTF-8 is available"
130        ))),
131    }
132}
133
134fn cell_contains_only_text_scalars(cell: &runmat_builtins::CellArray) -> bool {
135    cell.data.iter().all(|ptr| match &**ptr {
136        Value::String(_) => true,
137        Value::StringArray(sa) => sa.data.len() <= 1,
138        Value::CharArray(ca) => ca.rows <= 1,
139        _ => false,
140    })
141}
142
143fn text_has_format_placeholder(text: &str) -> bool {
144    let mut chars = text.chars().peekable();
145    while let Some(ch) = chars.next() {
146        if ch != '%' {
147            continue;
148        }
149        if let Some('%') = chars.peek() {
150            chars.next();
151            continue;
152        }
153        while matches!(chars.peek(), Some(flag) if matches!(flag, '+' | '-' | '0' | '#')) {
154            chars.next();
155        }
156        while matches!(chars.peek(), Some(digit) if digit.is_ascii_digit()) {
157            chars.next();
158        }
159        if let Some('.') = chars.peek() {
160            chars.next();
161            while matches!(chars.peek(), Some(digit) if digit.is_ascii_digit()) {
162                chars.next();
163            }
164        }
165        if let Some(conv) = chars.peek() {
166            if conv.is_ascii_alphabetic() {
167                return true;
168            }
169        }
170    }
171    false
172}
173
174fn has_format_placeholders(value: &Value) -> bool {
175    match value {
176        Value::String(s) => text_has_format_placeholder(s),
177        Value::StringArray(sa) => sa.data.iter().any(|s| text_has_format_placeholder(s)),
178        Value::CharArray(ca) => {
179            for row in 0..ca.rows {
180                let mut row_str = String::with_capacity(ca.cols);
181                for col in 0..ca.cols {
182                    row_str.push(ca.data[row * ca.cols + col]);
183                }
184                if text_has_format_placeholder(&row_str) {
185                    return true;
186                }
187            }
188            false
189        }
190        Value::Cell(cell) => {
191            for ptr in &cell.data {
192                let element = (**ptr).clone();
193                if has_format_placeholders(&element) {
194                    return true;
195                }
196            }
197            false
198        }
199        _ => false,
200    }
201}
202
203pub(crate) struct FormatSpecData {
204    pub(crate) specs: Vec<String>,
205    pub(crate) shape: Vec<usize>,
206}
207
208struct ArgumentData {
209    values: Vec<Value>,
210    shape: Vec<usize>,
211}
212
213fn string_flow(message: impl Into<String>) -> RuntimeError {
214    build_runtime_error(message).with_builtin("string").build()
215}
216
217fn remap_string_flow(err: RuntimeError) -> RuntimeError {
218    map_control_flow_with_builtin(err, "string")
219}
220
221pub(crate) async fn format_from_spec(
222    format_value: Value,
223    args: Vec<Value>,
224) -> crate::BuiltinResult<StringArray> {
225    let spec = extract_format_spec(format_value).await?;
226    let mut arguments = Vec::with_capacity(args.len());
227    for arg in args {
228        arguments.push(extract_argument_data(arg).await?);
229    }
230
231    let (target_len, mut target_shape) = resolve_target_shape(&spec, &arguments)?;
232
233    if target_len == 0 {
234        let shape = if target_shape.is_empty() {
235            if spec.shape.is_empty() {
236                vec![0, 0]
237            } else {
238                spec.shape.clone()
239            }
240        } else {
241            target_shape
242        };
243        return StringArray::new(Vec::new(), shape)
244            .map_err(|e| string_flow(format!("string: {e}")));
245    }
246
247    let spec_len = spec.specs.len();
248    if spec_len == 0 {
249        return Err(string_flow(
250            "string: formatSpec must contain at least one element when formatting with data",
251        ));
252    }
253
254    for arg in &arguments {
255        if target_len > 0 && arg.values.is_empty() {
256            return Err(string_flow(
257                "string: format data arguments must be scalars or match formatSpec size",
258            ));
259        }
260    }
261
262    let mut output = Vec::with_capacity(target_len);
263    for idx in 0..target_len {
264        let spec_idx = if spec_len == 1 { 0 } else { idx };
265        let spec_str = &spec.specs[spec_idx];
266        let mut per_call = Vec::with_capacity(arguments.len());
267        for arg in &arguments {
268            let value =
269                match arg.values.len() {
270                    0 => continue,
271                    1 => arg.values[0].clone(),
272                    len if len == target_len => arg.values[idx].clone(),
273                    _ => return Err(string_flow(
274                        "string: format data arguments must be scalars or match formatSpec size",
275                    )),
276                };
277            per_call.push(value);
278        }
279        let formatted =
280            format_variadic(spec_str, &per_call).map_err(|flow| remap_string_flow(flow))?;
281        output.push(formatted);
282    }
283
284    if target_shape.is_empty() {
285        target_shape = if spec_len > 1 {
286            spec.shape.clone()
287        } else {
288            vec![target_len, 1]
289        };
290    }
291
292    if tensor::element_count(&target_shape) != target_len {
293        target_shape = vec![target_len, 1];
294    }
295
296    StringArray::new(output, target_shape).map_err(|e| string_flow(format!("string: {e}")))
297}
298
299fn resolve_target_shape(
300    spec: &FormatSpecData,
301    args: &[ArgumentData],
302) -> BuiltinResult<(usize, Vec<usize>)> {
303    let mut target_len = spec.specs.len();
304    let mut target_shape = if target_len > 1 || (target_len == 1 && !spec.shape.is_empty()) {
305        spec.shape.clone()
306    } else {
307        Vec::new()
308    };
309
310    for arg in args {
311        let len = arg.values.len();
312        if len == 0 {
313            continue;
314        }
315        if target_len == 0 {
316            target_len = len;
317            target_shape = arg.shape.clone();
318            continue;
319        }
320        if len == 1 {
321            continue;
322        }
323        if target_len == 1 {
324            target_len = len;
325            target_shape = arg.shape.clone();
326            continue;
327        }
328        if len != target_len {
329            return Err(string_flow(
330                "string: format data arguments must be scalars or match formatSpec size",
331            ));
332        }
333        if target_shape.is_empty() && len > 1 {
334            target_shape = arg.shape.clone();
335        }
336    }
337
338    if target_len == 0 {
339        let shape = if spec.shape.is_empty() {
340            vec![0, 0]
341        } else {
342            spec.shape.clone()
343        };
344        return Ok((0, shape));
345    }
346
347    if target_shape.is_empty() {
348        target_shape = if spec.shape.is_empty() {
349            vec![target_len, 1]
350        } else {
351            spec.shape.clone()
352        };
353        if spec.specs.len() == 1 && tensor::element_count(&target_shape) != target_len {
354            target_shape = vec![target_len, 1];
355        }
356    }
357
358    if tensor::element_count(&target_shape) != target_len {
359        target_shape = vec![target_len, 1];
360    }
361
362    Ok((target_len, target_shape))
363}
364
365pub(crate) async fn extract_format_spec(value: Value) -> BuiltinResult<FormatSpecData> {
366    match value {
367        Value::String(s) => Ok(FormatSpecData {
368            specs: vec![s],
369            shape: vec![1, 1],
370        }),
371        Value::StringArray(sa) => Ok(FormatSpecData {
372            specs: sa.data.clone(),
373            shape: sa.shape.clone(),
374        }),
375        Value::CharArray(ca) => {
376            let array = char_array_to_string_array(ca, StringEncoding::Utf8)?;
377            Ok(FormatSpecData {
378                specs: array.data,
379                shape: array.shape,
380            })
381        }
382        Value::Cell(cell) => {
383            let mut specs = Vec::with_capacity(cell.data.len());
384            for col in 0..cell.cols {
385                for row in 0..cell.rows {
386                    let idx = row * cell.cols + col;
387                    let element = &cell.data[idx];
388                    let value = (**element).clone();
389                    let gathered = gather_if_needed_async(&value)
390                        .await
391                        .map_err(|flow| remap_string_flow(flow))?;
392                    let text = value_to_scalar_text(&gathered).ok_or_else(|| {
393                        string_flow("string: formatSpec cell elements must be text scalars")
394                    })?;
395                    specs.push(text);
396                }
397            }
398            Ok(FormatSpecData {
399                specs,
400                shape: vec![cell.rows, cell.cols],
401            })
402        }
403        _ => Err(string_flow(
404            "string: formatSpec must be text (string, char, or cellstr)",
405        )),
406    }
407}
408
409#[async_recursion::async_recursion(?Send)]
410async fn extract_argument_data(value: Value) -> BuiltinResult<ArgumentData> {
411    match value {
412        Value::String(s) => Ok(ArgumentData {
413            values: vec![Value::String(s)],
414            shape: vec![1, 1],
415        }),
416        Value::StringArray(sa) => Ok(ArgumentData {
417            values: sa.data.into_iter().map(Value::String).collect(),
418            shape: sa.shape,
419        }),
420        Value::CharArray(ca) => {
421            let array = char_array_to_string_array(ca, StringEncoding::Utf8)?;
422            Ok(ArgumentData {
423                values: array.data.into_iter().map(Value::String).collect(),
424                shape: array.shape,
425            })
426        }
427        Value::Num(n) => Ok(ArgumentData {
428            values: vec![Value::Num(n)],
429            shape: vec![1, 1],
430        }),
431        Value::Int(i) => Ok(ArgumentData {
432            values: vec![Value::Int(i)],
433            shape: vec![1, 1],
434        }),
435        Value::Bool(b) => Ok(ArgumentData {
436            values: vec![Value::Num(if b { 1.0 } else { 0.0 })],
437            shape: vec![1, 1],
438        }),
439        Value::Tensor(t) => Ok(ArgumentData {
440            values: t.data.into_iter().map(Value::Num).collect(),
441            shape: t.shape,
442        }),
443        Value::Complex(re, im) => Ok(ArgumentData {
444            values: vec![Value::String(Value::Complex(re, im).to_string())],
445            shape: vec![1, 1],
446        }),
447        Value::ComplexTensor(t) => Ok(ArgumentData {
448            values: t
449                .data
450                .into_iter()
451                .map(|(re, im)| Value::String(Value::Complex(re, im).to_string()))
452                .collect(),
453            shape: t.shape,
454        }),
455        Value::LogicalArray(la) => Ok(ArgumentData {
456            values: la
457                .data
458                .into_iter()
459                .map(|byte| Value::Num(if byte != 0 { 1.0 } else { 0.0 }))
460                .collect(),
461            shape: la.shape,
462        }),
463        Value::Cell(cell) => {
464            let mut values = Vec::with_capacity(cell.data.len());
465            for col in 0..cell.cols {
466                for row in 0..cell.rows {
467                    let idx = row * cell.cols + col;
468                    let element = &cell.data[idx];
469                    let value = (**element).clone();
470                    let gathered = gather_if_needed_async(&value)
471                        .await
472                        .map_err(|flow| remap_string_flow(flow))?;
473                    let value = match gathered {
474                        Value::String(s) => Value::String(s),
475                        Value::StringArray(sa) if sa.data.len() == 1 => {
476                            Value::String(sa.data[0].clone())
477                        }
478                        Value::CharArray(ca) => {
479                            if ca.rows != 1 {
480                                return Err(string_flow(
481                                    "string: cell format arguments must contain char row vectors",
482                                ));
483                            }
484                            let mut row_str = String::with_capacity(ca.cols);
485                            for ch in ca.data {
486                                row_str.push(ch);
487                            }
488                            Value::String(row_str)
489                        }
490                        Value::Num(n) => Value::Num(n),
491                        Value::Int(i) => Value::Int(i),
492                        Value::Bool(b) => Value::Num(if b { 1.0 } else { 0.0 }),
493                        Value::Tensor(t) => {
494                            if t.data.len() != 1 {
495                                return Err(string_flow(
496                                    "string: cell format arguments must contain scalar values",
497                                ));
498                            }
499                            Value::Num(t.data[0])
500                        }
501                        Value::LogicalArray(la) => {
502                            if la.data.len() != 1 {
503                                return Err(string_flow(
504                                    "string: cell format arguments must contain scalar values",
505                                ));
506                            }
507                            Value::Num(if la.data[0] != 0 { 1.0 } else { 0.0 })
508                        }
509                        Value::Complex(re, im) => {
510                            Value::String(Value::Complex(re, im).to_string())
511                        }
512                        Value::ComplexTensor(t) => {
513                            if t.data.len() != 1 {
514                                return Err(string_flow(
515                                    "string: cell format arguments must contain scalar values",
516                                ));
517                            }
518                            let (re, im) = t.data[0];
519                            Value::String(Value::Complex(re, im).to_string())
520                        }
521                        other => {
522                            return Err(string_flow(format!(
523                                "string: unsupported cell format argument {other:?}; expected scalar text or numeric values"
524                            )))
525                        }
526                    };
527                    values.push(value);
528                }
529            }
530            Ok(ArgumentData {
531                values,
532                shape: vec![cell.rows, cell.cols],
533            })
534        }
535        Value::GpuTensor(handle) => {
536            let gathered = gather_if_needed_async(&Value::GpuTensor(handle))
537                .await
538                .map_err(|flow| remap_string_flow(flow))?;
539            extract_argument_data(gathered).await
540        }
541        Value::MException(_)
542        | Value::HandleObject(_)
543        | Value::Object(_)
544        | Value::Listener(_)
545        | Value::Struct(_)
546        | Value::OutputList(_) => Err(string_flow("string: unsupported format argument type")),
547        Value::FunctionHandle(_) | Value::Closure(_) | Value::ClassRef(_) => {
548            Err(string_flow("string: unsupported format argument type"))
549        }
550    }
551}
552
553#[async_recursion::async_recursion(?Send)]
554async fn convert_to_string_array(
555    value: Value,
556    encoding: StringEncoding,
557) -> BuiltinResult<StringArray> {
558    if let Some(array) = crate::builtins::datetime::datetime_string_array(&value)
559        .map_err(|err| string_flow(err.message().to_string()))?
560    {
561        return Ok(array);
562    }
563    if let Some(array) = crate::builtins::duration::duration_string_array(&value)
564        .map_err(|err| string_flow(err.message().to_string()))?
565    {
566        return Ok(array);
567    }
568    match value {
569        Value::String(s) => string_scalar(s),
570        Value::StringArray(sa) => Ok(sa),
571        Value::CharArray(ca) => char_array_to_string_array(ca, encoding),
572        Value::Tensor(tensor) => tensor_to_string_array(tensor),
573        Value::ComplexTensor(tensor) => complex_tensor_to_string_array(tensor),
574        Value::LogicalArray(logical) => logical_array_to_string_array(logical),
575        Value::Cell(cell) => cell_array_to_string_array(cell, encoding).await,
576        Value::Num(n) => string_scalar(Value::Num(n).to_string()),
577        Value::Int(i) => string_scalar(int_value_to_string(&i)),
578        Value::Bool(b) => string_scalar(bool_to_string(b).to_string()),
579        Value::Complex(re, im) => string_scalar(Value::Complex(re, im).to_string()),
580        Value::GpuTensor(handle) => {
581            // Defensive fallback: gather and retry.
582            let gathered = gather_if_needed_async(&Value::GpuTensor(handle))
583                .await
584                .map_err(|flow| remap_string_flow(flow))?;
585            convert_to_string_array(gathered, encoding).await
586        }
587        Value::Object(_) | Value::HandleObject(_) | Value::Listener(_) => Err(string_flow(
588            "string: unsupported conversion from handle-based objects. Use class-specific formatters.",
589        )),
590        Value::Struct(_) => Err(string_flow(
591            "string: structs are not supported for automatic conversion",
592        )),
593        Value::FunctionHandle(_)
594        | Value::Closure(_)
595        | Value::ClassRef(_)
596        | Value::MException(_)
597        | Value::OutputList(_) => Err(
598            string_flow("string: unsupported conversion for function or exception handles"),
599        ),
600    }
601}
602
603fn string_scalar<S: Into<String>>(text: S) -> BuiltinResult<StringArray> {
604    StringArray::new(vec![text.into()], vec![1, 1]).map_err(|e| string_flow(format!("string: {e}")))
605}
606
607fn value_to_scalar_text(value: &Value) -> Option<String> {
608    match value {
609        Value::String(s) => Some(s.clone()),
610        Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
611        Value::CharArray(ca) if ca.rows == 1 => Some(ca.data.iter().collect()),
612        _ => None,
613    }
614}
615
616fn char_array_to_string_array(
617    array: CharArray,
618    _encoding: StringEncoding,
619) -> BuiltinResult<StringArray> {
620    let mut rows: Vec<String> = Vec::with_capacity(array.rows);
621    for r in 0..array.rows {
622        let mut row = String::with_capacity(array.cols);
623        for c in 0..array.cols {
624            row.push(array.data[r * array.cols + c]);
625        }
626        rows.push(row);
627    }
628    let shape = if array.rows == 0 {
629        vec![0, 1]
630    } else {
631        vec![array.rows, 1]
632    };
633    StringArray::new(rows, shape).map_err(|e| string_flow(format!("string: {e}")))
634}
635
636fn tensor_to_string_array(tensor: Tensor) -> BuiltinResult<StringArray> {
637    let mut strings = Vec::with_capacity(tensor.data.len());
638    for &value in &tensor.data {
639        strings.push(Value::Num(value).to_string());
640    }
641    StringArray::new(strings, tensor.shape).map_err(|e| string_flow(format!("string: {e}")))
642}
643
644fn complex_tensor_to_string_array(tensor: ComplexTensor) -> BuiltinResult<StringArray> {
645    let mut strings = Vec::with_capacity(tensor.data.len());
646    for &(re, im) in &tensor.data {
647        strings.push(Value::Complex(re, im).to_string());
648    }
649    StringArray::new(strings, tensor.shape).map_err(|e| string_flow(format!("string: {e}")))
650}
651
652fn logical_array_to_string_array(logical: LogicalArray) -> BuiltinResult<StringArray> {
653    let mut strings = Vec::with_capacity(logical.data.len());
654    for &byte in &logical.data {
655        strings.push(bool_to_string(byte != 0).to_string());
656    }
657    StringArray::new(strings, logical.shape).map_err(|e| string_flow(format!("string: {e}")))
658}
659
660async fn cell_array_to_string_array(
661    cell: runmat_builtins::CellArray,
662    _encoding: StringEncoding,
663) -> BuiltinResult<StringArray> {
664    let mut strings = Vec::with_capacity(cell.data.len());
665    for col in 0..cell.cols {
666        for row in 0..cell.rows {
667            let idx = row * cell.cols + col;
668            let element = &cell.data[idx];
669            let value = (**element).clone();
670            let gathered = gather_if_needed_async(&value)
671                .await
672                .map_err(|flow| remap_string_flow(flow))?;
673            strings.push(cell_element_to_string(&gathered)?);
674        }
675    }
676    StringArray::new(strings, vec![cell.rows, cell.cols])
677        .map_err(|e| string_flow(format!("string: {e}")))
678}
679
680fn cell_element_to_string(value: &Value) -> BuiltinResult<String> {
681    if let Some(array) = crate::builtins::datetime::datetime_string_array(value)
682        .map_err(|err| string_flow(err.message().to_string()))?
683    {
684        if array.data.len() == 1 {
685            return Ok(array.data[0].clone());
686        }
687        return Err(string_flow("string: cell datetime values must be scalar"));
688    }
689    if let Some(array) = crate::builtins::duration::duration_string_array(value)
690        .map_err(|err| string_flow(err.message().to_string()))?
691    {
692        if array.data.len() == 1 {
693            return Ok(array.data[0].clone());
694        }
695        return Err(string_flow("string: cell duration values must be scalar"));
696    }
697    match value {
698        Value::String(s) => Ok(s.clone()),
699        Value::StringArray(sa) => {
700            if sa.data.len() == 1 {
701                Ok(sa.data[0].clone())
702            } else {
703                Err(string_flow(
704                    "string: cell elements must contain string scalars, not string arrays",
705                ))
706            }
707        }
708        Value::CharArray(ca) => {
709            if ca.rows == 1 {
710                Ok(ca.data.iter().collect())
711            } else {
712                Err(string_flow(
713                    "string: cell character arrays must be row vectors",
714                ))
715            }
716        }
717        Value::Num(n) => Ok(Value::Num(*n).to_string()),
718        Value::Int(i) => Ok(int_value_to_string(i)),
719        Value::Bool(b) => Ok(bool_to_string(*b).to_string()),
720        Value::LogicalArray(array) => {
721            if array.data.len() == 1 {
722                Ok(bool_to_string(array.data[0] != 0).to_string())
723            } else {
724                Err(string_flow("string: cell logical values must be scalar"))
725            }
726        }
727        Value::Tensor(t) => {
728            if t.data.len() == 1 {
729                Ok(Value::Num(t.data[0]).to_string())
730            } else {
731                Err(string_flow("string: cell numeric values must be scalar"))
732            }
733        }
734        Value::Complex(re, im) => Ok(Value::Complex(*re, *im).to_string()),
735        Value::ComplexTensor(t) => {
736            if t.data.len() == 1 {
737                let (re, im) = t.data[0];
738                Ok(Value::Complex(re, im).to_string())
739            } else {
740                Err(string_flow("string: cell complex values must be scalar"))
741            }
742        }
743        other => Err(string_flow(format!(
744            "string: unsupported cell element type {:?}; expected text or scalar values",
745            other
746        ))),
747    }
748}
749
750fn bool_to_string(value: bool) -> &'static str {
751    if value {
752        "true"
753    } else {
754        "false"
755    }
756}
757
758fn int_value_to_string(value: &IntValue) -> String {
759    match value {
760        IntValue::I8(v) => v.to_string(),
761        IntValue::I16(v) => v.to_string(),
762        IntValue::I32(v) => v.to_string(),
763        IntValue::I64(v) => v.to_string(),
764        IntValue::U8(v) => v.to_string(),
765        IntValue::U16(v) => v.to_string(),
766        IntValue::U32(v) => v.to_string(),
767        IntValue::U64(v) => v.to_string(),
768    }
769}
770
771#[cfg(test)]
772pub(crate) mod tests {
773    use super::*;
774    use crate::builtins::common::test_support;
775    use runmat_builtins::{CellArray, IntValue, ResolveContext, StringArray, StructValue, Type};
776
777    fn string_builtin(value: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
778        futures::executor::block_on(super::string_builtin(value, rest))
779    }
780
781    fn error_message(err: crate::RuntimeError) -> String {
782        err.message().to_string()
783    }
784
785    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
786    #[test]
787    fn string_from_numeric_scalar() {
788        let out = string_builtin(Value::Num(42.0), Vec::new()).expect("string");
789        match out {
790            Value::StringArray(sa) => {
791                assert_eq!(sa.shape, vec![1, 1]);
792                assert_eq!(sa.data, vec!["42".to_string()]);
793            }
794            other => panic!("expected string array, got {other:?}"),
795        }
796    }
797
798    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
799    #[test]
800    fn string_from_numeric_tensor_preserves_shape() {
801        let tensor = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![2, 2]).unwrap();
802        let out = string_builtin(Value::Tensor(tensor), Vec::new()).expect("string");
803        match out {
804            Value::StringArray(sa) => {
805                assert_eq!(sa.shape, vec![2, 2]);
806                assert_eq!(sa.data, vec!["1", "2", "3", "4"]);
807            }
808            other => panic!("expected string array, got {other:?}"),
809        }
810    }
811
812    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
813    #[test]
814    fn string_from_logical_array_uses_boolean_text() {
815        let logical = LogicalArray::new(vec![1, 0, 1], vec![1, 3]).unwrap();
816        let out = string_builtin(Value::LogicalArray(logical), Vec::new()).expect("string");
817        match out {
818            Value::StringArray(sa) => {
819                assert_eq!(sa.shape, vec![1, 3]);
820                assert_eq!(sa.data, vec!["true", "false", "true"]);
821            }
822            other => panic!("expected string array, got {other:?}"),
823        }
824    }
825
826    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
827    #[test]
828    fn string_from_char_array_produces_column_vector() {
829        let chars = CharArray::new("abc".chars().collect(), 1, 3).unwrap();
830        let out = string_builtin(Value::CharArray(chars), Vec::new()).expect("string");
831        match out {
832            Value::StringArray(sa) => {
833                assert_eq!(sa.shape, vec![1, 1]);
834                assert_eq!(sa.data, vec!["abc"]);
835            }
836            other => panic!("expected string array, got {other:?}"),
837        }
838    }
839
840    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
841    #[test]
842    fn string_from_cell_array() {
843        let cell = CellArray::new(vec![Value::Bool(true), Value::Int(IntValue::I32(7))], 1, 2)
844            .expect("cell array");
845        let out = string_builtin(Value::Cell(cell), Vec::new()).expect("string");
846        match out {
847            Value::StringArray(sa) => {
848                assert_eq!(sa.shape, vec![1, 2]);
849                assert_eq!(sa.data, vec!["true", "7"]);
850            }
851            other => panic!("expected string array, got {other:?}"),
852        }
853    }
854
855    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
856    #[test]
857    fn string_from_cell_array_column_major() {
858        let cell = CellArray::new(
859            vec![
860                Value::Int(IntValue::I32(1)),
861                Value::Int(IntValue::I32(2)),
862                Value::Int(IntValue::I32(3)),
863                Value::Int(IntValue::I32(4)),
864            ],
865            2,
866            2,
867        )
868        .expect("cell array");
869        let out = string_builtin(Value::Cell(cell), Vec::new()).expect("string");
870        match out {
871            Value::StringArray(sa) => {
872                assert_eq!(sa.shape, vec![2, 2]);
873                assert_eq!(sa.data, vec!["1", "3", "2", "4"]);
874            }
875            other => panic!("expected string array, got {other:?}"),
876        }
877    }
878
879    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
880    #[test]
881    fn string_cell_element_requires_scalar_numeric() {
882        let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
883        let cell =
884            CellArray::new(vec![Value::Tensor(tensor)], 1, 1).expect("cell with numeric tensor");
885        let err = error_message(string_builtin(Value::Cell(cell), Vec::new()).unwrap_err());
886        assert!(err.contains("cell numeric values must be scalar"));
887    }
888
889    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
890    #[test]
891    fn string_rejects_struct_input() {
892        let err = error_message(
893            string_builtin(Value::Struct(StructValue::new()), Vec::new()).expect_err("string"),
894        );
895        assert!(err.contains("structs are not supported"));
896    }
897
898    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
899    #[test]
900    fn string_errors_on_unsupported_encoding() {
901        let err = error_message(
902            string_builtin(
903                Value::CharArray(CharArray::new_row("abc")),
904                vec![Value::from("UTF-16")],
905            )
906            .unwrap_err(),
907        );
908        assert!(
909            err.contains("unsupported character encoding"),
910            "unexpected error message: {err}"
911        );
912    }
913
914    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
915    #[test]
916    fn string_accepts_system_encoding_alias() {
917        let out = string_builtin(
918            Value::CharArray(CharArray::new_row("hello")),
919            vec![Value::from("system")],
920        )
921        .expect("string");
922        match out {
923            Value::StringArray(sa) => {
924                assert_eq!(sa.shape, vec![1, 1]);
925                assert_eq!(sa.data, vec!["hello"]);
926            }
927            other => panic!("expected string array, got {other:?}"),
928        }
929    }
930
931    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
932    #[test]
933    fn string_encoding_allows_percent_literal() {
934        let out = string_builtin(
935            Value::CharArray(CharArray::new_row("100% Done")),
936            vec![Value::from("utf8")],
937        )
938        .expect("string");
939        match out {
940            Value::StringArray(sa) => {
941                assert_eq!(sa.shape, vec![1, 1]);
942                assert_eq!(sa.data, vec!["100% Done"]);
943            }
944            other => panic!("expected string array, got {other:?}"),
945        }
946    }
947
948    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
949    #[test]
950    fn string_format_spec_cell_requires_text_scalars() {
951        let cell = CellArray::new(vec![Value::Num(1.0)], 1, 1).expect("cell");
952        let err = error_message(
953            string_builtin(Value::Cell(cell), vec![Value::from("data")]).expect_err("string"),
954        );
955        assert!(
956            err.contains("formatSpec cell elements must be text scalars"),
957            "unexpected error: {err}"
958        );
959    }
960
961    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
962    #[test]
963    fn string_format_cell_argument_requires_scalar_values() {
964        let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
965        let cell = CellArray::new(vec![Value::Tensor(tensor)], 1, 1).expect("cell argument values");
966        let err = error_message(
967            string_builtin(Value::from("%d"), vec![Value::Cell(cell)]).expect_err("string"),
968        );
969        assert!(err.contains("cell format arguments must contain scalar values"));
970    }
971
972    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
973    #[test]
974    fn string_handles_large_unsigned_int() {
975        let value = Value::Int(IntValue::U64(u64::MAX));
976        let out = string_builtin(value, Vec::new()).expect("string");
977        match out {
978            Value::StringArray(sa) => {
979                assert_eq!(sa.shape, vec![1, 1]);
980                assert_eq!(sa.data, vec![u64::MAX.to_string()]);
981            }
982            other => panic!("expected string array, got {other:?}"),
983        }
984    }
985
986    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
987    #[test]
988    fn string_format_numeric_scalar() {
989        let out = string_builtin(Value::from("%d"), vec![Value::Num(7.0)]).expect("string");
990        match out {
991            Value::StringArray(sa) => {
992                assert_eq!(sa.shape, vec![1, 1]);
993                assert_eq!(sa.data, vec!["7"]);
994            }
995            other => panic!("expected string array, got {other:?}"),
996        }
997    }
998
999    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1000    #[test]
1001    fn string_format_broadcast_over_tensor() {
1002        let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![1, 3]).unwrap();
1003        let out =
1004            string_builtin(Value::from("Trial %d"), vec![Value::Tensor(tensor)]).expect("string");
1005        match out {
1006            Value::StringArray(sa) => {
1007                assert_eq!(sa.shape, vec![1, 3]);
1008                assert_eq!(sa.data, vec!["Trial 1", "Trial 2", "Trial 3"]);
1009            }
1010            other => panic!("expected string array, got {other:?}"),
1011        }
1012    }
1013
1014    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1015    #[test]
1016    fn string_format_string_array_spec_alignment() {
1017        let spec = StringArray::new(vec!["[%d]".into(), "Value %d".into()], vec![1, 2]).unwrap();
1018        let tensor = Tensor::new(vec![5.0, 6.0], vec![1, 2]).unwrap();
1019        let out =
1020            string_builtin(Value::StringArray(spec), vec![Value::Tensor(tensor)]).expect("string");
1021        match out {
1022            Value::StringArray(sa) => {
1023                assert_eq!(sa.shape, vec![1, 2]);
1024                assert_eq!(sa.data, vec!["[5]", "Value 6"]);
1025            }
1026            other => panic!("expected string array, got {other:?}"),
1027        }
1028    }
1029
1030    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1031    #[test]
1032    fn string_format_prefers_placeholders_over_encoding_hint() {
1033        let out = string_builtin(Value::from("%s"), vec![Value::from("UTF-8")]).expect("string");
1034        match out {
1035            Value::StringArray(sa) => {
1036                assert_eq!(sa.shape, vec![1, 1]);
1037                assert_eq!(sa.data, vec!["UTF-8"]);
1038            }
1039            other => panic!("expected string array, got {other:?}"),
1040        }
1041    }
1042
1043    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1044    #[test]
1045    fn string_format_mismatched_lengths_errors() {
1046        let spec = StringArray::new(vec!["%d".into(), "%d".into()], vec![2, 1]).unwrap();
1047        let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap();
1048        let err = error_message(
1049            string_builtin(Value::StringArray(spec), vec![Value::Tensor(tensor)]).unwrap_err(),
1050        );
1051        assert!(err.contains("must be scalars or match formatSpec size"));
1052    }
1053
1054    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1055    #[test]
1056    fn string_gpu_numeric_tensor() {
1057        test_support::with_test_provider(|provider| {
1058            let tensor = Tensor::new(vec![10.0, 20.0], vec![1, 2]).unwrap();
1059            let view = runmat_accelerate_api::HostTensorView {
1060                data: &tensor.data,
1061                shape: &tensor.shape,
1062            };
1063            let handle = provider.upload(&view).expect("upload");
1064            let result = string_builtin(Value::GpuTensor(handle), Vec::new())
1065                .expect("gpu string conversion");
1066            match result {
1067                Value::StringArray(sa) => {
1068                    assert_eq!(sa.shape, vec![1, 2]);
1069                    assert_eq!(sa.data, vec!["10", "20"]);
1070                }
1071                other => panic!("expected string array, got {other:?}"),
1072            }
1073        });
1074    }
1075
1076    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1077    #[test]
1078    #[cfg(feature = "wgpu")]
1079    fn string_wgpu_numeric_tensor_matches_cpu() {
1080        let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
1081            runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
1082        );
1083        let tensor = Tensor::new(vec![4.0, 5.0, 6.0], vec![1, 3]).unwrap();
1084        let cpu = string_builtin(Value::Tensor(tensor.clone()), Vec::new())
1085            .expect("cpu string conversion");
1086        let view = runmat_accelerate_api::HostTensorView {
1087            data: &tensor.data,
1088            shape: &tensor.shape,
1089        };
1090        let handle = runmat_accelerate_api::provider()
1091            .unwrap()
1092            .upload(&view)
1093            .expect("gpu upload");
1094        let gpu =
1095            string_builtin(Value::GpuTensor(handle), Vec::new()).expect("gpu string conversion");
1096        match (cpu, gpu) {
1097            (Value::StringArray(expect), Value::StringArray(actual)) => {
1098                assert_eq!(actual.shape, expect.shape);
1099                assert_eq!(actual.data, expect.data);
1100            }
1101            other => panic!("unexpected results {other:?}"),
1102        }
1103    }
1104
1105    #[test]
1106    fn string_type_is_string_array() {
1107        assert_eq!(
1108            string_array_type(&[Type::Num], &ResolveContext::new(Vec::new())),
1109            Type::cell_of(Type::String)
1110        );
1111    }
1112}