Skip to main content

runmat_runtime/builtins/strings/core/
string.rs

1//! MATLAB-compatible `string` builtin with GPU-aware conversion semantics for RunMat.
2
3use runmat_builtins::{
4    BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
5    BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
6    CharArray, ComplexTensor, IntValue, LogicalArray, StringArray, Tensor, Value,
7};
8use runmat_macros::runtime_builtin;
9
10use crate::builtins::common::format::{complex_to_string, format_variadic, number_to_string};
11use crate::builtins::common::map_control_flow_with_builtin;
12use crate::builtins::common::spec::{
13    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
14    ReductionNaN, ResidencyPolicy, ShapeRequirements,
15};
16use crate::builtins::common::tensor;
17use crate::builtins::strings::type_resolvers::string_array_type;
18use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
19
20const STRING_OUTPUT_S: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
21    name: "S",
22    ty: BuiltinParamType::Any,
23    arity: BuiltinParamArity::Required,
24    default: None,
25    description: "String scalar/array result.",
26}];
27
28const STRING_INPUTS_VALUE: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
29    name: "X",
30    ty: BuiltinParamType::Any,
31    arity: BuiltinParamArity::Required,
32    default: None,
33    description: "Input value to convert to string array.",
34}];
35
36const STRING_INPUTS_VALUE_ENCODING: [BuiltinParamDescriptor; 2] = [
37    BuiltinParamDescriptor {
38        name: "X",
39        ty: BuiltinParamType::Any,
40        arity: BuiltinParamArity::Required,
41        default: None,
42        description: "Input value to convert to string array.",
43    },
44    BuiltinParamDescriptor {
45        name: "encoding",
46        ty: BuiltinParamType::StringScalar,
47        arity: BuiltinParamArity::Optional,
48        default: Some("\"UTF-8\""),
49        description: "Character encoding (UTF-8 aliases supported).",
50    },
51];
52
53const STRING_INPUTS_FORMAT: [BuiltinParamDescriptor; 2] = [
54    BuiltinParamDescriptor {
55        name: "formatSpec",
56        ty: BuiltinParamType::Any,
57        arity: BuiltinParamArity::Required,
58        default: None,
59        description: "Format specification text/cell/string array.",
60    },
61    BuiltinParamDescriptor {
62        name: "A",
63        ty: BuiltinParamType::Any,
64        arity: BuiltinParamArity::Variadic,
65        default: None,
66        description: "Formatting data arguments.",
67    },
68];
69
70const STRING_SIGNATURES: [BuiltinSignatureDescriptor; 3] = [
71    BuiltinSignatureDescriptor {
72        label: "S = string(X)",
73        inputs: &STRING_INPUTS_VALUE,
74        outputs: &STRING_OUTPUT_S,
75    },
76    BuiltinSignatureDescriptor {
77        label: "S = string(X, encoding)",
78        inputs: &STRING_INPUTS_VALUE_ENCODING,
79        outputs: &STRING_OUTPUT_S,
80    },
81    BuiltinSignatureDescriptor {
82        label: "S = string(formatSpec, A...)",
83        inputs: &STRING_INPUTS_FORMAT,
84        outputs: &STRING_OUTPUT_S,
85    },
86];
87
88const STRING_ERROR_INVALID_INPUT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
89    code: "RM.STRING.INVALID_INPUT",
90    identifier: Some("RunMat:string:InvalidInput"),
91    when: "Input conversion/formatting/encoding constraints are violated.",
92    message: "string: invalid input",
93};
94
95const STRING_ERRORS: [BuiltinErrorDescriptor; 1] = [STRING_ERROR_INVALID_INPUT];
96
97pub const STRING_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
98    signatures: &STRING_SIGNATURES,
99    output_mode: BuiltinOutputMode::Fixed,
100    completion_policy: BuiltinCompletionPolicy::Public,
101    errors: &STRING_ERRORS,
102};
103
104#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::strings::core::string")]
105pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
106    name: "string",
107    op_kind: GpuOpKind::Custom("conversion"),
108    supported_precisions: &[],
109    broadcast: BroadcastSemantics::None,
110    provider_hooks: &[],
111    constant_strategy: ConstantStrategy::InlineLiteral,
112    residency: ResidencyPolicy::GatherImmediately,
113    nan_mode: ReductionNaN::Include,
114    two_pass_threshold: None,
115    workgroup_size: None,
116    accepts_nan_mode: false,
117    notes: "Always converts on the CPU; GPU tensors are gathered to host memory before conversion.",
118};
119
120#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::strings::core::string")]
121pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
122    name: "string",
123    shape: ShapeRequirements::Any,
124    constant_strategy: ConstantStrategy::InlineLiteral,
125    elementwise: None,
126    reduction: None,
127    emits_nan: false,
128    notes:
129        "Conversion builtin; not eligible for fusion and always materialises host string arrays.",
130};
131
132#[runtime_builtin(
133    name = "string",
134    category = "strings/core",
135    summary = "Convert numeric, logical, and text inputs into string arrays.",
136    keywords = "string,convert,text,char,gpu",
137    accel = "sink",
138    type_resolver(string_array_type),
139    descriptor(crate::builtins::strings::core::string::STRING_DESCRIPTOR),
140    builtin_path = "crate::builtins::strings::core::string"
141)]
142async fn string_builtin(value: Value, rest: Vec<Value>) -> crate::BuiltinResult<Value> {
143    if rest.is_empty() {
144        let gathered = gather_if_needed_async(&value)
145            .await
146            .map_err(|flow| remap_string_flow(flow))?;
147        let array = convert_to_string_array(gathered, StringEncoding::Utf8).await?;
148        return Ok(Value::StringArray(array));
149    }
150
151    let mut args = rest;
152    let format_value = gather_if_needed_async(&value)
153        .await
154        .map_err(|flow| remap_string_flow(flow))?;
155
156    if args.len() == 1 {
157        let arg = args.pop().unwrap();
158        let gathered_arg = gather_if_needed_async(&arg)
159            .await
160            .map_err(|flow| remap_string_flow(flow))?;
161        if let Some(encoding) = try_encoding_argument(&format_value, &gathered_arg)? {
162            let array = convert_to_string_array(format_value, encoding).await?;
163            return Ok(Value::StringArray(array));
164        }
165        let formatted = format_from_spec(format_value, vec![gathered_arg]).await?;
166        return Ok(Value::StringArray(formatted));
167    }
168
169    let mut gathered_args = Vec::with_capacity(args.len());
170    for arg in args {
171        gathered_args.push(
172            gather_if_needed_async(&arg)
173                .await
174                .map_err(|flow| remap_string_flow(flow))?,
175        );
176    }
177    let formatted = format_from_spec(format_value, gathered_args).await?;
178    Ok(Value::StringArray(formatted))
179}
180
181#[derive(Clone, Copy, Debug, PartialEq, Eq)]
182enum StringEncoding {
183    Utf8,
184}
185
186fn try_encoding_argument(
187    first: &Value,
188    candidate: &Value,
189) -> BuiltinResult<Option<StringEncoding>> {
190    if !matches!(
191        first,
192        Value::CharArray(_) | Value::String(_) | Value::StringArray(_) | Value::Cell(_)
193    ) {
194        return Ok(None);
195    }
196    if has_format_placeholders(first) {
197        return Ok(None);
198    }
199    if let Value::Cell(cell) = first {
200        if !cell_contains_only_text_scalars(cell) {
201            return Ok(None);
202        }
203    }
204    let Some(text) = value_to_scalar_text(candidate) else {
205        return Ok(None);
206    };
207    parse_encoding_text(&text).map(Some)
208}
209
210fn parse_encoding_text(raw: &str) -> BuiltinResult<StringEncoding> {
211    let trimmed = raw.trim();
212    let lowered = trimmed.to_ascii_lowercase();
213    match lowered.as_str() {
214        "utf-8" | "utf8" | "unicode" | "system" => Ok(StringEncoding::Utf8),
215        _ => Err(string_flow(format!(
216            "string: unsupported character encoding '{trimmed}'; only UTF-8 is available"
217        ))),
218    }
219}
220
221fn cell_contains_only_text_scalars(cell: &runmat_builtins::CellArray) -> bool {
222    cell.data.iter().all(|ptr| match &**ptr {
223        Value::String(_) => true,
224        Value::StringArray(sa) => sa.data.len() <= 1,
225        Value::CharArray(ca) => ca.rows <= 1,
226        _ => false,
227    })
228}
229
230fn text_has_format_placeholder(text: &str) -> bool {
231    let mut chars = text.chars().peekable();
232    while let Some(ch) = chars.next() {
233        if ch != '%' {
234            continue;
235        }
236        if let Some('%') = chars.peek() {
237            chars.next();
238            continue;
239        }
240        while matches!(chars.peek(), Some(flag) if matches!(flag, '+' | '-' | '0' | '#')) {
241            chars.next();
242        }
243        while matches!(chars.peek(), Some(digit) if digit.is_ascii_digit()) {
244            chars.next();
245        }
246        if let Some('.') = chars.peek() {
247            chars.next();
248            while matches!(chars.peek(), Some(digit) if digit.is_ascii_digit()) {
249                chars.next();
250            }
251        }
252        if let Some(conv) = chars.peek() {
253            if conv.is_ascii_alphabetic() {
254                return true;
255            }
256        }
257    }
258    false
259}
260
261fn has_format_placeholders(value: &Value) -> bool {
262    match value {
263        Value::String(s) => text_has_format_placeholder(s),
264        Value::StringArray(sa) => sa.data.iter().any(|s| text_has_format_placeholder(s)),
265        Value::CharArray(ca) => {
266            for row in 0..ca.rows {
267                let mut row_str = String::with_capacity(ca.cols);
268                for col in 0..ca.cols {
269                    row_str.push(ca.data[row * ca.cols + col]);
270                }
271                if text_has_format_placeholder(&row_str) {
272                    return true;
273                }
274            }
275            false
276        }
277        Value::Cell(cell) => {
278            for ptr in &cell.data {
279                let element = (**ptr).clone();
280                if has_format_placeholders(&element) {
281                    return true;
282                }
283            }
284            false
285        }
286        _ => false,
287    }
288}
289
290pub(crate) struct FormatSpecData {
291    pub(crate) specs: Vec<String>,
292    pub(crate) shape: Vec<usize>,
293}
294
295struct ArgumentData {
296    values: Vec<Value>,
297    shape: Vec<usize>,
298}
299
300fn string_flow(message: impl Into<String>) -> RuntimeError {
301    string_error_with_detail(&STRING_ERROR_INVALID_INPUT, message)
302}
303
304fn string_error_with_detail(
305    error: &'static BuiltinErrorDescriptor,
306    detail: impl Into<String>,
307) -> RuntimeError {
308    let detail = detail.into();
309    let message = if detail.starts_with("string:") {
310        detail
311    } else {
312        format!("{}: {detail}", error.message)
313    };
314    let mut builder = build_runtime_error(message).with_builtin("string");
315    if let Some(identifier) = error.identifier {
316        builder = builder.with_identifier(identifier);
317    }
318    builder.build()
319}
320
321fn remap_string_flow(err: RuntimeError) -> RuntimeError {
322    map_control_flow_with_builtin(err, "string")
323}
324
325pub(crate) async fn format_from_spec(
326    format_value: Value,
327    args: Vec<Value>,
328) -> crate::BuiltinResult<StringArray> {
329    let spec = extract_format_spec(format_value).await?;
330    let mut arguments = Vec::with_capacity(args.len());
331    for arg in args {
332        arguments.push(extract_argument_data(arg).await?);
333    }
334
335    let (target_len, mut target_shape) = resolve_target_shape(&spec, &arguments)?;
336
337    if target_len == 0 {
338        let shape = if target_shape.is_empty() {
339            if spec.shape.is_empty() {
340                vec![0, 0]
341            } else {
342                spec.shape.clone()
343            }
344        } else {
345            target_shape
346        };
347        return StringArray::new(Vec::new(), shape)
348            .map_err(|e| string_flow(format!("string: {e}")));
349    }
350
351    let spec_len = spec.specs.len();
352    if spec_len == 0 {
353        return Err(string_flow(
354            "string: formatSpec must contain at least one element when formatting with data",
355        ));
356    }
357
358    for arg in &arguments {
359        if target_len > 0 && arg.values.is_empty() {
360            return Err(string_flow(
361                "string: format data arguments must be scalars or match formatSpec size",
362            ));
363        }
364    }
365
366    let mut output = Vec::with_capacity(target_len);
367    for idx in 0..target_len {
368        let spec_idx = if spec_len == 1 { 0 } else { idx };
369        let spec_str = &spec.specs[spec_idx];
370        let mut per_call = Vec::with_capacity(arguments.len());
371        for arg in &arguments {
372            let value =
373                match arg.values.len() {
374                    0 => continue,
375                    1 => arg.values[0].clone(),
376                    len if len == target_len => arg.values[idx].clone(),
377                    _ => return Err(string_flow(
378                        "string: format data arguments must be scalars or match formatSpec size",
379                    )),
380                };
381            per_call.push(value);
382        }
383        let formatted =
384            format_variadic(spec_str, &per_call).map_err(|flow| remap_string_flow(flow))?;
385        output.push(formatted);
386    }
387
388    if target_shape.is_empty() {
389        target_shape = if spec_len > 1 {
390            spec.shape.clone()
391        } else {
392            vec![target_len, 1]
393        };
394    }
395
396    if tensor::element_count(&target_shape) != target_len {
397        target_shape = vec![target_len, 1];
398    }
399
400    StringArray::new(output, target_shape).map_err(|e| string_flow(format!("string: {e}")))
401}
402
403fn resolve_target_shape(
404    spec: &FormatSpecData,
405    args: &[ArgumentData],
406) -> BuiltinResult<(usize, Vec<usize>)> {
407    let mut target_len = spec.specs.len();
408    let mut target_shape = if target_len > 1 || (target_len == 1 && !spec.shape.is_empty()) {
409        spec.shape.clone()
410    } else {
411        Vec::new()
412    };
413
414    for arg in args {
415        let len = arg.values.len();
416        if len == 0 {
417            continue;
418        }
419        if target_len == 0 {
420            target_len = len;
421            target_shape = arg.shape.clone();
422            continue;
423        }
424        if len == 1 {
425            continue;
426        }
427        if target_len == 1 {
428            target_len = len;
429            target_shape = arg.shape.clone();
430            continue;
431        }
432        if len != target_len {
433            return Err(string_flow(
434                "string: format data arguments must be scalars or match formatSpec size",
435            ));
436        }
437        if target_shape.is_empty() && len > 1 {
438            target_shape = arg.shape.clone();
439        }
440    }
441
442    if target_len == 0 {
443        let shape = if spec.shape.is_empty() {
444            vec![0, 0]
445        } else {
446            spec.shape.clone()
447        };
448        return Ok((0, shape));
449    }
450
451    if target_shape.is_empty() {
452        target_shape = if spec.shape.is_empty() {
453            vec![target_len, 1]
454        } else {
455            spec.shape.clone()
456        };
457        if spec.specs.len() == 1 && tensor::element_count(&target_shape) != target_len {
458            target_shape = vec![target_len, 1];
459        }
460    }
461
462    if tensor::element_count(&target_shape) != target_len {
463        target_shape = vec![target_len, 1];
464    }
465
466    Ok((target_len, target_shape))
467}
468
469pub(crate) async fn extract_format_spec(value: Value) -> BuiltinResult<FormatSpecData> {
470    match value {
471        Value::String(s) => Ok(FormatSpecData {
472            specs: vec![s],
473            shape: vec![1, 1],
474        }),
475        Value::StringArray(sa) => Ok(FormatSpecData {
476            specs: sa.data.clone(),
477            shape: sa.shape.clone(),
478        }),
479        Value::CharArray(ca) => {
480            let array = char_array_to_string_array(ca, StringEncoding::Utf8)?;
481            Ok(FormatSpecData {
482                specs: array.data,
483                shape: array.shape,
484            })
485        }
486        Value::Cell(cell) => {
487            let mut specs = Vec::with_capacity(cell.data.len());
488            for col in 0..cell.cols {
489                for row in 0..cell.rows {
490                    let idx = row * cell.cols + col;
491                    let element = &cell.data[idx];
492                    let value = (**element).clone();
493                    let gathered = gather_if_needed_async(&value)
494                        .await
495                        .map_err(|flow| remap_string_flow(flow))?;
496                    let text = value_to_scalar_text(&gathered).ok_or_else(|| {
497                        string_flow("string: formatSpec cell elements must be text scalars")
498                    })?;
499                    specs.push(text);
500                }
501            }
502            Ok(FormatSpecData {
503                specs,
504                shape: vec![cell.rows, cell.cols],
505            })
506        }
507        _ => Err(string_flow(
508            "string: formatSpec must be text (string, char, or cellstr)",
509        )),
510    }
511}
512
513#[async_recursion::async_recursion(?Send)]
514async fn extract_argument_data(value: Value) -> BuiltinResult<ArgumentData> {
515    match value {
516        Value::String(s) => Ok(ArgumentData {
517            values: vec![Value::String(s)],
518            shape: vec![1, 1],
519        }),
520        Value::StringArray(sa) => Ok(ArgumentData {
521            values: sa.data.into_iter().map(Value::String).collect(),
522            shape: sa.shape,
523        }),
524        Value::CharArray(ca) => {
525            let array = char_array_to_string_array(ca, StringEncoding::Utf8)?;
526            Ok(ArgumentData {
527                values: array.data.into_iter().map(Value::String).collect(),
528                shape: array.shape,
529            })
530        }
531        Value::Num(n) => Ok(ArgumentData {
532            values: vec![Value::Num(n)],
533            shape: vec![1, 1],
534        }),
535        Value::Int(i) => Ok(ArgumentData {
536            values: vec![Value::Int(i)],
537            shape: vec![1, 1],
538        }),
539        Value::Bool(b) => Ok(ArgumentData {
540            values: vec![Value::Num(if b { 1.0 } else { 0.0 })],
541            shape: vec![1, 1],
542        }),
543        Value::Tensor(t) => Ok(ArgumentData {
544            values: t.data.into_iter().map(Value::Num).collect(),
545            shape: t.shape,
546        }),
547        Value::Complex(re, im) => Ok(ArgumentData {
548            values: vec![Value::String(complex_to_string(re, im))],
549            shape: vec![1, 1],
550        }),
551        Value::ComplexTensor(t) => Ok(ArgumentData {
552            values: t
553                .data
554                .into_iter()
555                .map(|(re, im)| Value::String(complex_to_string(re, im)))
556                .collect(),
557            shape: t.shape,
558        }),
559        Value::LogicalArray(la) => Ok(ArgumentData {
560            values: la
561                .data
562                .into_iter()
563                .map(|byte| Value::Num(if byte != 0 { 1.0 } else { 0.0 }))
564                .collect(),
565            shape: la.shape,
566        }),
567        Value::Cell(cell) => {
568            let mut values = Vec::with_capacity(cell.data.len());
569            for col in 0..cell.cols {
570                for row in 0..cell.rows {
571                    let idx = row * cell.cols + col;
572                    let element = &cell.data[idx];
573                    let value = (**element).clone();
574                    let gathered = gather_if_needed_async(&value)
575                        .await
576                        .map_err(|flow| remap_string_flow(flow))?;
577                    let value = match gathered {
578                        Value::String(s) => Value::String(s),
579                        Value::StringArray(sa) if sa.data.len() == 1 => {
580                            Value::String(sa.data[0].clone())
581                        }
582                        Value::CharArray(ca) => {
583                            if ca.rows != 1 {
584                                return Err(string_flow(
585                                    "string: cell format arguments must contain char row vectors",
586                                ));
587                            }
588                            let mut row_str = String::with_capacity(ca.cols);
589                            for ch in ca.data {
590                                row_str.push(ch);
591                            }
592                            Value::String(row_str)
593                        }
594                        Value::Num(n) => Value::Num(n),
595                        Value::Int(i) => Value::Int(i),
596                        Value::Bool(b) => Value::Num(if b { 1.0 } else { 0.0 }),
597                        Value::Tensor(t) => {
598                            if t.data.len() != 1 {
599                                return Err(string_flow(
600                                    "string: cell format arguments must contain scalar values",
601                                ));
602                            }
603                            Value::Num(t.data[0])
604                        }
605                        Value::LogicalArray(la) => {
606                            if la.data.len() != 1 {
607                                return Err(string_flow(
608                                    "string: cell format arguments must contain scalar values",
609                                ));
610                            }
611                            Value::Num(if la.data[0] != 0 { 1.0 } else { 0.0 })
612                        }
613                        Value::Complex(re, im) => Value::String(complex_to_string(re, im)),
614                        Value::ComplexTensor(t) => {
615                            if t.data.len() != 1 {
616                                return Err(string_flow(
617                                    "string: cell format arguments must contain scalar values",
618                                ));
619                            }
620                            let (re, im) = t.data[0];
621                            Value::String(complex_to_string(re, im))
622                        }
623                        other => {
624                            return Err(string_flow(format!(
625                                "string: unsupported cell format argument {other:?}; expected scalar text or numeric values"
626                            )))
627                        }
628                    };
629                    values.push(value);
630                }
631            }
632            Ok(ArgumentData {
633                values,
634                shape: vec![cell.rows, cell.cols],
635            })
636        }
637        Value::GpuTensor(handle) => {
638            let gathered = gather_if_needed_async(&Value::GpuTensor(handle))
639                .await
640                .map_err(|flow| remap_string_flow(flow))?;
641            extract_argument_data(gathered).await
642        }
643        Value::MException(_)
644        | Value::HandleObject(_)
645        | Value::Object(_)
646        | Value::Listener(_)
647        | Value::Struct(_)
648        | Value::OutputList(_) => Err(string_flow("string: unsupported format argument type")),
649        Value::FunctionHandle(_)
650        | Value::ExternalFunctionHandle(_)
651        | Value::MethodFunctionHandle(_)
652        | Value::BoundFunctionHandle { .. }
653        | Value::Closure(_)
654        | Value::ClassRef(_) => Err(string_flow("string: unsupported format argument type")),
655    }
656}
657
658#[async_recursion::async_recursion(?Send)]
659async fn convert_to_string_array(
660    value: Value,
661    encoding: StringEncoding,
662) -> BuiltinResult<StringArray> {
663    if let Some(array) = crate::builtins::datetime::datetime_string_array(&value)
664        .map_err(|err| string_flow(err.message().to_string()))?
665    {
666        return Ok(array);
667    }
668    if let Some(array) = crate::builtins::duration::duration_string_array(&value)
669        .map_err(|err| string_flow(err.message().to_string()))?
670    {
671        return Ok(array);
672    }
673    match value {
674        Value::String(s) => string_scalar(s),
675        Value::StringArray(sa) => Ok(sa),
676        Value::CharArray(ca) => char_array_to_string_array(ca, encoding),
677        Value::Tensor(tensor) => tensor_to_string_array(tensor),
678        Value::ComplexTensor(tensor) => complex_tensor_to_string_array(tensor),
679        Value::LogicalArray(logical) => logical_array_to_string_array(logical),
680        Value::Cell(cell) => cell_array_to_string_array(cell, encoding).await,
681        Value::Num(n) => string_scalar(number_to_string(n)),
682        Value::Int(i) => string_scalar(int_value_to_string(&i)),
683        Value::Bool(b) => string_scalar(bool_to_string(b).to_string()),
684        Value::Complex(re, im) => string_scalar(complex_to_string(re, im)),
685        Value::GpuTensor(handle) => {
686            // Defensive fallback: gather and retry.
687            let gathered = gather_if_needed_async(&Value::GpuTensor(handle))
688                .await
689                .map_err(|flow| remap_string_flow(flow))?;
690            convert_to_string_array(gathered, encoding).await
691        }
692        Value::Object(_) | Value::HandleObject(_) | Value::Listener(_) => Err(string_flow(
693            "string: unsupported conversion from handle-based objects. Use class-specific formatters.",
694        )),
695        Value::Struct(_) => Err(string_flow(
696            "string: structs are not supported for automatic conversion",
697        )),
698        Value::FunctionHandle(_) | Value::ExternalFunctionHandle(_) | Value::MethodFunctionHandle(_) | Value::BoundFunctionHandle { .. }
699        | Value::Closure(_)
700        | Value::ClassRef(_)
701        | Value::MException(_)
702        | Value::OutputList(_) => Err(
703            string_flow("string: unsupported conversion for function or exception handles"),
704        ),
705    }
706}
707
708fn string_scalar<S: Into<String>>(text: S) -> BuiltinResult<StringArray> {
709    StringArray::new(vec![text.into()], vec![1, 1]).map_err(|e| string_flow(format!("string: {e}")))
710}
711
712fn value_to_scalar_text(value: &Value) -> Option<String> {
713    match value {
714        Value::String(s) => Some(s.clone()),
715        Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
716        Value::CharArray(ca) if ca.rows == 1 => Some(ca.data.iter().collect()),
717        _ => None,
718    }
719}
720
721fn char_array_to_string_array(
722    array: CharArray,
723    _encoding: StringEncoding,
724) -> BuiltinResult<StringArray> {
725    let mut rows: Vec<String> = Vec::with_capacity(array.rows);
726    for r in 0..array.rows {
727        let mut row = String::with_capacity(array.cols);
728        for c in 0..array.cols {
729            row.push(array.data[r * array.cols + c]);
730        }
731        rows.push(row);
732    }
733    let shape = if array.rows == 0 {
734        vec![0, 1]
735    } else {
736        vec![array.rows, 1]
737    };
738    StringArray::new(rows, shape).map_err(|e| string_flow(format!("string: {e}")))
739}
740
741fn tensor_to_string_array(tensor: Tensor) -> BuiltinResult<StringArray> {
742    let mut strings = Vec::with_capacity(tensor.data.len());
743    for &value in &tensor.data {
744        strings.push(number_to_string(value));
745    }
746    StringArray::new(strings, tensor.shape).map_err(|e| string_flow(format!("string: {e}")))
747}
748
749fn complex_tensor_to_string_array(tensor: ComplexTensor) -> BuiltinResult<StringArray> {
750    let mut strings = Vec::with_capacity(tensor.data.len());
751    for &(re, im) in &tensor.data {
752        strings.push(complex_to_string(re, im));
753    }
754    StringArray::new(strings, tensor.shape).map_err(|e| string_flow(format!("string: {e}")))
755}
756
757fn logical_array_to_string_array(logical: LogicalArray) -> BuiltinResult<StringArray> {
758    let mut strings = Vec::with_capacity(logical.data.len());
759    for &byte in &logical.data {
760        strings.push(bool_to_string(byte != 0).to_string());
761    }
762    StringArray::new(strings, logical.shape).map_err(|e| string_flow(format!("string: {e}")))
763}
764
765async fn cell_array_to_string_array(
766    cell: runmat_builtins::CellArray,
767    _encoding: StringEncoding,
768) -> BuiltinResult<StringArray> {
769    let mut strings = Vec::with_capacity(cell.data.len());
770    for col in 0..cell.cols {
771        for row in 0..cell.rows {
772            let idx = row * cell.cols + col;
773            let element = &cell.data[idx];
774            let value = (**element).clone();
775            let gathered = gather_if_needed_async(&value)
776                .await
777                .map_err(|flow| remap_string_flow(flow))?;
778            strings.push(cell_element_to_string(&gathered)?);
779        }
780    }
781    StringArray::new(strings, vec![cell.rows, cell.cols])
782        .map_err(|e| string_flow(format!("string: {e}")))
783}
784
785fn cell_element_to_string(value: &Value) -> BuiltinResult<String> {
786    if let Some(array) = crate::builtins::datetime::datetime_string_array(value)
787        .map_err(|err| string_flow(err.message().to_string()))?
788    {
789        if array.data.len() == 1 {
790            return Ok(array.data[0].clone());
791        }
792        return Err(string_flow("string: cell datetime values must be scalar"));
793    }
794    if let Some(array) = crate::builtins::duration::duration_string_array(value)
795        .map_err(|err| string_flow(err.message().to_string()))?
796    {
797        if array.data.len() == 1 {
798            return Ok(array.data[0].clone());
799        }
800        return Err(string_flow("string: cell duration values must be scalar"));
801    }
802    match value {
803        Value::String(s) => Ok(s.clone()),
804        Value::StringArray(sa) => {
805            if sa.data.len() == 1 {
806                Ok(sa.data[0].clone())
807            } else {
808                Err(string_flow(
809                    "string: cell elements must contain string scalars, not string arrays",
810                ))
811            }
812        }
813        Value::CharArray(ca) => {
814            if ca.rows == 1 {
815                Ok(ca.data.iter().collect())
816            } else {
817                Err(string_flow(
818                    "string: cell character arrays must be row vectors",
819                ))
820            }
821        }
822        Value::Num(n) => Ok(number_to_string(*n)),
823        Value::Int(i) => Ok(int_value_to_string(i)),
824        Value::Bool(b) => Ok(bool_to_string(*b).to_string()),
825        Value::LogicalArray(array) => {
826            if array.data.len() == 1 {
827                Ok(bool_to_string(array.data[0] != 0).to_string())
828            } else {
829                Err(string_flow("string: cell logical values must be scalar"))
830            }
831        }
832        Value::Tensor(t) => {
833            if t.data.len() == 1 {
834                Ok(number_to_string(t.data[0]))
835            } else {
836                Err(string_flow("string: cell numeric values must be scalar"))
837            }
838        }
839        Value::Complex(re, im) => Ok(complex_to_string(*re, *im)),
840        Value::ComplexTensor(t) => {
841            if t.data.len() == 1 {
842                let (re, im) = t.data[0];
843                Ok(complex_to_string(re, im))
844            } else {
845                Err(string_flow("string: cell complex values must be scalar"))
846            }
847        }
848        other => Err(string_flow(format!(
849            "string: unsupported cell element type {:?}; expected text or scalar values",
850            other
851        ))),
852    }
853}
854
855fn bool_to_string(value: bool) -> &'static str {
856    if value {
857        "true"
858    } else {
859        "false"
860    }
861}
862
863fn int_value_to_string(value: &IntValue) -> String {
864    match value {
865        IntValue::I8(v) => v.to_string(),
866        IntValue::I16(v) => v.to_string(),
867        IntValue::I32(v) => v.to_string(),
868        IntValue::I64(v) => v.to_string(),
869        IntValue::U8(v) => v.to_string(),
870        IntValue::U16(v) => v.to_string(),
871        IntValue::U32(v) => v.to_string(),
872        IntValue::U64(v) => v.to_string(),
873    }
874}
875
876#[cfg(test)]
877pub(crate) mod tests {
878    use super::*;
879    use crate::builtins::common::test_support;
880    use runmat_builtins::{CellArray, IntValue, ResolveContext, StringArray, StructValue, Type};
881
882    fn string_builtin(value: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
883        futures::executor::block_on(super::string_builtin(value, rest))
884    }
885
886    fn error_message(err: crate::RuntimeError) -> String {
887        err.message().to_string()
888    }
889
890    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
891    #[test]
892    fn string_from_numeric_scalar() {
893        let out = string_builtin(Value::Num(42.0), Vec::new()).expect("string");
894        match out {
895            Value::StringArray(sa) => {
896                assert_eq!(sa.shape, vec![1, 1]);
897                assert_eq!(sa.data, vec!["42".to_string()]);
898            }
899            other => panic!("expected string array, got {other:?}"),
900        }
901    }
902
903    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
904    #[test]
905    fn string_from_numeric_tensor_preserves_shape() {
906        let tensor = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![2, 2]).unwrap();
907        let out = string_builtin(Value::Tensor(tensor), Vec::new()).expect("string");
908        match out {
909            Value::StringArray(sa) => {
910                assert_eq!(sa.shape, vec![2, 2]);
911                assert_eq!(sa.data, vec!["1", "2", "3", "4"]);
912            }
913            other => panic!("expected string array, got {other:?}"),
914        }
915    }
916
917    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
918    #[test]
919    fn string_from_logical_array_uses_boolean_text() {
920        let logical = LogicalArray::new(vec![1, 0, 1], vec![1, 3]).unwrap();
921        let out = string_builtin(Value::LogicalArray(logical), Vec::new()).expect("string");
922        match out {
923            Value::StringArray(sa) => {
924                assert_eq!(sa.shape, vec![1, 3]);
925                assert_eq!(sa.data, vec!["true", "false", "true"]);
926            }
927            other => panic!("expected string array, got {other:?}"),
928        }
929    }
930
931    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
932    #[test]
933    fn string_from_char_array_produces_column_vector() {
934        let chars = CharArray::new("abc".chars().collect(), 1, 3).unwrap();
935        let out = string_builtin(Value::CharArray(chars), Vec::new()).expect("string");
936        match out {
937            Value::StringArray(sa) => {
938                assert_eq!(sa.shape, vec![1, 1]);
939                assert_eq!(sa.data, vec!["abc"]);
940            }
941            other => panic!("expected string array, got {other:?}"),
942        }
943    }
944
945    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
946    #[test]
947    fn string_from_cell_array() {
948        let cell = CellArray::new(vec![Value::Bool(true), Value::Int(IntValue::I32(7))], 1, 2)
949            .expect("cell array");
950        let out = string_builtin(Value::Cell(cell), Vec::new()).expect("string");
951        match out {
952            Value::StringArray(sa) => {
953                assert_eq!(sa.shape, vec![1, 2]);
954                assert_eq!(sa.data, vec!["true", "7"]);
955            }
956            other => panic!("expected string array, got {other:?}"),
957        }
958    }
959
960    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
961    #[test]
962    fn string_from_cell_array_column_major() {
963        let cell = CellArray::new(
964            vec![
965                Value::Int(IntValue::I32(1)),
966                Value::Int(IntValue::I32(2)),
967                Value::Int(IntValue::I32(3)),
968                Value::Int(IntValue::I32(4)),
969            ],
970            2,
971            2,
972        )
973        .expect("cell array");
974        let out = string_builtin(Value::Cell(cell), Vec::new()).expect("string");
975        match out {
976            Value::StringArray(sa) => {
977                assert_eq!(sa.shape, vec![2, 2]);
978                assert_eq!(sa.data, vec!["1", "3", "2", "4"]);
979            }
980            other => panic!("expected string array, got {other:?}"),
981        }
982    }
983
984    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
985    #[test]
986    fn string_cell_element_requires_scalar_numeric() {
987        let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
988        let cell =
989            CellArray::new(vec![Value::Tensor(tensor)], 1, 1).expect("cell with numeric tensor");
990        let err = error_message(string_builtin(Value::Cell(cell), Vec::new()).unwrap_err());
991        assert!(err.contains("cell numeric values must be scalar"));
992    }
993
994    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
995    #[test]
996    fn string_rejects_struct_input() {
997        let err = error_message(
998            string_builtin(Value::Struct(StructValue::new()), Vec::new()).expect_err("string"),
999        );
1000        assert!(err.contains("structs are not supported"));
1001    }
1002
1003    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1004    #[test]
1005    fn string_errors_on_unsupported_encoding() {
1006        let err = error_message(
1007            string_builtin(
1008                Value::CharArray(CharArray::new_row("abc")),
1009                vec![Value::from("UTF-16")],
1010            )
1011            .unwrap_err(),
1012        );
1013        assert!(
1014            err.contains("unsupported character encoding"),
1015            "unexpected error message: {err}"
1016        );
1017    }
1018
1019    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1020    #[test]
1021    fn string_accepts_system_encoding_alias() {
1022        let out = string_builtin(
1023            Value::CharArray(CharArray::new_row("hello")),
1024            vec![Value::from("system")],
1025        )
1026        .expect("string");
1027        match out {
1028            Value::StringArray(sa) => {
1029                assert_eq!(sa.shape, vec![1, 1]);
1030                assert_eq!(sa.data, vec!["hello"]);
1031            }
1032            other => panic!("expected string array, got {other:?}"),
1033        }
1034    }
1035
1036    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1037    #[test]
1038    fn string_encoding_allows_percent_literal() {
1039        let out = string_builtin(
1040            Value::CharArray(CharArray::new_row("100% Done")),
1041            vec![Value::from("utf8")],
1042        )
1043        .expect("string");
1044        match out {
1045            Value::StringArray(sa) => {
1046                assert_eq!(sa.shape, vec![1, 1]);
1047                assert_eq!(sa.data, vec!["100% Done"]);
1048            }
1049            other => panic!("expected string array, got {other:?}"),
1050        }
1051    }
1052
1053    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1054    #[test]
1055    fn string_format_spec_cell_requires_text_scalars() {
1056        let cell = CellArray::new(vec![Value::Num(1.0)], 1, 1).expect("cell");
1057        let err = error_message(
1058            string_builtin(Value::Cell(cell), vec![Value::from("data")]).expect_err("string"),
1059        );
1060        assert!(
1061            err.contains("formatSpec cell elements must be text scalars"),
1062            "unexpected error: {err}"
1063        );
1064    }
1065
1066    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1067    #[test]
1068    fn string_format_cell_argument_requires_scalar_values() {
1069        let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
1070        let cell = CellArray::new(vec![Value::Tensor(tensor)], 1, 1).expect("cell argument values");
1071        let err = error_message(
1072            string_builtin(Value::from("%d"), vec![Value::Cell(cell)]).expect_err("string"),
1073        );
1074        assert!(err.contains("cell format arguments must contain scalar values"));
1075    }
1076
1077    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1078    #[test]
1079    fn string_handles_large_unsigned_int() {
1080        let value = Value::Int(IntValue::U64(u64::MAX));
1081        let out = string_builtin(value, Vec::new()).expect("string");
1082        match out {
1083            Value::StringArray(sa) => {
1084                assert_eq!(sa.shape, vec![1, 1]);
1085                assert_eq!(sa.data, vec![u64::MAX.to_string()]);
1086            }
1087            other => panic!("expected string array, got {other:?}"),
1088        }
1089    }
1090
1091    #[test]
1092    fn string_descriptor_signatures_cover_core_forms() {
1093        let labels: Vec<&str> = STRING_DESCRIPTOR
1094            .signatures
1095            .iter()
1096            .map(|signature| signature.label)
1097            .collect();
1098        assert_eq!(
1099            labels,
1100            vec![
1101                "S = string(X)",
1102                "S = string(X, encoding)",
1103                "S = string(formatSpec, A...)",
1104            ]
1105        );
1106
1107        let codes: Vec<&str> = STRING_DESCRIPTOR
1108            .errors
1109            .iter()
1110            .map(|error| error.code)
1111            .collect();
1112        assert_eq!(codes, vec!["RM.STRING.INVALID_INPUT"]);
1113    }
1114
1115    #[test]
1116    fn string_struct_input_uses_stable_identifier() {
1117        let err = string_builtin(Value::Struct(StructValue::new()), Vec::new()).unwrap_err();
1118        assert_eq!(err.identifier(), Some("RunMat:string:InvalidInput"));
1119    }
1120
1121    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1122    #[test]
1123    fn string_format_numeric_scalar() {
1124        let out = string_builtin(Value::from("%d"), vec![Value::Num(7.0)]).expect("string");
1125        match out {
1126            Value::StringArray(sa) => {
1127                assert_eq!(sa.shape, vec![1, 1]);
1128                assert_eq!(sa.data, vec!["7"]);
1129            }
1130            other => panic!("expected string array, got {other:?}"),
1131        }
1132    }
1133
1134    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1135    #[test]
1136    fn string_format_broadcast_over_tensor() {
1137        let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![1, 3]).unwrap();
1138        let out =
1139            string_builtin(Value::from("Trial %d"), vec![Value::Tensor(tensor)]).expect("string");
1140        match out {
1141            Value::StringArray(sa) => {
1142                assert_eq!(sa.shape, vec![1, 3]);
1143                assert_eq!(sa.data, vec!["Trial 1", "Trial 2", "Trial 3"]);
1144            }
1145            other => panic!("expected string array, got {other:?}"),
1146        }
1147    }
1148
1149    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1150    #[test]
1151    fn string_format_string_array_spec_alignment() {
1152        let spec = StringArray::new(vec!["[%d]".into(), "Value %d".into()], vec![1, 2]).unwrap();
1153        let tensor = Tensor::new(vec![5.0, 6.0], vec![1, 2]).unwrap();
1154        let out =
1155            string_builtin(Value::StringArray(spec), vec![Value::Tensor(tensor)]).expect("string");
1156        match out {
1157            Value::StringArray(sa) => {
1158                assert_eq!(sa.shape, vec![1, 2]);
1159                assert_eq!(sa.data, vec!["[5]", "Value 6"]);
1160            }
1161            other => panic!("expected string array, got {other:?}"),
1162        }
1163    }
1164
1165    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1166    #[test]
1167    fn string_format_prefers_placeholders_over_encoding_hint() {
1168        let out = string_builtin(Value::from("%s"), vec![Value::from("UTF-8")]).expect("string");
1169        match out {
1170            Value::StringArray(sa) => {
1171                assert_eq!(sa.shape, vec![1, 1]);
1172                assert_eq!(sa.data, vec!["UTF-8"]);
1173            }
1174            other => panic!("expected string array, got {other:?}"),
1175        }
1176    }
1177
1178    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1179    #[test]
1180    fn string_format_mismatched_lengths_errors() {
1181        let spec = StringArray::new(vec!["%d".into(), "%d".into()], vec![2, 1]).unwrap();
1182        let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap();
1183        let err = error_message(
1184            string_builtin(Value::StringArray(spec), vec![Value::Tensor(tensor)]).unwrap_err(),
1185        );
1186        assert!(err.contains("must be scalars or match formatSpec size"));
1187    }
1188
1189    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1190    #[test]
1191    fn string_gpu_numeric_tensor() {
1192        test_support::with_test_provider(|provider| {
1193            let tensor = Tensor::new(vec![10.0, 20.0], vec![1, 2]).unwrap();
1194            let view = runmat_accelerate_api::HostTensorView {
1195                data: &tensor.data,
1196                shape: &tensor.shape,
1197            };
1198            let handle = provider.upload(&view).expect("upload");
1199            let result = string_builtin(Value::GpuTensor(handle), Vec::new())
1200                .expect("gpu string conversion");
1201            match result {
1202                Value::StringArray(sa) => {
1203                    assert_eq!(sa.shape, vec![1, 2]);
1204                    assert_eq!(sa.data, vec!["10", "20"]);
1205                }
1206                other => panic!("expected string array, got {other:?}"),
1207            }
1208        });
1209    }
1210
1211    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1212    #[test]
1213    #[cfg(feature = "wgpu")]
1214    fn string_wgpu_numeric_tensor_matches_cpu() {
1215        let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
1216            runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
1217        );
1218        let tensor = Tensor::new(vec![4.0, 5.0, 6.0], vec![1, 3]).unwrap();
1219        let cpu = string_builtin(Value::Tensor(tensor.clone()), Vec::new())
1220            .expect("cpu string conversion");
1221        let view = runmat_accelerate_api::HostTensorView {
1222            data: &tensor.data,
1223            shape: &tensor.shape,
1224        };
1225        let handle = runmat_accelerate_api::provider()
1226            .unwrap()
1227            .upload(&view)
1228            .expect("gpu upload");
1229        let gpu =
1230            string_builtin(Value::GpuTensor(handle), Vec::new()).expect("gpu string conversion");
1231        match (cpu, gpu) {
1232            (Value::StringArray(expect), Value::StringArray(actual)) => {
1233                assert_eq!(actual.shape, expect.shape);
1234                assert_eq!(actual.data, expect.data);
1235            }
1236            other => panic!("unexpected results {other:?}"),
1237        }
1238    }
1239
1240    #[test]
1241    fn string_type_is_string_array() {
1242        assert_eq!(
1243            string_array_type(&[Type::Num], &ResolveContext::new(Vec::new())),
1244            Type::cell_of(Type::String)
1245        );
1246    }
1247}