1use runmat_builtins::{
4 BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
5 BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
6 CharArray, ComplexTensor, IntValue, LogicalArray, StringArray, Tensor, Value,
7};
8use runmat_macros::runtime_builtin;
9
10use crate::builtins::common::format::{complex_to_string, format_variadic, number_to_string};
11use crate::builtins::common::map_control_flow_with_builtin;
12use crate::builtins::common::spec::{
13 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
14 ReductionNaN, ResidencyPolicy, ShapeRequirements,
15};
16use crate::builtins::common::tensor;
17use crate::builtins::strings::type_resolvers::string_array_type;
18use crate::{build_runtime_error, gather_if_needed_async, BuiltinResult, RuntimeError};
19
20const STRING_OUTPUT_S: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
21 name: "S",
22 ty: BuiltinParamType::Any,
23 arity: BuiltinParamArity::Required,
24 default: None,
25 description: "String scalar/array result.",
26}];
27
28const STRING_INPUTS_VALUE: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
29 name: "X",
30 ty: BuiltinParamType::Any,
31 arity: BuiltinParamArity::Required,
32 default: None,
33 description: "Input value to convert to string array.",
34}];
35
36const STRING_INPUTS_VALUE_ENCODING: [BuiltinParamDescriptor; 2] = [
37 BuiltinParamDescriptor {
38 name: "X",
39 ty: BuiltinParamType::Any,
40 arity: BuiltinParamArity::Required,
41 default: None,
42 description: "Input value to convert to string array.",
43 },
44 BuiltinParamDescriptor {
45 name: "encoding",
46 ty: BuiltinParamType::StringScalar,
47 arity: BuiltinParamArity::Optional,
48 default: Some("\"UTF-8\""),
49 description: "Character encoding (UTF-8 aliases supported).",
50 },
51];
52
53const STRING_INPUTS_FORMAT: [BuiltinParamDescriptor; 2] = [
54 BuiltinParamDescriptor {
55 name: "formatSpec",
56 ty: BuiltinParamType::Any,
57 arity: BuiltinParamArity::Required,
58 default: None,
59 description: "Format specification text/cell/string array.",
60 },
61 BuiltinParamDescriptor {
62 name: "A",
63 ty: BuiltinParamType::Any,
64 arity: BuiltinParamArity::Variadic,
65 default: None,
66 description: "Formatting data arguments.",
67 },
68];
69
70const STRING_SIGNATURES: [BuiltinSignatureDescriptor; 3] = [
71 BuiltinSignatureDescriptor {
72 label: "S = string(X)",
73 inputs: &STRING_INPUTS_VALUE,
74 outputs: &STRING_OUTPUT_S,
75 },
76 BuiltinSignatureDescriptor {
77 label: "S = string(X, encoding)",
78 inputs: &STRING_INPUTS_VALUE_ENCODING,
79 outputs: &STRING_OUTPUT_S,
80 },
81 BuiltinSignatureDescriptor {
82 label: "S = string(formatSpec, A...)",
83 inputs: &STRING_INPUTS_FORMAT,
84 outputs: &STRING_OUTPUT_S,
85 },
86];
87
88const STRING_ERROR_INVALID_INPUT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
89 code: "RM.STRING.INVALID_INPUT",
90 identifier: Some("RunMat:string:InvalidInput"),
91 when: "Input conversion/formatting/encoding constraints are violated.",
92 message: "string: invalid input",
93};
94
95const STRING_ERRORS: [BuiltinErrorDescriptor; 1] = [STRING_ERROR_INVALID_INPUT];
96
97pub const STRING_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
98 signatures: &STRING_SIGNATURES,
99 output_mode: BuiltinOutputMode::Fixed,
100 completion_policy: BuiltinCompletionPolicy::Public,
101 errors: &STRING_ERRORS,
102};
103
104#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::strings::core::string")]
105pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
106 name: "string",
107 op_kind: GpuOpKind::Custom("conversion"),
108 supported_precisions: &[],
109 broadcast: BroadcastSemantics::None,
110 provider_hooks: &[],
111 constant_strategy: ConstantStrategy::InlineLiteral,
112 residency: ResidencyPolicy::GatherImmediately,
113 nan_mode: ReductionNaN::Include,
114 two_pass_threshold: None,
115 workgroup_size: None,
116 accepts_nan_mode: false,
117 notes: "Always converts on the CPU; GPU tensors are gathered to host memory before conversion.",
118};
119
120#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::strings::core::string")]
121pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
122 name: "string",
123 shape: ShapeRequirements::Any,
124 constant_strategy: ConstantStrategy::InlineLiteral,
125 elementwise: None,
126 reduction: None,
127 emits_nan: false,
128 notes:
129 "Conversion builtin; not eligible for fusion and always materialises host string arrays.",
130};
131
132#[runtime_builtin(
133 name = "string",
134 category = "strings/core",
135 summary = "Convert numeric, logical, and text inputs into string arrays.",
136 keywords = "string,convert,text,char,gpu",
137 accel = "sink",
138 type_resolver(string_array_type),
139 descriptor(crate::builtins::strings::core::string::STRING_DESCRIPTOR),
140 builtin_path = "crate::builtins::strings::core::string"
141)]
142async fn string_builtin(value: Value, rest: Vec<Value>) -> crate::BuiltinResult<Value> {
143 if rest.is_empty() {
144 let gathered = gather_if_needed_async(&value)
145 .await
146 .map_err(|flow| remap_string_flow(flow))?;
147 let array = convert_to_string_array(gathered, StringEncoding::Utf8).await?;
148 return Ok(Value::StringArray(array));
149 }
150
151 let mut args = rest;
152 let format_value = gather_if_needed_async(&value)
153 .await
154 .map_err(|flow| remap_string_flow(flow))?;
155
156 if args.len() == 1 {
157 let arg = args.pop().unwrap();
158 let gathered_arg = gather_if_needed_async(&arg)
159 .await
160 .map_err(|flow| remap_string_flow(flow))?;
161 if let Some(encoding) = try_encoding_argument(&format_value, &gathered_arg)? {
162 let array = convert_to_string_array(format_value, encoding).await?;
163 return Ok(Value::StringArray(array));
164 }
165 let formatted = format_from_spec(format_value, vec![gathered_arg]).await?;
166 return Ok(Value::StringArray(formatted));
167 }
168
169 let mut gathered_args = Vec::with_capacity(args.len());
170 for arg in args {
171 gathered_args.push(
172 gather_if_needed_async(&arg)
173 .await
174 .map_err(|flow| remap_string_flow(flow))?,
175 );
176 }
177 let formatted = format_from_spec(format_value, gathered_args).await?;
178 Ok(Value::StringArray(formatted))
179}
180
181#[derive(Clone, Copy, Debug, PartialEq, Eq)]
182enum StringEncoding {
183 Utf8,
184}
185
186fn try_encoding_argument(
187 first: &Value,
188 candidate: &Value,
189) -> BuiltinResult<Option<StringEncoding>> {
190 if !matches!(
191 first,
192 Value::CharArray(_) | Value::String(_) | Value::StringArray(_) | Value::Cell(_)
193 ) {
194 return Ok(None);
195 }
196 if has_format_placeholders(first) {
197 return Ok(None);
198 }
199 if let Value::Cell(cell) = first {
200 if !cell_contains_only_text_scalars(cell) {
201 return Ok(None);
202 }
203 }
204 let Some(text) = value_to_scalar_text(candidate) else {
205 return Ok(None);
206 };
207 parse_encoding_text(&text).map(Some)
208}
209
210fn parse_encoding_text(raw: &str) -> BuiltinResult<StringEncoding> {
211 let trimmed = raw.trim();
212 let lowered = trimmed.to_ascii_lowercase();
213 match lowered.as_str() {
214 "utf-8" | "utf8" | "unicode" | "system" => Ok(StringEncoding::Utf8),
215 _ => Err(string_flow(format!(
216 "string: unsupported character encoding '{trimmed}'; only UTF-8 is available"
217 ))),
218 }
219}
220
221fn cell_contains_only_text_scalars(cell: &runmat_builtins::CellArray) -> bool {
222 cell.data.iter().all(|ptr| match &**ptr {
223 Value::String(_) => true,
224 Value::StringArray(sa) => sa.data.len() <= 1,
225 Value::CharArray(ca) => ca.rows <= 1,
226 _ => false,
227 })
228}
229
230fn text_has_format_placeholder(text: &str) -> bool {
231 let mut chars = text.chars().peekable();
232 while let Some(ch) = chars.next() {
233 if ch != '%' {
234 continue;
235 }
236 if let Some('%') = chars.peek() {
237 chars.next();
238 continue;
239 }
240 while matches!(chars.peek(), Some(flag) if matches!(flag, '+' | '-' | '0' | '#')) {
241 chars.next();
242 }
243 while matches!(chars.peek(), Some(digit) if digit.is_ascii_digit()) {
244 chars.next();
245 }
246 if let Some('.') = chars.peek() {
247 chars.next();
248 while matches!(chars.peek(), Some(digit) if digit.is_ascii_digit()) {
249 chars.next();
250 }
251 }
252 if let Some(conv) = chars.peek() {
253 if conv.is_ascii_alphabetic() {
254 return true;
255 }
256 }
257 }
258 false
259}
260
261fn has_format_placeholders(value: &Value) -> bool {
262 match value {
263 Value::String(s) => text_has_format_placeholder(s),
264 Value::StringArray(sa) => sa.data.iter().any(|s| text_has_format_placeholder(s)),
265 Value::CharArray(ca) => {
266 for row in 0..ca.rows {
267 let mut row_str = String::with_capacity(ca.cols);
268 for col in 0..ca.cols {
269 row_str.push(ca.data[row * ca.cols + col]);
270 }
271 if text_has_format_placeholder(&row_str) {
272 return true;
273 }
274 }
275 false
276 }
277 Value::Cell(cell) => {
278 for ptr in &cell.data {
279 let element = (**ptr).clone();
280 if has_format_placeholders(&element) {
281 return true;
282 }
283 }
284 false
285 }
286 _ => false,
287 }
288}
289
290pub(crate) struct FormatSpecData {
291 pub(crate) specs: Vec<String>,
292 pub(crate) shape: Vec<usize>,
293}
294
295struct ArgumentData {
296 values: Vec<Value>,
297 shape: Vec<usize>,
298}
299
300fn string_flow(message: impl Into<String>) -> RuntimeError {
301 string_error_with_detail(&STRING_ERROR_INVALID_INPUT, message)
302}
303
304fn string_error_with_detail(
305 error: &'static BuiltinErrorDescriptor,
306 detail: impl Into<String>,
307) -> RuntimeError {
308 let detail = detail.into();
309 let message = if detail.starts_with("string:") {
310 detail
311 } else {
312 format!("{}: {detail}", error.message)
313 };
314 let mut builder = build_runtime_error(message).with_builtin("string");
315 if let Some(identifier) = error.identifier {
316 builder = builder.with_identifier(identifier);
317 }
318 builder.build()
319}
320
321fn remap_string_flow(err: RuntimeError) -> RuntimeError {
322 map_control_flow_with_builtin(err, "string")
323}
324
325pub(crate) async fn format_from_spec(
326 format_value: Value,
327 args: Vec<Value>,
328) -> crate::BuiltinResult<StringArray> {
329 let spec = extract_format_spec(format_value).await?;
330 let mut arguments = Vec::with_capacity(args.len());
331 for arg in args {
332 arguments.push(extract_argument_data(arg).await?);
333 }
334
335 let (target_len, mut target_shape) = resolve_target_shape(&spec, &arguments)?;
336
337 if target_len == 0 {
338 let shape = if target_shape.is_empty() {
339 if spec.shape.is_empty() {
340 vec![0, 0]
341 } else {
342 spec.shape.clone()
343 }
344 } else {
345 target_shape
346 };
347 return StringArray::new(Vec::new(), shape)
348 .map_err(|e| string_flow(format!("string: {e}")));
349 }
350
351 let spec_len = spec.specs.len();
352 if spec_len == 0 {
353 return Err(string_flow(
354 "string: formatSpec must contain at least one element when formatting with data",
355 ));
356 }
357
358 for arg in &arguments {
359 if target_len > 0 && arg.values.is_empty() {
360 return Err(string_flow(
361 "string: format data arguments must be scalars or match formatSpec size",
362 ));
363 }
364 }
365
366 let mut output = Vec::with_capacity(target_len);
367 for idx in 0..target_len {
368 let spec_idx = if spec_len == 1 { 0 } else { idx };
369 let spec_str = &spec.specs[spec_idx];
370 let mut per_call = Vec::with_capacity(arguments.len());
371 for arg in &arguments {
372 let value =
373 match arg.values.len() {
374 0 => continue,
375 1 => arg.values[0].clone(),
376 len if len == target_len => arg.values[idx].clone(),
377 _ => return Err(string_flow(
378 "string: format data arguments must be scalars or match formatSpec size",
379 )),
380 };
381 per_call.push(value);
382 }
383 let formatted =
384 format_variadic(spec_str, &per_call).map_err(|flow| remap_string_flow(flow))?;
385 output.push(formatted);
386 }
387
388 if target_shape.is_empty() {
389 target_shape = if spec_len > 1 {
390 spec.shape.clone()
391 } else {
392 vec![target_len, 1]
393 };
394 }
395
396 if tensor::element_count(&target_shape) != target_len {
397 target_shape = vec![target_len, 1];
398 }
399
400 StringArray::new(output, target_shape).map_err(|e| string_flow(format!("string: {e}")))
401}
402
403fn resolve_target_shape(
404 spec: &FormatSpecData,
405 args: &[ArgumentData],
406) -> BuiltinResult<(usize, Vec<usize>)> {
407 let mut target_len = spec.specs.len();
408 let mut target_shape = if target_len > 1 || (target_len == 1 && !spec.shape.is_empty()) {
409 spec.shape.clone()
410 } else {
411 Vec::new()
412 };
413
414 for arg in args {
415 let len = arg.values.len();
416 if len == 0 {
417 continue;
418 }
419 if target_len == 0 {
420 target_len = len;
421 target_shape = arg.shape.clone();
422 continue;
423 }
424 if len == 1 {
425 continue;
426 }
427 if target_len == 1 {
428 target_len = len;
429 target_shape = arg.shape.clone();
430 continue;
431 }
432 if len != target_len {
433 return Err(string_flow(
434 "string: format data arguments must be scalars or match formatSpec size",
435 ));
436 }
437 if target_shape.is_empty() && len > 1 {
438 target_shape = arg.shape.clone();
439 }
440 }
441
442 if target_len == 0 {
443 let shape = if spec.shape.is_empty() {
444 vec![0, 0]
445 } else {
446 spec.shape.clone()
447 };
448 return Ok((0, shape));
449 }
450
451 if target_shape.is_empty() {
452 target_shape = if spec.shape.is_empty() {
453 vec![target_len, 1]
454 } else {
455 spec.shape.clone()
456 };
457 if spec.specs.len() == 1 && tensor::element_count(&target_shape) != target_len {
458 target_shape = vec![target_len, 1];
459 }
460 }
461
462 if tensor::element_count(&target_shape) != target_len {
463 target_shape = vec![target_len, 1];
464 }
465
466 Ok((target_len, target_shape))
467}
468
469pub(crate) async fn extract_format_spec(value: Value) -> BuiltinResult<FormatSpecData> {
470 match value {
471 Value::String(s) => Ok(FormatSpecData {
472 specs: vec![s],
473 shape: vec![1, 1],
474 }),
475 Value::StringArray(sa) => Ok(FormatSpecData {
476 specs: sa.data.clone(),
477 shape: sa.shape.clone(),
478 }),
479 Value::CharArray(ca) => {
480 let array = char_array_to_string_array(ca, StringEncoding::Utf8)?;
481 Ok(FormatSpecData {
482 specs: array.data,
483 shape: array.shape,
484 })
485 }
486 Value::Cell(cell) => {
487 let mut specs = Vec::with_capacity(cell.data.len());
488 for col in 0..cell.cols {
489 for row in 0..cell.rows {
490 let idx = row * cell.cols + col;
491 let element = &cell.data[idx];
492 let value = (**element).clone();
493 let gathered = gather_if_needed_async(&value)
494 .await
495 .map_err(|flow| remap_string_flow(flow))?;
496 let text = value_to_scalar_text(&gathered).ok_or_else(|| {
497 string_flow("string: formatSpec cell elements must be text scalars")
498 })?;
499 specs.push(text);
500 }
501 }
502 Ok(FormatSpecData {
503 specs,
504 shape: vec![cell.rows, cell.cols],
505 })
506 }
507 _ => Err(string_flow(
508 "string: formatSpec must be text (string, char, or cellstr)",
509 )),
510 }
511}
512
513#[async_recursion::async_recursion(?Send)]
514async fn extract_argument_data(value: Value) -> BuiltinResult<ArgumentData> {
515 match value {
516 Value::String(s) => Ok(ArgumentData {
517 values: vec![Value::String(s)],
518 shape: vec![1, 1],
519 }),
520 Value::StringArray(sa) => Ok(ArgumentData {
521 values: sa.data.into_iter().map(Value::String).collect(),
522 shape: sa.shape,
523 }),
524 Value::CharArray(ca) => {
525 let array = char_array_to_string_array(ca, StringEncoding::Utf8)?;
526 Ok(ArgumentData {
527 values: array.data.into_iter().map(Value::String).collect(),
528 shape: array.shape,
529 })
530 }
531 Value::Num(n) => Ok(ArgumentData {
532 values: vec![Value::Num(n)],
533 shape: vec![1, 1],
534 }),
535 Value::Int(i) => Ok(ArgumentData {
536 values: vec![Value::Int(i)],
537 shape: vec![1, 1],
538 }),
539 Value::Bool(b) => Ok(ArgumentData {
540 values: vec![Value::Num(if b { 1.0 } else { 0.0 })],
541 shape: vec![1, 1],
542 }),
543 Value::Tensor(t) => Ok(ArgumentData {
544 values: t.data.into_iter().map(Value::Num).collect(),
545 shape: t.shape,
546 }),
547 Value::Complex(re, im) => Ok(ArgumentData {
548 values: vec![Value::String(complex_to_string(re, im))],
549 shape: vec![1, 1],
550 }),
551 Value::ComplexTensor(t) => Ok(ArgumentData {
552 values: t
553 .data
554 .into_iter()
555 .map(|(re, im)| Value::String(complex_to_string(re, im)))
556 .collect(),
557 shape: t.shape,
558 }),
559 Value::LogicalArray(la) => Ok(ArgumentData {
560 values: la
561 .data
562 .into_iter()
563 .map(|byte| Value::Num(if byte != 0 { 1.0 } else { 0.0 }))
564 .collect(),
565 shape: la.shape,
566 }),
567 Value::Cell(cell) => {
568 let mut values = Vec::with_capacity(cell.data.len());
569 for col in 0..cell.cols {
570 for row in 0..cell.rows {
571 let idx = row * cell.cols + col;
572 let element = &cell.data[idx];
573 let value = (**element).clone();
574 let gathered = gather_if_needed_async(&value)
575 .await
576 .map_err(|flow| remap_string_flow(flow))?;
577 let value = match gathered {
578 Value::String(s) => Value::String(s),
579 Value::StringArray(sa) if sa.data.len() == 1 => {
580 Value::String(sa.data[0].clone())
581 }
582 Value::CharArray(ca) => {
583 if ca.rows != 1 {
584 return Err(string_flow(
585 "string: cell format arguments must contain char row vectors",
586 ));
587 }
588 let mut row_str = String::with_capacity(ca.cols);
589 for ch in ca.data {
590 row_str.push(ch);
591 }
592 Value::String(row_str)
593 }
594 Value::Num(n) => Value::Num(n),
595 Value::Int(i) => Value::Int(i),
596 Value::Bool(b) => Value::Num(if b { 1.0 } else { 0.0 }),
597 Value::Tensor(t) => {
598 if t.data.len() != 1 {
599 return Err(string_flow(
600 "string: cell format arguments must contain scalar values",
601 ));
602 }
603 Value::Num(t.data[0])
604 }
605 Value::LogicalArray(la) => {
606 if la.data.len() != 1 {
607 return Err(string_flow(
608 "string: cell format arguments must contain scalar values",
609 ));
610 }
611 Value::Num(if la.data[0] != 0 { 1.0 } else { 0.0 })
612 }
613 Value::Complex(re, im) => Value::String(complex_to_string(re, im)),
614 Value::ComplexTensor(t) => {
615 if t.data.len() != 1 {
616 return Err(string_flow(
617 "string: cell format arguments must contain scalar values",
618 ));
619 }
620 let (re, im) = t.data[0];
621 Value::String(complex_to_string(re, im))
622 }
623 other => {
624 return Err(string_flow(format!(
625 "string: unsupported cell format argument {other:?}; expected scalar text or numeric values"
626 )))
627 }
628 };
629 values.push(value);
630 }
631 }
632 Ok(ArgumentData {
633 values,
634 shape: vec![cell.rows, cell.cols],
635 })
636 }
637 Value::GpuTensor(handle) => {
638 let gathered = gather_if_needed_async(&Value::GpuTensor(handle))
639 .await
640 .map_err(|flow| remap_string_flow(flow))?;
641 extract_argument_data(gathered).await
642 }
643 Value::MException(_)
644 | Value::HandleObject(_)
645 | Value::Object(_)
646 | Value::Listener(_)
647 | Value::Struct(_)
648 | Value::OutputList(_) => Err(string_flow("string: unsupported format argument type")),
649 Value::FunctionHandle(_)
650 | Value::ExternalFunctionHandle(_)
651 | Value::MethodFunctionHandle(_)
652 | Value::BoundFunctionHandle { .. }
653 | Value::Closure(_)
654 | Value::ClassRef(_) => Err(string_flow("string: unsupported format argument type")),
655 }
656}
657
658#[async_recursion::async_recursion(?Send)]
659async fn convert_to_string_array(
660 value: Value,
661 encoding: StringEncoding,
662) -> BuiltinResult<StringArray> {
663 if let Some(array) = crate::builtins::datetime::datetime_string_array(&value)
664 .map_err(|err| string_flow(err.message().to_string()))?
665 {
666 return Ok(array);
667 }
668 if let Some(array) = crate::builtins::duration::duration_string_array(&value)
669 .map_err(|err| string_flow(err.message().to_string()))?
670 {
671 return Ok(array);
672 }
673 match value {
674 Value::String(s) => string_scalar(s),
675 Value::StringArray(sa) => Ok(sa),
676 Value::CharArray(ca) => char_array_to_string_array(ca, encoding),
677 Value::Tensor(tensor) => tensor_to_string_array(tensor),
678 Value::ComplexTensor(tensor) => complex_tensor_to_string_array(tensor),
679 Value::LogicalArray(logical) => logical_array_to_string_array(logical),
680 Value::Cell(cell) => cell_array_to_string_array(cell, encoding).await,
681 Value::Num(n) => string_scalar(number_to_string(n)),
682 Value::Int(i) => string_scalar(int_value_to_string(&i)),
683 Value::Bool(b) => string_scalar(bool_to_string(b).to_string()),
684 Value::Complex(re, im) => string_scalar(complex_to_string(re, im)),
685 Value::GpuTensor(handle) => {
686 let gathered = gather_if_needed_async(&Value::GpuTensor(handle))
688 .await
689 .map_err(|flow| remap_string_flow(flow))?;
690 convert_to_string_array(gathered, encoding).await
691 }
692 Value::Object(_) | Value::HandleObject(_) | Value::Listener(_) => Err(string_flow(
693 "string: unsupported conversion from handle-based objects. Use class-specific formatters.",
694 )),
695 Value::Struct(_) => Err(string_flow(
696 "string: structs are not supported for automatic conversion",
697 )),
698 Value::FunctionHandle(_) | Value::ExternalFunctionHandle(_) | Value::MethodFunctionHandle(_) | Value::BoundFunctionHandle { .. }
699 | Value::Closure(_)
700 | Value::ClassRef(_)
701 | Value::MException(_)
702 | Value::OutputList(_) => Err(
703 string_flow("string: unsupported conversion for function or exception handles"),
704 ),
705 }
706}
707
708fn string_scalar<S: Into<String>>(text: S) -> BuiltinResult<StringArray> {
709 StringArray::new(vec![text.into()], vec![1, 1]).map_err(|e| string_flow(format!("string: {e}")))
710}
711
712fn value_to_scalar_text(value: &Value) -> Option<String> {
713 match value {
714 Value::String(s) => Some(s.clone()),
715 Value::StringArray(sa) if sa.data.len() == 1 => Some(sa.data[0].clone()),
716 Value::CharArray(ca) if ca.rows == 1 => Some(ca.data.iter().collect()),
717 _ => None,
718 }
719}
720
721fn char_array_to_string_array(
722 array: CharArray,
723 _encoding: StringEncoding,
724) -> BuiltinResult<StringArray> {
725 let mut rows: Vec<String> = Vec::with_capacity(array.rows);
726 for r in 0..array.rows {
727 let mut row = String::with_capacity(array.cols);
728 for c in 0..array.cols {
729 row.push(array.data[r * array.cols + c]);
730 }
731 rows.push(row);
732 }
733 let shape = if array.rows == 0 {
734 vec![0, 1]
735 } else {
736 vec![array.rows, 1]
737 };
738 StringArray::new(rows, shape).map_err(|e| string_flow(format!("string: {e}")))
739}
740
741fn tensor_to_string_array(tensor: Tensor) -> BuiltinResult<StringArray> {
742 let mut strings = Vec::with_capacity(tensor.data.len());
743 for &value in &tensor.data {
744 strings.push(number_to_string(value));
745 }
746 StringArray::new(strings, tensor.shape).map_err(|e| string_flow(format!("string: {e}")))
747}
748
749fn complex_tensor_to_string_array(tensor: ComplexTensor) -> BuiltinResult<StringArray> {
750 let mut strings = Vec::with_capacity(tensor.data.len());
751 for &(re, im) in &tensor.data {
752 strings.push(complex_to_string(re, im));
753 }
754 StringArray::new(strings, tensor.shape).map_err(|e| string_flow(format!("string: {e}")))
755}
756
757fn logical_array_to_string_array(logical: LogicalArray) -> BuiltinResult<StringArray> {
758 let mut strings = Vec::with_capacity(logical.data.len());
759 for &byte in &logical.data {
760 strings.push(bool_to_string(byte != 0).to_string());
761 }
762 StringArray::new(strings, logical.shape).map_err(|e| string_flow(format!("string: {e}")))
763}
764
765async fn cell_array_to_string_array(
766 cell: runmat_builtins::CellArray,
767 _encoding: StringEncoding,
768) -> BuiltinResult<StringArray> {
769 let mut strings = Vec::with_capacity(cell.data.len());
770 for col in 0..cell.cols {
771 for row in 0..cell.rows {
772 let idx = row * cell.cols + col;
773 let element = &cell.data[idx];
774 let value = (**element).clone();
775 let gathered = gather_if_needed_async(&value)
776 .await
777 .map_err(|flow| remap_string_flow(flow))?;
778 strings.push(cell_element_to_string(&gathered)?);
779 }
780 }
781 StringArray::new(strings, vec![cell.rows, cell.cols])
782 .map_err(|e| string_flow(format!("string: {e}")))
783}
784
785fn cell_element_to_string(value: &Value) -> BuiltinResult<String> {
786 if let Some(array) = crate::builtins::datetime::datetime_string_array(value)
787 .map_err(|err| string_flow(err.message().to_string()))?
788 {
789 if array.data.len() == 1 {
790 return Ok(array.data[0].clone());
791 }
792 return Err(string_flow("string: cell datetime values must be scalar"));
793 }
794 if let Some(array) = crate::builtins::duration::duration_string_array(value)
795 .map_err(|err| string_flow(err.message().to_string()))?
796 {
797 if array.data.len() == 1 {
798 return Ok(array.data[0].clone());
799 }
800 return Err(string_flow("string: cell duration values must be scalar"));
801 }
802 match value {
803 Value::String(s) => Ok(s.clone()),
804 Value::StringArray(sa) => {
805 if sa.data.len() == 1 {
806 Ok(sa.data[0].clone())
807 } else {
808 Err(string_flow(
809 "string: cell elements must contain string scalars, not string arrays",
810 ))
811 }
812 }
813 Value::CharArray(ca) => {
814 if ca.rows == 1 {
815 Ok(ca.data.iter().collect())
816 } else {
817 Err(string_flow(
818 "string: cell character arrays must be row vectors",
819 ))
820 }
821 }
822 Value::Num(n) => Ok(number_to_string(*n)),
823 Value::Int(i) => Ok(int_value_to_string(i)),
824 Value::Bool(b) => Ok(bool_to_string(*b).to_string()),
825 Value::LogicalArray(array) => {
826 if array.data.len() == 1 {
827 Ok(bool_to_string(array.data[0] != 0).to_string())
828 } else {
829 Err(string_flow("string: cell logical values must be scalar"))
830 }
831 }
832 Value::Tensor(t) => {
833 if t.data.len() == 1 {
834 Ok(number_to_string(t.data[0]))
835 } else {
836 Err(string_flow("string: cell numeric values must be scalar"))
837 }
838 }
839 Value::Complex(re, im) => Ok(complex_to_string(*re, *im)),
840 Value::ComplexTensor(t) => {
841 if t.data.len() == 1 {
842 let (re, im) = t.data[0];
843 Ok(complex_to_string(re, im))
844 } else {
845 Err(string_flow("string: cell complex values must be scalar"))
846 }
847 }
848 other => Err(string_flow(format!(
849 "string: unsupported cell element type {:?}; expected text or scalar values",
850 other
851 ))),
852 }
853}
854
855fn bool_to_string(value: bool) -> &'static str {
856 if value {
857 "true"
858 } else {
859 "false"
860 }
861}
862
863fn int_value_to_string(value: &IntValue) -> String {
864 match value {
865 IntValue::I8(v) => v.to_string(),
866 IntValue::I16(v) => v.to_string(),
867 IntValue::I32(v) => v.to_string(),
868 IntValue::I64(v) => v.to_string(),
869 IntValue::U8(v) => v.to_string(),
870 IntValue::U16(v) => v.to_string(),
871 IntValue::U32(v) => v.to_string(),
872 IntValue::U64(v) => v.to_string(),
873 }
874}
875
876#[cfg(test)]
877pub(crate) mod tests {
878 use super::*;
879 use crate::builtins::common::test_support;
880 use runmat_builtins::{CellArray, IntValue, ResolveContext, StringArray, StructValue, Type};
881
882 fn string_builtin(value: Value, rest: Vec<Value>) -> BuiltinResult<Value> {
883 futures::executor::block_on(super::string_builtin(value, rest))
884 }
885
886 fn error_message(err: crate::RuntimeError) -> String {
887 err.message().to_string()
888 }
889
890 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
891 #[test]
892 fn string_from_numeric_scalar() {
893 let out = string_builtin(Value::Num(42.0), Vec::new()).expect("string");
894 match out {
895 Value::StringArray(sa) => {
896 assert_eq!(sa.shape, vec![1, 1]);
897 assert_eq!(sa.data, vec!["42".to_string()]);
898 }
899 other => panic!("expected string array, got {other:?}"),
900 }
901 }
902
903 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
904 #[test]
905 fn string_from_numeric_tensor_preserves_shape() {
906 let tensor = Tensor::new(vec![1.0, 2.0, 3.0, 4.0], vec![2, 2]).unwrap();
907 let out = string_builtin(Value::Tensor(tensor), Vec::new()).expect("string");
908 match out {
909 Value::StringArray(sa) => {
910 assert_eq!(sa.shape, vec![2, 2]);
911 assert_eq!(sa.data, vec!["1", "2", "3", "4"]);
912 }
913 other => panic!("expected string array, got {other:?}"),
914 }
915 }
916
917 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
918 #[test]
919 fn string_from_logical_array_uses_boolean_text() {
920 let logical = LogicalArray::new(vec![1, 0, 1], vec![1, 3]).unwrap();
921 let out = string_builtin(Value::LogicalArray(logical), Vec::new()).expect("string");
922 match out {
923 Value::StringArray(sa) => {
924 assert_eq!(sa.shape, vec![1, 3]);
925 assert_eq!(sa.data, vec!["true", "false", "true"]);
926 }
927 other => panic!("expected string array, got {other:?}"),
928 }
929 }
930
931 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
932 #[test]
933 fn string_from_char_array_produces_column_vector() {
934 let chars = CharArray::new("abc".chars().collect(), 1, 3).unwrap();
935 let out = string_builtin(Value::CharArray(chars), Vec::new()).expect("string");
936 match out {
937 Value::StringArray(sa) => {
938 assert_eq!(sa.shape, vec![1, 1]);
939 assert_eq!(sa.data, vec!["abc"]);
940 }
941 other => panic!("expected string array, got {other:?}"),
942 }
943 }
944
945 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
946 #[test]
947 fn string_from_cell_array() {
948 let cell = CellArray::new(vec![Value::Bool(true), Value::Int(IntValue::I32(7))], 1, 2)
949 .expect("cell array");
950 let out = string_builtin(Value::Cell(cell), Vec::new()).expect("string");
951 match out {
952 Value::StringArray(sa) => {
953 assert_eq!(sa.shape, vec![1, 2]);
954 assert_eq!(sa.data, vec!["true", "7"]);
955 }
956 other => panic!("expected string array, got {other:?}"),
957 }
958 }
959
960 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
961 #[test]
962 fn string_from_cell_array_column_major() {
963 let cell = CellArray::new(
964 vec![
965 Value::Int(IntValue::I32(1)),
966 Value::Int(IntValue::I32(2)),
967 Value::Int(IntValue::I32(3)),
968 Value::Int(IntValue::I32(4)),
969 ],
970 2,
971 2,
972 )
973 .expect("cell array");
974 let out = string_builtin(Value::Cell(cell), Vec::new()).expect("string");
975 match out {
976 Value::StringArray(sa) => {
977 assert_eq!(sa.shape, vec![2, 2]);
978 assert_eq!(sa.data, vec!["1", "3", "2", "4"]);
979 }
980 other => panic!("expected string array, got {other:?}"),
981 }
982 }
983
984 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
985 #[test]
986 fn string_cell_element_requires_scalar_numeric() {
987 let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
988 let cell =
989 CellArray::new(vec![Value::Tensor(tensor)], 1, 1).expect("cell with numeric tensor");
990 let err = error_message(string_builtin(Value::Cell(cell), Vec::new()).unwrap_err());
991 assert!(err.contains("cell numeric values must be scalar"));
992 }
993
994 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
995 #[test]
996 fn string_rejects_struct_input() {
997 let err = error_message(
998 string_builtin(Value::Struct(StructValue::new()), Vec::new()).expect_err("string"),
999 );
1000 assert!(err.contains("structs are not supported"));
1001 }
1002
1003 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1004 #[test]
1005 fn string_errors_on_unsupported_encoding() {
1006 let err = error_message(
1007 string_builtin(
1008 Value::CharArray(CharArray::new_row("abc")),
1009 vec![Value::from("UTF-16")],
1010 )
1011 .unwrap_err(),
1012 );
1013 assert!(
1014 err.contains("unsupported character encoding"),
1015 "unexpected error message: {err}"
1016 );
1017 }
1018
1019 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1020 #[test]
1021 fn string_accepts_system_encoding_alias() {
1022 let out = string_builtin(
1023 Value::CharArray(CharArray::new_row("hello")),
1024 vec![Value::from("system")],
1025 )
1026 .expect("string");
1027 match out {
1028 Value::StringArray(sa) => {
1029 assert_eq!(sa.shape, vec![1, 1]);
1030 assert_eq!(sa.data, vec!["hello"]);
1031 }
1032 other => panic!("expected string array, got {other:?}"),
1033 }
1034 }
1035
1036 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1037 #[test]
1038 fn string_encoding_allows_percent_literal() {
1039 let out = string_builtin(
1040 Value::CharArray(CharArray::new_row("100% Done")),
1041 vec![Value::from("utf8")],
1042 )
1043 .expect("string");
1044 match out {
1045 Value::StringArray(sa) => {
1046 assert_eq!(sa.shape, vec![1, 1]);
1047 assert_eq!(sa.data, vec!["100% Done"]);
1048 }
1049 other => panic!("expected string array, got {other:?}"),
1050 }
1051 }
1052
1053 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1054 #[test]
1055 fn string_format_spec_cell_requires_text_scalars() {
1056 let cell = CellArray::new(vec![Value::Num(1.0)], 1, 1).expect("cell");
1057 let err = error_message(
1058 string_builtin(Value::Cell(cell), vec![Value::from("data")]).expect_err("string"),
1059 );
1060 assert!(
1061 err.contains("formatSpec cell elements must be text scalars"),
1062 "unexpected error: {err}"
1063 );
1064 }
1065
1066 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1067 #[test]
1068 fn string_format_cell_argument_requires_scalar_values() {
1069 let tensor = Tensor::new(vec![1.0, 2.0], vec![2, 1]).unwrap();
1070 let cell = CellArray::new(vec![Value::Tensor(tensor)], 1, 1).expect("cell argument values");
1071 let err = error_message(
1072 string_builtin(Value::from("%d"), vec![Value::Cell(cell)]).expect_err("string"),
1073 );
1074 assert!(err.contains("cell format arguments must contain scalar values"));
1075 }
1076
1077 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1078 #[test]
1079 fn string_handles_large_unsigned_int() {
1080 let value = Value::Int(IntValue::U64(u64::MAX));
1081 let out = string_builtin(value, Vec::new()).expect("string");
1082 match out {
1083 Value::StringArray(sa) => {
1084 assert_eq!(sa.shape, vec![1, 1]);
1085 assert_eq!(sa.data, vec![u64::MAX.to_string()]);
1086 }
1087 other => panic!("expected string array, got {other:?}"),
1088 }
1089 }
1090
1091 #[test]
1092 fn string_descriptor_signatures_cover_core_forms() {
1093 let labels: Vec<&str> = STRING_DESCRIPTOR
1094 .signatures
1095 .iter()
1096 .map(|signature| signature.label)
1097 .collect();
1098 assert_eq!(
1099 labels,
1100 vec![
1101 "S = string(X)",
1102 "S = string(X, encoding)",
1103 "S = string(formatSpec, A...)",
1104 ]
1105 );
1106
1107 let codes: Vec<&str> = STRING_DESCRIPTOR
1108 .errors
1109 .iter()
1110 .map(|error| error.code)
1111 .collect();
1112 assert_eq!(codes, vec!["RM.STRING.INVALID_INPUT"]);
1113 }
1114
1115 #[test]
1116 fn string_struct_input_uses_stable_identifier() {
1117 let err = string_builtin(Value::Struct(StructValue::new()), Vec::new()).unwrap_err();
1118 assert_eq!(err.identifier(), Some("RunMat:string:InvalidInput"));
1119 }
1120
1121 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1122 #[test]
1123 fn string_format_numeric_scalar() {
1124 let out = string_builtin(Value::from("%d"), vec![Value::Num(7.0)]).expect("string");
1125 match out {
1126 Value::StringArray(sa) => {
1127 assert_eq!(sa.shape, vec![1, 1]);
1128 assert_eq!(sa.data, vec!["7"]);
1129 }
1130 other => panic!("expected string array, got {other:?}"),
1131 }
1132 }
1133
1134 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1135 #[test]
1136 fn string_format_broadcast_over_tensor() {
1137 let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![1, 3]).unwrap();
1138 let out =
1139 string_builtin(Value::from("Trial %d"), vec![Value::Tensor(tensor)]).expect("string");
1140 match out {
1141 Value::StringArray(sa) => {
1142 assert_eq!(sa.shape, vec![1, 3]);
1143 assert_eq!(sa.data, vec!["Trial 1", "Trial 2", "Trial 3"]);
1144 }
1145 other => panic!("expected string array, got {other:?}"),
1146 }
1147 }
1148
1149 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1150 #[test]
1151 fn string_format_string_array_spec_alignment() {
1152 let spec = StringArray::new(vec!["[%d]".into(), "Value %d".into()], vec![1, 2]).unwrap();
1153 let tensor = Tensor::new(vec![5.0, 6.0], vec![1, 2]).unwrap();
1154 let out =
1155 string_builtin(Value::StringArray(spec), vec![Value::Tensor(tensor)]).expect("string");
1156 match out {
1157 Value::StringArray(sa) => {
1158 assert_eq!(sa.shape, vec![1, 2]);
1159 assert_eq!(sa.data, vec!["[5]", "Value 6"]);
1160 }
1161 other => panic!("expected string array, got {other:?}"),
1162 }
1163 }
1164
1165 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1166 #[test]
1167 fn string_format_prefers_placeholders_over_encoding_hint() {
1168 let out = string_builtin(Value::from("%s"), vec![Value::from("UTF-8")]).expect("string");
1169 match out {
1170 Value::StringArray(sa) => {
1171 assert_eq!(sa.shape, vec![1, 1]);
1172 assert_eq!(sa.data, vec!["UTF-8"]);
1173 }
1174 other => panic!("expected string array, got {other:?}"),
1175 }
1176 }
1177
1178 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1179 #[test]
1180 fn string_format_mismatched_lengths_errors() {
1181 let spec = StringArray::new(vec!["%d".into(), "%d".into()], vec![2, 1]).unwrap();
1182 let tensor = Tensor::new(vec![1.0, 2.0, 3.0], vec![3, 1]).unwrap();
1183 let err = error_message(
1184 string_builtin(Value::StringArray(spec), vec![Value::Tensor(tensor)]).unwrap_err(),
1185 );
1186 assert!(err.contains("must be scalars or match formatSpec size"));
1187 }
1188
1189 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1190 #[test]
1191 fn string_gpu_numeric_tensor() {
1192 test_support::with_test_provider(|provider| {
1193 let tensor = Tensor::new(vec![10.0, 20.0], vec![1, 2]).unwrap();
1194 let view = runmat_accelerate_api::HostTensorView {
1195 data: &tensor.data,
1196 shape: &tensor.shape,
1197 };
1198 let handle = provider.upload(&view).expect("upload");
1199 let result = string_builtin(Value::GpuTensor(handle), Vec::new())
1200 .expect("gpu string conversion");
1201 match result {
1202 Value::StringArray(sa) => {
1203 assert_eq!(sa.shape, vec![1, 2]);
1204 assert_eq!(sa.data, vec!["10", "20"]);
1205 }
1206 other => panic!("expected string array, got {other:?}"),
1207 }
1208 });
1209 }
1210
1211 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
1212 #[test]
1213 #[cfg(feature = "wgpu")]
1214 fn string_wgpu_numeric_tensor_matches_cpu() {
1215 let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
1216 runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
1217 );
1218 let tensor = Tensor::new(vec![4.0, 5.0, 6.0], vec![1, 3]).unwrap();
1219 let cpu = string_builtin(Value::Tensor(tensor.clone()), Vec::new())
1220 .expect("cpu string conversion");
1221 let view = runmat_accelerate_api::HostTensorView {
1222 data: &tensor.data,
1223 shape: &tensor.shape,
1224 };
1225 let handle = runmat_accelerate_api::provider()
1226 .unwrap()
1227 .upload(&view)
1228 .expect("gpu upload");
1229 let gpu =
1230 string_builtin(Value::GpuTensor(handle), Vec::new()).expect("gpu string conversion");
1231 match (cpu, gpu) {
1232 (Value::StringArray(expect), Value::StringArray(actual)) => {
1233 assert_eq!(actual.shape, expect.shape);
1234 assert_eq!(actual.data, expect.data);
1235 }
1236 other => panic!("unexpected results {other:?}"),
1237 }
1238 }
1239
1240 #[test]
1241 fn string_type_is_string_array() {
1242 assert_eq!(
1243 string_array_type(&[Type::Num], &ResolveContext::new(Vec::new())),
1244 Type::cell_of(Type::String)
1245 );
1246 }
1247}