1use runmat_builtins::{CellArray, CharArray, StringArray, Value};
4use runmat_macros::runtime_builtin;
5
6use crate::builtins::cells::type_resolvers::cellstr_type;
7use crate::builtins::common::spec::{
8 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
9 ReductionNaN, ResidencyPolicy, ShapeRequirements,
10};
11use crate::dispatcher::gather_if_needed_async;
12use crate::{build_runtime_error, make_cell, make_cell_with_shape, BuiltinResult, RuntimeError};
13
14const ERR_INPUT_NOT_TEXT: &str =
15 "cellstr: input must be a character array, string array, or cell array of character vectors";
16const ERR_CELL_CONTENT_NOT_TEXT: &str =
17 "cellstr: cell array elements must be character vectors or string scalars";
18
19#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::cells::core::cellstr")]
20pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
21 name: "cellstr",
22 op_kind: GpuOpKind::Custom("text-convert"),
23 supported_precisions: &[],
24 broadcast: BroadcastSemantics::None,
25 provider_hooks: &[],
26 constant_strategy: ConstantStrategy::InlineLiteral,
27 residency: ResidencyPolicy::GatherImmediately,
28 nan_mode: ReductionNaN::Include,
29 two_pass_threshold: None,
30 workgroup_size: None,
31 accepts_nan_mode: false,
32 notes: "Host-only text conversion. Inputs originating on the GPU are gathered before processing, and the output is always a host cell array.",
33};
34
35#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::cells::core::cellstr")]
36pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
37 name: "cellstr",
38 shape: ShapeRequirements::Any,
39 constant_strategy: ConstantStrategy::InlineLiteral,
40 elementwise: None,
41 reduction: None,
42 emits_nan: false,
43 notes:
44 "Terminates fusion because the result is a host-resident cell array of character vectors.",
45};
46
47const IDENT_INVALID_INPUT: &str = "RunMat:cellstr:InvalidInput";
48const IDENT_INVALID_CONTENTS: &str = "RunMat:cellstr:InvalidContents";
49
50fn cellstr_error(message: impl Into<String>) -> RuntimeError {
51 build_runtime_error(message).with_builtin("cellstr").build()
52}
53
54fn cellstr_error_with_identifier(message: impl Into<String>, identifier: &str) -> RuntimeError {
55 build_runtime_error(message)
56 .with_builtin("cellstr")
57 .with_identifier(identifier)
58 .build()
59}
60
61#[runtime_builtin(
62 name = "cellstr",
63 category = "cells/core",
64 summary = "Convert text to a cell array of character vectors.",
65 keywords = "cellstr,text,character,string,conversion",
66 accel = "gather",
67 type_resolver(cellstr_type),
68 builtin_path = "crate::builtins::cells::core::cellstr"
69)]
70async fn cellstr_builtin(value: Value) -> crate::BuiltinResult<Value> {
71 let host = gather_if_needed_async(&value).await?;
72 match host {
73 Value::CharArray(ca) => cellstr_from_char_array(ca),
74 Value::StringArray(sa) => cellstr_from_string_array(sa),
75 Value::String(text) => cellstr_from_string(text),
76 Value::Cell(cell) => cellstr_from_cell(cell).await,
77 Value::LogicalArray(_)
78 | Value::Bool(_)
79 | Value::Int(_)
80 | Value::Num(_)
81 | Value::Tensor(_)
82 | Value::Complex(_, _)
83 | Value::ComplexTensor(_)
84 | Value::Struct(_)
85 | Value::Object(_)
86 | Value::HandleObject(_)
87 | Value::Listener(_)
88 | Value::FunctionHandle(_)
89 | Value::Closure(_)
90 | Value::ClassRef(_)
91 | Value::MException(_)
92 | Value::OutputList(_) => Err(cellstr_error_with_identifier(
93 ERR_INPUT_NOT_TEXT,
94 IDENT_INVALID_INPUT,
95 )),
96 Value::GpuTensor(_) => Err(cellstr_error_with_identifier(
97 "cellstr: input must be gathered to the host before conversion",
98 IDENT_INVALID_INPUT,
99 )),
100 }
101}
102
103fn cellstr_from_string(text: String) -> BuiltinResult<Value> {
104 let row = Value::CharArray(CharArray::new_row(&text));
105 make_cell(vec![row], 1, 1).map_err(|e| cellstr_error(format!("cellstr: {e}")))
106}
107
108fn cellstr_from_char_array(ca: CharArray) -> BuiltinResult<Value> {
109 let rows = ca.rows;
110 let cols = ca.cols;
111 if rows == 0 {
112 return make_cell(Vec::new(), 0, 1).map_err(|e| cellstr_error(format!("cellstr: {e}")));
113 }
114 let mut values = Vec::with_capacity(rows);
115 for row in 0..rows {
116 let start = row * cols;
117 let end = start + cols;
118 let slice = &ca.data[start..end];
119 let trimmed = trim_trailing_spaces(slice);
120 values.push(Value::CharArray(CharArray::new_row(&trimmed)));
121 }
122 make_cell(values, rows, 1).map_err(|e| cellstr_error(format!("cellstr: {e}")))
123}
124
125fn cellstr_from_string_array(sa: StringArray) -> BuiltinResult<Value> {
126 let shape = if sa.shape.is_empty() {
127 vec![sa.rows.max(1), sa.cols.max(1)]
128 } else {
129 sa.shape.clone()
130 };
131 let total = shape.iter().product::<usize>();
132 if total == 0 {
133 return make_cell_with_shape(Vec::new(), shape)
134 .map_err(|e| cellstr_error(format!("cellstr: {e}")));
135 }
136 if total != sa.data.len() {
137 return Err(cellstr_error_with_identifier(
138 "cellstr: internal string array shape mismatch",
139 IDENT_INVALID_INPUT,
140 ));
141 }
142 let mut values = Vec::with_capacity(total);
143 for row_major in 0..total {
144 let coords = linear_to_multi_row_major(row_major, &shape);
145 let column_major = multi_to_linear_column_major(&coords, &shape);
146 let text = sa.data[column_major].clone();
147 values.push(Value::CharArray(CharArray::new_row(&text)));
148 }
149 make_cell_with_shape(values, shape).map_err(|e| cellstr_error(format!("cellstr: {e}")))
150}
151
152async fn cellstr_from_cell(cell: CellArray) -> BuiltinResult<Value> {
153 let mut values = Vec::with_capacity(cell.data.len());
154 for ptr in &cell.data {
155 let element = unsafe { &*ptr.as_raw() };
156 let gathered = gather_if_needed_async(element).await?;
157 values.push(coerce_to_char_vector(gathered)?);
158 }
159 make_cell_with_shape(values, cell.shape.clone())
160 .map_err(|e| cellstr_error(format!("cellstr: {e}")))
161}
162
163fn coerce_to_char_vector(value: Value) -> BuiltinResult<Value> {
164 match value {
165 Value::CharArray(ca) => {
166 if ca.rows == 1 || (ca.rows == 0 && ca.cols == 0) {
167 Ok(Value::CharArray(ca))
168 } else {
169 Err(cellstr_error_with_identifier(
170 ERR_CELL_CONTENT_NOT_TEXT,
171 IDENT_INVALID_CONTENTS,
172 ))
173 }
174 }
175 Value::String(text) => Ok(Value::CharArray(CharArray::new_row(&text))),
176 Value::StringArray(sa) => {
177 if sa.data.len() == 1 {
178 Ok(Value::CharArray(CharArray::new_row(&sa.data[0])))
179 } else {
180 Err(cellstr_error_with_identifier(
181 ERR_CELL_CONTENT_NOT_TEXT,
182 IDENT_INVALID_CONTENTS,
183 ))
184 }
185 }
186 Value::Num(_)
187 | Value::Int(_)
188 | Value::Bool(_)
189 | Value::Tensor(_)
190 | Value::LogicalArray(_)
191 | Value::Complex(_, _)
192 | Value::ComplexTensor(_)
193 | Value::GpuTensor(_) => Err(cellstr_error_with_identifier(
194 ERR_CELL_CONTENT_NOT_TEXT,
195 IDENT_INVALID_CONTENTS,
196 )),
197 Value::Cell(_) | Value::Struct(_) | Value::Object(_) | Value::HandleObject(_) => Err(
198 cellstr_error_with_identifier(ERR_CELL_CONTENT_NOT_TEXT, IDENT_INVALID_CONTENTS),
199 ),
200 other => Err(cellstr_error_with_identifier(
201 format!("cellstr: unsupported cell element {other:?}"),
202 IDENT_INVALID_CONTENTS,
203 )),
204 }
205}
206
207fn trim_trailing_spaces(chars: &[char]) -> String {
208 let mut end = chars.len();
209 while end > 0 && chars[end - 1] == ' ' {
210 end -= 1;
211 }
212 chars[..end].iter().collect()
213}
214
215fn linear_to_multi_row_major(mut index: usize, shape: &[usize]) -> Vec<usize> {
216 if shape.is_empty() {
217 return Vec::new();
218 }
219 let mut coords = vec![0usize; shape.len()];
220 for (dim, &extent) in shape.iter().enumerate().rev() {
221 if extent == 0 {
222 coords[dim] = 0;
223 } else {
224 coords[dim] = index % extent;
225 index /= extent;
226 }
227 }
228 coords
229}
230
231fn multi_to_linear_column_major(coords: &[usize], shape: &[usize]) -> usize {
232 let mut stride = 1usize;
233 let mut index = 0usize;
234 for (dim, &coord) in coords.iter().enumerate() {
235 let extent = shape[dim];
236 if extent == 0 {
237 return 0;
238 }
239 index += coord * stride;
240 stride *= extent;
241 }
242 index
243}
244
245#[cfg(test)]
246pub(crate) mod tests {
247 use super::*;
248 use futures::executor::block_on;
249
250 fn cellstr_builtin(value: Value) -> BuiltinResult<Value> {
251 block_on(super::cellstr_builtin(value))
252 }
253
254 fn cell_to_strings(cell: &CellArray) -> Vec<String> {
255 cell.data
256 .iter()
257 .map(|ptr| match unsafe { &*ptr.as_raw() } {
258 Value::CharArray(ca) => ca.data.iter().collect(),
259 other => panic!("expected CharArray in cell, found {other:?}"),
260 })
261 .collect()
262 }
263
264 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
265 #[test]
266 fn converts_char_matrix_and_trims() {
267 let data: Vec<char> = vec!['c', 'a', 't', ' ', 'd', 'o', 'g', ' ', 'f', 'o', 'x', ' '];
268 let ca = CharArray::new(data, 3, 4).expect("char array");
269 let result = cellstr_builtin(Value::CharArray(ca)).expect("cellstr");
270 match result {
271 Value::Cell(cell) => {
272 assert_eq!(cell.rows, 3);
273 assert_eq!(cell.cols, 1);
274 let rows = cell_to_strings(&cell);
275 assert_eq!(
276 rows,
277 vec!["cat".to_string(), "dog".to_string(), "fox".to_string()]
278 );
279 }
280 other => panic!("expected cell result, got {other:?}"),
281 }
282 }
283
284 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
285 #[test]
286 fn converts_string_array_with_shape() {
287 let data = vec![
288 "north".to_string(),
289 "south".to_string(),
290 "east".to_string(),
291 "west".to_string(),
292 ];
293 let sa = StringArray::new(data, vec![2, 2]).expect("string array");
294 let result = cellstr_builtin(Value::StringArray(sa)).expect("cellstr");
295 match result {
296 Value::Cell(cell) => {
297 assert_eq!(cell.rows, 2);
298 assert_eq!(cell.cols, 2);
299 let rows = cell_to_strings(&cell);
300 assert_eq!(
301 rows,
302 vec![
303 "north".to_string(),
304 "east".to_string(),
305 "south".to_string(),
306 "west".to_string(),
307 ]
308 );
309 }
310 other => panic!("expected cell result, got {other:?}"),
311 }
312 }
313
314 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
315 #[test]
316 fn converts_string_scalar() {
317 let result = cellstr_builtin(Value::String("RunMat".to_string())).expect("cellstr");
318 match result {
319 Value::Cell(cell) => {
320 assert_eq!(cell.rows, 1);
321 assert_eq!(cell.cols, 1);
322 let rows = cell_to_strings(&cell);
323 assert_eq!(rows, vec!["RunMat".to_string()]);
324 }
325 other => panic!("expected cell result, got {other:?}"),
326 }
327 }
328
329 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
330 #[test]
331 fn normalises_cell_elements() {
332 let alpha = Value::CharArray(CharArray::new_row("alpha"));
333 let beta = Value::String("beta".to_string());
334 let cell = crate::make_cell(vec![alpha, beta], 1, 2).expect("cell");
335 let result = cellstr_builtin(cell).expect("cellstr");
336 match result {
337 Value::Cell(cell) => {
338 assert_eq!(cell.rows, 1);
339 assert_eq!(cell.cols, 2);
340 let rows = cell_to_strings(&cell);
341 assert_eq!(rows, vec!["alpha".to_string(), "beta".to_string()]);
342 }
343 other => panic!("expected cell result, got {other:?}"),
344 }
345 }
346
347 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
348 #[test]
349 fn rejects_non_text_cell_element() {
350 let cell = crate::make_cell(vec![Value::Num(1.0)], 1, 1).expect("cell");
351 let err = cellstr_builtin(cell)
352 .expect_err("expected error")
353 .to_string();
354 assert!(err.contains("cell array elements must be"));
355 }
356
357 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
358 #[test]
359 fn rejects_multirow_char_element() {
360 let ca = CharArray::new(vec!['a', 'b', 'c', 'd'], 2, 2).expect("char array");
361 let cell = crate::make_cell(vec![Value::CharArray(ca)], 1, 1).expect("cell");
362 let err = cellstr_builtin(cell)
363 .expect_err("expected error")
364 .to_string();
365 assert!(err.contains("cell array elements must be"));
366 }
367
368 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
369 #[test]
370 fn rejects_non_text_input() {
371 let err = cellstr_builtin(Value::Num(std::f64::consts::PI))
372 .expect_err("expected error")
373 .to_string();
374 assert!(err.contains("input must be"));
375 }
376
377 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
378 #[test]
379 fn handles_empty_char_array() {
380 let ca = CharArray::new(Vec::new(), 0, 5).expect("empty char");
381 let result = cellstr_builtin(Value::CharArray(ca)).expect("cellstr");
382 match result {
383 Value::Cell(cell) => {
384 assert_eq!(cell.rows, 0);
385 assert_eq!(cell.cols, 1);
386 assert!(cell.data.is_empty());
387 }
388 other => panic!("expected cell result, got {other:?}"),
389 }
390 }
391
392 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
393 #[test]
394 fn char_row_of_spaces_becomes_empty_vector() {
395 let ca = CharArray::new(vec![' '; 3], 1, 3).expect("char array");
396 let result = cellstr_builtin(Value::CharArray(ca)).expect("cellstr");
397 match result {
398 Value::Cell(cell) => {
399 assert_eq!(cell.rows, 1);
400 assert_eq!(cell.cols, 1);
401 match unsafe { &*cell.data[0].as_raw() } {
402 Value::CharArray(row) => {
403 assert_eq!(row.rows, 1);
404 assert_eq!(row.cols, 0);
405 assert!(row.data.is_empty());
406 }
407 other => panic!("expected CharArray, got {other:?}"),
408 }
409 }
410 other => panic!("expected cell result, got {other:?}"),
411 }
412 }
413
414 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
415 #[test]
416 fn cell_elements_preserve_trailing_spaces() {
417 let ca = CharArray::new(vec!['a', ' ', ' '], 1, 3).expect("char array");
418 let cell = crate::make_cell(vec![Value::CharArray(ca.clone())], 1, 1).expect("cell");
419 let result = cellstr_builtin(cell).expect("cellstr");
420 match result {
421 Value::Cell(cell) => {
422 assert_eq!(cell.rows, 1);
423 assert_eq!(cell.cols, 1);
424 match unsafe { &*cell.data[0].as_raw() } {
425 Value::CharArray(row) => {
426 assert_eq!(row.rows, ca.rows);
427 assert_eq!(row.cols, ca.cols);
428 assert_eq!(row.data, ca.data);
429 }
430 other => panic!("expected CharArray, got {other:?}"),
431 }
432 }
433 other => panic!("expected cell result, got {other:?}"),
434 }
435 }
436
437 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
438 #[test]
439 fn string_array_missing_value_converts() {
440 let sa = StringArray::new(vec!["<missing>".to_string()], vec![1, 1]).expect("string array");
441 let result = cellstr_builtin(Value::StringArray(sa)).expect("cellstr");
442 match result {
443 Value::Cell(cell) => {
444 let rows = cell_to_strings(&cell);
445 assert_eq!(rows, vec!["<missing>".to_string()]);
446 }
447 other => panic!("expected cell result, got {other:?}"),
448 }
449 }
450
451 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
452 #[test]
453 fn empty_string_array_produces_empty_cell_shape() {
454 let sa = StringArray::new(Vec::new(), vec![0, 2]).expect("string array");
455 let result = cellstr_builtin(Value::StringArray(sa)).expect("cellstr");
456 match result {
457 Value::Cell(cell) => {
458 assert_eq!(cell.rows, 0);
459 assert_eq!(cell.cols, 2);
460 assert!(cell.data.is_empty());
461 }
462 other => panic!("expected cell result, got {other:?}"),
463 }
464 }
465}