1use runmat_builtins::{CellArray, CharArray, StringArray, Value};
3use runmat_macros::runtime_builtin;
4
5use crate::builtins::common::spec::{
6 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
7 ReductionNaN, ResidencyPolicy, ShapeRequirements,
8};
9use crate::builtins::strings::common::{char_row_to_string_slice, uppercase_preserving_missing};
10#[cfg(feature = "doc_export")]
11use crate::register_builtin_doc_text;
12use crate::{gather_if_needed, make_cell, register_builtin_fusion_spec, register_builtin_gpu_spec};
13
14#[cfg(feature = "doc_export")]
15pub const DOC_MD: &str = r#"---
16title: "upper"
17category: "strings/transform"
18keywords: ["upper", "uppercase", "convert to uppercase", "string case", "character arrays"]
19summary: "Convert strings, character arrays, and cell arrays of character vectors to uppercase."
20references:
21 - https://www.mathworks.com/help/matlab/ref/upper.html
22gpu_support:
23 elementwise: false
24 reduction: false
25 precisions: []
26 broadcasting: "none"
27 notes: "Runs on the CPU; GPU-resident inputs are gathered before conversion to keep MATLAB parity."
28fusion:
29 elementwise: false
30 reduction: false
31 max_inputs: 1
32 constants: "inline"
33requires_feature: null
34tested:
35 unit: "builtins::strings::transform::upper::tests"
36 integration: "builtins::strings::transform::upper::tests::upper_cell_array_mixed_content"
37---
38
39# What does the `upper` function do in MATLAB / RunMat?
40`upper(text)` converts every alphabetic character in `text` to uppercase. It accepts string scalars,
41string arrays, character arrays, and cell arrays of character vectors, mirroring MATLAB behaviour.
42Non-alphabetic characters are returned unchanged.
43
44## How does the `upper` function behave in MATLAB / RunMat?
45- String inputs stay as strings. String arrays preserve their size, orientation, and missing values.
46- Character arrays are processed row by row. The result remains a rectangular char array; if any row
47 grows after uppercasing (for example `'ß' → "SS"`), the array widens and shorter rows are padded with spaces.
48- Cell arrays must contain string scalars or character vectors. The result is a cell array of the same size
49 with each element converted to uppercase; other types raise MATLAB-compatible errors.
50- Missing string scalars (`string(missing)`) remain `<missing>` so downstream code behaves like MATLAB.
51- Inputs that are numeric, logical, structs, or GPU tensors raise MATLAB-compatible type errors.
52
53## `upper` Function GPU Execution Behaviour
54`upper` executes on the CPU. Text values currently reside in host memory, so providers do not offer device
55kernels for this builtin. When you pass a container that still holds GPU handles (for example, a struct
56whose string fields were gathered lazily), RunMat gathers those handles before performing the conversion.
57If you store characters as numeric code points on the GPU, gather and convert them to text before calling
58`upper`.
59
60## GPU residency in RunMat (Do I need `gpuArray`?)
61RunMat keeps text data in host memory, so you typically work with ordinary string and character arrays.
62When text originates from GPU computations (for example, numeric code points produced by kernels), gather
63those values to the host and convert them to text before calling `upper`.
64
65## Examples of using the `upper` function in MATLAB / RunMat
66
67### Convert a string scalar to uppercase
68```matlab
69txt = "RunMat";
70result = upper(txt);
71```
72Expected output:
73```matlab
74result = "RUNMAT"
75```
76
77### Uppercase each element of a string array
78```matlab
79labels = ["north" "South"; "East" "west"];
80uppered = upper(labels);
81```
82Expected output:
83```matlab
84uppered = 2×2 string
85 "NORTH" "SOUTH"
86 "EAST" "WEST"
87```
88
89### Uppercase character array rows while preserving shape
90```matlab
91animals = char("cat", "doge");
92result = upper(animals);
93```
94Expected output:
95```matlab
96result =
97
98 2×4 char array
99
100 'CAT '
101 'DOGE'
102```
103
104### Uppercase a cell array of character vectors
105```matlab
106C = {'hello', 'World'};
107out = upper(C);
108```
109Expected output:
110```matlab
111out = 1×2 cell array
112 {'HELLO'} {'WORLD'}
113```
114
115### Keep missing strings as missing
116```matlab
117vals = ["data", string(missing), "gpu"];
118converted = upper(vals);
119```
120Expected output:
121```matlab
122converted = 1×3 string
123 "DATA" <missing> "GPU"
124```
125
126### Handle text stored on a GPU input
127```matlab
128codes = gpuArray(uint16('runmat'));
129txt = char(gather(codes));
130result = upper(txt);
131```
132Expected output:
133```matlab
134result = 'RUNMAT'
135```
136
137## FAQ
138
139### Does `upper` change non-alphabetic characters?
140No. Digits, punctuation, whitespace, and symbols remain untouched. Only alphabetic code points that have
141distinct uppercase forms are converted.
142
143### What happens to character array dimensions?
144RunMat uppercases each row independently and pads with spaces when an uppercase mapping increases the row
145length. This mirrors MATLAB’s behaviour so the result always has rectangular dimensions.
146
147### Can I pass numeric arrays to `upper`?
148No. Passing numeric, logical, or struct inputs raises a MATLAB-compatible error. Convert the data to a string
149or character array first (for example with `string` or `char`).
150
151### How are missing strings handled?
152Missing string scalars remain `<missing>` and are returned unchanged. This matches MATLAB’s handling of
153missing values in text processing functions.
154
155### Will `upper` ever execute on the GPU?
156Not today. The builtin gathers GPU-resident data automatically and performs the conversion on the CPU so the
157results match MATLAB exactly. Providers may add device-side kernels in the future, but behaviour will remain
158compatible.
159
160## See Also
161[lower](./lower), [string](../core/string), [char](../core/char), [regexprep](../regex/regexprep), [strcmpi](../search/strcmpi)
162
163## Source & Feedback
164- Implementation: [`crates/runmat-runtime/src/builtins/strings/transform/upper.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/transform/upper.rs)
165- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
166"#;
167
168pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
169 name: "upper",
170 op_kind: GpuOpKind::Custom("string-transform"),
171 supported_precisions: &[],
172 broadcast: BroadcastSemantics::None,
173 provider_hooks: &[],
174 constant_strategy: ConstantStrategy::InlineLiteral,
175 residency: ResidencyPolicy::GatherImmediately,
176 nan_mode: ReductionNaN::Include,
177 two_pass_threshold: None,
178 workgroup_size: None,
179 accepts_nan_mode: false,
180 notes:
181 "Executes on the CPU; GPU-resident inputs are gathered to host memory before conversion.",
182};
183
184register_builtin_gpu_spec!(GPU_SPEC);
185
186pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
187 name: "upper",
188 shape: ShapeRequirements::Any,
189 constant_strategy: ConstantStrategy::InlineLiteral,
190 elementwise: None,
191 reduction: None,
192 emits_nan: false,
193 notes: "String transformation builtin; not eligible for fusion and always gathers GPU inputs.",
194};
195
196register_builtin_fusion_spec!(FUSION_SPEC);
197
198#[cfg(feature = "doc_export")]
199register_builtin_doc_text!("upper", DOC_MD);
200
201const ARG_TYPE_ERROR: &str =
202 "upper: first argument must be a string array, character array, or cell array of character vectors";
203const CELL_ELEMENT_ERROR: &str =
204 "upper: cell array elements must be string scalars or character vectors";
205
206#[runtime_builtin(
207 name = "upper",
208 category = "strings/transform",
209 summary = "Convert strings, character arrays, and cell arrays of character vectors to uppercase.",
210 keywords = "upper,uppercase,strings,character array,text",
211 accel = "sink"
212)]
213fn upper_builtin(value: Value) -> Result<Value, String> {
214 let gathered = gather_if_needed(&value).map_err(|e| format!("upper: {e}"))?;
215 match gathered {
216 Value::String(text) => Ok(Value::String(uppercase_preserving_missing(text))),
217 Value::StringArray(array) => upper_string_array(array),
218 Value::CharArray(array) => upper_char_array(array),
219 Value::Cell(cell) => upper_cell_array(cell),
220 _ => Err(ARG_TYPE_ERROR.to_string()),
221 }
222}
223
224fn upper_string_array(array: StringArray) -> Result<Value, String> {
225 let StringArray { data, shape, .. } = array;
226 let uppered = data
227 .into_iter()
228 .map(uppercase_preserving_missing)
229 .collect::<Vec<_>>();
230 let upper_array = StringArray::new(uppered, shape).map_err(|e| format!("upper: {e}"))?;
231 Ok(Value::StringArray(upper_array))
232}
233
234fn upper_char_array(array: CharArray) -> Result<Value, String> {
235 let CharArray { data, rows, cols } = array;
236 if rows == 0 || cols == 0 {
237 return Ok(Value::CharArray(CharArray { data, rows, cols }));
238 }
239
240 let mut upper_rows = Vec::with_capacity(rows);
241 let mut target_cols = cols;
242 for row in 0..rows {
243 let text = char_row_to_string_slice(&data, cols, row).to_uppercase();
244 let len = text.chars().count();
245 target_cols = target_cols.max(len);
246 upper_rows.push(text);
247 }
248
249 let mut upper_data = Vec::with_capacity(rows * target_cols);
250 for row_text in upper_rows {
251 let mut chars: Vec<char> = row_text.chars().collect();
252 if chars.len() < target_cols {
253 chars.resize(target_cols, ' ');
254 }
255 upper_data.extend(chars.into_iter());
256 }
257
258 CharArray::new(upper_data, rows, target_cols)
259 .map(Value::CharArray)
260 .map_err(|e| format!("upper: {e}"))
261}
262
263fn upper_cell_array(cell: CellArray) -> Result<Value, String> {
264 let CellArray {
265 data, rows, cols, ..
266 } = cell;
267 let mut upper_values = Vec::with_capacity(rows * cols);
268 for row in 0..rows {
269 for col in 0..cols {
270 let idx = row * cols + col;
271 let upper = upper_cell_element(&data[idx])?;
272 upper_values.push(upper);
273 }
274 }
275 make_cell(upper_values, rows, cols).map_err(|e| format!("upper: {e}"))
276}
277
278fn upper_cell_element(value: &Value) -> Result<Value, String> {
279 match value {
280 Value::String(text) => Ok(Value::String(uppercase_preserving_missing(text.clone()))),
281 Value::StringArray(sa) if sa.data.len() == 1 => Ok(Value::String(
282 uppercase_preserving_missing(sa.data[0].clone()),
283 )),
284 Value::CharArray(ca) if ca.rows <= 1 => upper_char_array(ca.clone()),
285 Value::CharArray(_) => Err(CELL_ELEMENT_ERROR.to_string()),
286 _ => Err(CELL_ELEMENT_ERROR.to_string()),
287 }
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293 #[cfg(feature = "doc_export")]
294 use crate::builtins::common::test_support;
295
296 #[test]
297 fn upper_string_scalar_value() {
298 let result = upper_builtin(Value::String("RunMat".into())).expect("upper");
299 assert_eq!(result, Value::String("RUNMAT".into()));
300 }
301
302 #[test]
303 fn upper_string_array_preserves_shape() {
304 let array = StringArray::new(
305 vec![
306 "gpu".into(),
307 "accel".into(),
308 "<missing>".into(),
309 "MiXeD".into(),
310 ],
311 vec![2, 2],
312 )
313 .unwrap();
314 let result = upper_builtin(Value::StringArray(array)).expect("upper");
315 match result {
316 Value::StringArray(sa) => {
317 assert_eq!(sa.shape, vec![2, 2]);
318 assert_eq!(
319 sa.data,
320 vec![
321 String::from("GPU"),
322 String::from("ACCEL"),
323 String::from("<missing>"),
324 String::from("MIXED")
325 ]
326 );
327 }
328 other => panic!("expected string array, got {other:?}"),
329 }
330 }
331
332 #[test]
333 fn upper_char_array_multiple_rows() {
334 let data: Vec<char> = vec!['c', 'a', 't', 'd', 'o', 'g'];
335 let array = CharArray::new(data, 2, 3).unwrap();
336 let result = upper_builtin(Value::CharArray(array)).expect("upper");
337 match result {
338 Value::CharArray(ca) => {
339 assert_eq!(ca.rows, 2);
340 assert_eq!(ca.cols, 3);
341 assert_eq!(ca.data, vec!['C', 'A', 'T', 'D', 'O', 'G']);
342 }
343 other => panic!("expected char array, got {other:?}"),
344 }
345 }
346
347 #[test]
348 fn upper_char_vector_handles_padding() {
349 let array = CharArray::new_row("hello ");
350 let result = upper_builtin(Value::CharArray(array)).expect("upper");
351 match result {
352 Value::CharArray(ca) => {
353 assert_eq!(ca.rows, 1);
354 assert_eq!(ca.cols, 6);
355 let expected: Vec<char> = "HELLO ".chars().collect();
356 assert_eq!(ca.data, expected);
357 }
358 other => panic!("expected char array, got {other:?}"),
359 }
360 }
361
362 #[test]
363 fn upper_char_array_unicode_expansion_extends_width() {
364 let data: Vec<char> = vec!['ß', 'a'];
365 let array = CharArray::new(data, 1, 2).unwrap();
366 let result = upper_builtin(Value::CharArray(array)).expect("upper");
367 match result {
368 Value::CharArray(ca) => {
369 assert_eq!(ca.rows, 1);
370 assert_eq!(ca.cols, 3);
371 let expected: Vec<char> = vec!['S', 'S', 'A'];
372 assert_eq!(ca.data, expected);
373 }
374 other => panic!("expected char array, got {other:?}"),
375 }
376 }
377
378 #[test]
379 fn upper_cell_array_mixed_content() {
380 let cell = CellArray::new(
381 vec![
382 Value::CharArray(CharArray::new_row("run")),
383 Value::String("Mat".into()),
384 ],
385 1,
386 2,
387 )
388 .unwrap();
389 let result = upper_builtin(Value::Cell(cell)).expect("upper");
390 match result {
391 Value::Cell(out) => {
392 let first = out.get(0, 0).unwrap();
393 let second = out.get(0, 1).unwrap();
394 assert_eq!(first, Value::CharArray(CharArray::new_row("RUN")));
395 assert_eq!(second, Value::String("MAT".into()));
396 }
397 other => panic!("expected cell array, got {other:?}"),
398 }
399 }
400
401 #[test]
402 fn upper_errors_on_invalid_input() {
403 let err = upper_builtin(Value::Num(1.0)).unwrap_err();
404 assert_eq!(err, ARG_TYPE_ERROR);
405 }
406
407 #[test]
408 fn upper_cell_errors_on_invalid_element() {
409 let cell = CellArray::new(vec![Value::Num(1.0)], 1, 1).unwrap();
410 let err = upper_builtin(Value::Cell(cell)).unwrap_err();
411 assert_eq!(err, CELL_ELEMENT_ERROR);
412 }
413
414 #[test]
415 fn upper_preserves_missing_string() {
416 let result = upper_builtin(Value::String("<missing>".into())).expect("upper");
417 assert_eq!(result, Value::String("<missing>".into()));
418 }
419
420 #[test]
421 fn upper_cell_allows_empty_char_vector() {
422 let empty_char = CharArray::new(Vec::new(), 1, 0).unwrap();
423 let cell = CellArray::new(vec![Value::CharArray(empty_char.clone())], 1, 1).unwrap();
424 let result = upper_builtin(Value::Cell(cell)).expect("upper");
425 match result {
426 Value::Cell(out) => {
427 let element = out.get(0, 0).unwrap();
428 assert_eq!(element, Value::CharArray(empty_char));
429 }
430 other => panic!("expected cell array, got {other:?}"),
431 }
432 }
433
434 #[test]
435 #[cfg(feature = "wgpu")]
436 fn upper_gpu_tensor_input_gathers_then_errors() {
437 let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
438 runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
439 );
440 let provider = runmat_accelerate_api::provider().expect("wgpu provider");
441 let data = [1.0f64, 2.0];
442 let shape = [2usize, 1usize];
443 let handle = provider
444 .upload(&runmat_accelerate_api::HostTensorView {
445 data: &data,
446 shape: &shape,
447 })
448 .expect("upload");
449 let err = upper_builtin(Value::GpuTensor(handle.clone())).unwrap_err();
450 assert_eq!(err, ARG_TYPE_ERROR);
451 provider.free(&handle).ok();
452 }
453
454 #[test]
455 #[cfg(feature = "doc_export")]
456 fn doc_examples_present() {
457 let blocks = test_support::doc_examples(DOC_MD);
458 assert!(!blocks.is_empty());
459 }
460}