Skip to main content

runmat_runtime/builtins/strings/transform/
upper.rs

1//! MATLAB-compatible `upper` builtin with GPU-aware semantics for RunMat.
2use runmat_builtins::{
3    BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
4    BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
5    CellArray, CharArray, StringArray, Value,
6};
7use runmat_macros::runtime_builtin;
8
9use crate::builtins::common::map_control_flow_with_builtin;
10use crate::builtins::common::spec::{
11    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
12    ReductionNaN, ResidencyPolicy, ShapeRequirements,
13};
14use crate::builtins::strings::common::{char_row_to_string_slice, uppercase_preserving_missing};
15use crate::builtins::strings::type_resolvers::text_preserve_type;
16use crate::{build_runtime_error, gather_if_needed_async, make_cell, BuiltinResult, RuntimeError};
17
18#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::strings::transform::upper")]
19pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
20    name: "upper",
21    op_kind: GpuOpKind::Custom("string-transform"),
22    supported_precisions: &[],
23    broadcast: BroadcastSemantics::None,
24    provider_hooks: &[],
25    constant_strategy: ConstantStrategy::InlineLiteral,
26    residency: ResidencyPolicy::GatherImmediately,
27    nan_mode: ReductionNaN::Include,
28    two_pass_threshold: None,
29    workgroup_size: None,
30    accepts_nan_mode: false,
31    notes:
32        "Executes on the CPU; GPU-resident inputs are gathered to host memory before conversion.",
33};
34
35#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::strings::transform::upper")]
36pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
37    name: "upper",
38    shape: ShapeRequirements::Any,
39    constant_strategy: ConstantStrategy::InlineLiteral,
40    elementwise: None,
41    reduction: None,
42    emits_nan: false,
43    notes: "String transformation builtin; not eligible for fusion and always gathers GPU inputs.",
44};
45
46const BUILTIN_NAME: &str = "upper";
47
48const UPPER_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
49    name: "out",
50    ty: BuiltinParamType::Any,
51    arity: BuiltinParamArity::Required,
52    default: None,
53    description: "Uppercased text preserving input container kind and shape.",
54}];
55
56const UPPER_INPUTS: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
57    name: "str",
58    ty: BuiltinParamType::Any,
59    arity: BuiltinParamArity::Required,
60    default: None,
61    description: "String/char/cell text input to transform.",
62}];
63
64const UPPER_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
65    label: "out = upper(str)",
66    inputs: &UPPER_INPUTS,
67    outputs: &UPPER_OUTPUT,
68}];
69
70const UPPER_ERROR_INVALID_INPUT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
71    code: "RM.UPPER.INVALID_INPUT",
72    identifier: Some("RunMat:upper:InvalidInput"),
73    when: "Input is not a string array, character array, or cell array of text scalars.",
74    message:
75        "upper: first argument must be a string array, character array, or cell array of character vectors",
76};
77
78const UPPER_ERROR_CELL_ELEMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
79    code: "RM.UPPER.CELL_ELEMENT",
80    identifier: Some("RunMat:upper:CellElement"),
81    when: "Cell array contains a non-text element or non-row char array element.",
82    message: "upper: cell array elements must be string scalars or character vectors",
83};
84
85const UPPER_ERROR_INTERNAL: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
86    code: "RM.UPPER.INTERNAL",
87    identifier: Some("RunMat:upper:InternalError"),
88    when: "Internal output container construction failed.",
89    message: "upper: internal error",
90};
91
92const UPPER_ERRORS: [BuiltinErrorDescriptor; 3] = [
93    UPPER_ERROR_INVALID_INPUT,
94    UPPER_ERROR_CELL_ELEMENT,
95    UPPER_ERROR_INTERNAL,
96];
97
98pub const UPPER_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
99    signatures: &UPPER_SIGNATURES,
100    output_mode: BuiltinOutputMode::Fixed,
101    completion_policy: BuiltinCompletionPolicy::Public,
102    errors: &UPPER_ERRORS,
103};
104
105fn map_flow(err: RuntimeError) -> RuntimeError {
106    map_control_flow_with_builtin(err, BUILTIN_NAME)
107}
108
109fn upper_error_with_message(
110    message: impl Into<String>,
111    error: &'static BuiltinErrorDescriptor,
112) -> RuntimeError {
113    let mut builder = build_runtime_error(message).with_builtin(BUILTIN_NAME);
114    if let Some(identifier) = error.identifier {
115        builder = builder.with_identifier(identifier);
116    }
117    builder.build()
118}
119
120fn upper_error(error: &'static BuiltinErrorDescriptor) -> RuntimeError {
121    upper_error_with_message(error.message, error)
122}
123
124#[runtime_builtin(
125    name = "upper",
126    category = "strings/transform",
127    summary = "Convert text inputs to uppercase character forms.",
128    keywords = "upper,uppercase,strings,character array,text",
129    accel = "sink",
130    type_resolver(text_preserve_type),
131    descriptor(crate::builtins::strings::transform::upper::UPPER_DESCRIPTOR),
132    builtin_path = "crate::builtins::strings::transform::upper"
133)]
134async fn upper_builtin(value: Value) -> BuiltinResult<Value> {
135    let gathered = gather_if_needed_async(&value).await.map_err(map_flow)?;
136    match gathered {
137        Value::String(text) => Ok(Value::String(uppercase_preserving_missing(text))),
138        Value::StringArray(array) => upper_string_array(array),
139        Value::CharArray(array) => upper_char_array(array),
140        Value::Cell(cell) => upper_cell_array(cell),
141        _ => Err(upper_error(&UPPER_ERROR_INVALID_INPUT)),
142    }
143}
144
145fn upper_string_array(array: StringArray) -> BuiltinResult<Value> {
146    let StringArray { data, shape, .. } = array;
147    let uppered = data
148        .into_iter()
149        .map(uppercase_preserving_missing)
150        .collect::<Vec<_>>();
151    let upper_array = StringArray::new(uppered, shape).map_err(|e| {
152        upper_error_with_message(format!("{BUILTIN_NAME}: {e}"), &UPPER_ERROR_INTERNAL)
153    })?;
154    Ok(Value::StringArray(upper_array))
155}
156
157fn upper_char_array(array: CharArray) -> BuiltinResult<Value> {
158    let CharArray { data, rows, cols } = array;
159    if rows == 0 || cols == 0 {
160        return Ok(Value::CharArray(CharArray { data, rows, cols }));
161    }
162
163    let mut upper_rows = Vec::with_capacity(rows);
164    let mut target_cols = cols;
165    for row in 0..rows {
166        let text = char_row_to_string_slice(&data, cols, row).to_uppercase();
167        let len = text.chars().count();
168        target_cols = target_cols.max(len);
169        upper_rows.push(text);
170    }
171
172    let mut upper_data = Vec::with_capacity(rows * target_cols);
173    for row_text in upper_rows {
174        let mut chars: Vec<char> = row_text.chars().collect();
175        if chars.len() < target_cols {
176            chars.resize(target_cols, ' ');
177        }
178        upper_data.extend(chars.into_iter());
179    }
180
181    CharArray::new(upper_data, rows, target_cols)
182        .map(Value::CharArray)
183        .map_err(|e| {
184            upper_error_with_message(format!("{BUILTIN_NAME}: {e}"), &UPPER_ERROR_INTERNAL)
185        })
186}
187
188fn upper_cell_array(cell: CellArray) -> BuiltinResult<Value> {
189    let CellArray {
190        data, rows, cols, ..
191    } = cell;
192    let mut upper_values = Vec::with_capacity(rows * cols);
193    for row in 0..rows {
194        for col in 0..cols {
195            let idx = row * cols + col;
196            let upper = upper_cell_element(&data[idx])?;
197            upper_values.push(upper);
198        }
199    }
200    make_cell(upper_values, rows, cols).map_err(|e| {
201        upper_error_with_message(format!("{BUILTIN_NAME}: {e}"), &UPPER_ERROR_INTERNAL)
202    })
203}
204
205fn upper_cell_element(value: &Value) -> BuiltinResult<Value> {
206    match value {
207        Value::String(text) => Ok(Value::String(uppercase_preserving_missing(text.clone()))),
208        Value::StringArray(sa) if sa.data.len() == 1 => Ok(Value::String(
209            uppercase_preserving_missing(sa.data[0].clone()),
210        )),
211        Value::CharArray(ca) if ca.rows <= 1 => upper_char_array(ca.clone()),
212        Value::CharArray(_) => Err(upper_error(&UPPER_ERROR_CELL_ELEMENT)),
213        _ => Err(upper_error(&UPPER_ERROR_CELL_ELEMENT)),
214    }
215}
216
217#[cfg(test)]
218pub(crate) mod tests {
219    use super::*;
220    use runmat_builtins::{ResolveContext, Type};
221
222    fn run_upper(value: Value) -> BuiltinResult<Value> {
223        futures::executor::block_on(upper_builtin(value))
224    }
225
226    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
227    #[test]
228    fn upper_string_scalar_value() {
229        let result = run_upper(Value::String("RunMat".into())).expect("upper");
230        assert_eq!(result, Value::String("RUNMAT".into()));
231    }
232
233    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
234    #[test]
235    fn upper_string_array_preserves_shape() {
236        let array = StringArray::new(
237            vec![
238                "gpu".into(),
239                "accel".into(),
240                "<missing>".into(),
241                "MiXeD".into(),
242            ],
243            vec![2, 2],
244        )
245        .unwrap();
246        let result = run_upper(Value::StringArray(array)).expect("upper");
247        match result {
248            Value::StringArray(sa) => {
249                assert_eq!(sa.shape, vec![2, 2]);
250                assert_eq!(
251                    sa.data,
252                    vec![
253                        String::from("GPU"),
254                        String::from("ACCEL"),
255                        String::from("<missing>"),
256                        String::from("MIXED")
257                    ]
258                );
259            }
260            other => panic!("expected string array, got {other:?}"),
261        }
262    }
263
264    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
265    #[test]
266    fn upper_char_array_multiple_rows() {
267        let data: Vec<char> = vec!['c', 'a', 't', 'd', 'o', 'g'];
268        let array = CharArray::new(data, 2, 3).unwrap();
269        let result = run_upper(Value::CharArray(array)).expect("upper");
270        match result {
271            Value::CharArray(ca) => {
272                assert_eq!(ca.rows, 2);
273                assert_eq!(ca.cols, 3);
274                assert_eq!(ca.data, vec!['C', 'A', 'T', 'D', 'O', 'G']);
275            }
276            other => panic!("expected char array, got {other:?}"),
277        }
278    }
279
280    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
281    #[test]
282    fn upper_char_vector_handles_padding() {
283        let array = CharArray::new_row("hello ");
284        let result = run_upper(Value::CharArray(array)).expect("upper");
285        match result {
286            Value::CharArray(ca) => {
287                assert_eq!(ca.rows, 1);
288                assert_eq!(ca.cols, 6);
289                let expected: Vec<char> = "HELLO ".chars().collect();
290                assert_eq!(ca.data, expected);
291            }
292            other => panic!("expected char array, got {other:?}"),
293        }
294    }
295
296    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
297    #[test]
298    fn upper_char_array_unicode_expansion_extends_width() {
299        let data: Vec<char> = vec!['ß', 'a'];
300        let array = CharArray::new(data, 1, 2).unwrap();
301        let result = run_upper(Value::CharArray(array)).expect("upper");
302        match result {
303            Value::CharArray(ca) => {
304                assert_eq!(ca.rows, 1);
305                assert_eq!(ca.cols, 3);
306                let expected: Vec<char> = vec!['S', 'S', 'A'];
307                assert_eq!(ca.data, expected);
308            }
309            other => panic!("expected char array, got {other:?}"),
310        }
311    }
312
313    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
314    #[test]
315    fn upper_cell_array_mixed_content() {
316        let cell = CellArray::new(
317            vec![
318                Value::CharArray(CharArray::new_row("run")),
319                Value::String("Mat".into()),
320            ],
321            1,
322            2,
323        )
324        .unwrap();
325        let result = run_upper(Value::Cell(cell)).expect("upper");
326        match result {
327            Value::Cell(out) => {
328                let first = out.get(0, 0).unwrap();
329                let second = out.get(0, 1).unwrap();
330                assert_eq!(first, Value::CharArray(CharArray::new_row("RUN")));
331                assert_eq!(second, Value::String("MAT".into()));
332            }
333            other => panic!("expected cell array, got {other:?}"),
334        }
335    }
336
337    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
338    #[test]
339    fn upper_errors_on_invalid_input() {
340        let err = run_upper(Value::Num(1.0)).unwrap_err();
341        assert_eq!(err.to_string(), UPPER_ERROR_INVALID_INPUT.message);
342    }
343
344    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
345    #[test]
346    fn upper_cell_errors_on_invalid_element() {
347        let cell = CellArray::new(vec![Value::Num(1.0)], 1, 1).unwrap();
348        let err = run_upper(Value::Cell(cell)).unwrap_err();
349        assert_eq!(err.to_string(), UPPER_ERROR_CELL_ELEMENT.message);
350    }
351
352    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
353    #[test]
354    fn upper_preserves_missing_string() {
355        let result = run_upper(Value::String("<missing>".into())).expect("upper");
356        assert_eq!(result, Value::String("<missing>".into()));
357    }
358
359    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
360    #[test]
361    fn upper_cell_allows_empty_char_vector() {
362        let empty_char = CharArray::new(Vec::new(), 1, 0).unwrap();
363        let cell = CellArray::new(vec![Value::CharArray(empty_char.clone())], 1, 1).unwrap();
364        let result = run_upper(Value::Cell(cell)).expect("upper");
365        match result {
366            Value::Cell(out) => {
367                let element = out.get(0, 0).unwrap();
368                assert_eq!(element, Value::CharArray(empty_char));
369            }
370            other => panic!("expected cell array, got {other:?}"),
371        }
372    }
373
374    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
375    #[test]
376    #[cfg(feature = "wgpu")]
377    fn upper_gpu_tensor_input_gathers_then_errors() {
378        let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
379            runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
380        );
381        let provider = runmat_accelerate_api::provider().expect("wgpu provider");
382        let data = [1.0f64, 2.0];
383        let shape = [2usize, 1usize];
384        let handle = provider
385            .upload(&runmat_accelerate_api::HostTensorView {
386                data: &data,
387                shape: &shape,
388            })
389            .expect("upload");
390        let err = run_upper(Value::GpuTensor(handle.clone())).unwrap_err();
391        assert_eq!(err.to_string(), UPPER_ERROR_INVALID_INPUT.message);
392        provider.free(&handle).ok();
393    }
394
395    #[test]
396    fn upper_type_preserves_text() {
397        assert_eq!(
398            text_preserve_type(&[Type::String], &ResolveContext::new(Vec::new())),
399            Type::String
400        );
401    }
402}