1use runmat_builtins::{
3 BuiltinCompletionPolicy, BuiltinDescriptor, BuiltinErrorDescriptor, BuiltinOutputMode,
4 BuiltinParamArity, BuiltinParamDescriptor, BuiltinParamType, BuiltinSignatureDescriptor,
5 CellArray, CharArray, StringArray, Value,
6};
7use runmat_macros::runtime_builtin;
8
9use crate::builtins::common::map_control_flow_with_builtin;
10use crate::builtins::common::spec::{
11 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
12 ReductionNaN, ResidencyPolicy, ShapeRequirements,
13};
14use crate::builtins::strings::common::{char_row_to_string_slice, uppercase_preserving_missing};
15use crate::builtins::strings::type_resolvers::text_preserve_type;
16use crate::{build_runtime_error, gather_if_needed_async, make_cell, BuiltinResult, RuntimeError};
17
18#[runmat_macros::register_gpu_spec(builtin_path = "crate::builtins::strings::transform::upper")]
19pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
20 name: "upper",
21 op_kind: GpuOpKind::Custom("string-transform"),
22 supported_precisions: &[],
23 broadcast: BroadcastSemantics::None,
24 provider_hooks: &[],
25 constant_strategy: ConstantStrategy::InlineLiteral,
26 residency: ResidencyPolicy::GatherImmediately,
27 nan_mode: ReductionNaN::Include,
28 two_pass_threshold: None,
29 workgroup_size: None,
30 accepts_nan_mode: false,
31 notes:
32 "Executes on the CPU; GPU-resident inputs are gathered to host memory before conversion.",
33};
34
35#[runmat_macros::register_fusion_spec(builtin_path = "crate::builtins::strings::transform::upper")]
36pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
37 name: "upper",
38 shape: ShapeRequirements::Any,
39 constant_strategy: ConstantStrategy::InlineLiteral,
40 elementwise: None,
41 reduction: None,
42 emits_nan: false,
43 notes: "String transformation builtin; not eligible for fusion and always gathers GPU inputs.",
44};
45
46const BUILTIN_NAME: &str = "upper";
47
48const UPPER_OUTPUT: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
49 name: "out",
50 ty: BuiltinParamType::Any,
51 arity: BuiltinParamArity::Required,
52 default: None,
53 description: "Uppercased text preserving input container kind and shape.",
54}];
55
56const UPPER_INPUTS: [BuiltinParamDescriptor; 1] = [BuiltinParamDescriptor {
57 name: "str",
58 ty: BuiltinParamType::Any,
59 arity: BuiltinParamArity::Required,
60 default: None,
61 description: "String/char/cell text input to transform.",
62}];
63
64const UPPER_SIGNATURES: [BuiltinSignatureDescriptor; 1] = [BuiltinSignatureDescriptor {
65 label: "out = upper(str)",
66 inputs: &UPPER_INPUTS,
67 outputs: &UPPER_OUTPUT,
68}];
69
70const UPPER_ERROR_INVALID_INPUT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
71 code: "RM.UPPER.INVALID_INPUT",
72 identifier: Some("RunMat:upper:InvalidInput"),
73 when: "Input is not a string array, character array, or cell array of text scalars.",
74 message:
75 "upper: first argument must be a string array, character array, or cell array of character vectors",
76};
77
78const UPPER_ERROR_CELL_ELEMENT: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
79 code: "RM.UPPER.CELL_ELEMENT",
80 identifier: Some("RunMat:upper:CellElement"),
81 when: "Cell array contains a non-text element or non-row char array element.",
82 message: "upper: cell array elements must be string scalars or character vectors",
83};
84
85const UPPER_ERROR_INTERNAL: BuiltinErrorDescriptor = BuiltinErrorDescriptor {
86 code: "RM.UPPER.INTERNAL",
87 identifier: Some("RunMat:upper:InternalError"),
88 when: "Internal output container construction failed.",
89 message: "upper: internal error",
90};
91
92const UPPER_ERRORS: [BuiltinErrorDescriptor; 3] = [
93 UPPER_ERROR_INVALID_INPUT,
94 UPPER_ERROR_CELL_ELEMENT,
95 UPPER_ERROR_INTERNAL,
96];
97
98pub const UPPER_DESCRIPTOR: BuiltinDescriptor = BuiltinDescriptor {
99 signatures: &UPPER_SIGNATURES,
100 output_mode: BuiltinOutputMode::Fixed,
101 completion_policy: BuiltinCompletionPolicy::Public,
102 errors: &UPPER_ERRORS,
103};
104
105fn map_flow(err: RuntimeError) -> RuntimeError {
106 map_control_flow_with_builtin(err, BUILTIN_NAME)
107}
108
109fn upper_error_with_message(
110 message: impl Into<String>,
111 error: &'static BuiltinErrorDescriptor,
112) -> RuntimeError {
113 let mut builder = build_runtime_error(message).with_builtin(BUILTIN_NAME);
114 if let Some(identifier) = error.identifier {
115 builder = builder.with_identifier(identifier);
116 }
117 builder.build()
118}
119
120fn upper_error(error: &'static BuiltinErrorDescriptor) -> RuntimeError {
121 upper_error_with_message(error.message, error)
122}
123
124#[runtime_builtin(
125 name = "upper",
126 category = "strings/transform",
127 summary = "Convert text inputs to uppercase character forms.",
128 keywords = "upper,uppercase,strings,character array,text",
129 accel = "sink",
130 type_resolver(text_preserve_type),
131 descriptor(crate::builtins::strings::transform::upper::UPPER_DESCRIPTOR),
132 builtin_path = "crate::builtins::strings::transform::upper"
133)]
134async fn upper_builtin(value: Value) -> BuiltinResult<Value> {
135 let gathered = gather_if_needed_async(&value).await.map_err(map_flow)?;
136 match gathered {
137 Value::String(text) => Ok(Value::String(uppercase_preserving_missing(text))),
138 Value::StringArray(array) => upper_string_array(array),
139 Value::CharArray(array) => upper_char_array(array),
140 Value::Cell(cell) => upper_cell_array(cell),
141 _ => Err(upper_error(&UPPER_ERROR_INVALID_INPUT)),
142 }
143}
144
145fn upper_string_array(array: StringArray) -> BuiltinResult<Value> {
146 let StringArray { data, shape, .. } = array;
147 let uppered = data
148 .into_iter()
149 .map(uppercase_preserving_missing)
150 .collect::<Vec<_>>();
151 let upper_array = StringArray::new(uppered, shape).map_err(|e| {
152 upper_error_with_message(format!("{BUILTIN_NAME}: {e}"), &UPPER_ERROR_INTERNAL)
153 })?;
154 Ok(Value::StringArray(upper_array))
155}
156
157fn upper_char_array(array: CharArray) -> BuiltinResult<Value> {
158 let CharArray { data, rows, cols } = array;
159 if rows == 0 || cols == 0 {
160 return Ok(Value::CharArray(CharArray { data, rows, cols }));
161 }
162
163 let mut upper_rows = Vec::with_capacity(rows);
164 let mut target_cols = cols;
165 for row in 0..rows {
166 let text = char_row_to_string_slice(&data, cols, row).to_uppercase();
167 let len = text.chars().count();
168 target_cols = target_cols.max(len);
169 upper_rows.push(text);
170 }
171
172 let mut upper_data = Vec::with_capacity(rows * target_cols);
173 for row_text in upper_rows {
174 let mut chars: Vec<char> = row_text.chars().collect();
175 if chars.len() < target_cols {
176 chars.resize(target_cols, ' ');
177 }
178 upper_data.extend(chars.into_iter());
179 }
180
181 CharArray::new(upper_data, rows, target_cols)
182 .map(Value::CharArray)
183 .map_err(|e| {
184 upper_error_with_message(format!("{BUILTIN_NAME}: {e}"), &UPPER_ERROR_INTERNAL)
185 })
186}
187
188fn upper_cell_array(cell: CellArray) -> BuiltinResult<Value> {
189 let CellArray {
190 data, rows, cols, ..
191 } = cell;
192 let mut upper_values = Vec::with_capacity(rows * cols);
193 for row in 0..rows {
194 for col in 0..cols {
195 let idx = row * cols + col;
196 let upper = upper_cell_element(&data[idx])?;
197 upper_values.push(upper);
198 }
199 }
200 make_cell(upper_values, rows, cols).map_err(|e| {
201 upper_error_with_message(format!("{BUILTIN_NAME}: {e}"), &UPPER_ERROR_INTERNAL)
202 })
203}
204
205fn upper_cell_element(value: &Value) -> BuiltinResult<Value> {
206 match value {
207 Value::String(text) => Ok(Value::String(uppercase_preserving_missing(text.clone()))),
208 Value::StringArray(sa) if sa.data.len() == 1 => Ok(Value::String(
209 uppercase_preserving_missing(sa.data[0].clone()),
210 )),
211 Value::CharArray(ca) if ca.rows <= 1 => upper_char_array(ca.clone()),
212 Value::CharArray(_) => Err(upper_error(&UPPER_ERROR_CELL_ELEMENT)),
213 _ => Err(upper_error(&UPPER_ERROR_CELL_ELEMENT)),
214 }
215}
216
217#[cfg(test)]
218pub(crate) mod tests {
219 use super::*;
220 use runmat_builtins::{ResolveContext, Type};
221
222 fn run_upper(value: Value) -> BuiltinResult<Value> {
223 futures::executor::block_on(upper_builtin(value))
224 }
225
226 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
227 #[test]
228 fn upper_string_scalar_value() {
229 let result = run_upper(Value::String("RunMat".into())).expect("upper");
230 assert_eq!(result, Value::String("RUNMAT".into()));
231 }
232
233 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
234 #[test]
235 fn upper_string_array_preserves_shape() {
236 let array = StringArray::new(
237 vec![
238 "gpu".into(),
239 "accel".into(),
240 "<missing>".into(),
241 "MiXeD".into(),
242 ],
243 vec![2, 2],
244 )
245 .unwrap();
246 let result = run_upper(Value::StringArray(array)).expect("upper");
247 match result {
248 Value::StringArray(sa) => {
249 assert_eq!(sa.shape, vec![2, 2]);
250 assert_eq!(
251 sa.data,
252 vec![
253 String::from("GPU"),
254 String::from("ACCEL"),
255 String::from("<missing>"),
256 String::from("MIXED")
257 ]
258 );
259 }
260 other => panic!("expected string array, got {other:?}"),
261 }
262 }
263
264 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
265 #[test]
266 fn upper_char_array_multiple_rows() {
267 let data: Vec<char> = vec!['c', 'a', 't', 'd', 'o', 'g'];
268 let array = CharArray::new(data, 2, 3).unwrap();
269 let result = run_upper(Value::CharArray(array)).expect("upper");
270 match result {
271 Value::CharArray(ca) => {
272 assert_eq!(ca.rows, 2);
273 assert_eq!(ca.cols, 3);
274 assert_eq!(ca.data, vec!['C', 'A', 'T', 'D', 'O', 'G']);
275 }
276 other => panic!("expected char array, got {other:?}"),
277 }
278 }
279
280 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
281 #[test]
282 fn upper_char_vector_handles_padding() {
283 let array = CharArray::new_row("hello ");
284 let result = run_upper(Value::CharArray(array)).expect("upper");
285 match result {
286 Value::CharArray(ca) => {
287 assert_eq!(ca.rows, 1);
288 assert_eq!(ca.cols, 6);
289 let expected: Vec<char> = "HELLO ".chars().collect();
290 assert_eq!(ca.data, expected);
291 }
292 other => panic!("expected char array, got {other:?}"),
293 }
294 }
295
296 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
297 #[test]
298 fn upper_char_array_unicode_expansion_extends_width() {
299 let data: Vec<char> = vec!['ß', 'a'];
300 let array = CharArray::new(data, 1, 2).unwrap();
301 let result = run_upper(Value::CharArray(array)).expect("upper");
302 match result {
303 Value::CharArray(ca) => {
304 assert_eq!(ca.rows, 1);
305 assert_eq!(ca.cols, 3);
306 let expected: Vec<char> = vec!['S', 'S', 'A'];
307 assert_eq!(ca.data, expected);
308 }
309 other => panic!("expected char array, got {other:?}"),
310 }
311 }
312
313 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
314 #[test]
315 fn upper_cell_array_mixed_content() {
316 let cell = CellArray::new(
317 vec![
318 Value::CharArray(CharArray::new_row("run")),
319 Value::String("Mat".into()),
320 ],
321 1,
322 2,
323 )
324 .unwrap();
325 let result = run_upper(Value::Cell(cell)).expect("upper");
326 match result {
327 Value::Cell(out) => {
328 let first = out.get(0, 0).unwrap();
329 let second = out.get(0, 1).unwrap();
330 assert_eq!(first, Value::CharArray(CharArray::new_row("RUN")));
331 assert_eq!(second, Value::String("MAT".into()));
332 }
333 other => panic!("expected cell array, got {other:?}"),
334 }
335 }
336
337 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
338 #[test]
339 fn upper_errors_on_invalid_input() {
340 let err = run_upper(Value::Num(1.0)).unwrap_err();
341 assert_eq!(err.to_string(), UPPER_ERROR_INVALID_INPUT.message);
342 }
343
344 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
345 #[test]
346 fn upper_cell_errors_on_invalid_element() {
347 let cell = CellArray::new(vec![Value::Num(1.0)], 1, 1).unwrap();
348 let err = run_upper(Value::Cell(cell)).unwrap_err();
349 assert_eq!(err.to_string(), UPPER_ERROR_CELL_ELEMENT.message);
350 }
351
352 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
353 #[test]
354 fn upper_preserves_missing_string() {
355 let result = run_upper(Value::String("<missing>".into())).expect("upper");
356 assert_eq!(result, Value::String("<missing>".into()));
357 }
358
359 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
360 #[test]
361 fn upper_cell_allows_empty_char_vector() {
362 let empty_char = CharArray::new(Vec::new(), 1, 0).unwrap();
363 let cell = CellArray::new(vec![Value::CharArray(empty_char.clone())], 1, 1).unwrap();
364 let result = run_upper(Value::Cell(cell)).expect("upper");
365 match result {
366 Value::Cell(out) => {
367 let element = out.get(0, 0).unwrap();
368 assert_eq!(element, Value::CharArray(empty_char));
369 }
370 other => panic!("expected cell array, got {other:?}"),
371 }
372 }
373
374 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
375 #[test]
376 #[cfg(feature = "wgpu")]
377 fn upper_gpu_tensor_input_gathers_then_errors() {
378 let _ = runmat_accelerate::backend::wgpu::provider::register_wgpu_provider(
379 runmat_accelerate::backend::wgpu::provider::WgpuProviderOptions::default(),
380 );
381 let provider = runmat_accelerate_api::provider().expect("wgpu provider");
382 let data = [1.0f64, 2.0];
383 let shape = [2usize, 1usize];
384 let handle = provider
385 .upload(&runmat_accelerate_api::HostTensorView {
386 data: &data,
387 shape: &shape,
388 })
389 .expect("upload");
390 let err = run_upper(Value::GpuTensor(handle.clone())).unwrap_err();
391 assert_eq!(err.to_string(), UPPER_ERROR_INVALID_INPUT.message);
392 provider.free(&handle).ok();
393 }
394
395 #[test]
396 fn upper_type_preserves_text() {
397 assert_eq!(
398 text_preserve_type(&[Type::String], &ResolveContext::new(Vec::new())),
399 Type::String
400 );
401 }
402}