1use runmat_builtins::{CellArray, CharArray, StringArray, Value};
4use runmat_macros::runtime_builtin;
5
6use crate::builtins::common::spec::{
7 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
8 ReductionNaN, ResidencyPolicy, ShapeRequirements,
9};
10use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
11#[cfg(feature = "doc_export")]
12use crate::register_builtin_doc_text;
13use crate::{gather_if_needed, make_cell, register_builtin_fusion_spec, register_builtin_gpu_spec};
14
15#[cfg(feature = "doc_export")]
16pub const DOC_MD: &str = r###"---
17title: "strtrim"
18category: "strings/transform"
19keywords: ["strtrim", "trim whitespace", "leading spaces", "trailing spaces", "character arrays", "string arrays"]
20summary: "Remove leading and trailing whitespace from strings, character arrays, and cell arrays."
21references:
22 - https://www.mathworks.com/help/matlab/ref/strtrim.html
23gpu_support:
24 elementwise: false
25 reduction: false
26 precisions: []
27 broadcasting: "none"
28 notes: "Executes on the CPU; GPU-resident inputs are gathered automatically before trimming."
29fusion:
30 elementwise: false
31 reduction: false
32 max_inputs: 1
33 constants: "inline"
34requires_feature: null
35tested:
36 unit: "builtins::strings::transform::strtrim::tests"
37 integration: "builtins::strings::transform::strtrim::tests::strtrim_cell_array_mixed_content"
38---
39
40# What does the `strtrim` function do in MATLAB / RunMat?
41`strtrim(text)` removes leading and trailing whitespace characters from `text`. The input can be a
42string scalar, string array, character array, or a cell array of character vectors, mirroring MATLAB
43behaviour. Internal whitespace is preserved exactly as provided.
44
45## How does the `strtrim` function behave in MATLAB / RunMat?
46- Whitespace is defined via MATLAB's `isspace`, so spaces, tabs, newlines, and other Unicode
47 whitespace code points are removed from both ends of each element.
48- String scalars and arrays keep their type and shape. Missing string scalars (`<missing>`) remain
49 missing and are returned unchanged.
50- Character arrays are trimmed row by row. The result keeps the original number of rows and shrinks
51 the column count to the longest trimmed row, padding shorter rows with spaces so the output stays
52 rectangular.
53- Cell arrays must contain string scalars or character vectors. Results preserve the original cell
54 layout with each element trimmed.
55- Numeric, logical, or structured inputs raise MATLAB-compatible type errors.
56
57## `strtrim` Function GPU Execution Behaviour
58`strtrim` runs on the CPU. When the input (or any nested element) resides on the GPU, RunMat gathers
59it to host memory before trimming so the output matches MATLAB exactly. Providers do not need to
60implement device kernels for this builtin today.
61
62## GPU residency in RunMat (Do I need `gpuArray`?)
63You do not need to call `gpuArray` or `gather` manually. RunMat automatically gathers any GPU-resident
64text data before applying `strtrim`, so the builtin behaves the same regardless of where the data lives.
65
66## Examples of using the `strtrim` function in MATLAB / RunMat
67
68### Trim Leading And Trailing Spaces From A String Scalar
69```matlab
70name = " RunMat ";
71clean = strtrim(name);
72```
73Expected output:
74```matlab
75clean = "RunMat"
76```
77
78### Remove Extra Whitespace From Each Element Of A String Array
79```matlab
80labels = [" Alpha "; "Beta "; " Gamma"];
81trimmed = strtrim(labels);
82```
83Expected output:
84```matlab
85trimmed = 3×1 string
86 "Alpha"
87 "Beta"
88 "Gamma"
89```
90
91### Trim Character Array Rows While Preserving Shape
92```matlab
93animals = char(' cat ', 'dog', ' cow ');
94result = strtrim(animals);
95```
96Expected output:
97```matlab
98result =
99
100 3×3 char array
101
102 'cat'
103 'dog'
104 'cow'
105```
106
107### Trim Tabs And Newlines Alongside Spaces
108```matlab
109text = "\tMetrics " + newline;
110clean = strtrim(text);
111```
112Expected output:
113```matlab
114clean = "Metrics"
115```
116
117### Trim Each Element Of A Cell Array Of Character Vectors
118```matlab
119pieces = {' GPU ', " Accelerate", 'RunMat '};
120out = strtrim(pieces);
121```
122Expected output:
123```matlab
124out = 1×3 cell array
125 {'GPU'} {"Accelerate"} {'RunMat'}
126```
127
128### Preserve Missing String Scalars
129```matlab
130vals = [" ok "; "<missing>"; " trimmed "];
131trimmed = strtrim(vals);
132```
133Expected output:
134```matlab
135trimmed = 1×3 string
136 "ok"
137 <missing>
138 "trimmed"
139```
140
141## FAQ
142
143### Does `strtrim` modify internal whitespace?
144No. Only leading and trailing whitespace is removed; interior spacing remains intact.
145
146### Which characters count as whitespace?
147`strtrim` removes code points that MATLAB's `isspace` recognises, including spaces, tabs, newlines,
148carriage returns, and many Unicode space separators.
149
150### How are character arrays resized?
151Each row is trimmed independently. The output keeps the same number of rows and shrinks the width to
152match the longest trimmed row, padding shorter rows with spaces if necessary.
153
154### What happens to missing strings?
155Missing string scalars (`string(missing)`) remain `<missing>` exactly as in MATLAB.
156
157### Can I pass numeric or logical arrays to `strtrim`?
158No. Passing non-text inputs raises a MATLAB-compatible error indicating that text input is required.
159
160### How does `strtrim` differ from `strip`?
161`strtrim` always removes leading and trailing whitespace. `strip` is newer and adds options for custom
162characters and directional trimming; use it when you need finer control.
163
164## See Also
165[strip](./strip), [upper](./upper), [lower](./lower)
166
167## Source & Feedback
168- Implementation: [`crates/runmat-runtime/src/builtins/strings/transform/strtrim.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/transform/strtrim.rs)
169- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
170"###;
171
172pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
173 name: "strtrim",
174 op_kind: GpuOpKind::Custom("string-transform"),
175 supported_precisions: &[],
176 broadcast: BroadcastSemantics::None,
177 provider_hooks: &[],
178 constant_strategy: ConstantStrategy::InlineLiteral,
179 residency: ResidencyPolicy::GatherImmediately,
180 nan_mode: ReductionNaN::Include,
181 two_pass_threshold: None,
182 workgroup_size: None,
183 accepts_nan_mode: false,
184 notes:
185 "Executes on the CPU; GPU-resident inputs are gathered to host memory before trimming whitespace.",
186};
187
188register_builtin_gpu_spec!(GPU_SPEC);
189
190pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
191 name: "strtrim",
192 shape: ShapeRequirements::Any,
193 constant_strategy: ConstantStrategy::InlineLiteral,
194 elementwise: None,
195 reduction: None,
196 emits_nan: false,
197 notes: "String transformation builtin; not eligible for fusion and always gathers GPU inputs.",
198};
199
200register_builtin_fusion_spec!(FUSION_SPEC);
201
202#[cfg(feature = "doc_export")]
203register_builtin_doc_text!("strtrim", DOC_MD);
204
205const ARG_TYPE_ERROR: &str =
206 "strtrim: first argument must be a string array, character array, or cell array of character vectors";
207const CELL_ELEMENT_ERROR: &str =
208 "strtrim: cell array elements must be string scalars or character vectors";
209
210#[runtime_builtin(
211 name = "strtrim",
212 category = "strings/transform",
213 summary = "Remove leading and trailing whitespace from strings, character arrays, and cell arrays.",
214 keywords = "strtrim,trim,whitespace,strings,character array,text",
215 accel = "sink"
216)]
217fn strtrim_builtin(value: Value) -> Result<Value, String> {
218 let gathered = gather_if_needed(&value).map_err(|e| format!("strtrim: {e}"))?;
219 match gathered {
220 Value::String(text) => Ok(Value::String(trim_string(text))),
221 Value::StringArray(array) => strtrim_string_array(array),
222 Value::CharArray(array) => strtrim_char_array(array),
223 Value::Cell(cell) => strtrim_cell_array(cell),
224 _ => Err(ARG_TYPE_ERROR.to_string()),
225 }
226}
227
228fn trim_string(text: String) -> String {
229 if is_missing_string(&text) {
230 text
231 } else {
232 trim_whitespace(&text)
233 }
234}
235
236fn strtrim_string_array(array: StringArray) -> Result<Value, String> {
237 let StringArray { data, shape, .. } = array;
238 let trimmed = data.into_iter().map(trim_string).collect::<Vec<_>>();
239 let out = StringArray::new(trimmed, shape).map_err(|e| format!("strtrim: {e}"))?;
240 Ok(Value::StringArray(out))
241}
242
243fn strtrim_char_array(array: CharArray) -> Result<Value, String> {
244 let CharArray { data, rows, cols } = array;
245 if rows == 0 {
246 return Ok(Value::CharArray(CharArray { data, rows, cols }));
247 }
248
249 let mut trimmed_rows: Vec<Vec<char>> = Vec::with_capacity(rows);
250 let mut target_cols: usize = 0;
251 for row in 0..rows {
252 let text = char_row_to_string_slice(&data, cols, row);
253 let trimmed = trim_whitespace(&text);
254 let chars: Vec<char> = trimmed.chars().collect();
255 target_cols = target_cols.max(chars.len());
256 trimmed_rows.push(chars);
257 }
258
259 let mut new_data: Vec<char> = Vec::with_capacity(rows * target_cols);
260 for mut chars in trimmed_rows {
261 if chars.len() < target_cols {
262 chars.resize(target_cols, ' ');
263 }
264 new_data.extend(chars);
265 }
266
267 CharArray::new(new_data, rows, target_cols)
268 .map(Value::CharArray)
269 .map_err(|e| format!("strtrim: {e}"))
270}
271
272fn strtrim_cell_array(cell: CellArray) -> Result<Value, String> {
273 let CellArray {
274 data, rows, cols, ..
275 } = cell;
276 let mut trimmed_values = Vec::with_capacity(rows * cols);
277 for value in &data {
278 let trimmed = strtrim_cell_element(value)?;
279 trimmed_values.push(trimmed);
280 }
281 make_cell(trimmed_values, rows, cols).map_err(|e| format!("strtrim: {e}"))
282}
283
284fn strtrim_cell_element(value: &Value) -> Result<Value, String> {
285 match gather_if_needed(value).map_err(|e| format!("strtrim: {e}"))? {
286 Value::String(text) => Ok(Value::String(trim_string(text))),
287 Value::StringArray(sa) if sa.data.len() == 1 => {
288 let text = sa.data.into_iter().next().unwrap();
289 Ok(Value::String(trim_string(text)))
290 }
291 Value::CharArray(ca) if ca.rows <= 1 => {
292 if ca.rows == 0 {
293 return Ok(Value::CharArray(ca));
294 }
295 let source = char_row_to_string_slice(&ca.data, ca.cols, 0);
296 let trimmed = trim_whitespace(&source);
297 let chars: Vec<char> = trimmed.chars().collect();
298 let cols = chars.len();
299 CharArray::new(chars, ca.rows, cols)
300 .map(Value::CharArray)
301 .map_err(|e| format!("strtrim: {e}"))
302 }
303 Value::CharArray(_) => Err(CELL_ELEMENT_ERROR.to_string()),
304 _ => Err(CELL_ELEMENT_ERROR.to_string()),
305 }
306}
307
308fn trim_whitespace(text: &str) -> String {
309 let trimmed = text.trim_matches(|c: char| c.is_whitespace());
310 trimmed.to_string()
311}
312
313#[cfg(test)]
314mod tests {
315 use super::*;
316 #[cfg(feature = "doc_export")]
317 use crate::builtins::common::test_support;
318
319 #[test]
320 fn strtrim_string_scalar_trims_whitespace() {
321 let result =
322 strtrim_builtin(Value::String(" RunMat ".into())).expect("strtrim string scalar");
323 assert_eq!(result, Value::String("RunMat".into()));
324 }
325
326 #[test]
327 fn strtrim_string_array_preserves_shape() {
328 let array = StringArray::new(
329 vec![
330 " one ".into(),
331 "<missing>".into(),
332 "two".into(),
333 " three ".into(),
334 ],
335 vec![2, 2],
336 )
337 .unwrap();
338 let result = strtrim_builtin(Value::StringArray(array)).expect("strtrim string array");
339 match result {
340 Value::StringArray(sa) => {
341 assert_eq!(sa.shape, vec![2, 2]);
342 assert_eq!(
343 sa.data,
344 vec![
345 String::from("one"),
346 String::from("<missing>"),
347 String::from("two"),
348 String::from("three")
349 ]
350 );
351 }
352 other => panic!("expected string array, got {other:?}"),
353 }
354 }
355
356 #[test]
357 fn strtrim_char_array_multiple_rows() {
358 let data: Vec<char> = " cat ".chars().chain(" dog ".chars()).collect();
359 let array = CharArray::new(data, 2, 7).unwrap();
360 let result = strtrim_builtin(Value::CharArray(array)).expect("strtrim char array");
361 match result {
362 Value::CharArray(ca) => {
363 assert_eq!(ca.rows, 2);
364 assert_eq!(ca.cols, 3);
365 assert_eq!(ca.data, vec!['c', 'a', 't', 'd', 'o', 'g']);
366 }
367 other => panic!("expected char array, got {other:?}"),
368 }
369 }
370
371 #[test]
372 fn strtrim_char_array_all_whitespace_yields_zero_width() {
373 let array = CharArray::new(" ".chars().collect(), 1, 3).unwrap();
374 let result = strtrim_builtin(Value::CharArray(array)).expect("strtrim char whitespace");
375 match result {
376 Value::CharArray(ca) => {
377 assert_eq!(ca.rows, 1);
378 assert_eq!(ca.cols, 0);
379 assert!(ca.data.is_empty());
380 }
381 other => panic!("expected empty char array, got {other:?}"),
382 }
383 }
384
385 #[test]
386 fn strtrim_cell_array_mixed_content() {
387 let cell = CellArray::new(
388 vec![
389 Value::CharArray(CharArray::new_row(" GPU ")),
390 Value::String(" Accelerate ".into()),
391 ],
392 1,
393 2,
394 )
395 .unwrap();
396 let result = strtrim_builtin(Value::Cell(cell)).expect("strtrim cell array");
397 match result {
398 Value::Cell(out) => {
399 let first = out.get(0, 0).unwrap();
400 let second = out.get(0, 1).unwrap();
401 assert_eq!(first, Value::CharArray(CharArray::new_row("GPU")));
402 assert_eq!(second, Value::String("Accelerate".into()));
403 }
404 other => panic!("expected cell array, got {other:?}"),
405 }
406 }
407
408 #[test]
409 fn strtrim_preserves_missing_strings() {
410 let result =
411 strtrim_builtin(Value::String("<missing>".into())).expect("strtrim missing string");
412 assert_eq!(result, Value::String("<missing>".into()));
413 }
414
415 #[test]
416 fn strtrim_handles_tabs_and_newlines() {
417 let input = Value::String("\tMetrics \n".into());
418 let result = strtrim_builtin(input).expect("strtrim tab/newline");
419 assert_eq!(result, Value::String("Metrics".into()));
420 }
421
422 #[test]
423 fn strtrim_trims_unicode_whitespace() {
424 let input = Value::String("\u{00A0}RunMat\u{2003}".into());
425 let result = strtrim_builtin(input).expect("strtrim unicode whitespace");
426 assert_eq!(result, Value::String("RunMat".into()));
427 }
428
429 #[test]
430 fn strtrim_char_array_zero_rows_stable() {
431 let array = CharArray::new(Vec::new(), 0, 0).unwrap();
432 let result = strtrim_builtin(Value::CharArray(array.clone())).expect("strtrim 0x0 char");
433 assert_eq!(result, Value::CharArray(array));
434 }
435
436 #[test]
437 fn strtrim_cell_array_accepts_string_scalar() {
438 let scalar = StringArray::new(vec![" padded ".into()], vec![1, 1]).unwrap();
439 let cell = CellArray::new(vec![Value::StringArray(scalar)], 1, 1).unwrap();
440 let trimmed = strtrim_builtin(Value::Cell(cell)).expect("strtrim cell string scalar");
441 match trimmed {
442 Value::Cell(out) => {
443 let value = out.get(0, 0).expect("cell element");
444 assert_eq!(value, Value::String("padded".into()));
445 }
446 other => panic!("expected cell array, got {other:?}"),
447 }
448 }
449
450 #[test]
451 fn strtrim_cell_array_rejects_non_text() {
452 let cell = CellArray::new(vec![Value::Num(5.0)], 1, 1).unwrap();
453 let err = strtrim_builtin(Value::Cell(cell)).expect_err("strtrim cell non-text");
454 assert!(err.contains("cell array elements"));
455 }
456
457 #[test]
458 fn strtrim_errors_on_invalid_input() {
459 let err = strtrim_builtin(Value::Num(1.0)).unwrap_err();
460 assert!(err.contains("strtrim"));
461 }
462
463 #[test]
464 #[cfg(feature = "doc_export")]
465 fn doc_examples_present() {
466 let blocks = test_support::doc_examples(DOC_MD);
467 assert!(!blocks.is_empty());
468 }
469}