runmat_runtime/builtins/strings/transform/
strtrim.rs

1//! MATLAB-compatible `strtrim` builtin with GPU-aware semantics for RunMat.
2
3use runmat_builtins::{CellArray, CharArray, StringArray, Value};
4use runmat_macros::runtime_builtin;
5
6use crate::builtins::common::spec::{
7    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
8    ReductionNaN, ResidencyPolicy, ShapeRequirements,
9};
10use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
11#[cfg(feature = "doc_export")]
12use crate::register_builtin_doc_text;
13use crate::{gather_if_needed, make_cell, register_builtin_fusion_spec, register_builtin_gpu_spec};
14
15#[cfg(feature = "doc_export")]
16pub const DOC_MD: &str = r###"---
17title: "strtrim"
18category: "strings/transform"
19keywords: ["strtrim", "trim whitespace", "leading spaces", "trailing spaces", "character arrays", "string arrays"]
20summary: "Remove leading and trailing whitespace from strings, character arrays, and cell arrays."
21references:
22  - https://www.mathworks.com/help/matlab/ref/strtrim.html
23gpu_support:
24  elementwise: false
25  reduction: false
26  precisions: []
27  broadcasting: "none"
28  notes: "Executes on the CPU; GPU-resident inputs are gathered automatically before trimming."
29fusion:
30  elementwise: false
31  reduction: false
32  max_inputs: 1
33  constants: "inline"
34requires_feature: null
35tested:
36  unit: "builtins::strings::transform::strtrim::tests"
37  integration: "builtins::strings::transform::strtrim::tests::strtrim_cell_array_mixed_content"
38---
39
40# What does the `strtrim` function do in MATLAB / RunMat?
41`strtrim(text)` removes leading and trailing whitespace characters from `text`. The input can be a
42string scalar, string array, character array, or a cell array of character vectors, mirroring MATLAB
43behaviour. Internal whitespace is preserved exactly as provided.
44
45## How does the `strtrim` function behave in MATLAB / RunMat?
46- Whitespace is defined via MATLAB's `isspace`, so spaces, tabs, newlines, and other Unicode
47  whitespace code points are removed from both ends of each element.
48- String scalars and arrays keep their type and shape. Missing string scalars (`<missing>`) remain
49  missing and are returned unchanged.
50- Character arrays are trimmed row by row. The result keeps the original number of rows and shrinks
51  the column count to the longest trimmed row, padding shorter rows with spaces so the output stays
52  rectangular.
53- Cell arrays must contain string scalars or character vectors. Results preserve the original cell
54  layout with each element trimmed.
55- Numeric, logical, or structured inputs raise MATLAB-compatible type errors.
56
57## `strtrim` Function GPU Execution Behaviour
58`strtrim` runs on the CPU. When the input (or any nested element) resides on the GPU, RunMat gathers
59it to host memory before trimming so the output matches MATLAB exactly. Providers do not need to
60implement device kernels for this builtin today.
61
62## GPU residency in RunMat (Do I need `gpuArray`?)
63You do not need to call `gpuArray` or `gather` manually. RunMat automatically gathers any GPU-resident
64text data before applying `strtrim`, so the builtin behaves the same regardless of where the data lives.
65
66## Examples of using the `strtrim` function in MATLAB / RunMat
67
68### Trim Leading And Trailing Spaces From A String Scalar
69```matlab
70name = "   RunMat   ";
71clean = strtrim(name);
72```
73Expected output:
74```matlab
75clean = "RunMat"
76```
77
78### Remove Extra Whitespace From Each Element Of A String Array
79```matlab
80labels = ["  Alpha  "; "Beta   "; "   Gamma"];
81trimmed = strtrim(labels);
82```
83Expected output:
84```matlab
85trimmed = 3×1 string
86    "Alpha"
87    "Beta"
88    "Gamma"
89```
90
91### Trim Character Array Rows While Preserving Shape
92```matlab
93animals = char('  cat   ', 'dog', ' cow ');
94result = strtrim(animals);
95```
96Expected output:
97```matlab
98result =
99
100  3×3 char array
101
102    'cat'
103    'dog'
104    'cow'
105```
106
107### Trim Tabs And Newlines Alongside Spaces
108```matlab
109text = "\tMetrics " + newline;
110clean = strtrim(text);
111```
112Expected output:
113```matlab
114clean = "Metrics"
115```
116
117### Trim Each Element Of A Cell Array Of Character Vectors
118```matlab
119pieces = {'  GPU  ', " Accelerate", 'RunMat   '};
120out = strtrim(pieces);
121```
122Expected output:
123```matlab
124out = 1×3 cell array
125    {'GPU'}    {"Accelerate"}    {'RunMat'}
126```
127
128### Preserve Missing String Scalars
129```matlab
130vals = [" ok "; "<missing>"; " trimmed "];
131trimmed = strtrim(vals);
132```
133Expected output:
134```matlab
135trimmed = 1×3 string
136    "ok"
137    <missing>
138    "trimmed"
139```
140
141## FAQ
142
143### Does `strtrim` modify internal whitespace?
144No. Only leading and trailing whitespace is removed; interior spacing remains intact.
145
146### Which characters count as whitespace?
147`strtrim` removes code points that MATLAB's `isspace` recognises, including spaces, tabs, newlines,
148carriage returns, and many Unicode space separators.
149
150### How are character arrays resized?
151Each row is trimmed independently. The output keeps the same number of rows and shrinks the width to
152match the longest trimmed row, padding shorter rows with spaces if necessary.
153
154### What happens to missing strings?
155Missing string scalars (`string(missing)`) remain `<missing>` exactly as in MATLAB.
156
157### Can I pass numeric or logical arrays to `strtrim`?
158No. Passing non-text inputs raises a MATLAB-compatible error indicating that text input is required.
159
160### How does `strtrim` differ from `strip`?
161`strtrim` always removes leading and trailing whitespace. `strip` is newer and adds options for custom
162characters and directional trimming; use it when you need finer control.
163
164## See Also
165[strip](./strip), [upper](./upper), [lower](./lower)
166
167## Source & Feedback
168- Implementation: [`crates/runmat-runtime/src/builtins/strings/transform/strtrim.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/transform/strtrim.rs)
169- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
170"###;
171
172pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
173    name: "strtrim",
174    op_kind: GpuOpKind::Custom("string-transform"),
175    supported_precisions: &[],
176    broadcast: BroadcastSemantics::None,
177    provider_hooks: &[],
178    constant_strategy: ConstantStrategy::InlineLiteral,
179    residency: ResidencyPolicy::GatherImmediately,
180    nan_mode: ReductionNaN::Include,
181    two_pass_threshold: None,
182    workgroup_size: None,
183    accepts_nan_mode: false,
184    notes:
185        "Executes on the CPU; GPU-resident inputs are gathered to host memory before trimming whitespace.",
186};
187
188register_builtin_gpu_spec!(GPU_SPEC);
189
190pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
191    name: "strtrim",
192    shape: ShapeRequirements::Any,
193    constant_strategy: ConstantStrategy::InlineLiteral,
194    elementwise: None,
195    reduction: None,
196    emits_nan: false,
197    notes: "String transformation builtin; not eligible for fusion and always gathers GPU inputs.",
198};
199
200register_builtin_fusion_spec!(FUSION_SPEC);
201
202#[cfg(feature = "doc_export")]
203register_builtin_doc_text!("strtrim", DOC_MD);
204
205const ARG_TYPE_ERROR: &str =
206    "strtrim: first argument must be a string array, character array, or cell array of character vectors";
207const CELL_ELEMENT_ERROR: &str =
208    "strtrim: cell array elements must be string scalars or character vectors";
209
210#[runtime_builtin(
211    name = "strtrim",
212    category = "strings/transform",
213    summary = "Remove leading and trailing whitespace from strings, character arrays, and cell arrays.",
214    keywords = "strtrim,trim,whitespace,strings,character array,text",
215    accel = "sink"
216)]
217fn strtrim_builtin(value: Value) -> Result<Value, String> {
218    let gathered = gather_if_needed(&value).map_err(|e| format!("strtrim: {e}"))?;
219    match gathered {
220        Value::String(text) => Ok(Value::String(trim_string(text))),
221        Value::StringArray(array) => strtrim_string_array(array),
222        Value::CharArray(array) => strtrim_char_array(array),
223        Value::Cell(cell) => strtrim_cell_array(cell),
224        _ => Err(ARG_TYPE_ERROR.to_string()),
225    }
226}
227
228fn trim_string(text: String) -> String {
229    if is_missing_string(&text) {
230        text
231    } else {
232        trim_whitespace(&text)
233    }
234}
235
236fn strtrim_string_array(array: StringArray) -> Result<Value, String> {
237    let StringArray { data, shape, .. } = array;
238    let trimmed = data.into_iter().map(trim_string).collect::<Vec<_>>();
239    let out = StringArray::new(trimmed, shape).map_err(|e| format!("strtrim: {e}"))?;
240    Ok(Value::StringArray(out))
241}
242
243fn strtrim_char_array(array: CharArray) -> Result<Value, String> {
244    let CharArray { data, rows, cols } = array;
245    if rows == 0 {
246        return Ok(Value::CharArray(CharArray { data, rows, cols }));
247    }
248
249    let mut trimmed_rows: Vec<Vec<char>> = Vec::with_capacity(rows);
250    let mut target_cols: usize = 0;
251    for row in 0..rows {
252        let text = char_row_to_string_slice(&data, cols, row);
253        let trimmed = trim_whitespace(&text);
254        let chars: Vec<char> = trimmed.chars().collect();
255        target_cols = target_cols.max(chars.len());
256        trimmed_rows.push(chars);
257    }
258
259    let mut new_data: Vec<char> = Vec::with_capacity(rows * target_cols);
260    for mut chars in trimmed_rows {
261        if chars.len() < target_cols {
262            chars.resize(target_cols, ' ');
263        }
264        new_data.extend(chars);
265    }
266
267    CharArray::new(new_data, rows, target_cols)
268        .map(Value::CharArray)
269        .map_err(|e| format!("strtrim: {e}"))
270}
271
272fn strtrim_cell_array(cell: CellArray) -> Result<Value, String> {
273    let CellArray {
274        data, rows, cols, ..
275    } = cell;
276    let mut trimmed_values = Vec::with_capacity(rows * cols);
277    for value in &data {
278        let trimmed = strtrim_cell_element(value)?;
279        trimmed_values.push(trimmed);
280    }
281    make_cell(trimmed_values, rows, cols).map_err(|e| format!("strtrim: {e}"))
282}
283
284fn strtrim_cell_element(value: &Value) -> Result<Value, String> {
285    match gather_if_needed(value).map_err(|e| format!("strtrim: {e}"))? {
286        Value::String(text) => Ok(Value::String(trim_string(text))),
287        Value::StringArray(sa) if sa.data.len() == 1 => {
288            let text = sa.data.into_iter().next().unwrap();
289            Ok(Value::String(trim_string(text)))
290        }
291        Value::CharArray(ca) if ca.rows <= 1 => {
292            if ca.rows == 0 {
293                return Ok(Value::CharArray(ca));
294            }
295            let source = char_row_to_string_slice(&ca.data, ca.cols, 0);
296            let trimmed = trim_whitespace(&source);
297            let chars: Vec<char> = trimmed.chars().collect();
298            let cols = chars.len();
299            CharArray::new(chars, ca.rows, cols)
300                .map(Value::CharArray)
301                .map_err(|e| format!("strtrim: {e}"))
302        }
303        Value::CharArray(_) => Err(CELL_ELEMENT_ERROR.to_string()),
304        _ => Err(CELL_ELEMENT_ERROR.to_string()),
305    }
306}
307
308fn trim_whitespace(text: &str) -> String {
309    let trimmed = text.trim_matches(|c: char| c.is_whitespace());
310    trimmed.to_string()
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    #[cfg(feature = "doc_export")]
317    use crate::builtins::common::test_support;
318
319    #[test]
320    fn strtrim_string_scalar_trims_whitespace() {
321        let result =
322            strtrim_builtin(Value::String("  RunMat  ".into())).expect("strtrim string scalar");
323        assert_eq!(result, Value::String("RunMat".into()));
324    }
325
326    #[test]
327    fn strtrim_string_array_preserves_shape() {
328        let array = StringArray::new(
329            vec![
330                " one ".into(),
331                "<missing>".into(),
332                "two".into(),
333                " three ".into(),
334            ],
335            vec![2, 2],
336        )
337        .unwrap();
338        let result = strtrim_builtin(Value::StringArray(array)).expect("strtrim string array");
339        match result {
340            Value::StringArray(sa) => {
341                assert_eq!(sa.shape, vec![2, 2]);
342                assert_eq!(
343                    sa.data,
344                    vec![
345                        String::from("one"),
346                        String::from("<missing>"),
347                        String::from("two"),
348                        String::from("three")
349                    ]
350                );
351            }
352            other => panic!("expected string array, got {other:?}"),
353        }
354    }
355
356    #[test]
357    fn strtrim_char_array_multiple_rows() {
358        let data: Vec<char> = "  cat  ".chars().chain(" dog   ".chars()).collect();
359        let array = CharArray::new(data, 2, 7).unwrap();
360        let result = strtrim_builtin(Value::CharArray(array)).expect("strtrim char array");
361        match result {
362            Value::CharArray(ca) => {
363                assert_eq!(ca.rows, 2);
364                assert_eq!(ca.cols, 3);
365                assert_eq!(ca.data, vec!['c', 'a', 't', 'd', 'o', 'g']);
366            }
367            other => panic!("expected char array, got {other:?}"),
368        }
369    }
370
371    #[test]
372    fn strtrim_char_array_all_whitespace_yields_zero_width() {
373        let array = CharArray::new("   ".chars().collect(), 1, 3).unwrap();
374        let result = strtrim_builtin(Value::CharArray(array)).expect("strtrim char whitespace");
375        match result {
376            Value::CharArray(ca) => {
377                assert_eq!(ca.rows, 1);
378                assert_eq!(ca.cols, 0);
379                assert!(ca.data.is_empty());
380            }
381            other => panic!("expected empty char array, got {other:?}"),
382        }
383    }
384
385    #[test]
386    fn strtrim_cell_array_mixed_content() {
387        let cell = CellArray::new(
388            vec![
389                Value::CharArray(CharArray::new_row("  GPU  ")),
390                Value::String(" Accelerate ".into()),
391            ],
392            1,
393            2,
394        )
395        .unwrap();
396        let result = strtrim_builtin(Value::Cell(cell)).expect("strtrim cell array");
397        match result {
398            Value::Cell(out) => {
399                let first = out.get(0, 0).unwrap();
400                let second = out.get(0, 1).unwrap();
401                assert_eq!(first, Value::CharArray(CharArray::new_row("GPU")));
402                assert_eq!(second, Value::String("Accelerate".into()));
403            }
404            other => panic!("expected cell array, got {other:?}"),
405        }
406    }
407
408    #[test]
409    fn strtrim_preserves_missing_strings() {
410        let result =
411            strtrim_builtin(Value::String("<missing>".into())).expect("strtrim missing string");
412        assert_eq!(result, Value::String("<missing>".into()));
413    }
414
415    #[test]
416    fn strtrim_handles_tabs_and_newlines() {
417        let input = Value::String("\tMetrics \n".into());
418        let result = strtrim_builtin(input).expect("strtrim tab/newline");
419        assert_eq!(result, Value::String("Metrics".into()));
420    }
421
422    #[test]
423    fn strtrim_trims_unicode_whitespace() {
424        let input = Value::String("\u{00A0}RunMat\u{2003}".into());
425        let result = strtrim_builtin(input).expect("strtrim unicode whitespace");
426        assert_eq!(result, Value::String("RunMat".into()));
427    }
428
429    #[test]
430    fn strtrim_char_array_zero_rows_stable() {
431        let array = CharArray::new(Vec::new(), 0, 0).unwrap();
432        let result = strtrim_builtin(Value::CharArray(array.clone())).expect("strtrim 0x0 char");
433        assert_eq!(result, Value::CharArray(array));
434    }
435
436    #[test]
437    fn strtrim_cell_array_accepts_string_scalar() {
438        let scalar = StringArray::new(vec![" padded ".into()], vec![1, 1]).unwrap();
439        let cell = CellArray::new(vec![Value::StringArray(scalar)], 1, 1).unwrap();
440        let trimmed = strtrim_builtin(Value::Cell(cell)).expect("strtrim cell string scalar");
441        match trimmed {
442            Value::Cell(out) => {
443                let value = out.get(0, 0).expect("cell element");
444                assert_eq!(value, Value::String("padded".into()));
445            }
446            other => panic!("expected cell array, got {other:?}"),
447        }
448    }
449
450    #[test]
451    fn strtrim_cell_array_rejects_non_text() {
452        let cell = CellArray::new(vec![Value::Num(5.0)], 1, 1).unwrap();
453        let err = strtrim_builtin(Value::Cell(cell)).expect_err("strtrim cell non-text");
454        assert!(err.contains("cell array elements"));
455    }
456
457    #[test]
458    fn strtrim_errors_on_invalid_input() {
459        let err = strtrim_builtin(Value::Num(1.0)).unwrap_err();
460        assert!(err.contains("strtrim"));
461    }
462
463    #[test]
464    #[cfg(feature = "doc_export")]
465    fn doc_examples_present() {
466        let blocks = test_support::doc_examples(DOC_MD);
467        assert!(!blocks.is_empty());
468    }
469}