runmat_runtime/builtins/strings/transform/
erase.rs

1//! MATLAB-compatible `erase` builtin with GPU-aware semantics for RunMat.
2use runmat_builtins::{CellArray, CharArray, StringArray, Value};
3use runmat_macros::runtime_builtin;
4
5use crate::builtins::common::spec::{
6    BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
7    ReductionNaN, ResidencyPolicy, ShapeRequirements,
8};
9use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
10#[cfg(feature = "doc_export")]
11use crate::register_builtin_doc_text;
12use crate::{
13    gather_if_needed, make_cell_with_shape, register_builtin_fusion_spec, register_builtin_gpu_spec,
14};
15
16#[cfg(feature = "doc_export")]
17pub const DOC_MD: &str = r#"---
18title: "erase"
19category: "strings/transform"
20keywords: ["erase", "remove substring", "delete text", "string manipulation", "character array"]
21summary: "Remove substring occurrences from strings, character arrays, and cell arrays with MATLAB-compatible semantics."
22references:
23  - https://www.mathworks.com/help/matlab/ref/erase.html
24gpu_support:
25  elementwise: false
26  reduction: false
27  precisions: []
28  broadcasting: "none"
29  notes: "Runs on the CPU; RunMat gathers GPU-resident text before removing substrings."
30fusion:
31  elementwise: false
32  reduction: false
33  max_inputs: 2
34  constants: "inline"
35requires_feature: null
36tested:
37  unit: "builtins::strings::transform::erase::tests::erase_string_array_shape_mismatch_applies_all_patterns"
38  integration: "builtins::strings::transform::erase::tests::erase_cell_array_mixed_content"
39---
40
41# What does the `erase` function do in MATLAB / RunMat?
42`erase(text, pattern)` removes every occurrence of `pattern` from `text`. The builtin accepts string
43scalars, string arrays, character arrays, and cell arrays of character vectors or string scalars,
44mirroring MATLAB behaviour. When `pattern` is an array, `erase` removes every occurrence of each
45pattern entry; the `text` and `pattern` arguments do not need to be the same size.
46
47## How does the `erase` function behave in MATLAB / RunMat?
48- String inputs stay as strings. Missing string scalars (`<missing>`) propagate unchanged.
49- String arrays preserve their size and orientation. Each element has every supplied pattern removed.
50- Character arrays are processed row by row. Rows shrink as characters are removed and are padded with
51  spaces so the result remains a rectangular char array.
52- Cell arrays must contain string scalars or character vectors. The result is a cell array with the same
53  shape whose elements reflect the removed substrings.
54- The `pattern` input can be a string scalar, string array, character array, or cell array of character
55  vectors/string scalars. Provide either a scalar pattern or a list; an empty list leaves `text` unchanged.
56- Pattern values are treated literally—no regular expressions are used. Use [`replace`](./replace) or the
57  regex builtins for pattern-based removal.
58
59## `erase` Function GPU Execution Behaviour
60`erase` executes on the CPU. When any argument is GPU-resident, RunMat gathers it to host memory before
61removing substrings. Outputs are returned on the host as well. Providers do not need to implement device
62kernels for this builtin, and the fusion planner treats it as a sink to avoid keeping text on the GPU.
63
64## GPU residency in RunMat (Do I need `gpuArray`?)
65No. `erase` automatically gathers GPU inputs and produces host results. You never need to move text to or
66from the GPU manually for this builtin, and `gpuArray` inputs are handled transparently.
67
68## Examples of using the `erase` function in MATLAB / RunMat
69
70### Remove a single word from a string scalar
71```matlab
72txt = "RunMat accelerates MATLAB code";
73clean = erase(txt, "accelerates ");
74```
75Expected output:
76```matlab
77clean = "RunMat MATLAB code"
78```
79
80### Remove multiple substrings from each element of a string array
81```matlab
82labels = ["GPU pipeline"; "CPU pipeline"];
83result = erase(labels, ["GPU ", "CPU "]);
84```
85Expected output:
86```matlab
87result = 2×1 string
88    "pipeline"
89    "pipeline"
90```
91
92### Erase characters from a character array while preserving padding
93```matlab
94chars = char("workspace", "snapshots");
95trimmed = erase(chars, "s");
96```
97Expected output:
98```matlab
99trimmed =
100
101  2×8 char array
102
103    'workpace'
104    'napshot '
105```
106
107### Remove substrings from a cell array of text
108```matlab
109C = {'Kernel Planner', "GPU Fusion"};
110out = erase(C, ["Kernel ", "GPU "]);
111```
112Expected output:
113```matlab
114out = 1×2 cell array
115    {'Planner'}    {"Fusion"}
116```
117
118### Provide an empty pattern list to leave the text unchanged
119```matlab
120data = ["alpha", "beta"];
121unchanged = erase(data, string.empty);
122```
123Expected output:
124```matlab
125unchanged = 1×2 string
126    "alpha"    "beta"
127```
128
129### Remove delimiters before splitting text
130```matlab
131path = "runmat/bin:runmat/lib";
132clean = erase(path, ":");
133parts = split(clean, "runmat/");
134```
135Expected output:
136```matlab
137clean = "runmat/binrunmat/lib"
138parts = 1×3 string
139    ""    "bin"    "lib"
140```
141
142## FAQ
143
144### Can I remove multiple patterns at once?
145Yes. Supply `pattern` as a string array or cell array. Each pattern is removed in order from every element
146of the input text.
147
148### What happens if `pattern` is empty?
149An empty pattern list leaves the input unchanged. Empty string patterns are ignored because removing empty
150text would have no effect.
151
152### Does `erase` modify the original data?
153No. It returns a new value with substrings removed. The input variables remain unchanged.
154
155### How are missing string scalars handled?
156They propagate unchanged. Calling `erase` on `<missing>` returns `<missing>`, matching MATLAB.
157
158### Can `erase` operate on GPU-resident data?
159Indirectly. RunMat automatically gathers GPU values to the host, performs the removal, and returns a host
160result. No explicit `gpuArray` calls are required.
161
162### How do I remove substrings using patterns or regular expressions?
163Use `replace` for literal substitution or `regexprep` for regular expressions when you need pattern-based
164removal rather than literal substring erasure.
165
166## See Also
167[replace](./replace), [strrep](./strrep), [split](./split), [regexprep](../regex/regexprep), [string](../core/string)
168
169## Source & Feedback
170- Implementation: [`crates/runmat-runtime/src/builtins/strings/transform/erase.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/transform/erase.rs)
171- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
172"#;
173
174pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
175    name: "erase",
176    op_kind: GpuOpKind::Custom("string-transform"),
177    supported_precisions: &[],
178    broadcast: BroadcastSemantics::None,
179    provider_hooks: &[],
180    constant_strategy: ConstantStrategy::InlineLiteral,
181    residency: ResidencyPolicy::GatherImmediately,
182    nan_mode: ReductionNaN::Include,
183    two_pass_threshold: None,
184    workgroup_size: None,
185    accepts_nan_mode: false,
186    notes:
187        "Executes on the CPU; GPU-resident inputs are gathered to host memory before substrings are removed.",
188};
189
190register_builtin_gpu_spec!(GPU_SPEC);
191
192pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
193    name: "erase",
194    shape: ShapeRequirements::Any,
195    constant_strategy: ConstantStrategy::InlineLiteral,
196    elementwise: None,
197    reduction: None,
198    emits_nan: false,
199    notes:
200        "String manipulation builtin; not eligible for fusion plans and always gathers GPU inputs before execution.",
201};
202
203register_builtin_fusion_spec!(FUSION_SPEC);
204
205#[cfg(feature = "doc_export")]
206register_builtin_doc_text!("erase", DOC_MD);
207
208const ARG_TYPE_ERROR: &str =
209    "erase: first argument must be a string array, character array, or cell array of character vectors";
210const PATTERN_TYPE_ERROR: &str =
211    "erase: second argument must be a string array, character array, or cell array of character vectors";
212const CELL_ELEMENT_ERROR: &str =
213    "erase: cell array elements must be string scalars or character vectors";
214
215#[runtime_builtin(
216    name = "erase",
217    category = "strings/transform",
218    summary = "Remove substring occurrences from strings, character arrays, and cell arrays.",
219    keywords = "erase,remove substring,strings,character array,text",
220    accel = "sink"
221)]
222fn erase_builtin(text: Value, pattern: Value) -> Result<Value, String> {
223    let text = gather_if_needed(&text).map_err(|e| format!("erase: {e}"))?;
224    let pattern = gather_if_needed(&pattern).map_err(|e| format!("erase: {e}"))?;
225
226    let patterns = PatternList::from_value(&pattern)?;
227
228    match text {
229        Value::String(s) => Ok(Value::String(erase_string_scalar(s, &patterns))),
230        Value::StringArray(sa) => erase_string_array(sa, &patterns),
231        Value::CharArray(ca) => erase_char_array(ca, &patterns),
232        Value::Cell(cell) => erase_cell_array(cell, &patterns),
233        _ => Err(ARG_TYPE_ERROR.to_string()),
234    }
235}
236
237struct PatternList {
238    entries: Vec<String>,
239}
240
241impl PatternList {
242    fn from_value(value: &Value) -> Result<Self, String> {
243        let entries = match value {
244            Value::String(text) => vec![text.clone()],
245            Value::StringArray(array) => array.data.clone(),
246            Value::CharArray(array) => {
247                if array.rows == 0 {
248                    Vec::new()
249                } else {
250                    let mut list = Vec::with_capacity(array.rows);
251                    for row in 0..array.rows {
252                        list.push(char_row_to_string_slice(&array.data, array.cols, row));
253                    }
254                    list
255                }
256            }
257            Value::Cell(cell) => {
258                let mut list = Vec::with_capacity(cell.data.len());
259                for handle in &cell.data {
260                    match &**handle {
261                        Value::String(text) => list.push(text.clone()),
262                        Value::StringArray(sa) if sa.data.len() == 1 => {
263                            list.push(sa.data[0].clone());
264                        }
265                        Value::CharArray(ca) if ca.rows == 0 => list.push(String::new()),
266                        Value::CharArray(ca) if ca.rows == 1 => {
267                            list.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
268                        }
269                        Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
270                        _ => return Err(CELL_ELEMENT_ERROR.to_string()),
271                    }
272                }
273                list
274            }
275            _ => return Err(PATTERN_TYPE_ERROR.to_string()),
276        };
277        Ok(Self { entries })
278    }
279
280    fn apply(&self, input: &str) -> String {
281        if self.entries.is_empty() {
282            return input.to_string();
283        }
284        let mut current = input.to_string();
285        for pattern in &self.entries {
286            if pattern.is_empty() {
287                continue;
288            }
289            if current.is_empty() {
290                break;
291            }
292            current = current.replace(pattern, "");
293        }
294        current
295    }
296}
297
298fn erase_string_scalar(text: String, patterns: &PatternList) -> String {
299    if is_missing_string(&text) {
300        text
301    } else {
302        patterns.apply(&text)
303    }
304}
305
306fn erase_string_array(array: StringArray, patterns: &PatternList) -> Result<Value, String> {
307    let StringArray { data, shape, .. } = array;
308    let mut erased = Vec::with_capacity(data.len());
309    for entry in data {
310        if is_missing_string(&entry) {
311            erased.push(entry);
312        } else {
313            erased.push(patterns.apply(&entry));
314        }
315    }
316    StringArray::new(erased, shape)
317        .map(Value::StringArray)
318        .map_err(|e| format!("erase: {e}"))
319}
320
321fn erase_char_array(array: CharArray, patterns: &PatternList) -> Result<Value, String> {
322    let CharArray { data, rows, cols } = array;
323    if rows == 0 {
324        return Ok(Value::CharArray(CharArray { data, rows, cols }));
325    }
326
327    let mut processed: Vec<String> = Vec::with_capacity(rows);
328    let mut target_cols = 0usize;
329    for row in 0..rows {
330        let slice = char_row_to_string_slice(&data, cols, row);
331        let erased = patterns.apply(&slice);
332        let len = erased.chars().count();
333        if len > target_cols {
334            target_cols = len;
335        }
336        processed.push(erased);
337    }
338
339    let mut flattened: Vec<char> = Vec::with_capacity(rows * target_cols);
340    for row_text in processed {
341        let mut chars: Vec<char> = row_text.chars().collect();
342        if chars.len() < target_cols {
343            chars.resize(target_cols, ' ');
344        }
345        flattened.extend(chars);
346    }
347
348    CharArray::new(flattened, rows, target_cols)
349        .map(Value::CharArray)
350        .map_err(|e| format!("erase: {e}"))
351}
352
353fn erase_cell_array(cell: CellArray, patterns: &PatternList) -> Result<Value, String> {
354    let shape = cell.shape.clone();
355    let mut values = Vec::with_capacity(cell.data.len());
356    for handle in &cell.data {
357        values.push(erase_cell_element(handle, patterns)?);
358    }
359    make_cell_with_shape(values, shape).map_err(|e| format!("erase: {e}"))
360}
361
362fn erase_cell_element(value: &Value, patterns: &PatternList) -> Result<Value, String> {
363    match value {
364        Value::String(text) => Ok(Value::String(erase_string_scalar(text.clone(), patterns))),
365        Value::StringArray(sa) if sa.data.len() == 1 => Ok(Value::String(erase_string_scalar(
366            sa.data[0].clone(),
367            patterns,
368        ))),
369        Value::CharArray(ca) if ca.rows == 0 => Ok(Value::CharArray(ca.clone())),
370        Value::CharArray(ca) if ca.rows == 1 => {
371            let slice = char_row_to_string_slice(&ca.data, ca.cols, 0);
372            let erased = patterns.apply(&slice);
373            Ok(Value::CharArray(CharArray::new_row(&erased)))
374        }
375        Value::CharArray(_) => Err(CELL_ELEMENT_ERROR.to_string()),
376        _ => Err(CELL_ELEMENT_ERROR.to_string()),
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383    #[cfg(feature = "doc_export")]
384    use crate::builtins::common::test_support;
385
386    #[test]
387    fn erase_string_scalar_single_pattern() {
388        let result = erase_builtin(
389            Value::String("RunMat runtime".into()),
390            Value::String(" runtime".into()),
391        )
392        .expect("erase");
393        assert_eq!(result, Value::String("RunMat".into()));
394    }
395
396    #[test]
397    fn erase_string_array_multiple_patterns() {
398        let strings = StringArray::new(
399            vec!["gpu".into(), "cpu".into(), "<missing>".into()],
400            vec![3, 1],
401        )
402        .unwrap();
403        let result = erase_builtin(
404            Value::StringArray(strings),
405            Value::StringArray(StringArray::new(vec!["g".into(), "c".into()], vec![2, 1]).unwrap()),
406        )
407        .expect("erase");
408        match result {
409            Value::StringArray(sa) => {
410                assert_eq!(sa.shape, vec![3, 1]);
411                assert_eq!(
412                    sa.data,
413                    vec![
414                        String::from("pu"),
415                        String::from("pu"),
416                        String::from("<missing>")
417                    ]
418                );
419            }
420            other => panic!("expected string array, got {other:?}"),
421        }
422    }
423
424    #[test]
425    fn erase_string_array_shape_mismatch_applies_all_patterns() {
426        let strings =
427            StringArray::new(vec!["GPU kernel".into(), "CPU kernel".into()], vec![2, 1]).unwrap();
428        let patterns = StringArray::new(vec!["GPU ".into(), "CPU ".into()], vec![1, 2]).unwrap();
429        let result = erase_builtin(Value::StringArray(strings), Value::StringArray(patterns))
430            .expect("erase");
431        match result {
432            Value::StringArray(sa) => {
433                assert_eq!(sa.shape, vec![2, 1]);
434                assert_eq!(
435                    sa.data,
436                    vec![String::from("kernel"), String::from("kernel")]
437                );
438            }
439            other => panic!("expected string array, got {other:?}"),
440        }
441    }
442
443    #[test]
444    fn erase_char_array_adjusts_width() {
445        let chars = CharArray::new("matrix".chars().collect(), 1, 6).unwrap();
446        let result =
447            erase_builtin(Value::CharArray(chars), Value::String("tr".into())).expect("erase");
448        match result {
449            Value::CharArray(out) => {
450                assert_eq!(out.rows, 1);
451                assert_eq!(out.cols, 4);
452                let expected: Vec<char> = "maix".chars().collect();
453                assert_eq!(out.data, expected);
454            }
455            other => panic!("expected char array, got {other:?}"),
456        }
457    }
458
459    #[test]
460    fn erase_char_array_handles_full_removal() {
461        let chars = CharArray::new_row("abc");
462        let result = erase_builtin(Value::CharArray(chars.clone()), Value::String("abc".into()))
463            .expect("erase");
464        match result {
465            Value::CharArray(out) => {
466                assert_eq!(out.rows, 1);
467                assert_eq!(out.cols, 0);
468                assert!(out.data.is_empty());
469            }
470            other => panic!("expected empty char array, got {other:?}"),
471        }
472    }
473
474    #[test]
475    fn erase_char_array_multiple_rows_sequential_patterns() {
476        let chars = CharArray::new(
477            vec![
478                'G', 'P', 'U', ' ', 'p', 'i', 'p', 'e', 'l', 'i', 'n', 'e', 'C', 'P', 'U', ' ',
479                'p', 'i', 'p', 'e', 'l', 'i', 'n', 'e',
480            ],
481            2,
482            12,
483        )
484        .unwrap();
485        let patterns = CharArray::new_row("GPU ");
486        let result =
487            erase_builtin(Value::CharArray(chars), Value::CharArray(patterns)).expect("erase");
488        match result {
489            Value::CharArray(out) => {
490                assert_eq!(out.rows, 2);
491                assert_eq!(out.cols, 12);
492                let first = char_row_to_string_slice(&out.data, out.cols, 0);
493                let second = char_row_to_string_slice(&out.data, out.cols, 1);
494                assert_eq!(first.trim_end(), "pipeline");
495                assert_eq!(second.trim_end(), "CPU pipeline");
496            }
497            other => panic!("expected char array, got {other:?}"),
498        }
499    }
500
501    #[test]
502    fn erase_cell_array_mixed_content() {
503        let cell = CellArray::new(
504            vec![
505                Value::CharArray(CharArray::new_row("Kernel Planner")),
506                Value::String("GPU Fusion".into()),
507            ],
508            1,
509            2,
510        )
511        .unwrap();
512        let result = erase_builtin(
513            Value::Cell(cell),
514            Value::Cell(
515                CellArray::new(
516                    vec![
517                        Value::String("Kernel ".into()),
518                        Value::String("GPU ".into()),
519                    ],
520                    1,
521                    2,
522                )
523                .unwrap(),
524            ),
525        )
526        .expect("erase");
527        match result {
528            Value::Cell(out) => {
529                let first = out.get(0, 0).unwrap();
530                let second = out.get(0, 1).unwrap();
531                assert_eq!(first, Value::CharArray(CharArray::new_row("Planner")));
532                assert_eq!(second, Value::String("Fusion".into()));
533            }
534            other => panic!("expected cell array, got {other:?}"),
535        }
536    }
537
538    #[test]
539    fn erase_cell_array_preserves_shape() {
540        let cell = CellArray::new(
541            vec![
542                Value::String("alpha".into()),
543                Value::String("beta".into()),
544                Value::String("gamma".into()),
545                Value::String("delta".into()),
546            ],
547            2,
548            2,
549        )
550        .unwrap();
551        let patterns = StringArray::new(vec!["a".into()], vec![1, 1]).unwrap();
552        let result = erase_builtin(Value::Cell(cell), Value::StringArray(patterns)).expect("erase");
553        match result {
554            Value::Cell(out) => {
555                assert_eq!(out.rows, 2);
556                assert_eq!(out.cols, 2);
557                assert_eq!(out.get(0, 0).unwrap(), Value::String("lph".into()));
558                assert_eq!(out.get(1, 1).unwrap(), Value::String("delt".into()));
559            }
560            other => panic!("expected cell array, got {other:?}"),
561        }
562    }
563
564    #[test]
565    fn erase_preserves_missing_string() {
566        let result = erase_builtin(
567            Value::String("<missing>".into()),
568            Value::String("missing".into()),
569        )
570        .expect("erase");
571        assert_eq!(result, Value::String("<missing>".into()));
572    }
573
574    #[test]
575    fn erase_allows_empty_pattern_list() {
576        let strings = StringArray::new(vec!["alpha".into(), "beta".into()], vec![2, 1]).unwrap();
577        let pattern = StringArray::new(Vec::<String>::new(), vec![0, 0]).unwrap();
578        let result = erase_builtin(
579            Value::StringArray(strings.clone()),
580            Value::StringArray(pattern),
581        )
582        .expect("erase");
583        assert_eq!(result, Value::StringArray(strings));
584    }
585
586    #[test]
587    fn erase_errors_on_invalid_first_argument() {
588        let err = erase_builtin(Value::Num(1.0), Value::String("a".into())).unwrap_err();
589        assert_eq!(err, ARG_TYPE_ERROR);
590    }
591
592    #[test]
593    fn erase_errors_on_invalid_pattern_type() {
594        let err = erase_builtin(Value::String("abc".into()), Value::Num(1.0)).unwrap_err();
595        assert_eq!(err, PATTERN_TYPE_ERROR);
596    }
597
598    #[test]
599    #[cfg(feature = "doc_export")]
600    fn doc_examples_present() {
601        let blocks = test_support::doc_examples(DOC_MD);
602        assert!(!blocks.is_empty());
603    }
604}