1use runmat_builtins::{CellArray, CharArray, StringArray, Value};
3use runmat_macros::runtime_builtin;
4
5use crate::builtins::common::spec::{
6 BroadcastSemantics, BuiltinFusionSpec, BuiltinGpuSpec, ConstantStrategy, GpuOpKind,
7 ReductionNaN, ResidencyPolicy, ShapeRequirements,
8};
9use crate::builtins::strings::common::{char_row_to_string_slice, is_missing_string};
10#[cfg(feature = "doc_export")]
11use crate::register_builtin_doc_text;
12use crate::{
13 gather_if_needed, make_cell_with_shape, register_builtin_fusion_spec, register_builtin_gpu_spec,
14};
15
16#[cfg(feature = "doc_export")]
17pub const DOC_MD: &str = r#"---
18title: "erase"
19category: "strings/transform"
20keywords: ["erase", "remove substring", "delete text", "string manipulation", "character array"]
21summary: "Remove substring occurrences from strings, character arrays, and cell arrays with MATLAB-compatible semantics."
22references:
23 - https://www.mathworks.com/help/matlab/ref/erase.html
24gpu_support:
25 elementwise: false
26 reduction: false
27 precisions: []
28 broadcasting: "none"
29 notes: "Runs on the CPU; RunMat gathers GPU-resident text before removing substrings."
30fusion:
31 elementwise: false
32 reduction: false
33 max_inputs: 2
34 constants: "inline"
35requires_feature: null
36tested:
37 unit: "builtins::strings::transform::erase::tests::erase_string_array_shape_mismatch_applies_all_patterns"
38 integration: "builtins::strings::transform::erase::tests::erase_cell_array_mixed_content"
39---
40
41# What does the `erase` function do in MATLAB / RunMat?
42`erase(text, pattern)` removes every occurrence of `pattern` from `text`. The builtin accepts string
43scalars, string arrays, character arrays, and cell arrays of character vectors or string scalars,
44mirroring MATLAB behaviour. When `pattern` is an array, `erase` removes every occurrence of each
45pattern entry; the `text` and `pattern` arguments do not need to be the same size.
46
47## How does the `erase` function behave in MATLAB / RunMat?
48- String inputs stay as strings. Missing string scalars (`<missing>`) propagate unchanged.
49- String arrays preserve their size and orientation. Each element has every supplied pattern removed.
50- Character arrays are processed row by row. Rows shrink as characters are removed and are padded with
51 spaces so the result remains a rectangular char array.
52- Cell arrays must contain string scalars or character vectors. The result is a cell array with the same
53 shape whose elements reflect the removed substrings.
54- The `pattern` input can be a string scalar, string array, character array, or cell array of character
55 vectors/string scalars. Provide either a scalar pattern or a list; an empty list leaves `text` unchanged.
56- Pattern values are treated literally—no regular expressions are used. Use [`replace`](./replace) or the
57 regex builtins for pattern-based removal.
58
59## `erase` Function GPU Execution Behaviour
60`erase` executes on the CPU. When any argument is GPU-resident, RunMat gathers it to host memory before
61removing substrings. Outputs are returned on the host as well. Providers do not need to implement device
62kernels for this builtin, and the fusion planner treats it as a sink to avoid keeping text on the GPU.
63
64## GPU residency in RunMat (Do I need `gpuArray`?)
65No. `erase` automatically gathers GPU inputs and produces host results. You never need to move text to or
66from the GPU manually for this builtin, and `gpuArray` inputs are handled transparently.
67
68## Examples of using the `erase` function in MATLAB / RunMat
69
70### Remove a single word from a string scalar
71```matlab
72txt = "RunMat accelerates MATLAB code";
73clean = erase(txt, "accelerates ");
74```
75Expected output:
76```matlab
77clean = "RunMat MATLAB code"
78```
79
80### Remove multiple substrings from each element of a string array
81```matlab
82labels = ["GPU pipeline"; "CPU pipeline"];
83result = erase(labels, ["GPU ", "CPU "]);
84```
85Expected output:
86```matlab
87result = 2×1 string
88 "pipeline"
89 "pipeline"
90```
91
92### Erase characters from a character array while preserving padding
93```matlab
94chars = char("workspace", "snapshots");
95trimmed = erase(chars, "s");
96```
97Expected output:
98```matlab
99trimmed =
100
101 2×8 char array
102
103 'workpace'
104 'napshot '
105```
106
107### Remove substrings from a cell array of text
108```matlab
109C = {'Kernel Planner', "GPU Fusion"};
110out = erase(C, ["Kernel ", "GPU "]);
111```
112Expected output:
113```matlab
114out = 1×2 cell array
115 {'Planner'} {"Fusion"}
116```
117
118### Provide an empty pattern list to leave the text unchanged
119```matlab
120data = ["alpha", "beta"];
121unchanged = erase(data, string.empty);
122```
123Expected output:
124```matlab
125unchanged = 1×2 string
126 "alpha" "beta"
127```
128
129### Remove delimiters before splitting text
130```matlab
131path = "runmat/bin:runmat/lib";
132clean = erase(path, ":");
133parts = split(clean, "runmat/");
134```
135Expected output:
136```matlab
137clean = "runmat/binrunmat/lib"
138parts = 1×3 string
139 "" "bin" "lib"
140```
141
142## FAQ
143
144### Can I remove multiple patterns at once?
145Yes. Supply `pattern` as a string array or cell array. Each pattern is removed in order from every element
146of the input text.
147
148### What happens if `pattern` is empty?
149An empty pattern list leaves the input unchanged. Empty string patterns are ignored because removing empty
150text would have no effect.
151
152### Does `erase` modify the original data?
153No. It returns a new value with substrings removed. The input variables remain unchanged.
154
155### How are missing string scalars handled?
156They propagate unchanged. Calling `erase` on `<missing>` returns `<missing>`, matching MATLAB.
157
158### Can `erase` operate on GPU-resident data?
159Indirectly. RunMat automatically gathers GPU values to the host, performs the removal, and returns a host
160result. No explicit `gpuArray` calls are required.
161
162### How do I remove substrings using patterns or regular expressions?
163Use `replace` for literal substitution or `regexprep` for regular expressions when you need pattern-based
164removal rather than literal substring erasure.
165
166## See Also
167[replace](./replace), [strrep](./strrep), [split](./split), [regexprep](../regex/regexprep), [string](../core/string)
168
169## Source & Feedback
170- Implementation: [`crates/runmat-runtime/src/builtins/strings/transform/erase.rs`](https://github.com/runmat-org/runmat/blob/main/crates/runmat-runtime/src/builtins/strings/transform/erase.rs)
171- Found an issue? Please [open a GitHub issue](https://github.com/runmat-org/runmat/issues/new/choose) with a minimal reproduction.
172"#;
173
174pub const GPU_SPEC: BuiltinGpuSpec = BuiltinGpuSpec {
175 name: "erase",
176 op_kind: GpuOpKind::Custom("string-transform"),
177 supported_precisions: &[],
178 broadcast: BroadcastSemantics::None,
179 provider_hooks: &[],
180 constant_strategy: ConstantStrategy::InlineLiteral,
181 residency: ResidencyPolicy::GatherImmediately,
182 nan_mode: ReductionNaN::Include,
183 two_pass_threshold: None,
184 workgroup_size: None,
185 accepts_nan_mode: false,
186 notes:
187 "Executes on the CPU; GPU-resident inputs are gathered to host memory before substrings are removed.",
188};
189
190register_builtin_gpu_spec!(GPU_SPEC);
191
192pub const FUSION_SPEC: BuiltinFusionSpec = BuiltinFusionSpec {
193 name: "erase",
194 shape: ShapeRequirements::Any,
195 constant_strategy: ConstantStrategy::InlineLiteral,
196 elementwise: None,
197 reduction: None,
198 emits_nan: false,
199 notes:
200 "String manipulation builtin; not eligible for fusion plans and always gathers GPU inputs before execution.",
201};
202
203register_builtin_fusion_spec!(FUSION_SPEC);
204
205#[cfg(feature = "doc_export")]
206register_builtin_doc_text!("erase", DOC_MD);
207
208const ARG_TYPE_ERROR: &str =
209 "erase: first argument must be a string array, character array, or cell array of character vectors";
210const PATTERN_TYPE_ERROR: &str =
211 "erase: second argument must be a string array, character array, or cell array of character vectors";
212const CELL_ELEMENT_ERROR: &str =
213 "erase: cell array elements must be string scalars or character vectors";
214
215#[runtime_builtin(
216 name = "erase",
217 category = "strings/transform",
218 summary = "Remove substring occurrences from strings, character arrays, and cell arrays.",
219 keywords = "erase,remove substring,strings,character array,text",
220 accel = "sink"
221)]
222fn erase_builtin(text: Value, pattern: Value) -> Result<Value, String> {
223 let text = gather_if_needed(&text).map_err(|e| format!("erase: {e}"))?;
224 let pattern = gather_if_needed(&pattern).map_err(|e| format!("erase: {e}"))?;
225
226 let patterns = PatternList::from_value(&pattern)?;
227
228 match text {
229 Value::String(s) => Ok(Value::String(erase_string_scalar(s, &patterns))),
230 Value::StringArray(sa) => erase_string_array(sa, &patterns),
231 Value::CharArray(ca) => erase_char_array(ca, &patterns),
232 Value::Cell(cell) => erase_cell_array(cell, &patterns),
233 _ => Err(ARG_TYPE_ERROR.to_string()),
234 }
235}
236
237struct PatternList {
238 entries: Vec<String>,
239}
240
241impl PatternList {
242 fn from_value(value: &Value) -> Result<Self, String> {
243 let entries = match value {
244 Value::String(text) => vec![text.clone()],
245 Value::StringArray(array) => array.data.clone(),
246 Value::CharArray(array) => {
247 if array.rows == 0 {
248 Vec::new()
249 } else {
250 let mut list = Vec::with_capacity(array.rows);
251 for row in 0..array.rows {
252 list.push(char_row_to_string_slice(&array.data, array.cols, row));
253 }
254 list
255 }
256 }
257 Value::Cell(cell) => {
258 let mut list = Vec::with_capacity(cell.data.len());
259 for handle in &cell.data {
260 match &**handle {
261 Value::String(text) => list.push(text.clone()),
262 Value::StringArray(sa) if sa.data.len() == 1 => {
263 list.push(sa.data[0].clone());
264 }
265 Value::CharArray(ca) if ca.rows == 0 => list.push(String::new()),
266 Value::CharArray(ca) if ca.rows == 1 => {
267 list.push(char_row_to_string_slice(&ca.data, ca.cols, 0));
268 }
269 Value::CharArray(_) => return Err(CELL_ELEMENT_ERROR.to_string()),
270 _ => return Err(CELL_ELEMENT_ERROR.to_string()),
271 }
272 }
273 list
274 }
275 _ => return Err(PATTERN_TYPE_ERROR.to_string()),
276 };
277 Ok(Self { entries })
278 }
279
280 fn apply(&self, input: &str) -> String {
281 if self.entries.is_empty() {
282 return input.to_string();
283 }
284 let mut current = input.to_string();
285 for pattern in &self.entries {
286 if pattern.is_empty() {
287 continue;
288 }
289 if current.is_empty() {
290 break;
291 }
292 current = current.replace(pattern, "");
293 }
294 current
295 }
296}
297
298fn erase_string_scalar(text: String, patterns: &PatternList) -> String {
299 if is_missing_string(&text) {
300 text
301 } else {
302 patterns.apply(&text)
303 }
304}
305
306fn erase_string_array(array: StringArray, patterns: &PatternList) -> Result<Value, String> {
307 let StringArray { data, shape, .. } = array;
308 let mut erased = Vec::with_capacity(data.len());
309 for entry in data {
310 if is_missing_string(&entry) {
311 erased.push(entry);
312 } else {
313 erased.push(patterns.apply(&entry));
314 }
315 }
316 StringArray::new(erased, shape)
317 .map(Value::StringArray)
318 .map_err(|e| format!("erase: {e}"))
319}
320
321fn erase_char_array(array: CharArray, patterns: &PatternList) -> Result<Value, String> {
322 let CharArray { data, rows, cols } = array;
323 if rows == 0 {
324 return Ok(Value::CharArray(CharArray { data, rows, cols }));
325 }
326
327 let mut processed: Vec<String> = Vec::with_capacity(rows);
328 let mut target_cols = 0usize;
329 for row in 0..rows {
330 let slice = char_row_to_string_slice(&data, cols, row);
331 let erased = patterns.apply(&slice);
332 let len = erased.chars().count();
333 if len > target_cols {
334 target_cols = len;
335 }
336 processed.push(erased);
337 }
338
339 let mut flattened: Vec<char> = Vec::with_capacity(rows * target_cols);
340 for row_text in processed {
341 let mut chars: Vec<char> = row_text.chars().collect();
342 if chars.len() < target_cols {
343 chars.resize(target_cols, ' ');
344 }
345 flattened.extend(chars);
346 }
347
348 CharArray::new(flattened, rows, target_cols)
349 .map(Value::CharArray)
350 .map_err(|e| format!("erase: {e}"))
351}
352
353fn erase_cell_array(cell: CellArray, patterns: &PatternList) -> Result<Value, String> {
354 let shape = cell.shape.clone();
355 let mut values = Vec::with_capacity(cell.data.len());
356 for handle in &cell.data {
357 values.push(erase_cell_element(handle, patterns)?);
358 }
359 make_cell_with_shape(values, shape).map_err(|e| format!("erase: {e}"))
360}
361
362fn erase_cell_element(value: &Value, patterns: &PatternList) -> Result<Value, String> {
363 match value {
364 Value::String(text) => Ok(Value::String(erase_string_scalar(text.clone(), patterns))),
365 Value::StringArray(sa) if sa.data.len() == 1 => Ok(Value::String(erase_string_scalar(
366 sa.data[0].clone(),
367 patterns,
368 ))),
369 Value::CharArray(ca) if ca.rows == 0 => Ok(Value::CharArray(ca.clone())),
370 Value::CharArray(ca) if ca.rows == 1 => {
371 let slice = char_row_to_string_slice(&ca.data, ca.cols, 0);
372 let erased = patterns.apply(&slice);
373 Ok(Value::CharArray(CharArray::new_row(&erased)))
374 }
375 Value::CharArray(_) => Err(CELL_ELEMENT_ERROR.to_string()),
376 _ => Err(CELL_ELEMENT_ERROR.to_string()),
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383 #[cfg(feature = "doc_export")]
384 use crate::builtins::common::test_support;
385
386 #[test]
387 fn erase_string_scalar_single_pattern() {
388 let result = erase_builtin(
389 Value::String("RunMat runtime".into()),
390 Value::String(" runtime".into()),
391 )
392 .expect("erase");
393 assert_eq!(result, Value::String("RunMat".into()));
394 }
395
396 #[test]
397 fn erase_string_array_multiple_patterns() {
398 let strings = StringArray::new(
399 vec!["gpu".into(), "cpu".into(), "<missing>".into()],
400 vec![3, 1],
401 )
402 .unwrap();
403 let result = erase_builtin(
404 Value::StringArray(strings),
405 Value::StringArray(StringArray::new(vec!["g".into(), "c".into()], vec![2, 1]).unwrap()),
406 )
407 .expect("erase");
408 match result {
409 Value::StringArray(sa) => {
410 assert_eq!(sa.shape, vec![3, 1]);
411 assert_eq!(
412 sa.data,
413 vec![
414 String::from("pu"),
415 String::from("pu"),
416 String::from("<missing>")
417 ]
418 );
419 }
420 other => panic!("expected string array, got {other:?}"),
421 }
422 }
423
424 #[test]
425 fn erase_string_array_shape_mismatch_applies_all_patterns() {
426 let strings =
427 StringArray::new(vec!["GPU kernel".into(), "CPU kernel".into()], vec![2, 1]).unwrap();
428 let patterns = StringArray::new(vec!["GPU ".into(), "CPU ".into()], vec![1, 2]).unwrap();
429 let result = erase_builtin(Value::StringArray(strings), Value::StringArray(patterns))
430 .expect("erase");
431 match result {
432 Value::StringArray(sa) => {
433 assert_eq!(sa.shape, vec![2, 1]);
434 assert_eq!(
435 sa.data,
436 vec![String::from("kernel"), String::from("kernel")]
437 );
438 }
439 other => panic!("expected string array, got {other:?}"),
440 }
441 }
442
443 #[test]
444 fn erase_char_array_adjusts_width() {
445 let chars = CharArray::new("matrix".chars().collect(), 1, 6).unwrap();
446 let result =
447 erase_builtin(Value::CharArray(chars), Value::String("tr".into())).expect("erase");
448 match result {
449 Value::CharArray(out) => {
450 assert_eq!(out.rows, 1);
451 assert_eq!(out.cols, 4);
452 let expected: Vec<char> = "maix".chars().collect();
453 assert_eq!(out.data, expected);
454 }
455 other => panic!("expected char array, got {other:?}"),
456 }
457 }
458
459 #[test]
460 fn erase_char_array_handles_full_removal() {
461 let chars = CharArray::new_row("abc");
462 let result = erase_builtin(Value::CharArray(chars.clone()), Value::String("abc".into()))
463 .expect("erase");
464 match result {
465 Value::CharArray(out) => {
466 assert_eq!(out.rows, 1);
467 assert_eq!(out.cols, 0);
468 assert!(out.data.is_empty());
469 }
470 other => panic!("expected empty char array, got {other:?}"),
471 }
472 }
473
474 #[test]
475 fn erase_char_array_multiple_rows_sequential_patterns() {
476 let chars = CharArray::new(
477 vec![
478 'G', 'P', 'U', ' ', 'p', 'i', 'p', 'e', 'l', 'i', 'n', 'e', 'C', 'P', 'U', ' ',
479 'p', 'i', 'p', 'e', 'l', 'i', 'n', 'e',
480 ],
481 2,
482 12,
483 )
484 .unwrap();
485 let patterns = CharArray::new_row("GPU ");
486 let result =
487 erase_builtin(Value::CharArray(chars), Value::CharArray(patterns)).expect("erase");
488 match result {
489 Value::CharArray(out) => {
490 assert_eq!(out.rows, 2);
491 assert_eq!(out.cols, 12);
492 let first = char_row_to_string_slice(&out.data, out.cols, 0);
493 let second = char_row_to_string_slice(&out.data, out.cols, 1);
494 assert_eq!(first.trim_end(), "pipeline");
495 assert_eq!(second.trim_end(), "CPU pipeline");
496 }
497 other => panic!("expected char array, got {other:?}"),
498 }
499 }
500
501 #[test]
502 fn erase_cell_array_mixed_content() {
503 let cell = CellArray::new(
504 vec![
505 Value::CharArray(CharArray::new_row("Kernel Planner")),
506 Value::String("GPU Fusion".into()),
507 ],
508 1,
509 2,
510 )
511 .unwrap();
512 let result = erase_builtin(
513 Value::Cell(cell),
514 Value::Cell(
515 CellArray::new(
516 vec![
517 Value::String("Kernel ".into()),
518 Value::String("GPU ".into()),
519 ],
520 1,
521 2,
522 )
523 .unwrap(),
524 ),
525 )
526 .expect("erase");
527 match result {
528 Value::Cell(out) => {
529 let first = out.get(0, 0).unwrap();
530 let second = out.get(0, 1).unwrap();
531 assert_eq!(first, Value::CharArray(CharArray::new_row("Planner")));
532 assert_eq!(second, Value::String("Fusion".into()));
533 }
534 other => panic!("expected cell array, got {other:?}"),
535 }
536 }
537
538 #[test]
539 fn erase_cell_array_preserves_shape() {
540 let cell = CellArray::new(
541 vec![
542 Value::String("alpha".into()),
543 Value::String("beta".into()),
544 Value::String("gamma".into()),
545 Value::String("delta".into()),
546 ],
547 2,
548 2,
549 )
550 .unwrap();
551 let patterns = StringArray::new(vec!["a".into()], vec![1, 1]).unwrap();
552 let result = erase_builtin(Value::Cell(cell), Value::StringArray(patterns)).expect("erase");
553 match result {
554 Value::Cell(out) => {
555 assert_eq!(out.rows, 2);
556 assert_eq!(out.cols, 2);
557 assert_eq!(out.get(0, 0).unwrap(), Value::String("lph".into()));
558 assert_eq!(out.get(1, 1).unwrap(), Value::String("delt".into()));
559 }
560 other => panic!("expected cell array, got {other:?}"),
561 }
562 }
563
564 #[test]
565 fn erase_preserves_missing_string() {
566 let result = erase_builtin(
567 Value::String("<missing>".into()),
568 Value::String("missing".into()),
569 )
570 .expect("erase");
571 assert_eq!(result, Value::String("<missing>".into()));
572 }
573
574 #[test]
575 fn erase_allows_empty_pattern_list() {
576 let strings = StringArray::new(vec!["alpha".into(), "beta".into()], vec![2, 1]).unwrap();
577 let pattern = StringArray::new(Vec::<String>::new(), vec![0, 0]).unwrap();
578 let result = erase_builtin(
579 Value::StringArray(strings.clone()),
580 Value::StringArray(pattern),
581 )
582 .expect("erase");
583 assert_eq!(result, Value::StringArray(strings));
584 }
585
586 #[test]
587 fn erase_errors_on_invalid_first_argument() {
588 let err = erase_builtin(Value::Num(1.0), Value::String("a".into())).unwrap_err();
589 assert_eq!(err, ARG_TYPE_ERROR);
590 }
591
592 #[test]
593 fn erase_errors_on_invalid_pattern_type() {
594 let err = erase_builtin(Value::String("abc".into()), Value::Num(1.0)).unwrap_err();
595 assert_eq!(err, PATTERN_TYPE_ERROR);
596 }
597
598 #[test]
599 #[cfg(feature = "doc_export")]
600 fn doc_examples_present() {
601 let blocks = test_support::doc_examples(DOC_MD);
602 assert!(!blocks.is_empty());
603 }
604}